In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import time

df = pd.read_excel("satisverileri.xlsx")
df.dropna(inplace=True)

X = df.drop(["Sales"], axis = 1)
y = df["Sales"]

start_time = time.time()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
df_model = LinearRegression().fit(X_train, y_train)
y_pred = df_model.predict(X_test)
r2Score = r2_score(y_test, y_pred)
meanAbsScore = mean_absolute_error(y_test, y_pred)
meanSqScore = mean_squared_error(y_test, y_pred)
MAPE = mean_absolute_percentage_error(y_test, y_pred)

end_time = time.time()

print("R2:" + str(r2Score))
print("Mean Absolute Error:" + str(meanAbsScore))
print("Mean Squared Error:" + str(meanSqScore))
print("MAPE:" + str(MAPE))
elapsed_time = end_time - start_time
print("Elapsed Time: ", elapsed_time, " seconds")

R2:0.5485498856095008
Mean Absolute Error:586.3402616239069
Mean Squared Error:549120.3436482291
MAPE:0.43338134232230974
Elapsed Time:  0.004494905471801758  seconds


In [34]:
# K-fold

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import time

df = pd.read_excel("satisverileri.xlsx")
df.dropna(inplace=True)

X = df.drop(["Sales"], axis = 1)
y = df["Sales"]

start_time = time.time()

n_folds = 10
kf = KFold(n_splits=n_folds, shuffle=True)

r2_scores = []
mae_scores = []
mse_scores = []
mape_scores = []
elapsed_times = []

for train_idx, test_idx in kf.split(X):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    
    model = LinearRegression().fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2Score = r2_score(y_test, y_pred)
    meanAbsScore = mean_absolute_error(y_test, y_pred)
    meanSqScore = mean_squared_error(y_test, y_pred)
    MAPE = mean_absolute_percentage_error(y_test, y_pred)
    
    r2_scores.append(r2Score)
    mae_scores.append(meanAbsScore)
    mse_scores.append(meanSqScore)
    mape_scores.append(MAPE)
    

mean_r2 = np.mean(r2_scores)
mean_mae = np.mean(mae_scores)
mean_mse = np.mean(mse_scores)
mean_mape = np.mean(mape_scores)

end_time = time.time()

print("R2 Scores:", mean_r2)
print("Mean Absolute Error Scores:", mean_mae)
print("Mean Squared Error Scores:", mean_mse)
print("MAPE Scores:", mean_mape)

elapsed_time = end_time - start_time
print("Elapsed Time:", elapsed_time, "seconds")

R2 Scores: 0.5621681112027784
Mean Absolute Error Scores: 613.5754913169255
Mean Squared Error Scores: 603428.0559426707
MAPE Scores: 0.4820968455411977
Elapsed Time: 0.03424239158630371 seconds


In [1]:
# Feature Selection

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import time

df = pd.read_excel("satisverileri.xlsx")
df.dropna(inplace=True)

df = df.drop('Month', axis=1)
df = df.drop('Gold', axis=1)
df = df.drop('School', axis=1)
df = df.drop('Work', axis=1)
df = df.drop('CPI', axis=1)
df = df.drop('Unemployment', axis=1)
df = df.drop('Weather', axis=1)

X = df.drop(["Sales"], axis = 1)
y = df["Sales"]

start_time = time.time()

n_folds = 10
kf = KFold(n_splits=n_folds, shuffle=True)

r2_scores = []
mae_scores = []
mse_scores = []
mape_scores = []
elapsed_times = []

for train_idx, test_idx in kf.split(X):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    
    model = LinearRegression().fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2Score = r2_score(y_test, y_pred)
    meanAbsScore = mean_absolute_error(y_test, y_pred)
    meanSqScore = mean_squared_error(y_test, y_pred)
    MAPE = mean_absolute_percentage_error(y_test, y_pred)
    
    r2_scores.append(r2Score)
    mae_scores.append(meanAbsScore)
    mse_scores.append(meanSqScore)
    mape_scores.append(MAPE)
    

mean_r2 = np.mean(r2_scores)
mean_mae = np.mean(mae_scores)
mean_mse = np.mean(mse_scores)
mean_mape = np.mean(mape_scores)

end_time = time.time()

print("R2 Scores:", mean_r2)
print("Mean Absolute Error Scores:", mean_mae)
print("Mean Squared Error Scores:", mean_mse)
print("MAPE Scores:", mean_mape)

elapsed_time = end_time - start_time
print("Elapsed Time:", elapsed_time, "seconds")

R2 Scores: 0.5308900290006844
Mean Absolute Error Scores: 643.8202212219751
Mean Squared Error Scores: 680121.6732039818
MAPE Scores: 0.5000563700043112
Elapsed Time: 0.09073066711425781 seconds
