In [65]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import time

df = pd.read_excel("satisverileri.xlsx")
df.dropna(inplace=True)

X = df.drop(["Sales"], axis=1)
y = df["Sales"]

start_time = time.time()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# specify decision tree as the weak learner
base_model = DecisionTreeRegressor(max_depth=3)
model = AdaBoostRegressor(base_estimator=base_model).fit(X_train, y_train)

y_pred = model.predict(X_test)
r2Score = r2_score(y_test, y_pred)
meanAbsScore = mean_absolute_error(y_test, y_pred)
meanSqScore = mean_squared_error(y_test, y_pred)
MAPE = mean_absolute_percentage_error(y_test, y_pred)

end_time = time.time()

print("R2:" + str(r2Score))
print("Mean Absolute Error:" + str(meanAbsScore))
print("Mean Squared Error:" + str(meanSqScore))
print("MAPE:" + str(MAPE))
elapsed_time = end_time - start_time
print("Elapsed Time: ", elapsed_time, " seconds")

R2:0.6267194871688224
Mean Absolute Error:500.57905847736885
Mean Squared Error:495135.0926077143
MAPE:0.33582137898644565
Elapsed Time:  0.017640352249145508  seconds


In [77]:
# K-fold

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import time

df = pd.read_excel("satisverileri.xlsx")
df.dropna(inplace=True)

X = df.drop(["Sales"], axis = 1)
y = df["Sales"]

start_time = time.time()

n_folds = 10
kf = KFold(n_splits=n_folds, shuffle=True)

r2_scores = []
mae_scores = []
mse_scores = []
mape_scores = []
elapsed_times = []

for train_idx, test_idx in kf.split(X):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    
    base_model = DecisionTreeRegressor(max_depth=3)
    model = AdaBoostRegressor(base_estimator=base_model).fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2Score = r2_score(y_test, y_pred)
    meanAbsScore = mean_absolute_error(y_test, y_pred)
    meanSqScore = mean_squared_error(y_test, y_pred)
    MAPE = mean_absolute_percentage_error(y_test, y_pred)
    
    r2_scores.append(r2Score)
    mae_scores.append(meanAbsScore)
    mse_scores.append(meanSqScore)
    mape_scores.append(MAPE)
    

mean_r2 = np.mean(r2_scores)
mean_mae = np.mean(mae_scores)
mean_mse = np.mean(mse_scores)
mean_mape = np.mean(mape_scores)

end_time = time.time()

print("R2 Scores:", mean_r2)
print("Mean Absolute Error Scores:", mean_mae)
print("Mean Squared Error Scores:", mean_mse)
print("MAPE Scores:", mean_mape)

elapsed_time = end_time - start_time
print("Elapsed Time:", elapsed_time, "seconds")

R2 Scores: 0.6322320073856229
Mean Absolute Error Scores: 522.7479284075488
Mean Squared Error Scores: 496550.2334307979
MAPE Scores: 0.4231814450028484
Elapsed Time: 0.22944068908691406 seconds


In [2]:
# K-fold and Parameter Optimizations

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import time

df = pd.read_excel("satisverileri.xlsx")
df.dropna(inplace=True)

X = df.drop(["Sales"], axis=1)
y = df["Sales"]

n_folds = 10
kf = KFold(n_splits=n_folds, shuffle=True)

learning_rate_values = [0.001, 0.01, 0.1, 0.5]
n_estimators_values = [100, 200, 500, 1000]
loss_values = ['linear', 'square', 'exponential']

max_mean_r2 = -np.inf
best_LearningRate = -1
best_loss = ''
best_Estimator = -1

start_time = time.time()

for learning in learning_rate_values:
    for loss in loss_values:
        for estimator in n_estimators_values:
            r2_scores = []
            mae_scores = []
            mse_scores = []
            mape_scores = []

            for train_idx, test_idx in kf.split(X):
                X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
                y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
                
                base_model = DecisionTreeRegressor(max_depth=3)
                model = AdaBoostRegressor(n_estimators=estimator, learning_rate=learning, loss=loss).fit(X_train, y_train)
                y_pred = model.predict(X_test)
                r2Score = r2_score(y_test, y_pred)
                meanAbsScore = mean_absolute_error(y_test, y_pred)
                meanSqScore = mean_squared_error(y_test, y_pred)
                MAPE = mean_absolute_percentage_error(y_test, y_pred)

                r2_scores.append(r2Score)
                mae_scores.append(meanAbsScore)
                mse_scores.append(meanSqScore)
                mape_scores.append(MAPE)

            mean_r2 = np.mean(r2_scores)

            if mean_r2 > max_mean_r2:
                max_mean_r2 = mean_r2
                best_LearningRate = learning
                best_loss = loss
                best_Estimator = estimator

        
end_time = time.time()
elapsed_time = end_time - start_time

print("Elapsed Time:", elapsed_time, "seconds")
print("Best Learning Rate:", best_LearningRate)
print("Best Loss Function:", best_loss)
print("Best Estimator:", best_Estimator)

print("Max Mean R2 Score:", max_mean_r2)

Elapsed Time: 185.99681997299194 seconds
Best Learning Rate: 0.1
Best Loss Function: linear
Best Estimator: 200
Max Mean R2 Score: 0.7028119350975319
