First, we need to import all necessary components.

In [8]:
import pickle
import pandas as pd
from src.custom_models import Average, Minimum, Maximum, SampleMean, Median, Mode
from src.metrics import get_mae_mse
from src.globals import (
    PREPROCESSING_IN_FILE,
    INPUTDATA_OBJECT,
    ATS_OUT_FILE,
    RANDOM_SEED,
    TARGET_COLUMN,
    DATE_COLS,
)


##### Loading the actual test values of the target variable y 
(with the same train-test-split as used in the experiment)

In [9]:

with open(f'data/y_test.pkl', 'rb') as file:
      y_test = pickle.load(file)

y_test = y_test.tolist()

##### Retrieving MAE and MSE for each model

In [10]:
with open(f'data/y_preds_Average().pkl', 'rb') as file:
      y_preds_average = pickle.load(file)

mae_average, mse_average, r2 = get_mae_mse(y_test, y_preds_average)

print(f'Average()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_average/ (60*60))} hours  = {round(mae_average / (60*60*24))} days") # get difference in hours instead of seconds
print(f"MSE: {round(mse_average/ (60*60))} hours  = {round(mse_average / (60*60*24))} days") # get difference in hours instead of seconds

Average()
R^2: 0.057
MAE: 213 hours  = 9 days
MSE: 885782416 hours  = 36907601 days


In [11]:
with open(f'data/y_preds_Median().pkl', 'rb') as file:
      y_preds_median = pickle.load(file)

mae_median, mse_median, r2 = get_mae_mse(y_test,y_preds_median)

print(f'Median()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_median/ (60*60))} hours  = {round(mae_median / (60*60*24))} days") # get difference in hours instead of seconds
print(f"MSE: {round(mse_median/ (60*60))} hours  = {round(mse_median / (60*60*24))} days") # get difference in hours instead of seconds

Median()
R^2: -0.052
MAE: 167 hours  = 7 days
MSE: 988154601 hours  = 41173108 days


In [18]:
with open(f'data/y_preds_Mode().pkl', 'rb') as file:
      y_preds_mode = pickle.load(file)

mae_mode, mse_mode, r2 = get_mae_mse(y_test,y_preds_mode)

print('Mode()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_mode/ (60*60))} hours  = {round(mae_mode / (60*60*24))} days") # get difference in hours instead of seconds
print(f"MSE: {round(mse_mode/ (60*60))} hours  = {round(mse_mode / (60*60*24))} days") # get difference in hours instead of seconds


Mode()
R^2: 0.9999956456799848
MAE: 174 hours  = 7 days
MSE: 4092 hours  = 170 days


In [13]:
with open(f'data/y_preds_HistGradientBoostingRegressor().pkl', 'rb') as file:
      y_preds_hgb = pickle.load(file)

mae_hgb, mse_hgb, r2 = get_mae_mse(y_test,y_preds_hgb)

print('HistGradientBoostingRegressor()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_hgb/ (60*60))} hours  = {round(mae_hgb / (60*60*24))} days") # get difference in hours instead of seconds
print(f"MSE: {round(mse_hgb/ (60*60))} hours  = {round(mse_hgb / (60*60*24))} days") # get difference in hours instead of seconds


HistGradientBoostingRegressor()
R^2: 0.847
MAE: 57 hours  = 2 days
MSE: 143324058 hours  = 5971836 days


In [14]:
with open(f'data/y_preds_LinearRegression().pkl', 'rb') as file:
      y_preds_lr = pickle.load(file)

mae_lr, mse_lr, r2 = get_mae_mse(y_test,y_preds_lr)

print('LinearRegression()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_lr/ (60*60))} hours  = {round(mae_lr / (60*60*24))} days") # get difference in hours instead of seconds
print(f"MSE: {round(mse_lr/ (60*60))} hours  = {round(mse_lr / (60*60*24))} days") # get difference in hours instead of seconds


LinearRegression()
R^2: 0.39
MAE: 220 hours  = 9 days
MSE: 572758631 hours  = 23864943 days


In [15]:
with open(f'data/y_preds_SVR().pkl', 'rb') as file:
      y_preds_svr = pickle.load(file)

mae_svr, mse_svr, r2 = get_mae_mse(y_test,y_preds_svr)

print('SVR()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_hgb/ (60*60))} hours  = {round(mae_hgb / (60*60*24))} days") # get difference in hours instead of seconds
print(f"MSE: {round(mse_hgb/ (60*60))} hours  = {round(mse_hgb / (60*60*24))} days") # get difference in hours instead of seconds


SVR()
R^2: -0.052
MAE: 57 hours  = 2 days
MSE: 143324058 hours  = 5971836 days


In [17]:
with open(f'data/y_preds_KNeighborsRegressor(n_jobs=-1).pkl', 'rb') as file:
      y_preds_knn = pickle.load(file)

mae_knn, mse_knn, r2 = get_mae_mse(y_test,y_preds_knn)

print('KNeighborsRegressor')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_knn/ (60*60))} hours  = {round(mae_knn / (60*60*24))} days") # get difference in hours instead of seconds
print(f"MSE: {round(mse_knn/ (60*60))} hours  = {round(mse_knn / (60*60*24))} days") # get difference in hours instead of seconds


KNeighborsRegressor
R^2: 0.799
MAE: 46 hours  = 2 days
MSE: 188426404 hours  = 7851100 days
