First, we need to import all necessary components.

In [12]:
import pickle
import pandas as pd
from src.custom_models import Mean, Minimum, Maximum, SampleMean, Median, Mode
from src.metrics import get_mae_rmse
from src.globals import (
    PREPROCESSING_IN_FILE,
    INPUTDATA_OBJECT,
    ATS_OUT_FILE,
    RANDOM_SEED,
    TARGET_COLUMN,
    DATE_COLS,
)


##### Loading the actual test values of the target variable y 
(with the same train-test-split as used in the experiment)

In [13]:

with open(f'data/y_test.pkl', 'rb') as file:
      y_test = pickle.load(file)

y_test = y_test.tolist()

##### Retrieving MAE and MSE for each model

In [14]:
with open(f'data/y_preds_Mean().pkl', 'rb') as file:
      y_preds_average = pickle.load(file)

mae_average, rmse_average, r2 = get_mae_rmse(y_test, y_preds_average)

print(f'Mean()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_average/ (60*60))} hours  = {round(mae_average / (60*60*24))} days") # get difference in hours instead of seconds
print(f"RMSE: {round(rmse_average/ (60*60))} hours  = {round(rmse_average / (60*60*24))} days") # get difference in hours instead of seconds

Mean()
R^2: 0.057
MAE: 213 hours  = 9 days
RMSE: 496 hours  = 21 days


In [15]:
with open(f'data/y_preds_Median().pkl', 'rb') as file:
      y_preds_median = pickle.load(file)

mae_median, rmse_median, r2 = get_mae_rmse(y_test,y_preds_median)

print(f'Median()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_median/ (60*60))} hours  = {round(mae_median / (60*60*24))} days") # get difference in hours instead of seconds
print(f"RMSE: {round(rmse_median/ (60*60))} hours  = {round(rmse_median / (60*60*24))} days") # get difference in hours instead of seconds

Median()
R^2: -0.052
MAE: 167 hours  = 7 days
RMSE: 524 hours  = 22 days


In [16]:
with open(f'data/y_preds_Mode().pkl', 'rb') as file:
      y_preds_mode = pickle.load(file)

mae_mode, rmse_mode, r2 = get_mae_rmse(y_test,y_preds_mode)

print('Mode()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_mode/ (60*60))} hours  = {round(mae_mode / (60*60*24))} days") # get difference in hours instead of seconds
print(f"RMSE: {round(rmse_mode/ (60*60))} hours  = {round(rmse_mode / (60*60*24))} days") # get difference in hours instead of seconds


Mode()
R^2: 1.0
MAE: 174 hours  = 7 days
RMSE: 1 hours  = 0 days


In [17]:
with open(f'data/y_preds_HistGradientBoostingRegressor().pkl', 'rb') as file:
      y_preds_hgb = pickle.load(file)

mae_hgb, rmse_hgb, r2 = get_mae_rmse(y_test,y_preds_hgb)

print('HistGradientBoostingRegressor()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_hgb/ (60*60))} hours  = {round(mae_hgb / (60*60*24))} days") # get difference in hours instead of seconds
print(f"RMSE: {round(rmse_hgb/ (60*60))} hours  = {round(rmse_hgb / (60*60*24))} days") # get difference in hours instead of seconds


HistGradientBoostingRegressor()
R^2: 0.839
MAE: 58 hours  = 2 days
RMSE: 205 hours  = 9 days


In [18]:
with open(f'data/y_preds_NaN_HistGradientBoostingRegressor().pkl', 'rb') as file:
      y_preds_hgb = pickle.load(file)

mae_hgb, rmse_hgb, r2 = get_mae_rmse(y_test,y_preds_hgb)

print('HistGradientBoostingRegressor() with NaNs')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_hgb/ (60*60))} hours  = {round(mae_hgb / (60*60*24))} days") # get difference in hours instead of seconds
print(f"RMSE: {round(rmse_hgb/ (60*60))} hours  = {round(rmse_hgb / (60*60*24))} days") # get difference in hours instead of seconds


FileNotFoundError: [Errno 2] No such file or directory: 'data/y_preds_NaN_HistGradientBoostingRegressor().pkl'

In [20]:
with open(f'data/y_preds_LinearRegression().pkl', 'rb') as file:
      y_preds_lr = pickle.load(file)

mae_lr, rmse_lr, r2 = get_mae_rmse(y_test,y_preds_lr)

print('LinearRegression()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_lr/ (60*60))} hours  = {round(mae_lr / (60*60*24))} days") # get difference in hours instead of seconds
print(f"RMSE: {round(rmse_lr/ (60*60))} hours  = {round(rmse_lr / (60*60*24))} days") # get difference in hours instead of seconds


LinearRegression()
R^2: 0.39
MAE: 220 hours  = 9 days
RMSE: 399 hours  = 17 days


In [21]:
with open(f'data/y_preds_SVR().pkl', 'rb') as file:
      y_preds_svr = pickle.load(file)

mae_svr, rmse_svr, r2 = get_mae_rmse(y_test,y_preds_svr)

print('SVR()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_hgb/ (60*60))} hours  = {round(mae_hgb / (60*60*24))} days") # get difference in hours instead of seconds
print(f"RMSE: {round(rmse_hgb/ (60*60))} hours  = {round(rmse_hgb / (60*60*24))} days") # get difference in hours instead of seconds


SVR()
R^2: -0.052
MAE: 58 hours  = 2 days
RMSE: 205 hours  = 9 days


In [22]:
with open(f'data/y_preds_KNeighborsRegressor(n_jobs=-1).pkl', 'rb') as file:
      y_preds_knn = pickle.load(file)

mae_knn, rmse_knn, r2 = get_mae_rmse(y_test,y_preds_knn)

print('KNeighborsRegressor')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_knn/ (60*60))} hours  = {round(mae_knn / (60*60*24))} days") # get difference in hours instead of seconds
print(f"RMSE: {round(rmse_knn/ (60*60))} hours  = {round(rmse_knn / (60*60*24))} days") # get difference in hours instead of seconds


KNeighborsRegressor
R^2: 0.799
MAE: 46 hours  = 2 days
RMSE: 229 hours  = 10 days


In [23]:
with open(f'data/y_preds_BaggingRegressor().pkl', 'rb') as file:
      y_preds_bgr = pickle.load(file)

mae_bgr, rmse_bgr, r2 = get_mae_rmse(y_test,y_preds_bgr)

print('BaggingRegressor()')
print(f'R^2: {round(r2,3)}')
print(f"MAE: {round(mae_bgr/ (60*60))} hours  = {round(mae_bgr / (60*60*24))} days") # get difference in hours instead of seconds
print(f"RMSE: {round(rmse_bgr/ (60*60))} hours  = {round(rmse_bgr / (60*60*24))} days") # get difference in hours instead of seconds


BaggingRegressor()
R^2: 0.858
MAE: 38 hours  = 2 days
RMSE: 192 hours  = 8 days
