First, we need to import all necessary components.

In [53]:
import pickle
from src.metrics import *
from src.globals import *
from src.custom_models import *
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import HistGradientBoostingRegressor, BaggingRegressor


##### Loading the actual test values of the target variable y 
(with the same train-test-split as used in the experiment)

In [54]:
y_test=[]
for target_var in TARGET_VARS:
      with open(f'data/y_test_{target_var}.pkl', 'rb') as file:
            pickled_var = pickle.load(file)
            y_test.append(pickled_var.tolist())

##### Retrieving MAE, RMSE, and R^2 for each model

In [55]:
model = Mean()
for i, target_var in enumerate(TARGET_VARS):
      with open(f'data/y_preds_{target_var}_{model}.pkl', 'rb') as file:
            y_preds = pickle.load(file)

      mae, rmse, r2 = get_mae_rmse(y_test[i], y_preds)
      ae = get_ae_vector(y_test[i], y_preds)
      
      if 'time' in target_var:
            # ae to hourse
            ae = list(map(lambda x: x / 60 / 60, ae))
      with open(f"data/ae_{target_var}_{model}.pkl", "wb") as file:
        pickle.dump(ae, file)

      print('-' * 32)
      print(model)
      print(f'R^2: {round(r2,3)}')
      if "time" in target_var:
            print(f"MAE: {round(mae/ (60*60))} hours  = {round(mae / (60*60*24))} days") # get difference in hours instead of seconds
            print(f"RMSE: {round(rmse/ (60*60))} hours  = {round(rmse / (60*60*24))} days") # get difference in hours instead of seconds
      else:
            print(f"MAE: {round(mae,2)} activities")
            print(f"RMSE: {round(rmse,2)} activities")


--------------------------------
Mean()
R^2: 0.057
MAE: 213 hours  = 9 days
RMSE: 496 hours  = 21 days
--------------------------------
Mean()
R^2: 0.141
MAE: 3.96 activities
RMSE: 7.3 activities


In [56]:
model = Median()
for i, target_var in enumerate(TARGET_VARS):
      with open(f'data/y_preds_{target_var}_{model}.pkl', 'rb') as file:
            y_preds = pickle.load(file)

      mae, rmse, r2 = get_mae_rmse(y_test[i], y_preds)
      ae = get_ae_vector(y_test[i], y_preds)
      
      if 'time' in target_var:
            # ae to hourse
            ae = list(map(lambda x: x / 60 / 60, ae))
      with open(f"data/ae_{target_var}_{model}.pkl", "wb") as file:
        pickle.dump(ae, file)

      print('-' * 32)
      print(model)
      print(f'R^2: {round(r2,3)}')
      if "time" in target_var:
            print(f"MAE: {round(mae/ (60*60))} hours  = {round(mae / (60*60*24))} days") # get difference in hours instead of seconds
            print(f"RMSE: {round(rmse/ (60*60))} hours  = {round(rmse / (60*60*24))} days") # get difference in hours instead of seconds
      else:
            print(f"MAE: {round(mae,2)} activities")
            print(f"RMSE: {round(rmse,2)} activities")


--------------------------------
Median()
R^2: -0.052
MAE: 167 hours  = 7 days
RMSE: 524 hours  = 22 days
--------------------------------
Median()
R^2: 0.039
MAE: 3.46 activities
RMSE: 7.72 activities


In [57]:
model = Mode()
for i, target_var in enumerate(TARGET_VARS):
      with open(f'data/y_preds_{target_var}_{model}.pkl', 'rb') as file:
            y_preds = pickle.load(file)

      mae, rmse, r2 = get_mae_rmse(y_test[i], y_preds)
      ae = get_ae_vector(y_test[i], y_preds)
      
      if 'time' in target_var:
            # ae to hourse
            ae = list(map(lambda x: x / 60 / 60, ae))
      
      with open(f"data/ae_{target_var}_{model}.pkl", "wb") as file:
        pickle.dump(ae, file)

      print('-' * 32)
      print(model)
      print(f'R^2: {round(r2,3)}')
      if "time" in target_var:
            print(f"MAE: {round(mae/ (60*60))} hours  = {round(mae / (60*60*24))} days") # get difference in hours instead of seconds
            print(f"RMSE: {round(rmse/ (60*60))} hours  = {round(rmse / (60*60*24))} days") # get difference in hours instead of seconds
      else:
            print(f"MAE: {round(mae,2)} activities")
            print(f"RMSE: {round(rmse,2)} activities")
            

--------------------------------
Mode()
R^2: 1.0
MAE: 174 hours  = 7 days
RMSE: 1 hours  = 0 days
--------------------------------
Mode()
R^2: -0.12
MAE: 3.7 activities
RMSE: 8.33 activities


In [58]:
model = HistGradientBoostingRegressor(random_state=RANDOM_SEED)
for i, target_var in enumerate(TARGET_VARS):
      with open(f'data/y_preds_{target_var}_{model}.pkl', 'rb') as file:
            y_preds = pickle.load(file)

      mae, rmse, r2 = get_mae_rmse(y_test[i], y_preds)
      ae = get_ae_vector(y_test[i], y_preds)
      
      if 'time' in target_var:
            # ae to hourse
            ae = list(map(lambda x: x / 60 / 60, ae))
      
      with open(f"data/ae_{target_var}_{model}.pkl", "wb") as file:
        pickle.dump(ae, file)

      print('-' * 32)
      print(model)
      print(f'R^2: {round(r2,3)}')
      if "time" in target_var:
            print(f"MAE: {round(mae/ (60*60))} hours  = {round(mae / (60*60*24))} days") # get difference in hours instead of seconds
            print(f"RMSE: {round(rmse/ (60*60))} hours  = {round(rmse / (60*60*24))} days") # get difference in hours instead of seconds
      else:
            print(f"MAE: {round(mae,2)} activities")
            print(f"RMSE: {round(rmse,2)} activities")


--------------------------------
HistGradientBoostingRegressor(random_state=42)
R^2: 0.852
MAE: 54 hours  = 2 days
RMSE: 197 hours  = 8 days
--------------------------------
HistGradientBoostingRegressor(random_state=42)
R^2: 0.408
MAE: 3.14 activities
RMSE: 6.06 activities


In [59]:
model = LinearRegression()
for i, target_var in enumerate(TARGET_VARS):
      with open(f'data/y_preds_{target_var}_{model}.pkl', 'rb') as file:
            y_preds = pickle.load(file)

      mae, rmse, r2 = get_mae_rmse(y_test[i], y_preds)
      ae = get_ae_vector(y_test[i], y_preds)
      
      if 'time' in target_var:
            # ae to hourse
            ae = list(map(lambda x: x / 60 / 60, ae))
      
      with open(f"data/ae_{target_var}_{model}.pkl", "wb") as file:
        pickle.dump(ae, file)

      print('-' * 32)
      print(model)
      print(f'R^2: {round(r2,3)}')
      if "time" in target_var:
            print(f"MAE: {round(mae/ (60*60))} hours  = {round(mae / (60*60*24))} days") # get difference in hours instead of seconds
            print(f"RMSE: {round(rmse/ (60*60))} hours  = {round(rmse / (60*60*24))} days") # get difference in hours instead of seconds
      else:
            print(f"MAE: {round(mae,2)} activities")
            print(f"RMSE: {round(rmse,2)} activities")
            

--------------------------------
LinearRegression()
R^2: 0.39
MAE: 220 hours  = 9 days
RMSE: 399 hours  = 17 days
--------------------------------
LinearRegression()
R^2: 0.223
MAE: 3.83 activities
RMSE: 6.94 activities


In [60]:
model = SVR()
for i, target_var in enumerate(TARGET_VARS):
      with open(f'data/y_preds_{target_var}_{model}.pkl', 'rb') as file:
            y_preds = pickle.load(file)

      mae, rmse, r2 = get_mae_rmse(y_test[i], y_preds)
      ae = get_ae_vector(y_test[i], y_preds)
      
      if 'time' in target_var:
            # ae to hourse
            ae = list(map(lambda x: x / 60 / 60, ae))
      
      with open(f"data/ae_{target_var}_{model}.pkl", "wb") as file:
        pickle.dump(ae, file)

      print('-' * 32)
      print(model)
      print(f'R^2: {round(r2,3)}')
      if "time" in target_var:
            print(f"MAE: {round(mae/ (60*60))} hours  = {round(mae / (60*60*24))} days") # get difference in hours instead of seconds
            print(f"RMSE: {round(rmse/ (60*60))} hours  = {round(rmse / (60*60*24))} days") # get difference in hours instead of seconds
      else:
            print(f"MAE: {round(mae,2)} activities")
            print(f"RMSE: {round(rmse,2)} activities")

--------------------------------
SVR()
R^2: -0.052
MAE: 167 hours  = 7 days
RMSE: 524 hours  = 22 days
--------------------------------
SVR()
R^2: 0.044
MAE: 3.46 activities
RMSE: 7.7 activities


In [61]:
model = KNeighborsRegressor(n_jobs=-1)
for i, target_var in enumerate(TARGET_VARS):
      with open(f'data/y_preds_{target_var}_{model}.pkl', 'rb') as file:
            y_preds = pickle.load(file)

      mae, rmse, r2 = get_mae_rmse(y_test[i], y_preds)
      ae = get_ae_vector(y_test[i], y_preds)
      
      if 'time' in target_var:
            # ae to hourse
            ae = list(map(lambda x: x / 60 / 60, ae))
      
      with open(f"data/ae_{target_var}_{model}.pkl", "wb") as file:
        pickle.dump(ae, file)

      print('-' * 32)
      print(model)
      print(f'R^2: {round(r2,3)}')
      if "time" in target_var:
            print(f"MAE: {round(mae/ (60*60))} hours  = {round(mae / (60*60*24))} days") # get difference in hours instead of seconds
            print(f"RMSE: {round(rmse/ (60*60))} hours  = {round(rmse / (60*60*24))} days") # get difference in hours instead of seconds
      else:
            print(f"MAE: {round(mae,2)} activities")
            print(f"RMSE: {round(rmse,2)} activities")

--------------------------------
KNeighborsRegressor(n_jobs=-1)
R^2: 0.799
MAE: 46 hours  = 2 days
RMSE: 229 hours  = 10 days
--------------------------------
KNeighborsRegressor(n_jobs=-1)
R^2: 0.123
MAE: 3.36 activities
RMSE: 7.37 activities


In [62]:
model = BaggingRegressor(n_jobs=-1)
for i, target_var in enumerate(TARGET_VARS):
      with open(f'data/y_preds_{target_var}_{model}.pkl', 'rb') as file:
            y_preds = pickle.load(file)

      mae, rmse, r2 = get_mae_rmse(y_test[i], y_preds)
      ae = get_ae_vector(y_test[i], y_preds)
      
      if 'time' in target_var:
            # ae to hours
            ae = list(map(lambda x: x / 60 / 60, ae))
      
      with open(f"data/ae_{target_var}_{model}.pkl", "wb") as file:
        pickle.dump(ae, file)

      print('-' * 32)
      print(model)
      print(f'R^2: {round(r2,3)}')
      if "time" in target_var:
            print(f"MAE: {round(mae/ (60*60))} hours  = {round(mae / (60*60*24))} days") # get difference in hours instead of seconds
            print(f"RMSE: {round(rmse/ (60*60))} hours  = {round(rmse / (60*60*24))} days") # get difference in hours instead of seconds
      else:
            print(f"MAE: {round(mae,2)} activities")
            print(f"RMSE: {round(rmse,2)} activities")

--------------------------------
BaggingRegressor(n_jobs=-1)
R^2: 0.86
MAE: 38 hours  = 2 days
RMSE: 191 hours  = 8 days
--------------------------------
BaggingRegressor(n_jobs=-1)
R^2: 0.357
MAE: 3.02 activities
RMSE: 6.31 activities
