 # 1-step Forecasting with linear and non-linear models (Nomothetic)

In [1]:
# Import the necessary libraries
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import LinearSVR
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
from sklearn import linear_model as lm

import utils

# Plot settings
plt.rcParams['figure.figsize'] = (16, 8)
plt.rcParams['figure.dpi'] = 150
sns.set()

In [2]:
# Reading alcohol data
train_df, test_df, data_raw_list = utils.load_alcohol()

combined_data = []

# Create one big dataset that contains the information of all participants, and split into dependent/independent features
train_alcohol = pd.concat(train_df, ignore_index=True)
train_alcohol_X = train_alcohol.drop(train_alcohol.columns[range(0, 61)], axis=1).fillna(0)
train_alcohol_y = train_alcohol['craving']


# Function to split the test sets into dependent/independent features, since they are taken per individual
def prepare_data(idx, test_list):
    # print('Patient ID:', test_list[idx]['ID'][0])
    X_test = test_list[idx].drop(test_list[idx].columns[range(0, 61)], axis=1).fillna(0)
    y_test = test_list[idx]['craving']
    return X_test, y_test


In [3]:
# Loading train and test data of covid patients
covid_train_x_list, covid_test_x_list, covid_train_y_list, covid_test_y_list = utils.patients_covid()

# Concatenate the train set for the global models
covid_train_X = pd.concat(covid_train_x_list, ignore_index=True)
covid_train_y = pd.concat(covid_train_y_list, ignore_index=True)

Patient included in study:
[3, 5, 8, 11, 14, 15, 16, 24, 25, 26, 27, 31, 34, 35, 37, 39, 41, 42, 46, 50, 53, 54, 59, 63, 65, 66, 70, 72, 77]


# 1. Nomothetic Models Regression

In [4]:
# Elastic-Net

def elastic_net(train_x, train_y):
    # Standardize variables (mean 0, std 1)
    X_train_loc = utils.standardize(train_x).fillna(0)

    # Train Elastic Net model with 5-fold cross-validation
    l1_ratios = np.arange(0.01, 0.6, 0.05)
    elastic_reg = lm.ElasticNetCV(alphas=np.arange(0.01, 20, 0.05), l1_ratio=l1_ratios, cv=5, max_iter=100000,
                                  fit_intercept=True)
    elastic_reg.fit(X_train_loc, train_y)

    return elastic_reg


# Generate a model for both datasets, that will later be used for testing
print('--- Alcohol Data ---')
test_alcohol_X, test_alcohol_y = prepare_data(1, test_df)
elastic_alcohol = elastic_net(train_alcohol_X, train_alcohol_y)
print('Trained')
print('--- Covid Data ---')
elastic_covid = elastic_net(covid_train_X, covid_train_y)
print('Trained')

--- Alcohol Data ---
Trained
--- Covid Data ---
Trained


In [5]:
# Linear-SVM

def linear_svm(train_x, train_y, params):
    # Standardize
    X_train_loc = utils.standardize(train_x).fillna(0)

    # Train a SVM with a linear kernel, 5-fold cross validation, also optimizing the hyperparameters from the provided 'params' variable
    clf = GridSearchCV(estimator=LinearSVR(), param_grid=params, scoring='neg_mean_squared_error', cv=5)
    clf.fit(X_train_loc, train_y)
    print(clf.best_params_)

    return clf


# Train model for the two datasets
print('--- Alcohol Data ---')
# Hyperparameter grid for 'Alcohol' dataset
param = [
    {'C': np.arange(0.1, 4, 0.1),
     'epsilon': np.arange(6, 7, 0.1),
     'loss': ['epsilon_insensitive'],
     'fit_intercept': [True],
     'max_iter': [10000]}]

svm_alcohol = linear_svm(train_alcohol_X, train_alcohol_y, param)
print('Trained')
print('--- Covid Data ---')
# Hyperparameter grid for COVID-19 dataset
param = [
    {'C': np.arange(0.1, 2, 0.1),
     'epsilon': np.arange(0, 0.5, 0.1),
     'loss': ['epsilon_insensitive'],
     'fit_intercept': [True],
     'max_iter': [10000]}]
svm_covid = linear_svm(covid_train_X, covid_train_y, param)
print('Trained')

--- Alcohol Data ---
Trained
--- Covid Data ---
Trained


In [6]:
# XGBoost

def xgboost_reg(train_x, train_y):
    # Hyperparameter grid
    params = [
        {'objective': ['reg:squarederror'],
         'n_estimators': [15, 20, 25, 30, 45, 60],
         'booster': ['gbtree'],
         'alpha': np.arange(0, 1, 0.1),
         'eval_metric': ['rmse'],
         'max_depth': np.arange(1, 8, 1)}]

    # Train a XGBoost model with 5-fold cross-validation
    reg_xgb = GridSearchCV(xgb.XGBRegressor(), params, cv=5, scoring='neg_mean_squared_error')
    reg_xgb.fit(train_x, train_y)
    print(reg_xgb.best_params_)

    return reg_xgb


# Train model for both datasets
print('--- Alcohol Data ---')
xgb_alcohol = xgboost_reg(train_alcohol_X, train_alcohol_y)
print('Trained')
print('--- Covid Data ---')
xgb_covid = xgboost_reg(covid_train_X, covid_train_y)
print('Trained')

--- Alcohol Data ---
{'alpha': 0.9, 'booster': 'gbtree', 'eval_metric': 'rmse', 'max_depth': 1, 'n_estimators': 20, 'objective': 'reg:squarederror'}
Trained
--- Covid Data ---
{'alpha': 0.0, 'booster': 'gbtree', 'eval_metric': 'rmse', 'max_depth': 1, 'n_estimators': 30, 'objective': 'reg:squarederror'}
Trained


In [7]:
from sklearn.ensemble import RandomForestRegressor


# Random Forests

def random_forests(train_x, train_y):
    # Hyperparameter grid
    grid = [
        {'n_estimators': [50, 70, 85, 100, 120],
         'max_features': ['auto', 'sqrt'],
         'max_depth': [3, 5, 10, 15, 20],
         'min_samples_split': [2, 4, 6],
         'min_samples_leaf': [1],
         'bootstrap': [True]}]

    # Random forests 5-fold cross-validation
    rf = GridSearchCV(RandomForestRegressor(), param_grid=grid, cv=5, scoring='neg_mean_absolute_error')
    rf.fit(train_x, train_y)
    print(rf.best_params_)
    # utils.eval_results(actual=test_y, predicted=y_predicted_test, show=True)

    return rf


# Train model for two datasets
print('--- Alcohol Data ---')
rf_alcohol = random_forests(train_alcohol_X, train_alcohol_y)
print('Trained')
print('--- Covid Data ---')
rf_covid = random_forests(covid_train_X, covid_train_y)
print('Trained')

--- Alcohol Data ---
{'bootstrap': True, 'max_depth': 5, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 4, 'n_estimators': 70}
Trained
--- Covid Data ---
{'bootstrap': True, 'max_depth': 5, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 4, 'n_estimators': 70}
Trained


In [8]:
import keras.layers as layer
from keras.models import Sequential


# LSTM 3-Layer - Recurrent Neural Network

def lstm_rnn(train_x, train_y):
    # Standardize data and then transform to a suitable input
    X_train_loc = utils.standardize(train_x).fillna(0)
    train_x_val, train_y_val = X_train_loc.values, train_y.values

    train_x_val = train_x_val.reshape((train_x_val.shape[0], 1, train_x_val.shape[1]))

    # Neural network architecture
    model = Sequential([
        layer.Input(shape=(train_x_val.shape[1], train_x_val.shape[2])),
        layer.LSTM(40, return_sequences=True),
        layer.Dropout(0.25),
        layer.LSTM(units=25, return_sequences=True),
        layer.Dropout(0.20),
        layer.LSTM(units=10, return_sequences=False),
        layer.Dense(units=1, activation='linear')
    ])

    # Compile model using the mean absolute error loss function and the adam optimizer
    model.compile(loss='mae', optimizer='adam')
    # Train for 40 epochs
    model.fit(train_x_val, train_y_val, epochs=40, batch_size=8, verbose=0, shuffle=False)

    return model


# Train a model for two datasets
print('--- Alcohol Data ---')
lstm_alcohol = lstm_rnn(train_alcohol_X, train_alcohol_y)
print('Trained')
print('--- Covid Data ---')
lstm_covid = lstm_rnn(covid_train_X, covid_train_y)
print('Trained')

--- Alcohol Data ---
Trained
--- Covid Data ---
Trained


In [9]:
# LSTM 1-Layer - Recurrent Neural Network

def one_lstm_rnn(train_x, train_y):
    # Standardize then transform to suitable input
    X_train_loc = utils.standardize(train_x).fillna(0)
    train_x_val, train_y_val = X_train_loc.values, train_y.values

    train_x_val = train_x_val.reshape((train_x_val.shape[0], 1, train_x_val.shape[1]))

    # Neural network architecture
    model = Sequential([
        layer.Input(shape=(train_x_val.shape[1], train_x_val.shape[2])),
        layer.Bidirectional(layer.LSTM(32, return_sequences=True)),
        layer.Dropout(0.25),
        layer.Dense(units=1, activation='linear')
    ])

    # Compile and train model as the 3-Layer one
    model.compile(loss='mae', optimizer='adam')
    model.fit(train_x_val, train_y_val, epochs=40, batch_size=8, verbose=0, shuffle=False)

    return model


print('--- Alcohol Data ---')
lstm1_alcohol = one_lstm_rnn(train_alcohol_X, train_alcohol_y)
print('Trained')
print('--- Covid Data ---')
lstm1_covid = one_lstm_rnn(covid_train_X, covid_train_y)
print('Trained')

--- Alcohol Data ---
Trained
--- Covid Data ---
Trained


In [None]:
# Gated Recurrent Unit - Recurrent Neural Network

def gru_rnn(train_x, train_y):
    # Standardize and transform to suitable input
    X_train_loc = utils.standardize(train_x).fillna(0)
    train_x_val, train_y_val = X_train_loc.values, train_y.values

    train_x_val = train_x_val.reshape((train_x_val.shape[0], 1, train_x_val.shape[1]))

    # Neural network architecture
    model = Sequential([
        layer.Input(shape=(train_x_val.shape[1], train_x_val.shape[2])),
        layer.Bidirectional(layer.GRU(units=24)),
        layer.RepeatVector(train_x_val.shape[1]),
        layer.Bidirectional(layer.GRU(units=24, return_sequences=True)),
        layer.TimeDistributed(layer.Dense(units=1, activation='linear'))
    ])
    # Compile and train model
    model.compile(loss='mae', optimizer='adam')
    model.fit(train_x_val, train_y_val, epochs=75, batch_size=4, verbose=0, shuffle=False)

    return model


# Train a GRU model for two datasets
print('--- Alcohol Data ---')
gru_alcohol = gru_rnn(train_alcohol_X, train_alcohol_y)
print('Trained')
print('--- Covid Data ---')
gru_covid = gru_rnn(covid_train_X, covid_train_y)
print('Trained')

### 2. Evaluating Performance on Entire Dataset (Alcohol)

In [10]:
# Lists to hold the metrics for all patients and models
mse_elastic, mse_svm, mse_one_lstm, mse_xgb, mse_rf, mse_lstm, mse_gru = ([] for _ in range(7))
rmse_elastic, rmse_svm, rmse_one_lstm, rmse_xgb, rmse_rf, rmse_lstm, rmse_gru = ([] for _ in range(7))
mae_elastic, mae_svm, mae_one_lstm, mae_xgb, mae_rf, mae_lstm, mae_gru = ([] for _ in range(7))

# Write results to text file
f = open("output_nomothethic_a.txt", "a")
f.write('- - - PER INDIVIDUAL RESULTS GLOBAL MODELS - - -\n')

for x in range(len(test_df)):
    # Build and evaluate a model for every single patient

    test_alcohol_X, test_alcohol_y = prepare_data(x, test_df)
    # Elastic-Net (baseline)
    mse, rmse, mae = utils.eval_results(test_alcohol_y,
                                         elastic_alcohol.predict(utils.standardize(test_alcohol_X).fillna(0)), False)

    # Elastic-Net metrics
    mse_elastic.append(mse)
    rmse_elastic.append(rmse)
    mae_elastic.append(mae)

    f.write("Patient ID: %s\n" % test_df[x]['ID'][0])
    f.write('\n')
    f.write('--- Elastic-Net ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # Linear-SVM

    params = [
        {'C': np.arange(0.1, 4, 0.1),
         'epsilon': np.arange(6, 7, 0.1),
         'loss': ['epsilon_insensitive'],
         'fit_intercept': [True],
         'max_iter': [10000]}]

    mse, rmse, mae = utils.eval_results(test_alcohol_y,
                                         svm_alcohol.predict(utils.standardize(test_alcohol_X).fillna(0)),
                                         False)
    # Linear-SVM metrics
    mse_svm.append(mse)
    rmse_svm.append(rmse)
    mae_svm.append(mae)

    f.write('--- Linear-SVM ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # XGBoost Regression
    mse, rmse, mae = utils.eval_results(test_alcohol_y, xgb_alcohol.predict(test_alcohol_X), False)
    # XGBoost metrics
    mse_xgb.append(mse)
    rmse_xgb.append(rmse)
    mae_xgb.append(mae)

    f.write('--- XGBoost ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # RF
    mse, rmse, mae = utils.eval_results(test_alcohol_y, rf_alcohol.predict(test_alcohol_X), False)
    # RF metrics
    mse_rf.append(mse)
    rmse_rf.append(rmse)
    mae_rf.append(mae)

    f.write('--- Random Forests ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # LSTM 3-Layer RNN
    test_x_val = utils.standardize(test_alcohol_X).fillna(0).values
    test_x_val = test_x_val.reshape((test_x_val.shape[0], 1, test_x_val.shape[1]))
    mse, rmse, mae = utils.eval_results(test_alcohol_y, lstm_alcohol.predict(
        test_x_val).flatten(), False)
    # LSTM metrics
    mse_lstm.append(mse)
    rmse_lstm.append(rmse)
    mae_lstm.append(mae)

    f.write('--- LSTM RNN ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # LSTM 1-Layer RNN
    mse, rmse, mae = utils.eval_results(test_alcohol_y, lstm1_alcohol.predict(
        test_x_val).flatten(), False)
    # LSTM metrics
    mse_one_lstm.append(mse)
    rmse_one_lstm.append(rmse)
    mae_one_lstm.append(mae)

    f.write('--- 1-LSTM RNN ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # GRU RNN

    mse, rmse, mae = utils.eval_results(test_alcohol_y, gru_alcohol.predict(test_x_val).flatten(), False)

    # GRU metrics
    mse_gru.append(mse)
    rmse_gru.append(rmse)
    mae_gru.append(mae)

    f.write('--- GRU RNN ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

f.close()
# Compute average metrics for all models
print('---- Elastic-Net Results ----')
utils.average_metrics(mse_elastic, rmse_elastic, mae_elastic)
print('---------------------------------')
print('---- Linear SVM Results ----')
utils.average_metrics(mse_svm, rmse_svm, mae_svm)
print('---------------------------------')
print('---- XGBoost Results ----')
utils.average_metrics(mse_xgb, rmse_xgb, mae_xgb)
print('---------------------------------')
print('---- Random Forest Results ----')
utils.average_metrics(mse_rf, rmse_rf, mae_rf)
print('---------------------------------')
print('---- LSTM Results ----')
utils.average_metrics(mse_lstm, rmse_lstm, mae_lstm)
print('---------------------------------')
print('---- 1-LSTM Results ----')
utils.average_metrics(mse_one_lstm, rmse_one_lstm, mae_one_lstm)
print('---------------------------------')
print('---- GRU Results ----')
utils.average_metrics(mse_gru, rmse_gru, mae_gru)
print('---------------------------------')

rmse_overall = rmse_elastic + rmse_svm + rmse_xgb + rmse_rf + rmse_lstm + rmse_one_lstm + rmse_gru
n = len(rmse_rf)
model_names = ['Elastic Net'] * n + ['SVM'] * n + ['XGBoost'] * n + ['RF'] * n + ['3-Layer LSTM'] * n + [
    '1-Layer LSTM'] * n + ['GRU'] * n
dataset = ['Alcohol'] * len(model_names)

---- Elastic-Net Results ----
Average R_Squared: 0.24006335687457628
Average RMSE: 14.180332707305798
Average MAE: 10.682079363173175
---------------------------------
---- Linear SVM Results ----
Average R_Squared: 0.23470151268535994
Average RMSE: 14.401403063737932
Average MAE: 10.551223831581822
---------------------------------
---- XGBoost Results ----
Average R_Squared: 0.23293263945135248
Average RMSE: 13.960150864760047
Average MAE: 10.409002698725837
---------------------------------
---- Random Forest Results ----
Average R_Squared: 0.22552790896163133
Average RMSE: 13.97961379543016
Average MAE: 10.351286223765028
---------------------------------
---- LSTM Results ----
Average R_Squared: 0.16600599519943046
Average RMSE: 14.903098949406086
Average MAE: 10.747439404059012
---------------------------------
---- 1-LSTM Results ----
Average R_Squared: 0.010860335421724019
Average RMSE: 16.20896741199095
Average MAE: 11.328501175069308
---------------------------------


### 2.1 Evaluating Performance on Entire Dataset (COVID-19)

In [12]:
# Lists to hold the metrics for all patients and models
mse_elastic, mse_svm, mse_one_lstm, mse_xgb, mse_rf, mse_lstm, mse_gru = ([] for _ in range(7))
rmse_elastic, rmse_svm, rmse_one_lstm, rmse_xgb, rmse_rf, rmse_lstm, rmse_gru = ([] for _ in range(7))
mae_elastic, mae_svm, mae_one_lstm, mae_xgb, mae_rf, mae_lstm, mae_gru = ([] for _ in range(7))

# Write results to text file
f = open("output_nomothethic_c.txt", "a")
f.write('- - - PER INDIVIDUAL RESULTS GLOBAL MODELS - - -\n')

for z in range(len(covid_train_x_list)):
    # Build and evaluate a model for every single patient

    # Elastic-Net (baseline)
    mse, rmse, mae = utils.eval_results(covid_test_y_list[z],
                                         elastic_covid.predict(utils.standardize(covid_test_x_list[z]).fillna(0)),
                                         False)

    # Elastic-Net metrics
    mse_elastic.append(mse)
    rmse_elastic.append(rmse)
    mae_elastic.append(mae)

    f.write("Patient ID: %s\n" % z)
    f.write('\n')
    f.write('--- Elastic-Net ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # Linear-SVM

    params = [
        {'C': np.arange(0.1, 2, 0.1),
         'epsilon': np.arange(0, 0.5, 0.1),
         'loss': ['epsilon_insensitive'],
         'fit_intercept': [True],
         'max_iter': [10000]}]

    mse, rmse, mae = utils.eval_results(covid_test_y_list[z],
                                         svm_covid.predict(utils.standardize(covid_test_x_list[z]).fillna(0)),
                                         False)
    # Linear-SVM metrics
    mse_svm.append(mse)
    rmse_svm.append(rmse)
    mae_svm.append(mae)

    f.write('--- Linear-SVM ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # XGBoost Regression
    mse, rmse, mae = utils.eval_results(covid_test_y_list[z], xgb_covid.predict(covid_test_x_list[z]), False)
    # XGBoost metrics
    mse_xgb.append(mse)
    rmse_xgb.append(rmse)
    mae_xgb.append(mae)

    f.write('--- XGBoost ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # RF
    mse, rmse, mae = utils.eval_results(covid_test_y_list[z], rf_covid.predict(covid_test_x_list[z]), False)
    # RF metrics
    mse_rf.append(mse)
    rmse_rf.append(rmse)
    mae_rf.append(mae)

    f.write('--- Random Forests ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # LSTM 3-Layer RNN
    test_x_val = utils.standardize(covid_test_x_list[z]).fillna(0).values
    test_x_val = test_x_val.reshape((test_x_val.shape[0], 1, test_x_val.shape[1]))
    mse, rmse, mae = utils.eval_results(covid_test_y_list[z], lstm_covid.predict(
        test_x_val).flatten(), False)
    # LSTM metrics
    mse_lstm.append(mse)
    rmse_lstm.append(rmse)
    mae_lstm.append(mae)

    f.write('--- LSTM RNN ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # LSTM 1-Layer RNN
    mse, rmse, mae = utils.eval_results(covid_test_y_list[z], lstm1_covid.predict(
        test_x_val).flatten(), False)
    # LSTM metrics
    mse_one_lstm.append(mse)
    rmse_one_lstm.append(rmse)
    mae_one_lstm.append(mae)

    f.write('--- 1-LSTM RNN ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

    # GRU RNN
    mse, rmse, mae = utils.eval_results(covid_test_y_list[z], gru_covid.predict(test_x_val).flatten(), False)

    # GRU metrics
    mse_gru.append(mse)
    rmse_gru.append(rmse)
    mae_gru.append(mae)

    f.write('--- GRU RNN ---\n')
    f.write("MSE: %s\n" % mse)
    f.write("RMSE: %s\n" % rmse)
    f.write("MAE: %s\n" % mae)
    f.write('\n')

f.close()
# Compute average metrics for all models
print('---- Elastic-Net Results ----')
utils.average_metrics(mse_elastic, rmse_elastic, mae_elastic)
print('---------------------------------')
print('---- Linear SVM Results ----')
utils.average_metrics(mse_svm, rmse_svm, mae_svm)
print('---------------------------------')
print('---- XGBoost Results ----')
utils.average_metrics(mse_xgb, rmse_xgb, mae_xgb)
print('---------------------------------')
print('---- Random Forest Results ----')
utils.average_metrics(mse_rf, rmse_rf, mae_rf)
print('---------------------------------')
print('---- LSTM Results ----')
utils.average_metrics(mse_lstm, rmse_lstm, mae_lstm)
print('---------------------------------')
print('---- 1-LSTM Results ----')
utils.average_metrics(mse_one_lstm, rmse_one_lstm, mae_one_lstm)
print('---------------------------------')
print('---- GRU Results ----')
utils.average_metrics(mse_gru, rmse_gru, mae_gru)
print('---------------------------------')

rmse_overall_covid = rmse_elastic + rmse_svm + rmse_xgb + rmse_rf + rmse_lstm + rmse_one_lstm + rmse_gru
n = len(rmse_rf)
model_names_covid = ['Elastic Net'] * n + ['SVM'] * n + ['XGBoost'] * n + ['RF'] * n + ['3-Layer LSTM'] * n + [
    '1-Layer LSTM'] * n + ['GRU'] * n
dataset_covid = ['COVID-19'] * len(model_names)

---- Elastic-Net Results ----
Average MAPE: 0.45307565997239435
Average RMSE: 0.883394075610157
Average MAE: 0.7302551453337811
---------------------------------
---- Linear SVM Results ----
Average MAPE: 0.48201753946287196
Average RMSE: 1.0121748809858786
Average MAE: 0.8007018151264567
---------------------------------
---- XGBoost Results ----
Average MAPE: 0.3040780971500327
Average RMSE: 0.6047595152268893
Average MAE: 0.48073697638237617
---------------------------------
---- Random Forest Results ----
Average MAPE: 0.28795423448046786
Average RMSE: 0.595598735796749
Average MAE: 0.4520136183894651
---------------------------------
---- LSTM Results ----
Average MAPE: 0.44200061972324667
Average RMSE: 0.9266709980892981
Average MAE: 0.7495321409165806
---------------------------------
---- 1-LSTM Results ----
Average MAPE: 0.39063713070946104
Average RMSE: 0.8641972047844878
Average MAE: 0.7047873287866011
---------------------------------


In [None]:
# Generate a box-plot
rmse = rmse_overall + rmse_overall_covid
models = model_names + model_names_covid
dataset_names = dataset + dataset_covid

dictionary = {'RMSE': rmse, 'Model': models, 'Dataset': dataset_names}
df = pd.DataFrame(data=dictionary)
boxplot = sns.catplot(x='Model', y='RMSE', col='Dataset',
                      data=df, kind="box",
                      height=4, aspect=.7)
# boxplot = sns.boxplot(data=df, y='RMSE', x='Model', palette="Set3")
boxplot.set(title='RMSE Values For All Individuals', ylabel='RMSE')