# 1-step Forecasting with linear and non-linear models

In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import LinearSVR
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
from sklearn import linear_model as lm
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

import utils

# Plot settings
plt.rcParams['figure.figsize'] = (16, 8)
plt.rcParams['figure.dpi'] = 150
sns.set()

In [2]:
# Loading alcohol data
train_df, test_df, data_raw_list = utils.load_alcohol()

combined_data = []

for i in range(len(train_df)):
    train = train_df[i]
    test = test_df[i]
    # Combine both train and test sets since the initial split was 50/50
    combined = pd.concat([train, test])
    # Sort by date
    combined['start'] = pd.to_datetime(combined['start'])
    combined = combined.sort_values(by='start')
    combined_data.append(combined)

combined_data[0].head()

Unnamed: 0.1,Unnamed: 0,ID,start,finish,drinks,comfortable,stressed,down,calm,pressure,...,cosT.1,sinT.1,cos2T.1,sin2T.1,cosW.1,sinW.1,dayvar.1,beepvar.1,filter.1,consec.1
0,1,1,2018-02-06 16:20:00,2/6/2018 16:22,3,7.382609,-9.817391,10.843478,-37.791304,6.173913,...,1.0,0.0,1.0,0.0,1.0,0.0,1,4,0,1
31,2,1,2018-02-06 18:54:00,2/6/2018 18:58,0,14.382609,47.182609,7.843478,7.208696,10.173913,...,0.892979,0.450098,0.594823,0.803857,0.997777,0.066647,1,5,0,2
1,3,1,2018-02-06 20:08:00,2/6/2018 20:22,0,15.382609,12.182609,10.843478,20.208696,18.173913,...,0.41866,0.908143,-0.649448,0.760406,0.986795,0.161973,1,6,0,3
2,4,1,2018-02-06 22:29:00,2/6/2018 22:46,0,21.382609,-5.817391,-2.156522,8.208696,5.173913,...,0.108867,0.994056,-0.976296,0.21644,0.978277,0.207302,1,7,0,4
36,5,1,2018-02-07 10:52:00,2/7/2018 11:23,0,-11.617391,5.182609,0.843478,-24.791304,-4.826087,...,0.043619,-0.999048,-0.996195,-0.087156,0.77793,0.628351,2,1,0,7


In [3]:
# Loading covid data
covid_train_x_list, covid_test_x_list, covid_train_y_list, covid_test_y_list = utils.patients_covid()

covid_train_x_list[0].head()

Patient included in study:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 46, 48, 49, 50, 52, 53, 54, 55, 57, 58, 59, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73, 74, 75, 77, 78]


Unnamed: 0,Relax_lag,Irritable_lag,Worry_lag,Nervous_lag,Future_lag,Anhedonia_lag,Tired_lag,Hungry_lag,Alone_lag,Angry_lag,Social_offline_lag,Social_online_lag,Music_lag,Procrastinate_lag,Outdoors_lag,C19_occupied_lag,C19_worry_lag,Home_lag,beepvar_lag
105,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,3.0,4.0,2.0,3.0,1.0,2.0,2.0,5.0,1.0
111,2.0,1.0,2.0,2.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,4.0,3.0,1.0,1.0,2.0,2.0,5.0,3.0
109,2.0,1.0,2.0,2.0,1.0,1.0,3.0,1.0,1.0,1.0,2.0,3.0,2.0,2.0,2.0,3.0,3.0,3.0,1.0
102,2.0,1.0,2.0,2.0,1.0,1.0,3.0,1.0,1.0,1.0,3.0,3.0,3.0,2.0,4.0,3.0,3.0,4.0,2.0
107,2.0,2.0,2.0,2.0,1.0,1.0,3.0,2.0,1.0,1.0,2.0,4.0,2.0,1.0,1.0,3.0,3.0,5.0,3.0


## 1. Idiographic Models Regression

In [4]:
# Predict craving

# Make own splits
def prepare_data_own(idx, combined_list, random_state):
    # print('Patient ID:', combined_list[idx].iloc[0]['ID'])
    X = combined_list[idx].drop(combined_list[idx].columns[range(0, 24)], axis=1).fillna(0)
    y = combined_list[idx]['craving']

    return train_test_split(X, y, test_size=0.3, random_state=random_state)


def prepare_data(idx, train_list, test_list):
    # print('Patient ID:', train_list[idx]['ID'][0])

    X_train = train_list[idx].drop(train_list[idx].columns[range(0, 61)], axis=1).fillna(0)
    y_train = train_list[idx]['craving']
    X_test = test_list[idx].drop(test_list[idx].columns[range(0, 61)], axis=1).fillna(0)
    y_test = test_list[idx]['craving']

    return X_train, X_test, y_train, y_test

### 1.1 Lasso Regression

In [6]:
X_train, X_test, y_train, y_test = prepare_data(1, train_list=train_df, test_list=test_df)


def lasso_reg(train_x, train_y, test_x, test_y, vis):
    X_train_loc = utils.standardize(train_x).fillna(0)
    X_test_loc = utils.standardize(test_x).fillna(0)

    alphas = np.arange(0.01, 20, 0.05)
    lasso = lm.LassoCV(alphas=alphas, cv=5, max_iter=100000, fit_intercept=True)
    lasso.fit(X_train_loc, train_y)
    y_predicted_test = lasso.predict(X_test_loc)

    # print('--- Lasso Regression Results ---')
    # print()
    r2, rmse, mae = utils.eval_results(actual=test_y, predicted=y_predicted_test, show=vis)

    return r2, rmse, mae


lasso_reg(covid_train_x_list[4], covid_train_y_list[4], covid_test_x_list[4], covid_test_y_list[4], True)

R_squared: 0.351568429169112
MAPE: 0.27455896945334624
RMSE: 0.48773572599444837
MAE: 0.3699025929245423
CORR: 0.6303501218634843


(0.351568429169112, 0.48773572599444837, 0.3699025929245423)

### 1.2 Elastic-Net Regression

In [7]:
def elastic_net(train_x, train_y, test_x, test_y, vis):
    X_train_loc = utils.standardize(train_x).fillna(0)
    X_test_loc = utils.standardize(test_x).fillna(0)

    l1_ratios = np.arange(0.01, 0.6, 0.05)
    elastic_reg = lm.ElasticNetCV(alphas=np.arange(0.01, 20, 0.05), l1_ratio=l1_ratios, cv=5, max_iter=100000,
                                  fit_intercept=True)
    elastic_reg.fit(X_train_loc, train_y)
    y_predicted_test = elastic_reg.predict(X_test_loc)

    # print('--- Elastic-Net Results ---')
    # print()
    r2, rmse, mae = utils.eval_results(actual=test_y, predicted=y_predicted_test, show=vis)
    return r2, rmse, mae


elastic_net(covid_train_x_list[4], covid_train_y_list[4], covid_test_x_list[4], covid_test_y_list[4], True)

R_squared: 0.32078006267206427
MAPE: 0.2872061920042228
RMSE: 0.49918060737876185
MAE: 0.3877675199797297
CORR: 0.6206417507150388


(0.32078006267206427, 0.49918060737876185, 0.3877675199797297)

### 1.3 Linear SVM Regression

In [8]:
def linear_svm(train_x, train_y, test_x, test_y, vis):
    X_train_loc = utils.standardize(train_x).fillna(0)
    X_test_loc = utils.standardize(test_x).fillna(0)

    params = [
        {'C': np.arange(0.1, 4, 0.1),
         'epsilon': np.arange(0, 0.5, 0.1),
         'loss': ['epsilon_insensitive'],
         'fit_intercept': [True],
         'max_iter': [10000]}]

    clf = GridSearchCV(estimator=LinearSVR(), param_grid=params, scoring='neg_mean_absolute_error', cv=5)
    clf.fit(X_train_loc, train_y)
    # best_params = clf.best_params_
    # print(best_params)
    y_predicted_test = clf.predict(X_test_loc)

    # print('--- Linear-SVM Results ---')
    # print()
    r2, rmse, mae = utils.eval_results(actual=test_y, predicted=y_predicted_test, show=vis)
    return r2, rmse, mae


linear_svm(X_train, y_train, X_test, y_test, True)

R_squared: 0.3015715948149953
MAPE: 0.8382106521210954
RMSE: 20.52628356908166
MAE: 15.886136952334763
CORR: 0.5688646194257785


(0.3015715948149953, 20.52628356908166, 15.886136952334763)

### 1.4 K-NN Regression

In [9]:
def knn_reg(train_x, train_y, test_x, test_y, vis):
    params = [
        {'weights': ['uniform', 'distance'],
         'n_neighbors': np.arange(2, 20, 1)}]

    clf = GridSearchCV(estimator=KNeighborsRegressor(), param_grid=params, scoring='neg_mean_absolute_error', cv=5)
    clf.fit(train_x, train_y)
    #best_params = clf.best_params_
    #print(best_params)

    y_predicted_test = clf.predict(test_x)

    # print('--- kNN Regression Results ---')
    # print()
    r2, rmse, mae = utils.eval_results(actual=test_y, predicted=y_predicted_test, show=vis)
    return r2, rmse, mae


knn_reg(covid_train_x_list[4], covid_train_y_list[4], covid_test_x_list[4], covid_test_y_list[4], True)

R_squared: 0.1969354838709676
MAPE: 0.3166666666666667
RMSE: 0.5427848419174808
MAE: 0.43846153846153857
CORR: 0.5796736197002106


(0.1969354838709676, 0.5427848419174808, 0.43846153846153857)

### 1.5 Symbolic Regressions (Genetic Algorithm basically)

In [10]:
from gplearn.genetic import SymbolicRegressor

function_set = ['add', 'sub', 'mul', 'div', 'sin', 'log']
model = SymbolicRegressor(population_size=3000, tournament_size=5,
                          generations=10, stopping_criteria=0.1,
                          function_set=function_set, metric='rmse',
                          p_crossover=0.65, p_subtree_mutation=0.15,
                          p_hoist_mutation=0.05, p_point_mutation=0.1,
                          verbose=1, random_state=None, n_jobs=-1)
model.fit(covid_train_x_list[4], covid_train_y_list[4])
predicted = model.predict(covid_test_x_list[4])

utils.eval_results(actual=covid_test_y_list[4], predicted=predicted, show=True)

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    13.75          6.87119       23         0.514973              N/A     16.35s
   1     7.39           1.3895       12         0.480534              N/A     14.79s
   2     5.75          1.15081       12         0.480534              N/A     11.20s
   3     5.02          1.78437        8         0.453018              N/A     12.02s
   4     4.51          1.08731        8         0.427853              N/A      8.93s
   5     4.74          1.03721        7            0.448              N/A      6.69s
   6     4.99          1.02314        5         0.418158              N/A      5.08s
   7     5.44          1.03216        5         0.418158              N/A      3.48s
   8     5.90          1.03433       15         0.386311              N/A  

(-0.26400992062246864, 0.6809696144833945, 0.5273570769794537)

### 1.6 XGBoost Regression

In [11]:
def xgboost_reg(train_x, train_y, test_x, test_y, vis):
    # Very simple models work better here, since there are few datapoints
    params = [
        {'objective': ['reg:squarederror'],
         'n_estimators': np.arange(1, 10, 1),
         'eval_metric': ['mae'],
         'max_depth': np.arange(1, 5, 1)}]

    reg_xgb = GridSearchCV(xgb.XGBRegressor(), params, n_jobs=5, cv=5, scoring='neg_mean_absolute_error')
    reg_xgb.fit(train_x, train_y)

    #print(reg_xgb.best_params_)
    y_predicted_test = reg_xgb.predict(test_x)

    # print('--- XGBoost Regression Results ---')
    # print()
    r2, rmse, mae = utils.eval_results(actual=test_y, predicted=y_predicted_test, show=vis)
    return r2, rmse, mae


xgboost_reg(covid_train_x_list[4], covid_train_y_list[4], covid_test_x_list[4], covid_test_y_list[4], True)

R_squared: 0.23182835667086477
MAPE: 0.23507179358066657
RMSE: 0.5308619868025873
MAE: 0.349186502970182
CORR: 0.486300834844521


(0.23182835667086477, 0.5308619868025873, 0.349186502970182)

### 1.7 Random Forests

In [12]:
from sklearn.model_selection import RandomizedSearchCV


def random_forests(train_x, train_y, test_x, test_y, vis):
    grid = [
        {'n_estimators': [50, 70, 100],
         'max_features': ['auto', 'sqrt'],
         'max_depth': [5, 10, 15, 20],
         'min_samples_split': [2, 4, 6],
         'min_samples_leaf': [1],
         'bootstrap': [True]}]

    rf = GridSearchCV(RandomForestRegressor(), param_grid=grid, cv=5, scoring='neg_mean_absolute_error')
    rf.fit(train_x, train_y)
    y_predicted_test = rf.predict(test_x)
    # print(rf.best_params_)

    r2, rmse, mae = utils.eval_results(actual=test_y, predicted=y_predicted_test, show=vis)
    return r2, rmse, mae


random_forests(covid_train_x_list[4], covid_train_y_list[4], covid_test_x_list[4], covid_test_y_list[4], True)

R_squared: 0.13449435558440814
MAPE: 0.31011782661782655
RMSE: 0.5634915978002856
MAE: 0.3872832722832723
CORR: 0.47952686393476524


(0.13449435558440814, 0.5634915978002856, 0.3872832722832723)

### 1.8 LSTM RNN

In [13]:
from sklearn import metrics
import keras.layers as layer
from keras.models import Sequential


def lstm_rnn(train_x, train_y, test_x, test_y, vis):
    X_train_loc = utils.standardize(train_x).fillna(0)
    X_test_loc = utils.standardize(test_x).fillna(0)
    train_x_val, train_y_val, test_x_val, test_y_val = X_train_loc.values, train_y.values, X_test_loc.values, test_y.values

    train_x_val = train_x_val.reshape((train_x_val.shape[0], 1, train_x_val.shape[1]))
    test_x_val = test_x_val.reshape((test_x_val.shape[0], 1, test_x_val.shape[1]))

    # print(train_x_val.shape)
    # print(test_x_val.shape)

    model = Sequential([
        layer.LSTM(40, return_sequences=True, input_shape=(train_x_val.shape[1], train_x_val.shape[2])),
        layer.Dropout(0.25),
        layer.LSTM(units=25, return_sequences=True),
        layer.Dropout(0.20),
        layer.LSTM(units=10, return_sequences=False),
        layer.Dense(units=1, activation='linear'),
    ])
    model.compile(loss='mae', optimizer='adam')
    model.fit(train_x_val, train_y_val, epochs=15, batch_size=4, verbose=0, shuffle=False)

    y_predicted_test = model.predict(test_x_val)

    r2, rmse, mae = utils.eval_results(actual=test_y, predicted=y_predicted_test.flatten(), show=vis)

    return r2, rmse, mae


lstm_rnn(covid_train_x_list[4], covid_train_y_list[4], covid_test_x_list[4], covid_test_y_list[4], True)

R_squared: -0.6906279742717791
MAPE: 0.31541976256248283
RMSE: 0.7875470659546568
MAE: 0.5180214230830853
CORR: 0.026469629326447582


(-0.6906279742717791, 0.7875470659546568, 0.5180214230830853)

### 1.9 MTGNN

In [14]:
import torch
import torch.nn.functional as f
from torch_geometric_temporal.nn.recurrent.gconv_gru import GConvGRU


class RecurrentGCN(torch.nn.Module):
    def __init__(self, node_features, filters):
        super(RecurrentGCN, self).__init__()
        self.recurrent = GConvGRU(node_features, filters, 2)
        self.linear = torch.nn.Linear(filters, 1)

    def forward(self, x, edge_index, edge_weight):
        h = self.recurrent(x, edge_index, edge_weight)
        h = f.relu(h)
        h = self.linear(h)
        return h

### 2. Evaluating Performance on Entire Dataset (Alcohol Data)

In [19]:
import warnings


def average_metrics(r2_list, rmse_list, mae_list):
    print('Average R_Squared:', np.mean(r2_list))
    print('Average RMSE:', np.mean(rmse_list))
    print('Average MAE:', np.mean(mae_list))


def evaluate_models(train_list, test_list):
    assert len(train_list) == len(test_list)
    r2_lasso, r2_elastic, r2_svm, r2_knn, r2_xgb, r2_rf, r2_lstm, r2_mtgnn = ([] for _ in range(8))
    rmse_lasso, rmse_elastic, rmse_svm, rmse_knn, rmse_xgb, rmse_rf, rmse_lstm, rmse_mtgnn = ([] for _ in range(8))
    mae_lasso, mae_elastic, mae_svm, mae_knn, mae_xgb, mae_rf, mae_lstm, mae_mtgnn = ([] for _ in range(8))

    patient_ids = []
    f = open("output_idiographic_a.txt", "a")
    f.write('- - - PER INDIVIDUAL RESULTS - - -\n')
    for x in range(len(train_list)):
        # Build and evaluate a model for every single patient
        train_x, test_x, train_y, test_y = prepare_data(x, train_list=train_list, test_list=test_list)
        # Elastic-Net (baseline)
        r2, rmse, mae = elastic_net(train_x, train_y, test_x, test_y,
                                    False)  # only continue with other models if this one can get a positive r2

        # Elastic-Net metrics
        patient_ids.append(train_list[x]['ID'][0])
        r2_elastic.append(max(0, r2))
        rmse_elastic.append(rmse)
        mae_elastic.append(mae)

        f.write("Patient ID: %s\n" % train_list[x]['ID'][0])
        f.write('\n')
        f.write('--- Elastic-Net ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

        # Lasso Regression
        r2, rmse, mae = lasso_reg(train_x, train_y, test_x, test_y, False)
        # Lasso metrics
        r2_lasso.append(max(0, r2))
        rmse_lasso.append(rmse)
        mae_lasso.append(mae)

        f.write('--- Lasso ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

        # Linear-SVM
        r2, rmse, mae = linear_svm(train_x, train_y, test_x, test_y, False)
        # Linear-SVM metrics
        r2_svm.append(max(0, r2))
        rmse_svm.append(rmse)
        mae_svm.append(mae)

        f.write('--- Linear-SVM ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

        # kNN Regression
        r2, rmse, mae = knn_reg(train_x, train_y, test_x, test_y, False)
        # kNN metrics
        r2_knn.append(max(0, r2))
        rmse_knn.append(rmse)
        mae_knn.append(mae)

        f.write('--- kNN Reg ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

        # XGBoost Regression
        r2, rmse, mae = xgboost_reg(train_x, train_y, test_x, test_y, False)
        # XGBoost metrics
        r2_xgb.append(max(0, r2))
        rmse_xgb.append(rmse)
        mae_xgb.append(mae)

        f.write('--- XGBoost ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

        # RF
        r2, rmse, mae = random_forests(train_x, train_y, test_x, test_y, False)
        # RF metrics
        r2_rf.append(max(0, r2))
        rmse_rf.append(rmse)
        mae_rf.append(mae)

        f.write('--- Random Forests ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

        # LSTM RNN
        r2, rmse, mae = lstm_rnn(train_x, train_y, test_x, test_y, False)
        # LSTM metrics
        r2_lstm.append(max(0, r2))
        rmse_lstm.append(rmse)
        mae_lstm.append(mae)

        f.write('--- LSTM RNN ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

    f.close()
    print('---- Lasso Regression Results ----')
    average_metrics(r2_lasso, rmse_lasso, mae_lasso)
    print('---------------------------------')
    print('---- Elastic-Net Results ----')
    average_metrics(r2_elastic, rmse_elastic, mae_elastic)
    print('---------------------------------')
    print('---- Linear SVM Results ----')
    average_metrics(r2_svm, rmse_svm, mae_svm)
    print('---------------------------------')
    print('---- kNN Regression Results ----')
    average_metrics(r2_knn, rmse_knn, mae_knn)
    print('---------------------------------')
    print('---- XGBoost Results ----')
    average_metrics(r2_xgb, rmse_xgb, mae_xgb)
    print('---------------------------------')
    print('---- Random Forest Results ----')
    average_metrics(r2_rf, rmse_rf, mae_rf)
    print('---------------------------------')
    print('---- LSTM Results ----')
    average_metrics(r2_lstm, rmse_lstm, mae_lstm)
    print('---------------------------------')

    print('Included patient list:')
    print(patient_ids)


warnings.filterwarnings("ignore")
evaluate_models(train_df, test_df)

---- Lasso Regression Results ----
Average R_Squared: 0.1770519351340509
Average RMSE: 14.717961396686716
Average MAE: 10.83399961626075
---------------------------------
---- Elastic-Net Results ----
Average R_Squared: 0.194006559997254
Average RMSE: 14.771363649360774
Average MAE: 10.985510678456176
---------------------------------
---- Linear SVM Results ----
Average R_Squared: 0.19834604439127085
Average RMSE: 14.363067724376505
Average MAE: 10.122403699592118
---------------------------------
---- kNN Regression Results ----
Average R_Squared: 0.11133822895682312
Average RMSE: 15.44039096008571
Average MAE: 10.784237269511237
---------------------------------
---- XGBoost Results ----
Average R_Squared: 0.12261621935718933
Average RMSE: 15.382442588198774
Average MAE: 10.944263945708332
---------------------------------
---- Random Forest Results ----
Average R_Squared: 0.18895880965097842
Average RMSE: 14.578414318202334
Average MAE: 10.655208158697407
--------------------------

### 2. Evaluating Performance on Entire Dataset (COVID-19 Data)

In [15]:
def evaluate_models(covid_train_x, covid_test_x, covid_train_y, covid_test_y):
    r2_elastic, r2_knn, r2_xgb, r2_rf, r2_lstm, r2_mtgnn = ([] for _ in range(6))
    rmse_elastic, rmse_knn, rmse_xgb, rmse_rf, rmse_lstm, rmse_mtgnn = ([] for _ in range(6))
    mae_elastic, mae_knn, mae_xgb, mae_rf, mae_lstm, mae_mtgnn = ([] for _ in range(6))

    f = open("output_idiographic_c.txt", "a")
    f.write('- - - PER INDIVIDUAL RESULTS - - -\n')
    for x in range(len(covid_train_x)):
        # Build and evaluate a model for every single patient
        # Elastic-Net (baseline)
        r2, rmse, mae = elastic_net(covid_train_x[x], covid_train_y[x], covid_test_x[x], covid_test_y[x],
                                    False)  # only continue with other models if this one can get a positive r2

        # Elastic-Net metrics
        r2_elastic.append(max(0, r2))
        rmse_elastic.append(rmse)
        mae_elastic.append(mae)

        f.write("Patient ID: %s\n" % x)
        f.write('\n')
        f.write('--- Elastic-Net ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

        # kNN Regression
        r2, rmse, mae = knn_reg(covid_train_x[x], covid_train_y[x], covid_test_x[x], covid_test_y[x], False)
        # kNN metrics
        r2_knn.append(max(0, r2))
        rmse_knn.append(rmse)
        mae_knn.append(mae)

        f.write('--- kNN Reg ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

        # XGBoost Regression
        r2, rmse, mae = xgboost_reg(covid_train_x[x], covid_train_y[x], covid_test_x[x], covid_test_y[x], False)
        # XGBoost metrics
        r2_xgb.append(max(0, r2))
        rmse_xgb.append(rmse)
        mae_xgb.append(mae)

        f.write('--- XGBoost ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

        # RF
        r2, rmse, mae = random_forests(covid_train_x[x], covid_train_y[x], covid_test_x[x], covid_test_y[x], False)
        # RF metrics
        r2_rf.append(max(0, r2))
        rmse_rf.append(rmse)
        mae_rf.append(mae)

        f.write('--- Random Forests ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

        # LSTM RNN
        r2, rmse, mae = lstm_rnn(covid_train_x[x], covid_train_y[x], covid_test_x[x], covid_test_y[x], False)
        # LSTM metrics
        r2_lstm.append(max(0, r2))
        rmse_lstm.append(rmse)
        mae_lstm.append(mae)

        f.write('--- LSTM RNN ---\n')
        f.write("R_squared: %s\n" % max(0, r2))
        f.write("RMSE: %s\n" % rmse)
        f.write("MAE: %s\n" % mae)
        f.write('\n')

    f.close()
    print('---- Elastic-Net Results ----')
    average_metrics(r2_elastic, rmse_elastic, mae_elastic)
    print('---------------------------------')
    print('---- kNN Regression Results ----')
    average_metrics(r2_knn, rmse_knn, mae_knn)
    print('---------------------------------')
    print('---- XGBoost Results ----')
    average_metrics(r2_xgb, rmse_xgb, mae_xgb)
    print('---------------------------------')
    print('---- Random Forest Results ----')
    average_metrics(r2_rf, rmse_rf, mae_rf)
    print('---------------------------------')
    print('---- LSTM Results ----')
    average_metrics(r2_lstm, rmse_lstm, mae_lstm)
    print('---------------------------------')

    warnings.filterwarnings("ignore")


evaluate_models(covid_train_x_list, covid_test_x_list, covid_train_y_list, covid_test_y_list)

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  c /= stddev[:, None]
  c /= stddev[None, :]
Traceback (most recent call last):
  File "C:\Users\Alex\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Alex\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\metrics\_scorer.py", line 216, in __call__
    return self._score(
  File "C:\Users\Alex\AppData\Local\Programs\Python\Python38\l



Traceback (most recent call last):
  File "C:\Users\Alex\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\Alex\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\metrics\_scorer.py", line 216, in __call__
    return self._score(
  File "C:\Users\Alex\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\metrics\_scorer.py", line 258, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "C:\Users\Alex\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\metrics\_scorer.py", line 68, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "C:\Users\Alex\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\neighbors\_regression.py", line 229, in predict
    neigh_dist, neigh_ind = self.kneighbors(X)
  File "C:\Users\Alex\AppData\Local\Programs\Python\Python38\lib\site-pack

KeyboardInterrupt: 

### Nomothethic Approach In separate notebook