# Metrics Evaluator

### Introduction

To make sure that the splits don't make me choose a wrong model (since the difference of the finalist models is very little), I will run all of them many times to get a consistent metric from them, averaging its scores and stds.

In [2]:
# Load the necessary modules
# Relative paths (make sure you are in src)
import os, sys
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

# plotting libraries
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()

# Data management libraries 
import numpy as np 
import pandas as pd 
import math
import scipy.stats as stats

# My code
from data_processing import get_path
from Models.RandomForest import Random_Forest_ensemble
from Models.BaggingEnsemble import Bagging_Ensemble
from Models.LinearRegressor import LinearRegressor
from Models.LogisticRegressor import LogisticRegressor
from Models.LinearRegressionVariants import LR_Relations, LR_ensemble
from Models.SVR import SVR_manual
from utils import evaluate_classification_metrics, evaluate_regression_metrics, cross_validation, plot_residuals_color
from data_processing import standarize_numerical_variables

# Machine learning libraries 
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import BaggingClassifier, BaggingRegressor, RandomForestClassifier, RandomForestRegressor,StackingRegressor,GradientBoostingRegressor
from sklearn.metrics import accuracy_score, mean_absolute_error,mean_squared_error,confusion_matrix, make_scorer
from sklearn.tree import DecisionTreeRegressor, export_graphviz, DecisionTreeClassifier
from sklearn.decomposition import KernelPCA, PCA
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, LogisticRegression, RidgeCV, LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import Isomap
from sklearn.datasets import make_regression
from sklearn.svm import SVR, SVC
from sklearn.utils import shuffle

# Parameters
import configparser
config = configparser.ConfigParser()
config.read('../configuration.ini')

['../configuration.ini']

In [3]:
# Import data from relative path for train
file = get_path('created_files','train',parent_dir)
data = pd.read_csv(file,sep = ',')
X_train_p = data.drop(columns=['T3']).reset_index(drop=True)
y_train_p = data['T3'].reset_index(drop=True)

# Do the train/ test split and standarize AFTER so there is no data leakage
X_train, X_test, y_train_unstd, y_test_unstd = train_test_split(X_train_p,y_train_p,test_size=0.2)
X_train, X_test, y_train,y_test,scaler_y = standarize_numerical_variables(X_train, X_test, y_train_unstd, y_test_unstd,1)


### Model 1: Final Models

I will run this code twice, so I will test 600 models. In my paper, I will average as well both runs.

Models to try out in the Metrics Evaluator

In [31]:
base_models = [
    ('lasso', LassoCV(cv=5)),
    ('RF', RandomForestRegressor(random_state=42,n_estimators=100)),
]
stack = StackingRegressor(estimators=base_models, final_estimator=LinearRegression(), cv=5, passthrough=True)
rf = RandomForestRegressor(random_state=0,n_estimators=100,max_features=0.8) 
svr  = SVR(C=1, gamma='scale',kernel='linear')
gb = GradientBoostingRegressor(n_estimators=50,max_features=0.8,subsample=0.9,random_state=0)

models = [LinearRegression(),stack,BaggingRegressor(random_state=0,n_estimators=100), rf,svr,gb]
names = ['LinReg','Stack','Bagging','Random Forest','SVR','Boosting']



### Metrics Evaluator Code

In [32]:
final_scores = []

for model in models:
    scores = []
    stds = []
    scores_train = []
    MAE = []
    MSE = []
    for _ in range(300):
        X_shuffled, y_shuffled = shuffle(X_train_p, y_train_p, random_state=None)
        X_train, X_test, y_train_unstd, y_test_unstd = train_test_split(X_shuffled, y_shuffled, test_size=0.2)
        X_train, X_test, y_train, y_test, scaler_y = standarize_numerical_variables(X_train, X_test, y_train_unstd, y_test_unstd, 1)

        model.fit(X_train,y_train_unstd)
        predictions = model.predict(X_test)
        predictions_train = model.predict(X_train)

        score = evaluate_regression_metrics(y_test_unstd,y_train_unstd,predictions,predictions_train)
        scores.append(score['R² Score Test:'])
        scores_train.append(score['R² Score Train:'])
        MAE.append(score['Mean Absolute Error:'])
        MSE.append(score['Mean Squared Error:'])

    final_scores.append([np.mean(scores),np.std(scores),np.mean(MAE),np.mean(MSE),np.mean(scores_train)])

results_df = pd.DataFrame(final_scores, columns=['R² Mean Test', 'R² Std Test', 'Mean Absolute Error', 'Mean Squared Error', 'R² Mean Train'], index=names)


In [33]:
scores = []
scores_train = []
MAE = []
MSE = []
for _ in range(300):
    X_shuffled, y_shuffled = shuffle(X_train_p, y_train_p, random_state=None)
    X_train, X_test, y_train_unstd, y_test_unstd = train_test_split(X_shuffled, y_shuffled, test_size=0.2)
    X_train, X_test, y_train, y_test, scaler_y = standarize_numerical_variables(X_train, X_test, y_train_unstd, y_test_unstd, 1)

    LR_Rel = LR_Relations()
    LR_Rel.fit(X_train,X_test,y_train_unstd)
    predictions,score = LR_Rel.predict(y_test_unstd,y_train_unstd)
    scores.append(score['R² Score Test:'])
    scores_train.append(score['R² Score Train:'])
    MAE.append(score['Mean Absolute Error:'])
    MSE.append(score['Mean Squared Error:'])

final_score = [np.mean(scores),np.std(scores),np.mean(MAE),np.mean(MSE),np.mean(scores_train)]
results_df.loc['LR_Relations'] = final_score

In [34]:
scores = []
scores_train = []
MAE = []
MSE = []
for _ in range(300):
    X_shuffled, y_shuffled = shuffle(X_train_p, y_train_p, random_state=None)
    X_train, X_test, y_train_unstd, y_test_unstd = train_test_split(X_shuffled, y_shuffled, test_size=0.2)
    X_train, X_test, y_train, y_test, scaler_y = standarize_numerical_variables(X_train, X_test, y_train_unstd, y_test_unstd, 1)

    LR_E = LR_ensemble()
    LR_E.fit(X_train,X_test,y_train_unstd)
    predictions = LR_E.predict()
    score = LR_E.score_complete(y_test_unstd,y_train_unstd)
    scores.append(score['R² Score Test:'])
    scores_train.append(score['R² Score Train:'])
    MAE.append(score['Mean Absolute Error:'])
    MSE.append(score['Mean Squared Error:'])

final_score = [np.mean(scores),np.std(scores),np.mean(MAE),np.mean(MSE),np.mean(scores_train)]
results_df.loc['LR_Ensemble'] = final_score

In [35]:
print(results_df)

               R² Mean Test  R² Std Test  Mean Absolute Error  \
LinReg             0.830731     0.032955             0.998602   
Stack              0.844978     0.032056             0.970253   
Bagging            0.850915     0.034963             0.929575   
Random Forest      0.856393     0.034352             0.919295   
SVR                0.838306     0.035417             0.899967   
Boosting           0.865615     0.031133             0.894528   
LR_Relations       0.833887     0.034867             0.998346   
LR_Ensemble        0.831899     0.035824             0.991089   

               Mean Squared Error  R² Mean Train  
LinReg                   2.688406       0.852955  
Stack                    2.474833       0.939786  
Bagging                  2.317076       0.979435  
Random Forest            2.265342       0.979889  
SVR                      2.542479       0.838930  
Boosting                 2.133524       0.926174  
LR_Relations             2.649568       0.854351  
LR_Ens

### Model 2: Final Models
Models to try out in the Metrics Evaluator.

It is curious to see that the n_estimators that I got from cross-validation is for the 3 ensemble models, exactly twice the number.

In [8]:
# Import data from relative path for train
file = get_path('created_files','train',parent_dir)
data = pd.read_csv(file,sep = ',')
X_train_p = data.drop(columns=['T3','T2','T1']).reset_index(drop=True)
y_train_p = data['T3'].reset_index(drop=True)

# Do the train/ test split and standarize AFTER so there is no data leakage
X_train, X_test, y_train_unstd, y_test_unstd = train_test_split(X_train_p,y_train_p,test_size=0.2)
X_train, X_test, y_train,y_test,scaler_y = standarize_numerical_variables(X_train, X_test, y_train_unstd, y_test_unstd,2)

In [None]:
base_models = [
    ('gbr', GradientBoostingRegressor(n_estimators=100,max_features=0.8,subsample=0.5,random_state=42)),
    ('RF', RandomForestRegressor(random_state=42,n_estimators=200,max_features=0.8) ),
    ('bag', BaggingRegressor(random_state=42,n_estimators=200))]

stack = StackingRegressor(estimators=base_models, final_estimator=LassoCV(), cv=5, passthrough=True)
rf = RandomForestRegressor(random_state=0,n_estimators=200,max_features=0.8) 
svr  = SVR(C=10, gamma='scale',kernel='rbf')
gb = GradientBoostingRegressor(n_estimators=100,max_features=0.8,subsample=0.5,random_state=0)
bg = BaggingRegressor(random_state=0,n_estimators=200)

models_m2 = [LinearRegression(),stack,bg, rf,svr,gb]
names_m2 = ['LinReg','Stack','Bagging','Random Forest','SVR','Boosting']

### Metrics Evaluator Code

In [10]:
final_scores_m2 = []

for model in models_m2:
    scores = []
    stds = []
    scores_train = []
    MAE = []
    MSE = []
    for _ in range(300):
        X_shuffled, y_shuffled = shuffle(X_train_p, y_train_p, random_state=None)
        X_train, X_test, y_train_unstd, y_test_unstd = train_test_split(X_shuffled, y_shuffled, test_size=0.2)
        X_train, X_test, y_train, y_test, scaler_y = standarize_numerical_variables(X_train, X_test, y_train_unstd, y_test_unstd, 2)

        model.fit(X_train,y_train_unstd)
        predictions = model.predict(X_test)
        predictions_train = model.predict(X_train)

        score = evaluate_regression_metrics(y_test_unstd,y_train_unstd,predictions,predictions_train)
        scores.append(score['R² Score Test:'])
        scores_train.append(score['R² Score Train:'])
        MAE.append(score['Mean Absolute Error:'])
        MSE.append(score['Mean Squared Error:'])

    final_scores_m2.append([np.mean(scores),np.std(scores),np.mean(MAE),np.mean(MSE),np.mean(scores_train)])

results_df_m2 = pd.DataFrame(final_scores_m2, columns=['R² Mean Test', 'R² Std Test', 'Mean Absolute Error', 'Mean Squared Error', 'R² Mean Train'], index=names_m2)

In [7]:
print(results_df_m2)

               R² Mean Test  R² Std Test  Mean Absolute Error  \
LinReg             0.199626     0.062545             2.593248   
Stack              0.315392     0.069937             2.409327   
Bagging            0.299883     0.080667             2.395442   
Random Forest      0.305675     0.076834             2.395336   
SVR                0.285689     0.065760             2.419332   
Boosting           0.300555     0.081004             2.470903   

               Mean Squared Error  R² Mean Train  
LinReg                  12.763015       0.302269  
Stack                   10.817083       0.783729  
Bagging                 10.971332       0.904835  
Random Forest           10.935720       0.905903  
SVR                     11.423480       0.730712  
Boosting                11.025666       0.681296  
