# Tuning hyperparameters for 4 models using MinMax scaler:
- Random Forest
- SVR
- Ridge Regression
- Gradient Boosting

Using 10 fold-cross validation with internal 10 fold validation for each fold of the external validation.

In [1]:
# Useful starting lines
%matplotlib inline
import sys
sys.path.append('../')

import sklearn
import numpy as np
import pandas as pd
import scipy as ski
from math import sqrt
import matplotlib.pyplot as plt
from scripts.helpers import *
from scripts.GridSearch_helpers import *

%load_ext autoreload
%autoreload 2

## Loading the data from files:
- true_ees.csv : csv file with the values of end-systolic elastance (*EES*) from the lab dataset  => *outputs*
- u2.csv       : csv file with the values of the features from the lab dataset including the EF => *inputs*



In [2]:
EES_PATH = "../Data/true_ees.csv"
U2_PATH = "../Data/u2.csv"

In [3]:
ees = pd.read_csv(EES_PATH).set_index("ID")
data = pd.read_csv(U2_PATH).set_index("ID")

Calling the inverse function on the end-systolic elastance values of the dataset to convert them to the compliance, aimed at being consistent with the prediction of the applied regression models 

In [4]:
ees = inverse(ees)

## Spliting the data into:
- 80% : training set 
- 20% : testing set  

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, ees, test_size = 0.20)

## Normalizing the data with MinMax scaler


In [6]:
from sklearn.preprocessing import MinMaxScaler

scaler=MinMaxScaler()

scaler.fit(X_train)
X_train=pd.DataFrame(scaler.transform(X_train))
X_test=pd.DataFrame(scaler.transform(X_test))

# Random Forest


#### Parameter Tuning

In [None]:
from sklearn.ensemble import RandomForestRegressor

crossvalidation(X_train,y_train, RandomForestRegressor(), "Random Forest", dict(max_depth=[5,10,20], n_estimators=[500,700,1000] ), 10)

Best Parameters for 0 fold: {'max_depth': 20, 'n_estimators': 700}
fold:0, r:0.9494181967461612, R2:0.9009470826799836, RMSE:0.0008513662445949117, MAE:0.022501749183850703
Best Parameters for 1 fold: {'max_depth': 20, 'n_estimators': 1000}
fold:1, r:0.952766532518137, R2:0.901372306810513, RMSE:0.0007548134731107308, MAE:0.020868863341821493
Best Parameters for 2 fold: {'max_depth': 10, 'n_estimators': 1000}
fold:2, r:0.9487844289032829, R2:0.8997827571254051, RMSE:0.0007323830583488572, MAE:0.021449560145649135
Best Parameters for 3 fold: {'max_depth': 20, 'n_estimators': 1000}
fold:3, r:0.9394595932212227, R2:0.8821044264407387, RMSE:0.0008703974222239792, MAE:0.022254508797813707
Best Parameters for 4 fold: {'max_depth': 20, 'n_estimators': 1000}
fold:4, r:0.947483247672323, R2:0.8976334906661589, RMSE:0.000811276741411441, MAE:0.022221563346282072
Best Parameters for 5 fold: {'max_depth': 20, 'n_estimators': 1000}
fold:5, r:0.9561889484224204, R2:0.9141310548801801, RMSE:0.0006256

(0.9593399228388181,
 0.9184078509446257,
 0.0006256122504974232,
 0.01974388716278874,
 [{'max_depth': 10, 'n_estimators': 1000},
  {'max_depth': 10, 'n_estimators': 1000},
  {'max_depth': 20, 'n_estimators': 1000},
  {'max_depth': 20, 'n_estimators': 1000}])

#### Testing the model

In [None]:
regressor = RandomForestRegressor(1000,max_depth=20)
regressor.fit(X_train,np.ravel(y_train))
y_predict = regressor.predict(X_test)
R2 = regressor.score(X_test,y_test)
r = np.corrcoef(np.ravel(y_test), y_predict)[0,1]
RMSE = sqrt(sklearn.metrics.mean_squared_error(y_test, y_predict))
MAE = sklearn.metrics.mean_absolute_error(y_test, y_predict)
print("Results for Random Forest:")
print('r:{}, R2:{}, RMSE:{}, MAE:{}'.format(r,R2,RMSE, MAE))

Results for Random Forest:
r:0.94753884457801, R2:0.8973215410413728, RMSE:0.027155442708822645, MAE:0.020692766618872357


## SVR

#### Parameter Tuning

In [7]:
from sklearn.svm import SVR

crossvalidation(X_train,y_train, SVR(), "SVR", dict(C=[1,10,100], gamma=[0.001,0.01,0.1,1]), 10)

Best Parameters for 0 fold: {'C': 1, 'gamma': 1}
fold:0, r:0.9188698063108378, R2:0.8201644560155981, RMSE:0.0013945114638559532, MAE:0.02848113716997249
Best Parameters for 1 fold: {'C': 10, 'gamma': 1}
fold:1, r:0.8916766949479635, R2:0.7913720104925872, RMSE:0.0013816424393529778, MAE:0.028401352656756845
Best Parameters for 2 fold: {'C': 100, 'gamma': 1}
fold:2, r:0.9232420629420808, R2:0.840003275142291, RMSE:0.0012329267098044787, MAE:0.028421668100620773
Best Parameters for 3 fold: {'C': 10, 'gamma': 0.1}
fold:3, r:0.9306078407784225, R2:0.8528489978427831, RMSE:0.0012933923954918589, MAE:0.028812897430734417
Best Parameters for 4 fold: {'C': 10, 'gamma': 1}
fold:4, r:0.9162758574561345, R2:0.8296247719308203, RMSE:0.0011314327718641567, MAE:0.02632766682339098
Best Parameters for 5 fold: {'C': 100, 'gamma': 1}
fold:5, r:0.9017000577933119, R2:0.8041959563240382, RMSE:0.0014223396617304965, MAE:0.029564657603265408
Best Parameters for 6 fold: {'C': 100, 'gamma': 1}
fold:6, r:0.9

(0.9306078407784225,
 0.8528489978427831,
 0.0011314327718641567,
 0.02632766682339098,
 [{'C': 10, 'gamma': 0.1},
  {'C': 10, 'gamma': 0.1},
  {'C': 10, 'gamma': 1},
  {'C': 10, 'gamma': 1}])

#### Testing the model

In [8]:
regressor = SVR(C=10, gamma=1)
regressor.fit(X_train,np.ravel(y_train))
y_predict = regressor.predict(X_test)
R2 = regressor.score(X_test,y_test)
r = np.corrcoef(np.ravel(y_test), y_predict)[0,1]
RMSE = sqrt(sklearn.metrics.mean_squared_error(y_test, y_predict))
MAE = sklearn.metrics.mean_absolute_error(y_test, y_predict)
print("Results for SVR:")
print('r:{}, R2:{}, RMSE:{}, MAE:{}'.format(r,R2,RMSE, MAE))

Results for SVR:
r:0.9077087571863297, R2:0.8164300433587737, RMSE:0.036698375689317116, MAE:0.029449006558468035


# Ridge Regression

#### Parameter Tuning

In [None]:
from sklearn.linear_model import Ridge

crossvalidation(X_train,y_train, Ridge(), "Ridge Regression", dict(alpha=[0.001,0.01, 0.1]), 10)

Best Parameters for 0 fold: {'alpha': 0.1}
fold:0, r:0.9153017367454845, R2:0.8372887478691551, RMSE:0.001398513758382562, MAE:0.02907887437651101
Best Parameters for 1 fold: {'alpha': 0.1}
fold:1, r:0.9128264354793488, R2:0.832037615914693, RMSE:0.0012854429256478124, MAE:0.02719995148651939
Best Parameters for 2 fold: {'alpha': 0.1}
fold:2, r:0.9136880318119727, R2:0.8342239849027989, RMSE:0.001211483587606783, MAE:0.027650269639583284
Best Parameters for 3 fold: {'alpha': 0.1}
fold:3, r:0.9193053309585936, R2:0.8439796802017002, RMSE:0.0011518641461016371, MAE:0.026100815068034806
Best Parameters for 4 fold: {'alpha': 0.1}
fold:4, r:0.9078000108155344, R2:0.8228511142224006, RMSE:0.0014039432597005375, MAE:0.028772471352192314
Best Parameters for 5 fold: {'alpha': 0.1}
fold:5, r:0.9094079782747303, R2:0.8263069954255635, RMSE:0.001265468806398449, MAE:0.0287329287141101
Best Parameters for 6 fold: {'alpha': 0.1}
fold:6, r:0.8814381309161061, R2:0.769692077825772, RMSE:0.001385601847

(0.9193053309585936,
 0.8439796802017002,
 0.0011518641461016371,
 0.026100815068034806,
 [{'alpha': 0.1}, {'alpha': 0.1}, {'alpha': 0.1}, {'alpha': 0.1}])

#### Testing the model

In [None]:
regressor = Ridge(alpha=0.1)
regressor.fit(X_train,np.ravel(y_train))
y_predict = regressor.predict(X_test)
R2 = regressor.score(X_test,y_test)
r = np.corrcoef(np.ravel(y_test), y_predict)[0,1]
RMSE = sqrt(sklearn.metrics.mean_squared_error(y_test, y_predict))
MAE = sklearn.metrics.mean_absolute_error(y_test, y_predict)
print("Results for Ridge Regression:")
print('r:{}, R2:{}, RMSE:{}, MAE:{}'.format(r,R2,RMSE, MAE))

Results for Ridge Regression:
r:0.8968434602682603, R2:0.8033789130663713, RMSE:0.03757787891539698, MAE:0.029032660688129756


# Gradient Boosting

#### Parameter Tuning

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

crossvalidation(X_train,y_train, GradientBoostingRegressor(), "Gradient Boosting", dict(learning_rate=[0.001,0.01,0.1,1],n_estimators=[1000,1750,2500,3000]), 10)

Best Parameters for 0 fold: {'learning_rate': 0.1, 'n_estimators': 1000}
fold:0, r:0.957141489689406, R2:0.9159748444862165, RMSE:0.0006302847080024128, MAE:0.01937678232198121
Best Parameters for 1 fold: {'learning_rate': 0.1, 'n_estimators': 1000}
fold:1, r:0.9637114925793848, R2:0.9271436995688056, RMSE:0.0006881515729795329, MAE:0.02077889104468467
Best Parameters for 2 fold: {'learning_rate': 0.1, 'n_estimators': 1000}
fold:2, r:0.9578386267469311, R2:0.9170156365385005, RMSE:0.0006432781587828538, MAE:0.019580416115716058
Best Parameters for 3 fold: {'learning_rate': 0.1, 'n_estimators': 1000}
fold:3, r:0.947657514488872, R2:0.8975491632740051, RMSE:0.0007006850528661222, MAE:0.020521040046334132
Best Parameters for 4 fold: {'learning_rate': 0.1, 'n_estimators': 1000}
fold:4, r:0.949551824587114, R2:0.9014756386800322, RMSE:0.0006638456925921475, MAE:0.020311048385555322
Best Parameters for 5 fold: {'learning_rate': 0.1, 'n_estimators': 1000}
fold:5, r:0.9552463233653969, R2:0.90

(0.9637114925793848,
 0.9271436995688056,
 0.0006007729828754824,
 0.019103844134928986,
 [{'learning_rate': 0.1, 'n_estimators': 1000},
  {'learning_rate': 0.1, 'n_estimators': 1000},
  {'learning_rate': 0.1, 'n_estimators': 1000},
  {'learning_rate': 0.1, 'n_estimators': 1000}])

#### Testing the model

In [None]:
regressor = GradientBoostingRegressor(learning_rate=0.1,n_estimators=1000)
regressor.fit(X_train,np.ravel(y_train))
y_predict = regressor.predict(X_test)
R2 = regressor.score(X_test,y_test)
r = np.corrcoef(np.ravel(y_test), y_predict)[0,1]
RMSE = sqrt(sklearn.metrics.mean_squared_error(y_test, y_predict))
MAE = sklearn.metrics.mean_absolute_error(y_test, y_predict)
print("Results for Gradient Boosting:")
print('r:{}, R2:{}, RMSE:{}, MAE:{}'.format(r,R2,RMSE, MAE))

Results for Gradient Boosting:
r:0.9563145341408525, R2:0.9143962728533649, RMSE:0.025764945036356936, MAE:0.02003376158504617
