In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
import pickle


#### Loading data after preprocessing

In [2]:
data_train  = pd.read_csv("../data/alt_maccsfp_after_preprocessing.csv")

#### Splitting data into training and test sets

In [3]:
y = data_train['ALT']
del data_train['ALT']
X = data_train.values
y = y.values

# podzielenie danych na dwa zbiory testowy i treningowy
# 
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43122)
# print(X_train.shape)
# print(X_test.shape)

#### Split dataset into k consecutive folds

In [4]:
cv_outer = KFold(n_splits=5, shuffle=True, random_state=132312)

#### LASSO 

In [5]:
cv_results=pd.DataFrame()
outer_results = []
for train_index, test_index in cv_outer.split(X):
    results = []
    # split data
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # configure the cross-validation procedure
    cv_inner = KFold(n_splits=5, shuffle=True, random_state=75579)
    # define the model
    model = Lasso()
    # define search space
    hyperparams_grid = {'alpha': [0.0001, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1], 'random_state': [321321, 321]}
    # define search
    search = GridSearchCV(model, hyperparams_grid, scoring='r2', cv=cv_inner, return_train_score=True, verbose=1000)
    search_fit = search.fit(X_train, y_train)
    cv_result = pd.DataFrame(search_fit.cv_results_)
    cv_results = cv_results.append(cv_result, ignore_index=True)
    
#     # get the best performing model fit on the whole training set and evaluate model on the hold out dataset
#     best_model = search_fit.best_estimator_
#     yhat = best_model.predict(X_test)
#     # evaluate the model
#     r2 = r2_score(y_test, yhat)
#     # store the result
#     outer_results.append((r2, search_fit.best_estimator_, search_fit.best_params_))
    

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.909, test=0.082), total=   0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.937, test=-0.577), total=   0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.902, test=0.659), total=   0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.904, test=0.1

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[CV]  alpha=0.001, random_state=321, score=(train=0.901, test=0.752), total=   0.0s
[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.903, test=0.118), total=   0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.856, test=0.601), total=   0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=0.894, test=-0.334), total=   0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=0.9

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)



[Parallel(n_jobs=1)]: Done  56 out of  56 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=0.1, random_state=321 .....................................
[CV]  alpha=0.1, random_state=321, score=(train=0.144, test=-0.099), total=   0.0s
[Parallel(n_jobs=1)]: Done  57 out of  57 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=0.1, random_state=321 .....................................
[CV]  alpha=0.1, random_state=321, score=(train=0.206, test=0.200), total=   0.0s
[Parallel(n_jobs=1)]: Done  58 out of  58 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=0.1, random_state=321 .....................................
[CV]  alpha=0.1, random_state=321, score=(train=0.249, test=-0.093), total=   0.0s
[Parallel(n_jobs=1)]: Done  59 out of  59 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=0.1, random_state=321 .....................................
[CV]  alpha=0.1, random_state=321, score=(train=0.044, test=-0.546), total=   0.0s
[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    0.3s rem

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[CV]  alpha=0.0001, random_state=321, score=(train=0.863, test=-0.150), total=   0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321 ..................................
[CV]  alpha=0.0001, random_state=321, score=(train=0.856, test=-0.110), total=   0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.001, random_state=321321 ................................
[CV]  alpha=0.001, random_state=321321, score=(train=0.790, test=0.389), total=   0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.001, random_state=321321 ................................
[CV]  alpha=0.001, random_state=321321, score=(train=0.788, test=0.820), total=   0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.001, random_state=321321 ................................
[CV]  alpha=0.001, random_state=321321, score=(tra

  positive)



[CV]  alpha=0.05, random_state=321, score=(train=0.405, test=0.196), total=   0.0s
[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.05, random_state=321 ....................................
[CV]  alpha=0.05, random_state=321, score=(train=0.524, test=-0.549), total=   0.0s
[Parallel(n_jobs=1)]: Done  49 out of  49 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.05, random_state=321 ....................................
[CV]  alpha=0.05, random_state=321, score=(train=0.507, test=-0.145), total=   0.0s
[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.1, random_state=321321 ..................................
[CV]  alpha=0.1, random_state=321321, score=(train=0.000, test=-0.030), total=   0.0s
[Parallel(n_jobs=1)]: Done  51 out of  51 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.1, random_state=321321 ..................................
[CV]  alpha=0.1, random_state=321321, score=(train=0.000, 

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[CV]  alpha=0.0001, random_state=321, score=(train=0.926, test=-0.097), total=   0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321 ..................................
[CV]  alpha=0.0001, random_state=321, score=(train=0.930, test=0.568), total=   0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321 ..................................
[CV]  alpha=0.0001, random_state=321, score=(train=0.938, test=0.193), total=   0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321 ..................................
[CV]  alpha=0.0001, random_state=321, score=(train=0.942, test=-0.422), total=   0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.001, random_state=321321 ................................
[CV]  alpha=0.001, random_state=321321, score=(train=0

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)



[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.005, random_state=321 ...................................
[CV]  alpha=0.005, random_state=321, score=(train=0.904, test=0.613), total=   0.0s
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.005, random_state=321 ...................................
[CV]  alpha=0.005, random_state=321, score=(train=0.916, test=-0.213), total=   0.0s
[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.005, random_state=321 ...................................
[CV]  alpha=0.005, random_state=321, score=(train=0.923, test=-0.534), total=   0.0s
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.01, random_state=321321 .................................
[CV]  alpha=0.01, random_state=321321, score=(train=0.826, test=0.396), total=   0.0s
[Parallel(n_jobs=1)]: Done  31 out of  31 | elapsed:   

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)



[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.803, test=0.223), total=   0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.891, test=-1.973), total=   0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.874, test=0.445), total=   0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.843, test=0.723), total=   0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[CV]  alpha=0.001, random_state=321, score=(train=0.873, test=0.441), total=   0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.841, test=0.712), total=   0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=0.898, test=-1.339), total=   0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=0.786, test=0.139), total=   0.0s
[Parallel(n_jobs=1)]: Done  22 out of  22 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.831, test=0.284), total=   0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.962, test=0.207), total=   0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.849, test=0.702), total=   0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.888, test=0.18

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)



[CV]  alpha=0.001, random_state=321, score=(train=0.961, test=0.150), total=   0.0s
[Parallel(n_jobs=1)]: Done  17 out of  17 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.847, test=0.550), total=   0.0s
[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.887, test=-0.064), total=   0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.889, test=-0.271), total=   0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=0.80

[CV]  alpha=1, random_state=321321, score=(train=0.000, test=-0.036), total=   0.0s
[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=1, random_state=321 .......................................
[CV]  alpha=1, random_state=321, score=(train=0.000, test=-0.077), total=   0.0s
[Parallel(n_jobs=1)]: Done  76 out of  76 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=1, random_state=321 .......................................
[CV]  alpha=1, random_state=321, score=(train=0.000, test=-0.354), total=   0.0s
[Parallel(n_jobs=1)]: Done  77 out of  77 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=1, random_state=321 .......................................
[CV]  alpha=1, random_state=321, score=(train=0.000, test=-0.145), total=   0.0s
[Parallel(n_jobs=1)]: Done  78 out of  78 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=1, random_state=321 .......................................
[CV]  alpha=1, random_state=321, score=(train=0.000, test=-0.014), to

  positive)
  positive)
  positive)
  positive)


In [6]:
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_random_state,params,split0_test_score,split1_test_score,split2_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,0.009582,1.361630e-03,0.000398,0.000487,0.0001,321321,"{'alpha': 0.0001, 'random_state': 321321}",0.081784,-0.576876,0.658899,...,0.215315,0.473908,1,0.908730,0.936930,0.901935,0.903999,0.858035,0.901926,0.025307
1,0.008973,6.284088e-04,0.000599,0.000489,0.0001,321,"{'alpha': 0.0001, 'random_state': 321}",0.081784,-0.576876,0.658899,...,0.215315,0.473908,1,0.908730,0.936930,0.901935,0.903999,0.858035,0.901926,0.025307
2,0.009365,7.890553e-04,0.000614,0.000502,0.001,321321,"{'alpha': 0.001, 'random_state': 321321}",-0.488586,-0.652391,0.752100,...,0.066022,0.562743,3,0.907960,0.935206,0.900654,0.902892,0.855584,0.900459,0.025625
3,0.009170,1.172666e-03,0.000597,0.000487,0.001,321,"{'alpha': 0.001, 'random_state': 321}",-0.488586,-0.652391,0.752100,...,0.066022,0.562743,3,0.907960,0.935206,0.900654,0.902892,0.855584,0.900459,0.025625
4,0.008177,2.308838e-03,0.000604,0.000494,0.005,321321,"{'alpha': 0.005, 'random_state': 321321}",-0.333851,-0.644716,0.729874,...,0.038449,0.530305,5,0.893734,0.914382,0.880782,0.884284,0.820435,0.878723,0.031399
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,0.000997,3.989506e-07,0.000000,0.000000,0.1,321,"{'alpha': 0.1, 'random_state': 321}",-0.076896,-0.350389,-0.120338,...,-0.092804,0.147203,11,0.000000,0.236312,0.070218,0.231905,0.225216,0.152730,0.098634
76,0.000798,3.989220e-04,0.000200,0.000399,0.5,321321,"{'alpha': 0.5, 'random_state': 321321}",-0.076896,-0.353958,-0.144606,...,-0.125246,0.122645,13,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
77,0.000399,4.886360e-04,0.000399,0.000488,0.5,321,"{'alpha': 0.5, 'random_state': 321}",-0.076896,-0.353958,-0.144606,...,-0.125246,0.122645,13,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
78,0.000599,4.887334e-04,0.000199,0.000399,1,321321,"{'alpha': 1, 'random_state': 321321}",-0.076896,-0.353958,-0.144606,...,-0.125246,0.122645,13,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [7]:
groups = cv_results.groupby(by=["param_alpha"])

In [8]:
mean_values_for_param_alpha = groups.mean()
mean_values_for_param_alpha

Unnamed: 0_level_0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
param_alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0.0001,0.009993,0.001665,0.000459,0.000489,-0.053721,0.124893,0.098021,0.169409,0.161991,0.100119,0.534449,3.0,0.867638,0.883648,0.891274,0.89326,0.877819,0.882728,0.033169
0.001,0.008598,0.001266,0.000321,0.000325,-0.26156,0.036106,-0.15641,0.108239,0.058184,-0.043088,0.631843,6.2,0.866083,0.882247,0.890045,0.892037,0.876225,0.881327,0.033469
0.005,0.005087,0.00181,0.000379,0.000404,-0.129376,0.050434,0.02262,-0.00144,0.136494,0.015747,0.517264,7.0,0.847257,0.862738,0.86767,0.871698,0.852495,0.860372,0.036214
0.01,0.002094,0.000615,0.00034,0.000404,-0.090861,0.100947,0.118463,-0.114334,0.112757,0.025394,0.491311,7.8,0.81256,0.827379,0.828403,0.835696,0.803777,0.821563,0.041201
0.05,0.000999,0.000491,0.000359,0.000404,-0.103428,-0.005061,0.151957,-0.224915,-0.109746,-0.058239,0.23422,8.2,0.401796,0.41365,0.440236,0.519158,0.427757,0.440519,0.092154
0.1,0.000997,0.000365,0.000258,0.000296,-0.171579,-0.071401,0.02353,-0.119659,-0.195387,-0.106899,0.149966,10.2,0.073404,0.109274,0.095722,0.188208,0.122144,0.117751,0.069424
0.5,0.000878,0.000514,0.0003,0.000365,-0.157959,-0.107466,-0.05427,-0.130134,-0.182887,-0.126543,0.104771,9.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1.0,0.000937,0.00057,0.00022,0.000379,-0.157959,-0.107466,-0.05427,-0.130134,-0.182887,-0.126543,0.104771,9.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
mean_values_for_param_alpha.sort_values('mean_test_score', ascending=False).iloc[:1]

Unnamed: 0_level_0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
param_alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0.0001,0.009993,0.001665,0.000459,0.000489,-0.053721,0.124893,0.098021,0.169409,0.161991,0.100119,0.534449,3.0,0.867638,0.883648,0.891274,0.89326,0.877819,0.882728,0.033169


In [10]:
results_train_score = []
results_test_score = []

for i, (train_index, test_index) in enumerate(cv_outer.split(X)):
    # split data
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    lasso_model = Lasso(alpha=0.0001)
    lasso_model.fit(X_train, y_train)
    # save the model to disk
    filename = f"../models/finalized_lasso_model_{i}.pickle"
    pickle.dump(lasso_model, open(filename, 'wb'))
    Y_pred_train = lasso_model.predict(X_train)
    train_score = lasso_model.score(X_train, y_train)
    Y_pred_test = lasso_model.predict(X_test)
    test_score = lasso_model.score(X_test, y_test)
    results_train_score.append(train_score)
    results_test_score.append(test_score) 

  positive)
  positive)
  positive)
  positive)
  positive)


In [11]:
results_train_score

[0.8978198129574055,
 0.8316430940888204,
 0.9243966868013909,
 0.8600882740545753,
 0.8797293821531054]

In [12]:
results_test_score

[-0.3355183296514179,
 0.6352750539824603,
 0.45292336384936305,
 0.45103901296219606,
 0.792432980678393]

In [13]:
np.mean(results_train_score)

0.8787354500110596

In [14]:
np.mean(results_test_score)

0.39923041636419887

In [15]:
# print(grid_cv_lr_fit.best_score_)
# print(grid_cv_lr_fit.best_params_)
# print(grid_cv_lr_fit.cv_results_)

In [16]:
# lasso_model = Lasso(alpha =  0.5)
# lasso_model.fit(X_train, y_train)

In [17]:
# Y_pred_train = lasso_model.predict(X_train)
# print("Accuracy R2 --> ", lasso_model.score(X_train, y_train))

In [18]:
# Y_pred_test = lasso_model.predict(X_test)
# print("Accuracy R2 --> ", lasso_model.score(X_test, y_test))

#### RANDOM FOREST REGRESSOR

In [25]:
# param_grid_rfr={"n_estimators": [1, 5, 10, 20, 30, 40, 50, 100],
#             "max_features": ["auto", "sqrt", "log2"],
#             "min_samples_split": [2, 4, 8, 16],
#             "bootstrap": [True, False],
# }

In [27]:
# grid_RandomForestRegressor = GridSearchCV(RandomForestRegressor(), param_grid_rfr, scoring='r2', cv=kf, n_jobs=-1, return_train_score=True, verbose=1000)

# grid_RandomForestRegressor.fit(X_train, y_train)

Fitting 5 folds for each of 192 candidates, totalling 960 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(64,), dtype=int32).
Pick


Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    2.0s
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done  39 tasks      | elapsed:    2.0s
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    2.0s

Pickling array (shape=(79,), dt

Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done  72 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed:    2.6s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done  7


Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:    3.5s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 130 tasks      | elapsed:    3.5s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 132 tasks      | elapsed:    3.5s

Pickling array 

Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 186 tasks      | elapsed:    4.3s

Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 188 tasks      | elapsed:    4.3s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), 

Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    5.1s

Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 254 tasks      | elapsed:    5.1s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 256 tasks      | elapsed:    5.1s
Pickling array (


Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 302 tasks      | elapsed:    5.7s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 304 tasks      | elapsed:    5.7s

Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:    5.8s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array 

Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 364 tasks      | elapsed:    6.5s

Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 366 tasks      | elapsed:    6.5s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), 

[Parallel(n_jobs=-1)]: Done 422 tasks      | elapsed:    7.5s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 424 tasks      | elapsed:    7.5s

Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 426 tasks      | elapsed:    7.5s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs


Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).[Parallel(n_jobs=-1)]: Done 484 tasks      | elapsed:    8.6s

Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 486 tasks      | elapsed:    8.6s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 488 tasks      | elapsed:    8.6s

Pickling array 

Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 538 tasks      | elapsed:    9.2s

Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 540 tasks      | elapsed:    9.2s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 54

Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 608 tasks      | elapsed:   10.0s
[Parallel(n_jobs=-1)]: Done 610 tasks      | elapsed:   10.1s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=

[Parallel(n_jobs=-1)]: Done 666 tasks      | elapsed:   10.7s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 668 tasks      | elapsed:   10.7s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
[Parallel(n_jobs=-1)]: Done 670 tasks      | elapsed:   10.7s
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (

Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 736 tasks      | elapsed:   11.3s
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 738 tasks      | elapsed:   11.3s

Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 740 tasks      | elapsed:   11.3s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (

Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 788 tasks      | elapsed:   11.8s
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 790 tasks      | elapsed:   11.8s

Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:   11.9s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (

Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 852 tasks      | elapsed:   12.4s
Pickling array (shape=(79, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 854 tasks      | elapsed:   12.4s

Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 856 tasks      | elapsed:   12.5s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs


Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(64,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 914 tasks      | elapsed:   13.0s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(79, 124), dtype=float64).
Pickling array (shape=(79,), dtype=float64).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(63,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 916 tasks      | elapsed:   13.0s
[Parallel(n_jobs=-1)]: Done 918 tasks      | elapsed:   13.1s
Pickling array (shape=(79, 124), dtype=float64).
Pickling array 

GridSearchCV(cv=KFold(n_splits=5, random_state=132312, shuffle=True),
             error_score=nan,
             estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                             criterion='mse', max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             max_samples=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators=100, n_jobs=None,
                                             oob_score=False, random_state=None,
                                             v

In [28]:
# print(grid_RandomForestRegressor.best_score_)
# print(grid_RandomForestRegressor.best_params_)
# print(grid_RandomForestRegressor.cv_results_)

0.4809421135443153
{'bootstrap': True, 'max_features': 'log2', 'min_samples_split': 2, 'n_estimators': 1}
{'mean_fit_time': array([0.00738039, 0.02274146, 0.03949647, 0.07160854, 0.11210103,
       0.1364356 , 0.16176844, 0.3283484 , 0.00359964, 0.03111753,
       0.03111649, 0.06443572, 0.09315605, 0.10212665, 0.13284187,
       0.33131509, 0.00439224, 0.01596026, 0.03191462, 0.09374981,
       0.12307177, 0.15079608, 0.17194018, 0.26747899, 0.005583  ,
       0.01615834, 0.02473445, 0.04807086, 0.0746007 , 0.10571756,
       0.12366328, 0.23098283, 0.00319514, 0.01076884, 0.02872343,
       0.0440824 , 0.06821547, 0.08258123, 0.11230054, 0.30757737,
       0.00378923, 0.01555719, 0.03630395, 0.05685377, 0.09175682,
       0.12526002, 0.13463492, 0.21801782, 0.00458794, 0.01176758,
       0.02074642, 0.04648166, 0.07140822, 0.08038268, 0.10930815,
       0.22220588, 0.00359168, 0.01136937, 0.02154236, 0.05186462,
       0.06502271, 0.0835772 , 0.11289868, 0.28882666, 0.00418653,
     

In [29]:
# rfr_model = RandomForestRegressor(bootstrap =  False, max_features = 'log2', min_samples_split = 8, n_estimators = 40, random_state=12312)
# rfr_model.fit(X_train, y_train)
# filename = 'finalized_model.sav'
# pickle.dump(rfr_model, open(filename, 'wb'))

In [23]:
# Y_pred_train_rfr = rfr_model.predict(X_train)
# print("Accuracy R2 --> ", rfr_model.score(X_train, y_train))

In [24]:
# Y_pred_test_rfr = rfr_model.predict(X_test)
# print("Accuracy R2 --> ", rfr_model.score(X_test, y_test))