In [2]:
import pandas as pd
import numpy as np
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
import pickle


#### Loading data after preprocessing

In [3]:
data_train  = pd.read_csv("../data/alt_maccsfp_after_preprocessing.csv")

#### Splitting data into training and test sets

In [4]:
y = data_train['ALT']
del data_train['ALT']
X = data_train.values
y = y.values

# podzielenie danych na dwa zbiory testowy i treningowy
# 
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43122)
# print(X_train.shape)
# print(X_test.shape)

#### Split dataset into k consecutive folds

In [5]:
cv_outer = KFold(n_splits=5, shuffle=True, random_state=132312)


#### LASSO 

In [6]:
cv_results=pd.DataFrame()
outer_results = []
for train_index, test_index in cv_outer.split(X):
    results = []
    # split data
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # configure the cross-validation procedure
    cv_inner = KFold(n_splits=5, shuffle=True, random_state=75579)
    # define the model
    model = Lasso()
    # define search space
    hyperparams_grid = {'alpha': [0.0001, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1], 'random_state': [321321, 321]}
    # define search
    search = GridSearchCV(model, hyperparams_grid, scoring='r2', cv=cv_inner, return_train_score=True, verbose=1000)
    search_fit = search.fit(X_train, y_train)
    cv_result = pd.DataFrame(search_fit.cv_results_)
    cv_results = cv_results.append(cv_result, ignore_index=True)
    
#     # get the best performing model fit on the whole training set and evaluate model on the hold out dataset
#     best_model = search_fit.best_estimator_
#     yhat = best_model.predict(X_test)
#     # evaluate the model
#     r2 = r2_score(y_test, yhat)
#     # store the result
#     outer_results.append((r2, search_fit.best_estimator_, search_fit.best_params_))
    

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.909, test=0.082), total=   0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.937, test=-0.577), total=   0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.902, test=0.659), total=   0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.904, test=0.1

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.856, test=0.601), total=   0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=0.894, test=-0.334), total=   0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=0.914, test=-0.645), total=   0.0s
[Parallel(n_jobs=1)]: Done  22 out of  22 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=0.881, test=0.730), total=   0.0s
[Parallel(n_jobs=1)]: Done  23 out of  23 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ...........

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[CV]  alpha=1, random_state=321321, score=(train=0.000, test=-0.260), total=   0.0s
[Parallel(n_jobs=1)]: Done  74 out of  74 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=1, random_state=321321 ....................................
[CV]  alpha=1, random_state=321321, score=(train=0.000, test=-0.577), total=   0.0s
[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=1, random_state=321 .......................................
[CV]  alpha=1, random_state=321, score=(train=0.000, test=-0.341), total=   0.0s
[Parallel(n_jobs=1)]: Done  76 out of  76 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=1, random_state=321 .......................................
[CV]  alpha=1, random_state=321, score=(train=0.000, test=-0.142), total=   0.0s
[Parallel(n_jobs=1)]: Done  77 out of  77 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=1, random_state=321 .......................................
[CV]  alpha=1, random_state=321, score=(train=0.000, test=-0.099),

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[CV]  alpha=0.0001, random_state=321, score=(train=0.863, test=-0.150), total=   0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321 ..................................
[CV]  alpha=0.0001, random_state=321, score=(train=0.856, test=-0.110), total=   0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.001, random_state=321321 ................................
[CV]  alpha=0.001, random_state=321321, score=(train=0.790, test=0.389), total=   0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321321 ................................
[CV]  alpha=0.001, random_state=321321, score=(train=0.788, test=0.820), total=   0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321321 ................................
[CV]  alpha=0.001, random_state=321321, score=(tra

  positive)
  positive)


[CV]  alpha=0.05, random_state=321321, score=(train=0.524, test=-0.549), total=   0.0s
[Parallel(n_jobs=1)]: Done  44 out of  44 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.05, random_state=321321 .................................
[CV]  alpha=0.05, random_state=321321, score=(train=0.507, test=-0.145), total=   0.0s
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.05, random_state=321 ....................................
[CV]  alpha=0.05, random_state=321, score=(train=0.296, test=0.201), total=   0.0s
[Parallel(n_jobs=1)]: Done  46 out of  46 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.05, random_state=321 ....................................
[CV]  alpha=0.05, random_state=321, score=(train=0.234, test=0.219), total=   0.0s
[Parallel(n_jobs=1)]: Done  47 out of  47 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.05, random_state=321 ....................................
[CV]  alpha=0.05, random_state=321, score=(train=0.405, 

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[CV]  alpha=0.0001, random_state=321321, score=(train=0.942, test=-0.422), total=   0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321 ..................................
[CV]  alpha=0.0001, random_state=321, score=(train=0.892, test=0.565), total=   0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321 ..................................
[CV]  alpha=0.0001, random_state=321, score=(train=0.926, test=-0.097), total=   0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321 ..................................
[CV]  alpha=0.0001, random_state=321, score=(train=0.930, test=0.568), total=   0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321 ..................................
[CV]  alpha=0.0001, random_state=321, score=(train=

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[CV]  alpha=0.001, random_state=321, score=(train=0.941, test=-0.658), total=   0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=0.864, test=0.409), total=   0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=0.906, test=-0.163), total=   0.0s
[Parallel(n_jobs=1)]: Done  22 out of  22 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(train=0.904, test=0.613), total=   0.0s
[Parallel(n_jobs=1)]: Done  23 out of  23 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.005, random_state=321321 ................................
[CV]  alpha=0.005, random_state=321321, score=(tr

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


Fitting 5 folds for each of 16 candidates, totalling 80 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.914, test=-1.740), total=   0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.803, test=0.223), total=   0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.891, test=-1.973), total=   0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001, random_state=321321 ...............................
[CV]  alpha=0.0001, random_state=321321, score=(train=0.874, test=0.

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)



[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321321 ................................
[CV]  alpha=0.001, random_state=321321, score=(train=0.873, test=0.441), total=   0.0s
[Parallel(n_jobs=1)]: Done  14 out of  14 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321321 ................................
[CV]  alpha=0.001, random_state=321321, score=(train=0.841, test=0.712), total=   0.0s
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.913, test=-1.874), total=   0.0s
[Parallel(n_jobs=1)]: Done  16 out of  16 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.802, test=-0.112), total=   0.0s
[Parallel(n_jobs=1)]: Done  17 out of  17 | elapsed

[CV]  alpha=0.5, random_state=321321, score=(train=0.000, test=-0.111), total=   0.0s
[Parallel(n_jobs=1)]: Done  64 out of  64 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=0.5, random_state=321321 ..................................
[CV]  alpha=0.5, random_state=321321, score=(train=0.000, test=-0.162), total=   0.0s
[Parallel(n_jobs=1)]: Done  65 out of  65 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=0.5, random_state=321 .....................................
[CV]  alpha=0.5, random_state=321, score=(train=0.000, test=-0.341), total=   0.0s
[Parallel(n_jobs=1)]: Done  66 out of  66 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=0.5, random_state=321 .....................................
[CV]  alpha=0.5, random_state=321, score=(train=0.000, test=-0.028), total=   0.0s
[Parallel(n_jobs=1)]: Done  67 out of  67 | elapsed:    0.3s remaining:    0.0s
[CV] alpha=0.5, random_state=321 .....................................
[CV]  alpha=0.5, random_state=321, score=(train=0.000, tes

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[CV]  alpha=0.001, random_state=321321, score=(train=0.889, test=-0.271), total=   0.0s
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.830, test=0.218), total=   0.0s
[Parallel(n_jobs=1)]: Done  16 out of  16 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.961, test=0.150), total=   0.0s
[Parallel(n_jobs=1)]: Done  17 out of  17 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.847, test=0.550), total=   0.0s
[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001, random_state=321 ...................................
[CV]  alpha=0.001, random_state=321, score=(train=0.887,

[CV]  alpha=0.5, random_state=321321, score=(train=0.000, test=-0.145), total=   0.0s
[Parallel(n_jobs=1)]: Done  63 out of  63 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.5, random_state=321321 ..................................
[CV]  alpha=0.5, random_state=321321, score=(train=0.000, test=-0.014), total=   0.0s
[Parallel(n_jobs=1)]: Done  64 out of  64 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.5, random_state=321321 ..................................
[CV]  alpha=0.5, random_state=321321, score=(train=0.000, test=-0.036), total=   0.0s
[Parallel(n_jobs=1)]: Done  65 out of  65 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.5, random_state=321 .....................................
[CV]  alpha=0.5, random_state=321, score=(train=0.000, test=-0.077), total=   0.0s
[Parallel(n_jobs=1)]: Done  66 out of  66 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.5, random_state=321 .....................................
[CV]  alpha=0.5, random_state=321, score=(train=0.000, 

  positive)
  positive)
  positive)
  positive)


In [7]:
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_random_state,params,split0_test_score,split1_test_score,split2_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,0.011234,0.004052,0.000580,0.000477,0.0001,321321,"{'alpha': 0.0001, 'random_state': 321321}",0.081784,-0.576876,0.658899,...,0.215315,0.473908,1,0.908730,0.936930,0.901935,0.903999,0.858035,0.901926,0.025307
1,0.008772,0.000396,0.000193,0.000386,0.0001,321,"{'alpha': 0.0001, 'random_state': 321}",0.081784,-0.576876,0.658899,...,0.215315,0.473908,1,0.908730,0.936930,0.901935,0.903999,0.858035,0.901926,0.025307
2,0.007779,0.000746,0.000200,0.000399,0.001,321321,"{'alpha': 0.001, 'random_state': 321321}",-0.488586,-0.652391,0.752100,...,0.066022,0.562743,3,0.907960,0.935206,0.900654,0.902892,0.855584,0.900459,0.025625
3,0.007580,0.000489,0.000599,0.000489,0.001,321,"{'alpha': 0.001, 'random_state': 321}",-0.488586,-0.652391,0.752100,...,0.066022,0.562743,3,0.907960,0.935206,0.900654,0.902892,0.855584,0.900459,0.025625
4,0.007181,0.001716,0.000598,0.000488,0.005,321321,"{'alpha': 0.005, 'random_state': 321321}",-0.333851,-0.644716,0.729874,...,0.038449,0.530305,5,0.893734,0.914382,0.880782,0.884284,0.820435,0.878723,0.031399
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,0.000792,0.000396,0.000200,0.000399,0.1,321,"{'alpha': 0.1, 'random_state': 321}",-0.076896,-0.350389,-0.120338,...,-0.092804,0.147203,11,0.000000,0.236312,0.070218,0.231905,0.225216,0.152730,0.098634
76,0.000591,0.000483,0.000399,0.000489,0.5,321321,"{'alpha': 0.5, 'random_state': 321321}",-0.076896,-0.353958,-0.144606,...,-0.125246,0.122645,13,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
77,0.000602,0.000492,0.000199,0.000399,0.5,321,"{'alpha': 0.5, 'random_state': 321}",-0.076896,-0.353958,-0.144606,...,-0.125246,0.122645,13,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
78,0.000794,0.000736,0.000405,0.000497,1,321321,"{'alpha': 1, 'random_state': 321321}",-0.076896,-0.353958,-0.144606,...,-0.125246,0.122645,13,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [8]:
groups = cv_results.groupby(by=["param_alpha"])

In [12]:
mean_values_for_param_alpha = groups.agg({'mean_test_score': "mean", 'std_test_score': "mean",'mean_train_score': 'mean', 'std_train_score': 'mean'})
mean_values_for_param_alpha

Unnamed: 0_level_0,mean_test_score,std_test_score,mean_train_score,std_train_score
param_alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0001,0.100119,0.534449,0.882728,0.033169
0.001,-0.043088,0.631843,0.881327,0.033469
0.005,0.015747,0.517264,0.860372,0.036214
0.01,0.025394,0.491311,0.821563,0.041201
0.05,-0.058239,0.23422,0.440519,0.092154
0.1,-0.106899,0.149966,0.117751,0.069424
0.5,-0.126543,0.104771,0.0,0.0
1.0,-0.126543,0.104771,0.0,0.0


In [13]:
mean_values_for_param_alpha.sort_values('mean_test_score', ascending=False).iloc[:1]

Unnamed: 0_level_0,mean_test_score,std_test_score,mean_train_score,std_train_score
param_alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0001,0.100119,0.534449,0.882728,0.033169


In [30]:
results_train_score = []
results_test_score = []

for i, (train_index, test_index) in enumerate(cv_outer.split(X)):
    # split data
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    lasso_model = Lasso(alpha=0.0001)
    lasso_model.fit(X_train, y_train)
    # save the model to disk
    filename = f"../models/finalized_lasso_model_{i}.pickle"
    pickle.dump(lasso_model, open(filename, 'wb'))
    Y_pred_train = lasso_model.predict(X_train)
    train_score = lasso_model.score(X_train, y_train)
    Y_pred_test = lasso_model.predict(X_test)
    test_score = lasso_model.score(X_test, y_test)
    results_train_score.append(train_score)
    results_test_score.append(test_score) 

  positive)
  positive)
  positive)
  positive)
  positive)


In [17]:
results_train_score

[0.8978198129574055,
 0.8316430940888204,
 0.9243966868013909,
 0.8600882740545753,
 0.8797293821531054]

In [18]:
results_test_score

[-0.3355183296514179,
 0.6352750539824603,
 0.45292336384936305,
 0.45103901296219606,
 0.792432980678393]

In [19]:
np.mean(results_train_score)

0.8787354500110596

In [None]:
np.mean(results_test_score)

In [None]:
# print(grid_cv_lr_fit.best_score_)
# print(grid_cv_lr_fit.best_params_)
# print(grid_cv_lr_fit.cv_results_)

In [None]:
# lasso_model = Lasso(alpha =  0.5)
# lasso_model.fit(X_train, y_train)

In [None]:
# Y_pred_train = lasso_model.predict(X_train)
# print("Accuracy R2 --> ", lasso_model.score(X_train, y_train))

In [None]:
# Y_pred_test = lasso_model.predict(X_test)
# print("Accuracy R2 --> ", lasso_model.score(X_test, y_test))

#### RANDOM FOREST REGRESSOR

In [None]:
# param_grid_rfr={"n_estimators": [1, 5, 10, 20, 30, 40, 50, 100],
#             "max_features": ["auto", "sqrt", "log2"],
#             "min_samples_split": [2, 4, 8, 16],
#             "bootstrap": [True, False],
# }

In [None]:
# grid_RandomForestRegressor = GridSearchCV(RandomForestRegressor(), param_grid_rfr, scoring='r2', cv=kf, n_jobs=-1, return_train_score=True, verbose=1000)

# grid_RandomForestRegressor.fit(X_train, y_train)

In [None]:
# print(grid_RandomForestRegressor.best_score_)
# print(grid_RandomForestRegressor.best_params_)
# print(grid_RandomForestRegressor.cv_results_)

In [None]:
# rfr_model = RandomForestRegressor(bootstrap =  False, max_features = 'log2', min_samples_split = 8, n_estimators = 40, random_state=12312)
# rfr_model.fit(X_train, y_train)
# filename = 'finalized_model.sav'
# pickle.dump(rfr_model, open(filename, 'wb'))

In [None]:
# Y_pred_train_rfr = rfr_model.predict(X_train)
# print("Accuracy R2 --> ", rfr_model.score(X_train, y_train))

In [None]:
# Y_pred_test_rfr = rfr_model.predict(X_test)
# print("Accuracy R2 --> ", rfr_model.score(X_test, y_test))