In [1]:
import pandas as pd
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, RepeatedKFold, StratifiedKFold
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor


#### Loading data after preprocessing

In [2]:
data_train  = pd.read_csv("../data/alt_maccsfp_after_preprocessing.csv")

#### Splitting data into training and test sets

In [3]:
y = data_train['ALT']
del data_train['ALT']
X = data_train.values
y = y.values

# podzielenie danych na dwa zbiory testowy i treningowy

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(X_test.shape)

(77, 124)
(20, 124)


#### Split dataset into k consecutive folds

In [4]:
kf = RepeatedKFold(n_splits=5, n_repeats=2, random_state=666)
kf_split_generator = kf.split(X)

#### LASSO 

In [5]:
param_grid = {'alpha':[0.0001,0.001, 0.005, 0.01,0.05,0.1,0.5,1]}
grid_cv_lr = GridSearchCV(Lasso(), param_grid, scoring='r2', cv=kf, return_train_score=True, verbose=1000)
grid_cv_lr_fit = grid_cv_lr.fit(X_train, y_train)

Fitting 10 folds for each of 8 candidates, totalling 80 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] alpha=0.0001 ....................................................
[CV] ... alpha=0.0001, score=(train=0.933, test=-0.277), total=   0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001 ....................................................
[CV] .... alpha=0.0001, score=(train=0.930, test=0.085), total=   0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001 ....................................................
[CV] .... alpha=0.0001, score=(train=0.957, test=0.605), total=   0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[CV] alpha=0.0001 ....................................................
[CV] .. alpha=0.0001, score=(train=0.943, test=-10.005), total=   0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapse

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[CV] ..... alpha=0.001, score=(train=0.890, test=0.037), total=   0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.001 .....................................................
[CV] ..... alpha=0.001, score=(train=0.942, test=0.388), total=   0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005 .....................................................
[CV] .... alpha=0.005, score=(train=0.910, test=-0.241), total=   0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005 .....................................................
[CV] ..... alpha=0.005, score=(train=0.919, test=0.047), total=   0.0s
[Parallel(n_jobs=1)]: Done  22 out of  22 | elapsed:    0.1s remaining:    0.0s
[CV] alpha=0.005 .....................................................
[CV] ..... alpha=0.005, score=(train=0.937, test=0.624), total=   0.0s
[Parallel(n_jobs=1)]: Done  23 out of  23

[CV] ...... alpha=0.5, score=(train=0.000, test=-0.000), total=   0.0s
[Parallel(n_jobs=1)]: Done  65 out of  65 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.5 .......................................................
[CV] ...... alpha=0.5, score=(train=0.000, test=-0.005), total=   0.0s
[Parallel(n_jobs=1)]: Done  66 out of  66 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.5 .......................................................
[CV] ...... alpha=0.5, score=(train=0.000, test=-0.407), total=   0.0s
[Parallel(n_jobs=1)]: Done  67 out of  67 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.5 .......................................................
[CV] ...... alpha=0.5, score=(train=0.000, test=-0.249), total=   0.0s
[Parallel(n_jobs=1)]: Done  68 out of  68 | elapsed:    0.2s remaining:    0.0s
[CV] alpha=0.5 .......................................................
[CV] ...... alpha=0.5, score=(train=0.000, test=-0.188), total=   0.0s
[Parallel(n_jobs=1)]: Done  69 out of  69

  positive)


In [6]:
print(grid_cv_lr_fit.best_score_)
print(grid_cv_lr_fit.best_params_)
print(grid_cv_lr_fit.cv_results_)

-0.17553082962797123
{'alpha': 0.5}
{'mean_fit_time': array([0.0095022 , 0.00759869, 0.00508685, 0.00199568, 0.00069821,
       0.00049875, 0.00049865, 0.00049896]), 'std_fit_time': array([0.0016866 , 0.00048628, 0.0023718 , 0.00063237, 0.00045709,
       0.00049875, 0.00049865, 0.00049896]), 'mean_score_time': array([0.00028899, 0.00049191, 0.00040443, 0.00030077, 0.00019944,
       0.00019937, 0.00029926, 0.00030258]), 'std_score_time': array([0.00044144, 0.00049206, 0.00049535, 0.00045948, 0.00039887,
       0.00039873, 0.00045713, 0.00046228]), 'param_alpha': masked_array(data=[0.0001, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1],
             mask=[False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'params': [{'alpha': 0.0001}, {'alpha': 0.001}, {'alpha': 0.005}, {'alpha': 0.01}, {'alpha': 0.05}, {'alpha': 0.1}, {'alpha': 0.5}, {'alpha': 1}], 'split0_test_score': array([-0.27739531, -0.34364342, -0.24071569, -0.02145069,  0.1624372

In [7]:
lasso_model = Lasso(alpha =  0.5)
lasso_model.fit(X_train, y_train)

Lasso(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [8]:
Y_pred_train = lasso_model.predict(X_train)
print("Accuracy R2 --> ", lasso_model.score(X_train, y_train))

Accuracy R2 -->  0.0


In [9]:
Y_pred_test = lasso_model.predict(X_test)
print("Accuracy R2 --> ", lasso_model.score(X_test, y_test))

Accuracy R2 -->  -0.42003467540699146


#### RANDOM FOREST REGRESSOR

In [10]:
param_grid_rfr={"n_estimators": [1, 5, 10, 20, 30, 40, 50, 100],
            "max_features": ["auto", "sqrt", "log2"],
            "min_samples_split": [2, 4, 8, 16],
            "bootstrap": [True, False],
}

In [11]:
grid_RandomForestRegressor = GridSearchCV(RandomForestRegressor(), param_grid_rfr, scoring='r2', cv=kf, n_jobs=-1, return_train_score=True, verbose=1000)

grid_RandomForestRegressor.fit(X_train, y_train)

Fitting 10 folds for each of 192 candidates, totalling 1920 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pi

Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done  31 tasks      | elapsed:    1.6s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed:    1.6s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1952s.) Setting batch_size=2.
Pickling ar

[Parallel(n_jobs=-1)]: Done  66 tasks      | elapsed:    2.1s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
[Parallel(n_jobs=-1)]: Done  68 tasks      | elapsed:    2.1s
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done  70 tasks      | elapsed:    2.1s
[Parallel(n_jobs


Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 132 tasks      | elapsed:    2.9s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:    3.0s


[Parallel(n_jobs=-1)]: Done 204 tasks      | elapsed:    3.6s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 208 tasks      | elapsed:    3.7s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).


[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:    4.2s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    4.2s


[Parallel(n_jobs=-1)]: Done 344 tasks      | elapsed:    4.9s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).[Parallel(n_jobs=-1)]: Done 348 tasks      | elapsed:    4.9s

Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).


Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 424 tasks      | elapsed:    5.5s

Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 428 tasks      | elapsed:    5.6s


Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 496 tasks      | elapsed:    6.1s
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_jo

Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 568 tasks      | elapsed:    6.8s

Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 572 tasks      | elapsed:    6.8s



Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 644 tasks      | elapsed:    7.4s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 648 tasks      | elapsed:    7.5s
Pickling array (shape=(77, 124), dtype=float64).

Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 744 tasks      | elapsed:    8.3s

Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 748 tasks      | elapsed:    8.3s



Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 828 tasks      | elapsed:    9.0s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 832 tasks      | elapsed:    9.1s
Pickling array (shape=(77, 124), dtype=float64).


Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 904 tasks      | elapsed:    9.7s

Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 908 tasks      | elapsed:    9.7s
Pickling array (shape=(77, 124), dtype=float64).


Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 976 tasks      | elapsed:   10.3s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 980 tasks      | elapsed:   10.3s
Pickling array (shape=(77, 124), dtype=float64).

Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 1044 tasks      | elapsed:   10.9s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-

Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), d

Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 1224 tasks      | elapsed:   12.5s

Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 1228 tasks      | elapsed:   12.5s

Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 1300 tasks      | elapsed:   13.1s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_j

Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 1368 tasks      | elapsed:   13.6s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_j


Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 1460 tasks      | elapsed:   14.2s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 1464 tasks      | elapsed:   14.3

Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 1544 tasks      | elapsed:   14.8s

Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).[Parallel(n_jobs=-1)]: Done 1548 tasks      | elapsed:   14.9s

Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 1632 tasks      | elapsed:   15.4s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 1636 tasks      | elapsed:   15.5

Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 1716 tasks      | elapsed:   16.0s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(77, 124), dtype=float64).
Pickling arr

Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 1800 tasks      | elapsed:   16.6s
Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 1804 tasks      | elapsed:   16.7

Pickling array (shape=(77, 124), dtype=float64).
Pickling array (shape=(77,), dtype=float64).
Pickling array (shape=(61,), dtype=int32).
Pickling array (shape=(16,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
Pickling array (shape=(62,), dtype=int32).
Pickling array (shape=(15,), dtype=int32).
[Parallel(n_jobs=-1)]: Done 1920 out of 1920 | elapsed:   17.6s finished


GridSearchCV(cv=RepeatedKFold(n_repeats=2, n_splits=5, random_state=666),
             error_score=nan,
             estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                             criterion='mse', max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             max_samples=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=...
                                             n_estimators=100, n_jobs=None,
                                             oob_score=False, random_state=None,
                                           

In [12]:
print(grid_RandomForestRegressor.best_score_)
print(grid_RandomForestRegressor.best_params_)
print(grid_RandomForestRegressor.cv_results_)

0.35469867273169475
{'bootstrap': False, 'max_features': 'log2', 'min_samples_split': 4, 'n_estimators': 5}
{'mean_fit_time': array([0.0048877 , 0.01795211, 0.02802849, 0.05266051, 0.07041178,
       0.10003045, 0.12207284, 0.23367646, 0.00319099, 0.01176715,
       0.02293837, 0.05485377, 0.10651491, 0.10352411, 0.12336843,
       0.22779129, 0.00299134, 0.0124655 , 0.02503316, 0.04338419,
       0.0656234 , 0.0831773 , 0.10681353, 0.20854349, 0.00249214,
       0.01146986, 0.02213953, 0.04118726, 0.06183584, 0.08148134,
       0.10023279, 0.20355806, 0.00358832, 0.01017189, 0.02034781,
       0.03849258, 0.05794461, 0.07799129, 0.09514449, 0.19118991,
       0.00299358, 0.01047187, 0.01934855, 0.0368016 , 0.0545542 ,
       0.07460096, 0.09195142, 0.18300996, 0.00259326, 0.00927472,
       0.01864941, 0.03710108, 0.05455391, 0.07260525, 0.09165547,
       0.17862103, 0.00219407, 0.01017206, 0.01934862, 0.03769708,
       0.05365512, 0.07041256, 0.09135752, 0.17712591, 0.00299191,
   

In [13]:
rfr_model = RandomForestRegressor(bootstrap =  False, max_features = 'log2', min_samples_split = 8, n_estimators = 40, random_state=12312)
rfr_model.fit(X_train, y_train)

RandomForestRegressor(bootstrap=False, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='log2', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=8, min_weight_fraction_leaf=0.0,
                      n_estimators=40, n_jobs=None, oob_score=False,
                      random_state=12312, verbose=0, warm_start=False)

In [14]:
Y_pred_train_rfr = rfr_model.predict(X_train)
print("Accuracy R2 --> ", rfr_model.score(X_train, y_train))

Accuracy R2 -->  0.8714416125621454


In [15]:
Y_pred_test_rfr = rfr_model.predict(X_test)
print("Accuracy R2 --> ", rfr_model.score(X_test, y_test))

Accuracy R2 -->  0.30129275158332314
