Random Search Cross Validation in Scikit-Learn


```
`# This is formatted as code`
```
![alt text](https://)


In [1]:
import pandas as pd
import numpy as np

data = pd.read_csv('20000_outband_QoS_ITU-QoE.csv')

X = data[['outbandQoS_DL_TP (kbps)', 'outbandQoS_UL_TP (kbps)', 'outbandQoS_RTT (ms)', 'outbandQoS_LOSS (ratio)']]
y = data[['QoE_ITU_JT_046_VP9_1280x780']]


In [2]:
from sklearn.model_selection import train_test_split

# Setting random seed
seed = 100
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.34, random_state = seed )
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = seed)

display(X_train.head(5))
display(y_train.shape)
display(y_test.shape)

Unnamed: 0,outbandQoS_DL_TP (kbps),outbandQoS_UL_TP (kbps),outbandQoS_RTT (ms),outbandQoS_LOSS (ratio)
1259,4249.768555,823.830444,26.435621,0.011158
5991,5614.546875,5574.541504,89.251411,0.001102
8998,1281.026489,217.695068,21.618298,0.009606
18072,774.683411,1045.737671,1.940019,0.024569
12949,275.871155,1180.889526,24.316751,0.001282


(16000, 1)

(4000, 1)

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV

rf = RandomForestRegressor(random_state = 100)
from pprint import pprint
# Look at parameters used by our current forest
print('Parameters currently in use:\n')
pprint(rf.get_params())


# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 10, stop = 1000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(4, 20, num = 10)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 7]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 3]
# Method of selecting samples for training each tree
bootstrap = [True, False]

# Altogether, there are 2 * 12 * 2 * 3 * 3 * 10 = 4320 settings
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
pprint(random_grid)

Parameters currently in use:

{'bootstrap': True,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 10,
 'n_jobs': 1,
 'oob_score': False,
 'random_state': 100,
 'verbose': 0,
 'warm_start': False}
{'bootstrap': [True, False],
 'max_depth': [4, 5, 7, 9, 11, 12, 14, 16, 18, 20, None],
 'max_features': ['auto', 'sqrt'],
 'min_samples_leaf': [1, 2, 3],
 'min_samples_split': [2, 5, 7],
 'n_estimators': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000]}


  from numpy.core.umath_tests import inner1d


In [4]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestRegressor()
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 5, verbose=2, random_state=100, n_jobs = -1)
# Fit the random search model
y_train = np.ravel(y_train)
rf_random.fit(X_train, y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV] bootstrap=True, n_estimators=670, max_features=auto, min_samples_split=5, max_depth=12, min_samples_leaf=2 
[CV] bootstrap=True, n_estimators=670, max_features=auto, min_samples_split=5, max_depth=12, min_samples_leaf=2 
[CV] bootstrap=True, n_estimators=670, max_features=auto, min_samples_split=5, max_depth=12, min_samples_leaf=2 
[CV] bootstrap=True, n_estimators=670, max_features=auto, min_samples_split=5, max_depth=12, min_samples_leaf=2 
[CV] bootstrap=True, n_estimators=670, max_features=auto, min_samples_split=5, max_depth=12, min_samples_leaf=2 
[CV] bootstrap=True, n_estimators=890, max_features=auto, min_samples_split=5, max_depth=12, min_samples_leaf=1 
[CV] bootstrap=True, n_estimators=890, max_features=auto, min_samples_split=5, max_depth=12, min_samples_leaf=1 
[CV] bootstrap=True, n_estimators=890, max_features=auto, min_samples_split=5, max_depth=12, min_samples_leaf=1 
[CV]  bootstrap=True, n_estimator

[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  1.5min


[CV] bootstrap=False, n_estimators=10, max_features=sqrt, min_samples_split=7, max_depth=5, min_samples_leaf=2 
[CV]  bootstrap=False, n_estimators=10, max_features=sqrt, min_samples_split=7, max_depth=5, min_samples_leaf=2, total=   0.2s
[CV] bootstrap=False, n_estimators=10, max_features=sqrt, min_samples_split=7, max_depth=5, min_samples_leaf=2 
[CV]  bootstrap=False, n_estimators=10, max_features=sqrt, min_samples_split=7, max_depth=5, min_samples_leaf=2, total=   0.2s
[CV] bootstrap=False, n_estimators=10, max_features=sqrt, min_samples_split=7, max_depth=5, min_samples_leaf=2 
[CV]  bootstrap=False, n_estimators=10, max_features=sqrt, min_samples_split=7, max_depth=5, min_samples_leaf=2, total=   0.2s
[CV] bootstrap=False, n_estimators=10, max_features=sqrt, min_samples_split=7, max_depth=5, min_samples_leaf=2 
[CV]  bootstrap=False, n_estimators=10, max_features=sqrt, min_samples_split=7, max_depth=5, min_samples_leaf=2, total=   0.2s
[CV] bootstrap=False, n_estimators=10, max_f

[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  8.1min


[CV]  bootstrap=False, n_estimators=10, max_features=auto, min_samples_split=5, max_depth=20, min_samples_leaf=1, total=   1.4s
[CV] bootstrap=False, n_estimators=450, max_features=auto, min_samples_split=7, max_depth=9, min_samples_leaf=3 
[CV]  bootstrap=False, n_estimators=10, max_features=auto, min_samples_split=5, max_depth=20, min_samples_leaf=1, total=   1.3s
[CV] bootstrap=False, n_estimators=450, max_features=auto, min_samples_split=7, max_depth=9, min_samples_leaf=3 
[CV]  bootstrap=False, n_estimators=1000, max_features=sqrt, min_samples_split=7, max_depth=16, min_samples_leaf=2, total= 1.0min
[CV] bootstrap=False, n_estimators=450, max_features=auto, min_samples_split=7, max_depth=9, min_samples_leaf=3 
[CV]  bootstrap=False, n_estimators=1000, max_features=sqrt, min_samples_split=7, max_depth=16, min_samples_leaf=2, total= 1.0min
[CV] bootstrap=False, n_estimators=450, max_features=auto, min_samples_split=7, max_depth=9, min_samples_leaf=3 
[CV]  bootstrap=False, n_estimat

[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed: 22.0min


[CV]  bootstrap=False, n_estimators=450, max_features=sqrt, min_samples_split=5, max_depth=20, min_samples_leaf=1, total=  32.7s
[CV] bootstrap=False, n_estimators=670, max_features=auto, min_samples_split=2, max_depth=18, min_samples_leaf=1 
[CV]  bootstrap=False, n_estimators=560, max_features=auto, min_samples_split=5, max_depth=11, min_samples_leaf=3, total=  48.0s
[CV] bootstrap=False, n_estimators=670, max_features=auto, min_samples_split=2, max_depth=18, min_samples_leaf=1 
[CV]  bootstrap=False, n_estimators=560, max_features=auto, min_samples_split=5, max_depth=11, min_samples_leaf=3, total=  47.8s
[CV] bootstrap=False, n_estimators=670, max_features=auto, min_samples_split=2, max_depth=18, min_samples_leaf=1 
[CV]  bootstrap=False, n_estimators=560, max_features=auto, min_samples_split=5, max_depth=11, min_samples_leaf=3, total=  47.8s
[CV] bootstrap=False, n_estimators=450, max_features=sqrt, min_samples_split=5, max_depth=7, min_samples_leaf=1 
[CV]  bootstrap=False, n_esti

[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 32.2min finished


RandomizedSearchCV(cv=5, error_score='raise',
          estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=None, verbose=0, warm_start=False),
          fit_params=None, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'bootstrap': [True, False], 'n_estimators': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'max_features': ['auto', 'sqrt'], 'min_samples_split': [2, 5, 7], 'max_depth': [4, 5, 7, 9, 11, 12, 14, 16, 18, 20, None], 'min_samples_leaf': [1, 2, 3]},
          pre_dispatch='2*n_jobs', random_state=100, refit=True,
          return_train_score='warn', scoring=None, verbose=2)

In [5]:
rf_random.best_params_

{'bootstrap': True,
 'max_depth': 9,
 'max_features': 'sqrt',
 'min_samples_leaf': 2,
 'min_samples_split': 5,
 'n_estimators': 230}

In [6]:
def evaluate(model, X_test, y_test):
    predictions = model.predict(X_test)
    y_test = np.ravel(y_test)
    residules = predictions - y_test
    errors = abs(residules)
    mape = 100 * np.mean(errors / y_test)
    accuracy = 100 - mape
    print('Model Performance')
    print('Average Error: {:0.4f} degrees.'.format(np.mean(errors)))
    print('Accuracy = {:0.2f}%.'.format(accuracy))
    
    MSE = (1/(len(y_test)))*np.sum((residules)**2)
    RMSE_1234= np.sqrt(MSE)
    print('MSE = {:.4f}, RMSE = {:.4f}'.format(MSE,RMSE_1234))
    #print('R^2 = {}'.format(linreg_1234.score(X, y)))

    return accuracy

# rfr = RandomForestRegressor(max_depth=best_params["max_depth"], n_estimators=best_params["n_estimators"], random_state=False, verbose=False)
base_model = RandomForestRegressor(n_estimators = 10, random_state = 100)
base_model.fit(X_train, y_train)
base_accuracy = evaluate(base_model, X_test, y_test)

random_grid = rf_random.best_estimator_
random_grid_accuracy = evaluate(random_grid, X_test, y_test)

print('Improvement of {:0.2f}%.'.format( 100 * (random_grid_accuracy - base_accuracy) / base_accuracy))

Model Performance
Average Error: 0.3526 degrees.
Accuracy = 89.52%.
MSE = 0.2348, RMSE = 0.4845
Model Performance
Average Error: 0.3314 degrees.
Accuracy = 90.11%.
MSE = 0.2036, RMSE = 0.4512
Improvement of 0.65%.


**Grid Search with Cross Validation**


In [7]:
from sklearn.model_selection import GridSearchCV
# Create the parameter grid based on the results of random search 
param_grid = {
    'bootstrap': [True],
    'max_depth': [3, 5, 6, 10, 15, 20],
    'max_features': [2, 3],
    'min_samples_leaf': [2, 3, 4],
    'min_samples_split': [3, 5, 7],
    'n_estimators': [200, 300, 450, 1000]
}
# Create a based model
rf = RandomForestRegressor()
# Instantiate the grid search model
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, cv = 5, n_jobs = -1, verbose = 2)

# Fit the grid search to the data
grid_search.fit(X_train, y_train)


Fitting 5 folds for each of 432 candidates, totalling 2160 fits
[CV] bootstrap=True, n_estimators=200, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=3 
[CV] bootstrap=True, n_estimators=200, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=3 
[CV] bootstrap=True, n_estimators=200, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=3 
[CV] bootstrap=True, n_estimators=200, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=3 
[CV] bootstrap=True, n_estimators=200, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=3 
[CV] bootstrap=True, n_estimators=300, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=3 
[CV] bootstrap=True, n_estimators=300, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=3 
[CV] bootstrap=True, n_estimators=300, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=3 
[CV]  bootstrap=True, n_estimators=200, max_features=2, min_samp

[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   17.3s


[CV]  bootstrap=True, n_estimators=300, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=5, total=   3.3s
[CV] bootstrap=True, n_estimators=450, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=5 
[CV]  bootstrap=True, n_estimators=300, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=5, total=   3.3s
[CV] bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=5 
[CV]  bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=3, total=  11.0s
[CV] bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=5 
[CV]  bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=3, total=  11.0s
[CV] bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=2, max_depth=3, min_samples_split=5 
[CV]  bootstrap=True, n_estimators=450, max_features=2, min_sam

[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  1.7min


[CV]  bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=4, max_depth=3, min_samples_split=3, total=  11.0s
[CV] bootstrap=True, n_estimators=450, max_features=2, min_samples_leaf=4, max_depth=3, min_samples_split=5 
[CV]  bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=4, max_depth=3, min_samples_split=3, total=  11.0s
[CV] bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=4, max_depth=3, min_samples_split=5 
[CV]  bootstrap=True, n_estimators=300, max_features=2, min_samples_leaf=4, max_depth=3, min_samples_split=5, total=   3.3s
[CV] bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=4, max_depth=3, min_samples_split=5 
[CV]  bootstrap=True, n_estimators=300, max_features=2, min_samples_leaf=4, max_depth=3, min_samples_split=5, total=   3.3s
[CV] bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=4, max_depth=3, min_samples_split=5 
[CV]  bootstrap=True, n_estimators=450, max_features=2, min_sam

[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  4.7min


[CV]  bootstrap=True, n_estimators=300, max_features=3, min_samples_leaf=4, max_depth=3, min_samples_split=7, total=   4.6s
[CV] bootstrap=True, n_estimators=1000, max_features=3, min_samples_leaf=4, max_depth=3, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=450, max_features=3, min_samples_leaf=4, max_depth=3, min_samples_split=7, total=   6.9s
[CV] bootstrap=True, n_estimators=1000, max_features=3, min_samples_leaf=4, max_depth=3, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=450, max_features=3, min_samples_leaf=4, max_depth=3, min_samples_split=7, total=   6.9s
[CV] bootstrap=True, n_estimators=1000, max_features=3, min_samples_leaf=4, max_depth=3, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=450, max_features=3, min_samples_leaf=4, max_depth=3, min_samples_split=7, total=   6.9s
[CV] bootstrap=True, n_estimators=200, max_features=2, min_samples_leaf=2, max_depth=5, min_samples_split=3 
[CV]  bootstrap=True, n_estimators=450, max_features=3, min_sampl

[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed: 10.2min


[CV]  bootstrap=True, n_estimators=450, max_features=3, min_samples_leaf=3, max_depth=5, min_samples_split=5, total=  10.3s
[CV] bootstrap=True, n_estimators=200, max_features=3, min_samples_leaf=3, max_depth=5, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=450, max_features=3, min_samples_leaf=3, max_depth=5, min_samples_split=5, total=  10.4s
[CV] bootstrap=True, n_estimators=200, max_features=3, min_samples_leaf=3, max_depth=5, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=200, max_features=3, min_samples_leaf=3, max_depth=5, min_samples_split=7, total=   4.6s
[CV] bootstrap=True, n_estimators=200, max_features=3, min_samples_leaf=3, max_depth=5, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=200, max_features=3, min_samples_leaf=3, max_depth=5, min_samples_split=7, total=   4.6s
[CV] bootstrap=True, n_estimators=200, max_features=3, min_samples_leaf=3, max_depth=5, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=200, max_features=3, min_samples_

[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed: 18.5min


[CV]  bootstrap=True, n_estimators=200, max_features=3, min_samples_leaf=3, max_depth=6, min_samples_split=7, total=   5.2s
[CV] bootstrap=True, n_estimators=300, max_features=3, min_samples_leaf=3, max_depth=6, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=200, max_features=3, min_samples_leaf=3, max_depth=6, min_samples_split=7, total=   5.2s
[CV] bootstrap=True, n_estimators=300, max_features=3, min_samples_leaf=3, max_depth=6, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=200, max_features=3, min_samples_leaf=3, max_depth=6, min_samples_split=7, total=   5.2s
[CV] bootstrap=True, n_estimators=300, max_features=3, min_samples_leaf=3, max_depth=6, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=300, max_features=3, min_samples_leaf=3, max_depth=6, min_samples_split=7, total=   7.8s
[CV] bootstrap=True, n_estimators=300, max_features=3, min_samples_leaf=3, max_depth=6, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=300, max_features=3, min_samples_

[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed: 33.8min


[CV]  bootstrap=True, n_estimators=1000, max_features=3, min_samples_leaf=4, max_depth=10, min_samples_split=7, total=  39.6s
[CV] bootstrap=True, n_estimators=450, max_features=2, min_samples_leaf=2, max_depth=15, min_samples_split=3 
[CV]  bootstrap=True, n_estimators=1000, max_features=3, min_samples_leaf=4, max_depth=10, min_samples_split=7, total=  39.5s
[CV] bootstrap=True, n_estimators=450, max_features=2, min_samples_leaf=2, max_depth=15, min_samples_split=3 
[CV]  bootstrap=True, n_estimators=1000, max_features=3, min_samples_leaf=4, max_depth=10, min_samples_split=7, total=  39.5s
[CV] bootstrap=True, n_estimators=450, max_features=2, min_samples_leaf=2, max_depth=15, min_samples_split=3 
[CV]  bootstrap=True, n_estimators=300, max_features=2, min_samples_leaf=2, max_depth=15, min_samples_split=3, total=  11.6s
[CV] bootstrap=True, n_estimators=450, max_features=2, min_samples_leaf=2, max_depth=15, min_samples_split=3 
[CV]  bootstrap=True, n_estimators=1000, max_features=3, 

[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed: 59.5min


[CV]  bootstrap=True, n_estimators=450, max_features=2, min_samples_leaf=4, max_depth=20, min_samples_split=7, total=  19.7s
[CV] bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=4, max_depth=20, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=450, max_features=2, min_samples_leaf=4, max_depth=20, min_samples_split=7, total=  19.4s
[CV] bootstrap=True, n_estimators=1000, max_features=2, min_samples_leaf=4, max_depth=20, min_samples_split=7 
[CV]  bootstrap=True, n_estimators=450, max_features=2, min_samples_leaf=4, max_depth=20, min_samples_split=7, total=  19.3s
[CV] bootstrap=True, n_estimators=200, max_features=3, min_samples_leaf=2, max_depth=20, min_samples_split=3 
[CV]  bootstrap=True, n_estimators=450, max_features=2, min_samples_leaf=4, max_depth=20, min_samples_split=7, total=  19.3s
[CV] bootstrap=True, n_estimators=200, max_features=3, min_samples_leaf=2, max_depth=20, min_samples_split=3 
[CV]  bootstrap=True, n_estimators=450, max_features=2, mi

[Parallel(n_jobs=-1)]: Done 2160 out of 2160 | elapsed: 72.2min finished


GridSearchCV(cv=5, error_score='raise',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=None, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'bootstrap': [True], 'n_estimators': [200, 300, 450, 1000], 'max_features': [2, 3], 'min_samples_leaf': [2, 3, 4], 'max_depth': [3, 5, 6, 10, 15, 20], 'min_samples_split': [3, 5, 7]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=2)

In [8]:
grid_search.best_params_

{'bootstrap': True,
 'max_depth': 10,
 'max_features': 2,
 'min_samples_leaf': 2,
 'min_samples_split': 7,
 'n_estimators': 1000}

In [1]:
def evaluate(model, X_test, y_test):
    predictions = model.predict(X_test)
    y_test = np.ravel(y_test)
    residules = predictions - y_test
    errors = abs(residules)
    mape = 100 * np.mean(errors / y_test)
    accuracy = 100 - mape
    print('Model Performance')
    print('Average Error: {:0.4f} degrees.'.format(np.mean(errors)))
    print('Accuracy = {:0.2f}%.'.format(accuracy))
    
    MSE = (1/(len(y_test)))*np.sum((residules)**2)
    RMSE_1234= np.sqrt(MSE)
    print('MSE = {:.4f}, RMSE = {:.4f}'.format(MSE,RMSE_1234))
    #print('R^2 = {}'.format(linreg_1234.score(X, y)))

    return accuracy

# rfr = RandomForestRegressor(max_depth=best_params["max_depth"], n_estimators=best_params["n_estimators"], random_state=False, verbose=False)
base_model = RandomForestRegressor(n_estimators = 10, random_state = 100)
base_model.fit(X_train, y_train)
base_accuracy = evaluate(base_model, X_test, y_test)

best_grid = grid_search.best_estimator_
grid_accuracy = evaluate(best_grid, X_test, y_test)

print('Improvement of {:0.2f}%.'.format( 100 * (grid_accuracy - base_accuracy) / base_accuracy))

NameError: name 'RandomForestRegressor' is not defined

In [None]:

# def save_model(file, model):
#   """
#   The pickle module implements binary protocols
#   for serializing and de-serializing a Python object structure.
#   """
#   fluff, id = file.split('=')
#   #filename = file

#   import pickle
#   _model_str = pickle.dumps(grid_search.best_estimator_)

#   upload = drive.CreateFile({'id': id})
#   upload.SetContentString(str(_model_str))
#   upload.Upload()


# def load_model(file):
#   """
#   The pickle module implements binary protocols
#   for serializing and de-serializing a Python object structure.
#   """
#   fluff, id = file.split('=')

#   import pickle

#   downloaded = drive.CreateFile({'id':id}) 
#   path_to_pickle = 'itu_model.pickle'
#   downloaded.GetContentFile(path_to_pickle)
#   try:
#       with open(path_to_pickle, 'rb') as handle:
#         my_str_as_bytes = str.encode(handle)
#         model = pickle.load(my_str_as_bytes)
#         return model
#   except pickle.UnpicklingError as error:
#       raise Exception(error)
  
#   return None

# file = 'https://drive.google.com/open?id=16ZEqlBFr_5__mN0gV-zxPBiJi9lnRsCR'
# save_model(file, grid_search.best_estimator_)

# model = load_model(file)
# display(model)