## GRID SEARCH

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import load_diabetes
from sklearn.metrics import root_mean_squared_error


In [3]:
# load the dataset 

X = load_diabetes(as_frame=True)['data']
y = load_diabetes()['target']

##### Base Model

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)
base_model = RandomForestRegressor(random_state=23)
base_model.fit(X_train, y_train)
test_preds = base_model.predict(X_test)
print(f'test rmse: {root_mean_squared_error(y_test, test_preds)}')

test rmse: 59.80016244200251


In [None]:
# setup the grid.
param_grid = {
    'n_estimators': [5,10,20,30,40,50],
    'max_depth': [2,3,4,5,6],
    'criterion': ['squared_error', 'absolute_error']
}

# setup cross validation
k_fold = KFold(n_splits=5, shuffle=True, random_state=23)

# init the regressor
regressor = RandomForestRegressor(random_state=23)


# setup the tuning block

model = GridSearchCV(estimator=regressor, param_grid=param_grid,
                     scoring='neg_root_mean_squared_error',cv = k_fold.split(X=X, y=y),
                     verbose=3, return_train_score=True)
model.fit(X=X, y=y)

In [15]:
-model.best_score_

56.89101506653416

In [16]:
model.best_estimator_

In [17]:
model.best_params_

{'criterion': 'squared_error', 'max_depth': 3, 'n_estimators': 40}

### RANDOMIZED SEARCH

In [20]:
from sklearn.model_selection import RandomizedSearchCV

In [23]:
param_dist = {
'n_estimators': list(range(2,21,2)),
'max_depth': list(range(2,11)),
'criterion': ['squared_error', 'absolute_error', 'friedman_mse', 'poisson']
}

# setup the trainer
model = RandomizedSearchCV(estimator=regressor, param_distributions=param_dist,
                           n_iter=100, scoring='neg_root_mean_squared_error',
                           cv=k_fold.split(X=X,y=y),verbose=3, return_train_score=True,
                           random_state=23)
model.fit(X=X, y=y)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV 1/5] END criterion=friedman_mse, max_depth=5, n_estimators=6;, score=(train=-41.693, test=-59.722) total time=   0.0s
[CV 2/5] END criterion=friedman_mse, max_depth=5, n_estimators=6;, score=(train=-43.498, test=-57.464) total time=   0.0s
[CV 3/5] END criterion=friedman_mse, max_depth=5, n_estimators=6;, score=(train=-41.089, test=-63.552) total time=   0.0s
[CV 4/5] END criterion=friedman_mse, max_depth=5, n_estimators=6;, score=(train=-43.467, test=-58.168) total time=   0.0s
[CV 5/5] END criterion=friedman_mse, max_depth=5, n_estimators=6;, score=(train=-41.062, test=-58.050) total time=   0.1s
[CV 1/5] END criterion=poisson, max_depth=7, n_estimators=2;, score=(train=-43.138, test=-69.621) total time=   0.0s
[CV 2/5] END criterion=poisson, max_depth=7, n_estimators=2;, score=(train=-40.407, test=-62.525) total time=   0.0s
[CV 3/5] END criterion=poisson, max_depth=7, n_estimators=2;, score=(train=-42.330, test=-70.

In [25]:
-model.best_score_

56.9237440630359