In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

In [2]:
data = load_iris()
X = data.data
y = data.target

### GridSearchCV
Finds best parameters but will take too much time if our dataset is too large & if we have many hyperparameters to be tuned.

In [6]:
param_grid = {
    "n_estimators":[20,60,100,120],
    "max_features":[.2, .6, 1.0],
    "max_depth":[2,8],
    "max_samples":[.5, .75, 1.0]
}
# (4X3X2X3) = 72 combinations of hyperparameters

In [7]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()

In [8]:
from sklearn.model_selection import GridSearchCV

rf_grid = GridSearchCV(estimator=rf,
                      param_grid=param_grid,
                      cv = 5,   # number of folds.  'cv' : int or kfold object
                      verbose=2,
                      n_jobs=-1)

In [9]:
rf_grid.fit(X, y)

Fitting 5 folds for each of 72 candidates, totalling 360 fits


 0.96       0.94666667        nan        nan        nan        nan
 0.95333333 0.95333333 0.94666667 0.95333333 0.94666667 0.95333333
 0.95333333 0.94666667        nan        nan        nan        nan
 0.95333333 0.95333333 0.96       0.96       0.94666667 0.94666667
 0.95333333 0.95333333        nan        nan        nan        nan
 0.94666667 0.95333333 0.96       0.94666667 0.94666667 0.94666667
 0.94666667 0.94666667        nan        nan        nan        nan
 0.96       0.95333333 0.94666667 0.96666667 0.95333333 0.96
 0.95333333 0.95333333        nan        nan        nan        nan
 0.95333333 0.94666667 0.96       0.95333333 0.96666667 0.96
 0.96666667 0.95333333        nan        nan        nan        nan]


GridSearchCV(cv=5, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'max_depth': [2, 8], 'max_features': [0.2, 0.6, 1.0],
                         'max_samples': [0.5, 0.75, 1.0],
                         'n_estimators': [20, 60, 100, 120]},
             verbose=2)

In [10]:
rf_grid.best_params_

{'max_depth': 8, 'max_features': 0.6, 'max_samples': 0.5, 'n_estimators': 120}

In [11]:
rf_grid.best_score_

0.9666666666666668

### RandomSearchCV
From total combinations of hyperparameters, it will take only few combinations randomly.

Find better hyperparameters. It will take few time even if we have large dataset & more number of hyperparameters to be tuned. 

In [12]:
param_grid = {
    "n_estimators":[20,60,100,120],
    "max_features":[.2, .6, 1.0],
    "max_depth":[2,8],
    "max_samples":[.5, .75, 1.0],
    "bootstrap":[True, False],
    "min_samples_split":[2, 5],
    "min_samples_leaf":[1, 2]
}

In [13]:
from sklearn.model_selection import RandomizedSearchCV

rf_grid = RandomizedSearchCV(estimator=rf,
                            param_distributions=param_grid,
                            cv = 5,
                            verbose=2,
                            n_jobs=-1)

In [14]:
rf_grid.fit(X,y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


 0.95333333 0.92666667 0.95333333 0.93333333]


RandomizedSearchCV(cv=5, estimator=RandomForestClassifier(), n_jobs=-1,
                   param_distributions={'bootstrap': [True, False],
                                        'max_depth': [2, 8],
                                        'max_features': [0.2, 0.6, 1.0],
                                        'max_samples': [0.5, 0.75, 1.0],
                                        'min_samples_leaf': [1, 2],
                                        'min_samples_split': [2, 5],
                                        'n_estimators': [20, 60, 100, 120]},
                   verbose=2)

In [15]:
rf_grid.best_params_

{'n_estimators': 120,
 'min_samples_split': 2,
 'min_samples_leaf': 1,
 'max_samples': 0.75,
 'max_features': 0.2,
 'max_depth': 8,
 'bootstrap': True}

In [16]:
rf_grid.best_score_

0.9533333333333334

### GridSearchCV using Pipeline

In [17]:
from sklearn.datasets import load_boston
from sklearn.model_selection import GridSearchCV,KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import Lasso

In [2]:
data = load_boston()
X = data.data
y = data.target

In [10]:
pipe = Pipeline([('scaling', StandardScaler()),
                ("poly", PolynomialFeatures()),
                ("las", Lasso())])
pipe

Pipeline(steps=[('scaling', StandardScaler()), ('poly', PolynomialFeatures()),
                ('las', Lasso())])

In [25]:
param_grid = {
    "poly__degree":[1, 2, 3],
    "las__alpha":[0.1, 0.5, 1.0, 1.2]
}

In [26]:
grid = GridSearchCV(estimator=pipe,
            param_grid=param_grid,
            cv = KFold(n_splits=5, shuffle=True),  # cv = int or kfold object.   here we can write 'cv = 5'
            verbose=2,
            n_jobs=-1)

In [27]:
grid.fit(X, y)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


GridSearchCV(cv=KFold(n_splits=5, random_state=None, shuffle=True),
             estimator=Pipeline(steps=[('scaling', StandardScaler()),
                                       ('poly', PolynomialFeatures()),
                                       ('las', Lasso())]),
             n_jobs=-1,
             param_grid={'las__alpha': [0.1, 0.5, 1.0, 1.2],
                         'poly__degree': [1, 2, 3]},
             verbose=2)

In [28]:
grid.best_params_

{'las__alpha': 0.1, 'poly__degree': 3}

In [29]:
grid.best_score_

0.8664450945020479