In [1]:
import numpy as np

from scipy.stats import randint as sp_randint

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

from sklearn.datasets import load_digits

from sklearn.ensemble import RandomForestClassifier

In [2]:
# get data
digits = load_digits()  # Source : http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits
X, y = digits.data, digits.target

In [3]:
# build a classifier
clf = RandomForestClassifier(n_estimators=50)

#### Random Search CV

In [4]:
# specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [6]:
# run randomized search
samples = 10  # number of random samples 

randomCV = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=samples) #default cv = 5

In [7]:
randomCV.fit(X, y)

RandomizedSearchCV(estimator=RandomForestClassifier(n_estimators=50),
                   param_distributions={'bootstrap': [True, False],
                                        'criterion': ['gini', 'entropy'],
                                        'max_depth': [3, None],
                                        'max_features': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000016E5B7CF670>,
                                        'min_samples_leaf': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000016E5855F370>,
                                        'min_samples_split': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000016E585692E0>})

In [8]:
randomCV.best_params_

{'bootstrap': True,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 5,
 'min_samples_leaf': 2,
 'min_samples_split': 3}

### Grid Search CV

In [9]:
# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [10]:
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)

grid_search.fit(X, y)

GridSearchCV(estimator=RandomForestClassifier(n_estimators=50),
             param_grid={'bootstrap': [True, False],
                         'criterion': ['gini', 'entropy'],
                         'max_depth': [3, None], 'max_features': [1, 3, 10],
                         'min_samples_leaf': [1, 3, 10],
                         'min_samples_split': [2, 3, 10]})

In [11]:
grid_search.best_params_

{'bootstrap': False,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 3,
 'min_samples_leaf': 1,
 'min_samples_split': 2}