# Randomized Search


1. Random search **differs from grid search**. Instead of providing a discrete set of values to explore on each     hyperparameter (parameter grid), we provide a statistical distribution.
2. Values for the **different hyper parameters are picked up at random** from this combine distribution
3. The motivation to use random search in place of grid search is that for many cases, hyperparameters are not equally important.

### Importing Libraries

In [2]:
import numpy as np
from time import time
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
%matplotlib inline

### Loading dataset

In [3]:
# get data
digits = load_digits()  # Source : http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits
X, y = digits.data, digits.target

### Selecting a Classifier 

In [6]:
#build a classifier
clf = RandomForestClassifier(n_estimators=50)

### Specify Parameters

In [7]:
#specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

### Running RandomizedSearch

In [15]:
#run randomized search
samples = 10  # number of random samples 
randomCV = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=samples) #default cv = 3

In [16]:
randomCV.fit(X, y)
print(randomCV.best_params_)

{'bootstrap': False, 'criterion': 'entropy', 'max_depth': None, 'max_features': 6, 'min_samples_leaf': 4, 'min_samples_split': 10}


In [17]:
# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [11]:
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)

GridSearchCV(estimator=RandomForestClassifier(n_estimators=50),
             param_grid={'bootstrap': [True, False],
                         'criterion': ['gini', 'entropy'],
                         'max_depth': [3, None], 'max_features': [1, 3, 10],
                         'min_samples_leaf': [1, 3, 10],
                         'min_samples_split': [2, 3, 10]})

In [12]:
grid_search.best_params_

{'bootstrap': False,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 3,
 'min_samples_leaf': 1,
 'min_samples_split': 3}

In [13]:
grid_search.cv_results_['mean_test_score']

array([0.81969824, 0.82139431, 0.82082173, 0.81583411, 0.83863045,
       0.8141597 , 0.81801764, 0.82583565, 0.80525379, 0.85142371,
       0.84696998, 0.84196534, 0.83864593, 0.84364903, 0.84754565,
       0.84808883, 0.84474466, 0.8375178 , 0.81803931, 0.81024915,
       0.81805169, 0.82582482, 0.82306097, 0.82416744, 0.8258403 ,
       0.81302228, 0.82527546, 0.93156763, 0.93046425, 0.92209842,
       0.91153822, 0.90874342, 0.91153358, 0.87313835, 0.8814918 ,
       0.87645621, 0.93769421, 0.93045961, 0.92712628, 0.92377283,
       0.93100898, 0.92766481, 0.89929434, 0.90875271, 0.91042556,
       0.92934386, 0.9349087 , 0.92768647, 0.92266636, 0.92323429,
       0.92600898, 0.90820644, 0.90709223, 0.89985608, 0.80967502,
       0.82305323, 0.81303002, 0.81082637, 0.80467348, 0.81414887,
       0.82081244, 0.80244197, 0.81025224, 0.8508697 , 0.84141597,
       0.83974776, 0.83696534, 0.84531414, 0.83472764, 0.84253791,
       0.84140823, 0.85142216, 0.82415816, 0.82193903, 0.82304

In [14]:
grid_search.best_estimator_

RandomForestClassifier(bootstrap=False, max_features=3, min_samples_split=3,
                       n_estimators=50)