# Randomized Grid Search

<span>Manual hyperparameter searching? No way. Scikit Learn has a got an amazing random grid search function that can give us a hint into the best parameters by calling its class, setting up a dictionary with all parameters, and letting it fly. This example below his using a K-Nearest Neighbours model for its example. After the Randomize Grid Search is done, you can pull the best parameter for your model, and as well as take a look a the history of the previous combination of parameters.</span>
    
### Import Preliminaries

In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier



iris = load_iris()
X, y = iris.data, iris.target


classifier = KNeighborsClassifier(n_neighbors=5, weights='uniform', 
                                 metric ='minkowski', p=2)

grid = {'n_neighbors':list(range(1,11)), 'weights':['uniform', 'distance'],
       'p':[1,2], }


random_search = RandomizedSearchCV(estimator=classifier, 
                                   param_distributions = grid, 
                                   n_iter = 10, scoring = 'accuracy', 
                                   n_jobs=1, refit=True,
                                   cv = 10,
                                   return_train_score=True)
random_search.fit(X,y)

RandomizedSearchCV(cv=10, error_score='raise',
          estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'),
          fit_params=None, iid=True, n_iter=10, n_jobs=1,
          param_distributions={'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'weights': ['uniform', 'distance'], 'p': [1, 2]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='accuracy', verbose=0)

### Randomized Grid Search Score

In [5]:
print('Best parameters: %s'%random_search.best_params_)
print('CV Accuracy of best parameters: %.3f'%random_search.best_score_)

Best parameters: {'weights': 'uniform', 'p': 2, 'n_neighbors': 5}
CV Accuracy of best parameters: 0.967


- This method is more computationaly visable then a full grid search
- The result will change each time the model is fitted

### Baseline Cross Validation Score

In [6]:
from sklearn.cross_validation import cross_val_score
print ('Baesline with default parameters: %.3f' %np.mean(
        cross_val_score(classifier, X, y, cv=10, scoring='accuracy', n_jobs=1)))

Baesline with default parameters: 0.967


### Viewing Randomized Grid Score

In [7]:
# The grid scores attribute is now depricated, 
#but I'll use it till its completely gone
random_search.grid_scores_



[mean: 0.96667, std: 0.04472, params: {'weights': 'uniform', 'p': 2, 'n_neighbors': 5},
 mean: 0.96667, std: 0.04472, params: {'weights': 'uniform', 'p': 2, 'n_neighbors': 6},
 mean: 0.94000, std: 0.06289, params: {'weights': 'uniform', 'p': 1, 'n_neighbors': 2},
 mean: 0.96667, std: 0.04472, params: {'weights': 'uniform', 'p': 2, 'n_neighbors': 10},
 mean: 0.96667, std: 0.04472, params: {'weights': 'distance', 'p': 2, 'n_neighbors': 3},
 mean: 0.96667, std: 0.04472, params: {'weights': 'distance', 'p': 2, 'n_neighbors': 8},
 mean: 0.96667, std: 0.04472, params: {'weights': 'distance', 'p': 2, 'n_neighbors': 4},
 mean: 0.95333, std: 0.05207, params: {'weights': 'uniform', 'p': 1, 'n_neighbors': 7},
 mean: 0.96000, std: 0.04422, params: {'weights': 'uniform', 'p': 1, 'n_neighbors': 10},
 mean: 0.96000, std: 0.05333, params: {'weights': 'distance', 'p': 2, 'n_neighbors': 2}]

In [8]:
# The new cv_results attribute outpute our results in JSON
# Throw it in a dataframe to make some sense of it
pd.DataFrame(random_search.cv_results_).head(3)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_weights,param_p,param_n_neighbors,params,split0_test_score,split1_test_score,...,split2_train_score,split3_train_score,split4_train_score,split5_train_score,split6_train_score,split7_train_score,split8_train_score,split9_train_score,mean_train_score,std_train_score
0,0.00027,3.6e-05,0.000499,0.000112,uniform,2,5,"{'weights': 'uniform', 'p': 2, 'n_neighbors': 5}",1.0,0.933333,...,0.962963,0.97037,0.985185,0.962963,0.977778,0.962963,0.962963,0.97037,0.968889,0.007258
1,0.000277,2e-05,0.00048,7e-05,uniform,2,6,"{'weights': 'uniform', 'p': 2, 'n_neighbors': 6}",1.0,0.933333,...,0.97037,0.977778,0.985185,0.955556,0.985185,0.97037,0.97037,0.97037,0.972593,0.008148
2,0.000273,1.9e-05,0.000436,2.4e-05,uniform,1,2,"{'weights': 'uniform', 'p': 1, 'n_neighbors': 2}",1.0,0.933333,...,0.97037,0.977778,0.97037,0.962963,0.985185,0.97037,0.97037,0.97037,0.972593,0.005785


In [9]:
# Here is the raw output
random_search.cv_results_

{'mean_fit_time': array([0.00026991, 0.00027733, 0.00027254, 0.00025287, 0.0002615 ,
        0.00026371, 0.0002893 , 0.00025344, 0.00025284, 0.00027139]),
 'std_fit_time': array([3.61297300e-05, 2.04961784e-05, 1.91800153e-05, 1.24148105e-05,
        3.63166831e-05, 2.07953627e-05, 7.74256544e-05, 1.33292832e-05,
        1.25028894e-05, 4.69197890e-05]),
 'mean_score_time': array([0.00049944, 0.00047967, 0.00043576, 0.00044491, 0.00044494,
        0.00049927, 0.0004462 , 0.0004365 , 0.00044463, 0.00045156]),
 'std_score_time': array([1.11876495e-04, 6.98170411e-05, 2.39042539e-05, 2.31534885e-05,
        2.36329944e-05, 6.97777868e-05, 2.43472535e-05, 2.11077342e-05,
        2.22234893e-05, 4.22938700e-05]),
 'param_weights': masked_array(data=['uniform', 'uniform', 'uniform', 'uniform', 'distance',
                    'distance', 'distance', 'uniform', 'uniform',
                    'distance'],
              mask=[False, False, False, False, False, False, False, False,
              

Author: Kavi Sekhon