In [27]:
%matplotlib inline

In [28]:
import numpy as np
from time import time
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier

In [29]:
# get data
digits = load_digits()  # Source : http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits
X, y = digits.data, digits.target

In [30]:
# build a classifier
clf = RandomForestClassifier(n_estimators=50)

In [31]:
# specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [32]:
# run randomized search
samples = 10  # number of random samples 
randomCV = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=samples) #default cv = 3

In [34]:
randomCV.fit(X, y)

 
print(randomCV.best_params_)
#print(randomCV.cv_results_)



{'bootstrap': True, 'criterion': 'gini', 'max_depth': None, 'max_features': 4, 'min_samples_leaf': 3, 'min_samples_split': 2}


In [35]:
# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [36]:
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_depth': [3, None], 'max_features': [1, 3, 10], 'min_samples_split': [2, 3, 10], 'min_samples_leaf': [1, 3, 10], 'bootstrap': [True, False], 'criterion': ['gini', 'entropy']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [37]:
grid_search.best_params_

{'bootstrap': False,
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 3,
 'min_samples_leaf': 1,
 'min_samples_split': 3}

In [38]:
grid_search.cv_results_['mean_test_score']

array([0.83528102, 0.80411797, 0.81413467, 0.7902059 , 0.81691708,
       0.80523094, 0.82025598, 0.80523094, 0.82415136, 0.84974958,
       0.84195882, 0.82860323, 0.83806344, 0.85475793, 0.84529772,
       0.83528102, 0.84529772, 0.84084585, 0.81413467, 0.83472454,
       0.82136895, 0.82470785, 0.82804674, 0.82192543, 0.83249861,
       0.83361157, 0.83027268, 0.92932666, 0.92821369, 0.90984975,
       0.90762382, 0.90873678, 0.91263216, 0.88425153, 0.87312187,
       0.87479132, 0.93656093, 0.93322204, 0.93210907, 0.92431831,
       0.93600445, 0.9148581 , 0.91040623, 0.9148581 , 0.90317195,
       0.93989983, 0.92877017, 0.92264886, 0.93043962, 0.92654424,
       0.91986644, 0.90094602, 0.91263216, 0.90372844, 0.78797997,
       0.79521425, 0.83027268, 0.81023929, 0.80634391, 0.81691708,
       0.80411797, 0.82248191, 0.82749026, 0.85754035, 0.83528102,
       0.83861992, 0.8558709 , 0.82081247, 0.83249861, 0.84641068,
       0.84474124, 0.82526433, 0.84140234, 0.82971619, 0.82749

In [39]:
grid_search.best_estimator_

RandomForestClassifier(bootstrap=False, class_weight=None,
            criterion='entropy', max_depth=None, max_features=3,
            max_leaf_nodes=None, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=3, min_weight_fraction_leaf=0.0,
            n_estimators=50, n_jobs=None, oob_score=False,
            random_state=None, verbose=0, warm_start=False)

In [None]:
yy = linreg.predict(x_test2)