# Hyperparameters Tuning with GridSearchCV and RandomizedSearchCV

### Using techniques Grid search and Randomized search for choosing best parameters from the set of given hyperparameters

**IMPORTING LIBRARIES AND DATASET**

In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.datasets import load_iris
iris = load_iris()

In [3]:
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
trainX, testX, train_y, test_y = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

In [6]:
print(trainX.shape)
print(testX.shape)
print(train_y.shape)
print(test_y.shape)

(120, 4)
(30, 4)
(120,)
(30,)


### Simple SVM model

In [7]:
from sklearn.svm import SVC

In [8]:
model = SVC(kernel='rbf', C=10, gamma='auto')
model.fit(trainX, train_y)

SVC(C=10, gamma='auto')

In [9]:
y_pred = model.predict(testX)

In [10]:
model.score(trainX, train_y), model.score(testX, test_y)

(0.9833333333333333, 1.0)

### Trying cross_val_score for hyperparameter tuning using best guess

In [11]:
from sklearn.model_selection import cross_val_score

In [25]:
from sklearn import svm

In [30]:
cross_val_score(svm.SVC(kernel='linear', C=10, gamma='auto'), trainX, train_y, cv=10).mean()

0.95

In [29]:
cross_val_score(svm.SVC(kernel='rbf', C=10, gamma='auto'), trainX, train_y, cv=10).mean()

0.9583333333333334

In [31]:
cross_val_score(svm.SVC(kernel='rbf', C=20, gamma='auto'), trainX, train_y, cv=10).mean()

0.9333333333333332

**METHOD 1 FOR HYPERPARAMETER TUNING**

In [34]:
kernels = ['linear', 'rbf', 'poly']
C = [1, 10, 20, 50]
avg_scores = {}

for kernel in kernels:
    for c in C:
        scores = cross_val_score(svm.SVC(kernel=kernel, C=c, gamma='auto'), iris.data, iris.target, cv=10)
        avg_scores[kernel + '_' + str(c)] = np.mean(scores)
        
        
avg_scores

{'linear_1': 0.9733333333333334,
 'linear_10': 0.9800000000000001,
 'linear_20': 0.9666666666666668,
 'linear_50': 0.9733333333333334,
 'rbf_1': 0.9800000000000001,
 'rbf_10': 0.9733333333333334,
 'rbf_20': 0.9666666666666668,
 'rbf_50': 0.9600000000000002,
 'poly_1': 0.9733333333333334,
 'poly_10': 0.9733333333333334,
 'poly_20': 0.9533333333333334,
 'poly_50': 0.96}

**METHOD 2 FOR HYPERPARAMETER TUNING - USING GridSearchCV and RandomizedSearchCV**

# GridSearchCV

In [35]:
from sklearn.model_selection import GridSearchCV

In [36]:
clf = GridSearchCV(svm.SVC(gamma='auto'), {
    'C': [1, 10, 20, 50],
    'kernel': ['linear', 'rbf', 'poly'],
}, cv=10, return_train_score=False)

clf.fit(iris.data, iris.target)
clf.cv_results_

{'mean_fit_time': array([0.0010982 , 0.00139565, 0.00289452, 0.0004986 , 0.00089679,
        0.00468709, 0.0004988 , 0.00069835, 0.00668228, 0.00069795,
        0.00079784, 0.01147213]),
 'std_fit_time': array([0.00069876, 0.00048876, 0.00169076, 0.0004986 , 0.00029893,
        0.00244464, 0.0004988 , 0.00045719, 0.00452861, 0.00045692,
        0.00039892, 0.00847107]),
 'mean_score_time': array([5.98406792e-04, 8.97598267e-04, 5.99026680e-04, 3.98588181e-04,
        1.99508667e-04, 0.00000000e+00, 2.99048424e-04, 2.99024582e-04,
        5.00369072e-04, 9.99450684e-05, 2.99239159e-04, 1.98197365e-04]),
 'std_score_time': array([0.0004886 , 0.00053677, 0.00066498, 0.00048817, 0.00039902,
        0.        , 0.00045681, 0.00045677, 0.00050048, 0.00029984,
        0.0004571 , 0.0003964 ]),
 'param_C': masked_array(data=[1, 1, 1, 10, 10, 10, 20, 20, 20, 50, 50, 50],
              mask=[False, False, False, False, False, False, False, False,
                    False, False, False, False],


In [38]:
df = pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001098,0.000699,0.000598,0.000489,1,linear,"{'C': 1, 'kernel': 'linear'}",1.0,0.933333,1.0,1.0,0.866667,1.0,0.933333,1.0,1.0,1.0,0.973333,0.044222,3
1,0.001396,0.000489,0.000898,0.000537,1,rbf,"{'C': 1, 'kernel': 'rbf'}",1.0,0.933333,1.0,1.0,1.0,0.933333,0.933333,1.0,1.0,1.0,0.98,0.030551,1
2,0.002895,0.001691,0.000599,0.000665,1,poly,"{'C': 1, 'kernel': 'poly'}",1.0,1.0,1.0,1.0,0.933333,0.933333,0.866667,1.0,1.0,1.0,0.973333,0.044222,3
3,0.000499,0.000499,0.000399,0.000488,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,1.0,1.0,0.866667,1.0,0.933333,1.0,1.0,1.0,0.98,0.042687,1
4,0.000897,0.000299,0.0002,0.000399,10,rbf,"{'C': 10, 'kernel': 'rbf'}",1.0,0.933333,1.0,1.0,0.866667,1.0,0.933333,1.0,1.0,1.0,0.973333,0.044222,3
5,0.004687,0.002445,0.0,0.0,10,poly,"{'C': 10, 'kernel': 'poly'}",1.0,1.0,1.0,1.0,0.933333,0.933333,0.866667,1.0,1.0,1.0,0.973333,0.044222,3
6,0.000499,0.000499,0.000299,0.000457,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,1.0,1.0,0.866667,0.933333,0.866667,1.0,1.0,1.0,0.966667,0.053748,8
7,0.000698,0.000457,0.000299,0.000457,20,rbf,"{'C': 20, 'kernel': 'rbf'}",1.0,0.933333,1.0,1.0,0.866667,0.933333,0.933333,1.0,1.0,1.0,0.966667,0.044721,8
8,0.006682,0.004529,0.0005,0.0005,20,poly,"{'C': 20, 'kernel': 'poly'}",1.0,0.933333,1.0,0.933333,0.866667,0.933333,0.866667,1.0,1.0,1.0,0.953333,0.052068,12
9,0.000698,0.000457,0.0001,0.0003,50,linear,"{'C': 50, 'kernel': 'linear'}",1.0,1.0,1.0,1.0,0.933333,1.0,0.866667,0.933333,1.0,1.0,0.973333,0.044222,3


In [40]:
df[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.973333
1,1,rbf,0.98
2,1,poly,0.973333
3,10,linear,0.98
4,10,rbf,0.973333
5,10,poly,0.973333
6,20,linear,0.966667
7,20,rbf,0.966667
8,20,poly,0.953333
9,50,linear,0.973333


In [41]:
dir(clf)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_check_feature_names',
 '_check_n_features',
 '_check_refit_for_multimetric',
 '_estimator_type',
 '_format_results',
 '_get_param_names',
 '_get_tags',
 '_more_tags',
 '_pairwise',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_required_parameters',
 '_run_search',
 '_select_best_index',
 '_validate_data',
 'best_estimator_',
 'best_index_',
 'best_params_',
 'best_score_',
 'classes_',
 'cv',
 'cv_results_',
 'decision_function',
 'error_score',
 'estimator',
 'fit',
 'get_params',
 'inverse_transform',
 'multimetric_',
 'n_features_

In [42]:
clf.best_score_

0.9800000000000001

In [43]:
clf.best_params_

{'C': 1, 'kernel': 'rbf'}

# RandomizeSearchCV

In [44]:
from sklearn.model_selection import RandomizedSearchCV

In [50]:
rclf = RandomizedSearchCV(svm.SVC(gamma='auto'), {
    'kernel': ['linear', 'rbf', 'poly'],
    'C': [1, 10, 20, 50]
}, cv=5, return_train_score=False, n_iter=3)

rclf.fit(iris.data, iris.target)
pd.DataFrame(rclf.cv_results_)[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,10,rbf,0.98
1,1,linear,0.98
2,20,poly,0.953333


# CHOOSING THE BEST MODEL

In [53]:
from sklearn import svm
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [54]:
model_params = {
    'svm': {
        'model':svm.SVC(gamma='auto'),
        'params': {
            'kernel': ['linear', 'rbf', 'poly'],
            'C': [1, 10, 20, 50]
        }
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [1, 5, 10]
        }
    },
    'linear_regression': {
        'model': LogisticRegression(),
        'params': {
            'C': [1, 5, 10]
        }
    }
}

In [60]:
scores = []

for model in model_params.values():
    mclf = GridSearchCV(model['model'], model['params'], cv=5, return_train_score=False)
    mclf.fit(iris.data, iris.target)
    scores.append({'Model': model['model'], 'Best score': mclf.best_score_, 'Best Parameters': mclf.best_params_})
    
print(pd.DataFrame(scores))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

                      Model  Best score               Best Parameters
0         SVC(gamma='auto')    0.980000  {'C': 1, 'kernel': 'linear'}
1  RandomForestClassifier()    0.966667           {'n_estimators': 5}
2      LogisticRegression()    0.980000                     {'C': 10}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist