# Hyperparameter Tuning

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import load_iris
iris = load_iris()

In [4]:
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [6]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [7]:
df['target'] = iris.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [9]:
from sklearn.model_selection import train_test_split
inputs = df.drop(['target'], axis='columns')
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(inputs, y, test_size=0.2)
X_train.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
14,5.8,4.0,1.2,0.2
100,6.3,3.3,6.0,2.5
19,5.1,3.8,1.5,0.3
40,5.0,3.5,1.3,0.3
45,4.8,3.0,1.4,0.3


In [10]:
from sklearn.svm import SVC
model = SVC()
model.fit(X_train, y_train)
model.score(X_test, y_test)

0.9

In [11]:
model1 = SVC(kernel='linear',C=10, gamma='auto')
model1.fit(X_train, y_train)
model1.score(X_test, y_test)

0.9666666666666667

In [12]:
model2 = SVC(kernel='rbf', C=10, gamma='auto')
model2.fit(X_train, y_train)
model2.score(X_test, y_test)

0.9333333333333333

### Cross Validation Score

In [13]:
from sklearn.model_selection import cross_val_score

In [14]:
cross_val_score(SVC(kernel='linear', C=10, gamma='auto'), X_train, y_train, cv=5)

array([0.95833333, 0.95833333, 1.        , 0.91666667, 0.91666667])

In [15]:
cross_val_score(SVC(kernel='poly', C=10, gamma='auto'), X_train, y_train, cv=5)

array([0.95833333, 1.        , 1.        , 0.95833333, 0.875     ])

In [16]:
cross_val_score(SVC(kernel='rbf', C=10, gamma='auto'), X_train, y_train, cv=5)

array([0.95833333, 1.        , 1.        , 1.        , 0.91666667])

## GridSearchCV

In [17]:
from sklearn.model_selection import GridSearchCV
clf = GridSearchCV(SVC(gamma='auto'),{
    'C':[1,10,20],
    'kernel':['rbf', 'linear']
}, cv=5, return_train_score=False)
clf.fit(inputs, y)
clf.cv_results_

{'mean_fit_time': array([0.01585822, 0.01021686, 0.00524688, 0.00536442, 0.00279779,
        0.00253687]),
 'mean_score_time': array([0.01518588, 0.00604382, 0.00598874, 0.0035615 , 0.00206032,
        0.00192928]),
 'mean_test_score': array([0.98      , 0.98      , 0.98      , 0.97333333, 0.96666667,
        0.96666667]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20, 'kernel': 'linear'}],
 'rank_test_score': array([1, 1, 1, 4, 5, 6], dtype=int32),
 'split0_test_score': array([0.96666667, 0.96

In [18]:
df =pd.DataFrame(clf.cv_results_)
df.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.015858,0.009187,0.015186,0.008067,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.010217,0.010956,0.006044,0.004704,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.005247,0.000361,0.005989,0.00335,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.005364,0.000946,0.003561,0.000491,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.002798,0.000135,0.00206,0.000138,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5


In [19]:
clf.best_score_

0.9800000000000001

In [20]:
clf.best_params_

{'C': 1, 'kernel': 'rbf'}

## RandomizedSearchCV

In [25]:
from sklearn.model_selection import RandomizedSearchCV
rs = RandomizedSearchCV(SVC(gamma='auto'),{
    'C':[1,10,20],
    'kernel':['rbf','linear']
},
cv=5,
return_train_score=False,
n_iter=2
)
rs.fit(inputs, y)
pd.DataFrame(rs.cv_results_)[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.98
1,20,linear,0.966667


In [31]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [32]:
model_params ={
    'svm':{
        'model':svm.SVC(gamma='auto'),
        'params':{
            'C':[1,10,20],
            'kernel':['rbf','linear']
        }
    },
    'random_forest':{
        'model':RandomForestClassifier(),
        'params':{
            'n_estimmators':[1,5,10]
        }
    },
    'logistic_regression':{
        'model':LogisticRegression(solver='liblinear',multi_class='auto'),
        'params':{
            'C':[1,5,10]
        }
    }
}