## HyperParameter Tuning
- Train test split to find model performance
- K fold cross validation
- Grid Search CV for hyperparameter tuning
- Random search CV
- Choosing best model 
- Exercise

- Learn: from sklearn import svm,datasets

In [3]:
from sklearn import svm,datasets

In [4]:
iris = datasets.load_iris()

In [5]:
import pandas as pd

In [13]:
df = pd.DataFrame(iris.data,columns = iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [14]:
df['flower']= iris.target

In [15]:
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [16]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.3)

In [19]:
model = svm.SVC(kernel='rbf',C=30,gamma='auto')

In [20]:
model.fit(X_train,y_train)

SVC(C=30, gamma='auto')

In [21]:
model.score(X_test,y_test)

0.9555555555555556

### run again

In [30]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.3)

- Learn : model = svm.SVC(kernel='rbf',C=30,gamma='auto')

In [31]:
model = svm.SVC(kernel='rbf',C=30,gamma='auto')

In [32]:
model.fit(X_train,y_train)

SVC(C=30, gamma='auto')

In [33]:
model.score(X_test,y_test)

0.9111111111111111

### run again

- Learn : from sklearn.model_selection import train_test_split

In [34]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.3)

In [35]:
model = svm.SVC(kernel='rbf',C=30,gamma='auto')

In [36]:
model.fit(X_train,y_train)

SVC(C=30, gamma='auto')

In [37]:
model.score(X_test,y_test)

0.9777777777777777

I can not rely on this method because score is changing with respect to samples so we use K-fold cross validation

- Learn: from sklearn.model_selection import cross_validate

In [39]:
from sklearn.model_selection import cross_val_score

In [40]:
cross_val_score(svm.SVC(kernel='linear',C=10,gamma='auto'),iris.data,iris.target,cv=5)

array([1.        , 1.        , 0.9       , 0.96666667, 1.        ])

In [41]:
cross_val_score(svm.SVC(kernel='rbf',C=10,gamma='auto'),iris.data,iris.target,cv=5)

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [42]:
cross_val_score(svm.SVC(kernel='rbf',C=20,gamma='auto'),iris.data,iris.target,cv=5)

array([0.96666667, 1.        , 0.9       , 0.96666667, 1.        ])

In [50]:
import numpy as np

In [62]:
Kernel = ['linear','rbf']
C = [1,10,20]

for kval in kernel:
    for cval in C:
        cv_score = cross_val_score(svm.SVC(kernel = kval,C = cval,gamma='auto'),iris.data,iris.target,cv=5) 
        print("Average score for " + kval +"_" + str(cval),np.average(cv_score))

Average score for linear_1 0.9800000000000001
Average score for linear_10 0.9733333333333334
Average score for linear_20 0.9666666666666666
Average score for rbf_1 0.9800000000000001
Average score for rbf_10 0.9800000000000001
Average score for rbf_20 0.9666666666666668


#### From above results we can say for 
- linear kernel with C = 1 give best result 
- rbf kernel with C=1 or 10 model score is greater than C=20

### Approch: Use grid Search CV

Grid Search CV does exactly the same thing as for loop above but in single line of code

In [63]:
from sklearn.model_selection import GridSearchCV

In [68]:
clf = GridSearchCV(svm.SVC(gamma='auto'),
                   {
    'kernel':['linear','rbf'],
     'C':[1,10,20],
    },cv=5,return_train_score=False)
clf.fit(iris.data,iris.target)

GridSearchCV(cv=5, estimator=SVC(gamma='auto'),
             param_grid={'C': [1, 10, 20], 'kernel': ['linear', 'rbf']})

In [69]:
clf.cv_results_

{'mean_fit_time': array([0.00139618, 0.001616  , 0.0011127 , 0.00257602, 0.00219331,
        0.00219393]),
 'std_fit_time': array([0.00048852, 0.00050606, 0.00023043, 0.00051858, 0.00039895,
        0.00039911]),
 'mean_score_time': array([0.00099812, 0.00137582, 0.00079751, 0.00116029, 0.00119758,
        0.00163789]),
 'std_score_time': array([1.57861188e-06, 5.06006004e-04, 3.98755198e-04, 3.36063943e-04,
        3.98993873e-04, 4.42066788e-04]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf', 'linear', 'rbf'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'linear'},
  {'C': 1, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 20, 'kernel': 'linear'},
  {'C':

In [70]:
df = pd.DataFrame(clf.cv_results_)

In [71]:
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001396,0.000489,0.000998,2e-06,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.001616,0.000506,0.001376,0.000506,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.001113,0.00023,0.000798,0.000399,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
3,0.002576,0.000519,0.00116,0.000336,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
4,0.002193,0.000399,0.001198,0.000399,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,6
5,0.002194,0.000399,0.001638,0.000442,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5


In [74]:
df[['param_kernel','params','mean_test_score']]

Unnamed: 0,param_kernel,params,mean_test_score
0,linear,"{'C': 1, 'kernel': 'linear'}",0.98
1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.98
2,linear,"{'C': 10, 'kernel': 'linear'}",0.973333
3,rbf,"{'C': 10, 'kernel': 'rbf'}",0.98
4,linear,"{'C': 20, 'kernel': 'linear'}",0.966667
5,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667


In [76]:
clf.best_params_

{'C': 1, 'kernel': 'linear'}

In [77]:
clf.best_score_

0.9800000000000001

#### Use RandomSearchCV to reduce number of iterations and with random combination of parameters. This is useful when you have to many parameters to try and your training time is too longer.It helps reduce the cost of computation

In [93]:
from sklearn.model_selection import RandomizedSearchCV
clf = RandomizedSearchCV(svm.SVC(gamma='auto'),{
    'C':[1,10,20],
    'kernel':['linear','rbf']
    },cv=5,return_train_score=False,n_iter=2)
clf.fit(iris.data,iris.target)
clf.cv_results_

{'mean_fit_time': array([0.00119677, 0.0017447 ]),
 'std_fit_time': array([0.00039916, 0.00038629]),
 'mean_score_time': array([0.00055428, 0.0006536 ]),
 'std_score_time': array([0.00045974, 0.00045124]),
 'param_kernel': masked_array(data=['linear', 'rbf'],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'param_C': masked_array(data=[20, 1],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'params': [{'kernel': 'linear', 'C': 20}, {'kernel': 'rbf', 'C': 1}],
 'split0_test_score': array([1.        , 0.96666667]),
 'split1_test_score': array([1., 1.]),
 'split2_test_score': array([0.9       , 0.96666667]),
 'split3_test_score': array([0.93333333, 0.96666667]),
 'split4_test_score': array([1., 1.]),
 'mean_test_score': array([0.96666667, 0.98      ]),
 'std_test_score': array([0.0421637 , 0.01632993]),
 'rank_test_score': array([2, 1])}

In [94]:
dfr = pd.DataFrame(clf.cv_results_)

In [95]:
dfr[['param_kernel','params','mean_test_score']]

Unnamed: 0,param_kernel,params,mean_test_score
0,linear,"{'kernel': 'linear', 'C': 20}",0.966667
1,rbf,"{'kernel': 'rbf', 'C': 1}",0.98


### Different model performance or model selection with different hyperparameters.

In [118]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [125]:
model_params = {
    'svm':{
        'model':svm.SVC(gamma='auto'),
        'params':{
            'kernel':['linear','rbf'],
            'C':[1,10,20]
        }
    },
    'random_forest':{
        'model':RandomForestClassifier(),
        'params':{'n_estimators':[1,5,10]}
    },
    'logistic_regression':{
        'model':LogisticRegression(solver='liblinear',multi_class='auto'),
        'params':{
            'C':[1,5,10]
        }
    }
}

In [126]:
model_params.items()

dict_items([('svm', {'model': SVC(gamma='auto'), 'params': {'kernel': ['linear', 'rbf'], 'C': [1, 10, 20]}}), ('random_forest', {'model': RandomForestClassifier(), 'params': {'n_estimators': [1, 5, 10]}}), ('logistic_regression', {'model': LogisticRegression(solver='liblinear'), 'params': {'C': [1, 5, 10]}})])

In [132]:
Scores = []

for model_name,mp in model_params.items():
    clf = GridSearchCV(mp['model'],mp['params'],cv=5,return_train_score=False)
    clf.fit(iris.data,iris.target)
    Scores.append({
        'model':model_name,
        'best_score':clf.best_score_,
        'best_param':clf.best_params_
        
    })
    
kj = pd.DataFrame(Scores,columns=['model','best_score','best_param'])
kj

Unnamed: 0,model,best_score,best_param
0,svm,0.98,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.966667,{'n_estimators': 5}
2,logistic_regression,0.966667,{'C': 5}


#### Based on above, I can conclude that SVM with C = 1 and Kernel = 'linear' is the best model for solving iris classifier problem