# Best Parameters

![](img/ex6_3.png)

In [1]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

import numpy as np
import pandas as pd
import scipy.io as sio

## load data

In [2]:
mat = sio.loadmat('./data/ex6data3.mat')
mat.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y', 'yval', 'Xval'])

In [3]:
training = pd.DataFrame(mat.get('X'), columns=['X1', 'X2'])
training['y'] = mat.get('y')

cv = pd.DataFrame(mat.get('Xval'), columns=['X1', 'X2'])
cv['y'] = mat.get('yval')

training.shape, cv.shape

((211, 3), (200, 3))

In [4]:
training.head()

Unnamed: 0,X1,X2,y
0,-0.158986,0.423977,1
1,-0.347926,0.47076,1
2,-0.504608,0.353801,1
3,-0.596774,0.114035,1
4,-0.518433,-0.172515,1


In [5]:
cv.head()

Unnamed: 0,X1,X2,y
0,-0.353062,-0.673902,0
1,-0.227126,0.44732,1
2,0.092898,-0.753524,0
3,0.148243,-0.718473,0
4,-0.001512,0.162928,0


## manual grid search for C and $\sigma$

In [6]:
candidate = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100]

In [7]:
combination = [(C, gamma) for C in candidate for gamma in candidate]
len(combination)

81

In [8]:
search = []

for C, gamma in combination:
    svc = svm.SVC(C=C, gamma=gamma)
    svc.fit(training[['X1', 'X2']], training['y'])
    search.append(svc.score(cv[['X1', 'X2']], cv['y']))

In [9]:
best_score = search[np.argmax(search)]
best_param = combination[np.argmax(search)]
best_score, best_param

(0.965, (0.3, 100))

In [10]:
best_svc = svm.SVC(C = best_param[0], gamma=best_param[1])
best_svc.fit(training[['X1', 'X2']], training['y'])
y_pred = best_svc.predict(cv[['X1', 'X2']])

print(metrics.classification_report(cv['y'], y_pred))

              precision    recall  f1-score   support

           0       0.97      0.96      0.97       113
           1       0.95      0.97      0.96        87

    accuracy                           0.96       200
   macro avg       0.96      0.97      0.96       200
weighted avg       0.97      0.96      0.97       200



## sklearn GridSearchCV

[GridSearchCV](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html)

In [11]:
parameters = {'C': candidate, 'gamma': candidate}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters, n_jobs=-1)
clf.fit(training[['X1', 'X2']], training['y'])

GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100],
                         'gamma': [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [12]:
clf.best_params_

{'C': 30, 'gamma': 3}

In [13]:
clf.best_score_

0.9194905869324475

In [14]:
y_pred = clf.predict(cv[['X1', 'X2']])
print(metrics.classification_report(cv['y'], y_pred))

              precision    recall  f1-score   support

           0       0.95      0.96      0.96       113
           1       0.95      0.93      0.94        87

    accuracy                           0.95       200
   macro avg       0.95      0.95      0.95       200
weighted avg       0.95      0.95      0.95       200



The reason for different result is that GridSearch is using part of training data to train because it need part of data as cv set to get the best parameters, but the manual grid search has additional cv data set.

In [15]:
all = res = pd.concat([training, cv], axis=0, ignore_index=True)
all.shape

(411, 3)

In [16]:
all.head()

Unnamed: 0,X1,X2,y
0,-0.158986,0.423977,1
1,-0.347926,0.47076,1
2,-0.504608,0.353801,1
3,-0.596774,0.114035,1
4,-0.518433,-0.172515,1


In [17]:
parameters = {'C': candidate, 'gamma': candidate}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters, n_jobs=-1)
clf.fit(all[['X1', 'X2']], all['y'])

GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100],
                         'gamma': [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [18]:
clf.best_params_

{'C': 1, 'gamma': 10}

In [19]:
clf.best_score_

0.9489861886570672

In [20]:
y_pred = clf.predict(cv[['X1', 'X2']])
print(metrics.classification_report(cv['y'], y_pred))

              precision    recall  f1-score   support

           0       0.97      0.96      0.97       113
           1       0.95      0.97      0.96        87

    accuracy                           0.96       200
   macro avg       0.96      0.97      0.96       200
weighted avg       0.97      0.96      0.97       200



use training data and cv data as GridSearchCV's data set, we can see the score improve.