In [5]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
iris = load_iris()

In [3]:
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,random_state=0)

In [4]:
best_score = 0

In [13]:
for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
        svm = SVC(gamma=gamma, C=C)
        svm.fit(X_train,y_train)
        score = svm.score(X_test,y_test)
        if score > best_score:
            best_score = score
            best_C = C
            best_gamma = gamma
print("Best score:", best_score)
print("Best C and gamma:", best_C, best_gamma)  

Best score: 0.9736842105263158
Best C and gamma: 100 0.001


### Using a validation set

In [29]:
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,random_state=0)

In [30]:
X_train_pr,X_valid,y_train_pr,y_valid = train_test_split(X_train,y_train,random_state=1)
print("Sizes of train_pr, valid, and test sets:", X_train_pr.shape[0], X_valid.shape[0],X_test.shape)

Sizes of train_pr, valid, and test sets: 84 28 (38, 4)


In [20]:
best_score = 0

In [21]:
for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
        svm = SVC(gamma=gamma, C=C)
        svm.fit(X_train_pr,y_train_pr)
        score = svm.score(X_valid,y_valid)
        if score > best_score:
            best_score = score
            best_C = C
            best_gamma = gamma

svm = SVC(C=best_C, gamma=best_gamma)
svm.fit(X_train, y_train)
test_score = svm.score(X_test, y_test)
print("Best score on validation set:", best_score)
print("Best parameters C and gamma:", best_C, best_gamma)
print("Best test score with best parameters:", test_score)

Best score on validation set: 0.9642857142857143
Best parameters C and gamma: 10 0.001
Best test score with best parameters: 0.9210526315789473


In [32]:
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,random_state=1)

In [33]:
X_train_pr,X_valid,y_train_pr,y_valid = train_test_split(X_train,y_train,random_state=1)
print("Sizes of train_pr, valid, and test sets:", X_train_pr.shape[0], X_valid.shape[0],X_test.shape)

Sizes of train_pr, valid, and test sets: 84 28 (38, 4)


In [24]:
best_score = 0
for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
        svm = SVC(gamma=gamma, C=C)
        svm.fit(X_train_pr,y_train_pr)
        score = svm.score(X_valid,y_valid)
        if score > best_score:
            best_score = score
            best_C = C
            best_gamma = gamma

svm = SVC(C=best_C, gamma=best_gamma)
svm.fit(X_train, y_train)
test_score = svm.score(X_test, y_test)
print("Best score on validation set:", best_score)
print("Best parameters C and gamma:", best_C, best_gamma)
print("Best test score with best parameters:", test_score)

Best score on validation set: 1.0
Best parameters C and gamma: 10 0.001
Best test score with best parameters: 0.9473684210526315


### Using cross-validation

In [34]:
from sklearn.model_selection import cross_val_score
import numpy as np
best_score = 0
for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
        svm = SVC(gamma=gamma, C=C)
        scores = cross_val_score(svm,X_train,y_train,cv=5)
        score = np.mean(scores)
        if score > best_score:
            best_score = score
            best_C = C
            best_gamma = gamma
            
svm = SVC(C=best_C, gamma=best_gamma)
svm.fit(X_train, y_train)
test_score = svm.score(X_test, y_test)
print("Best CV score:", best_score)
print("Best parameters C and gamma:", best_C, best_gamma)
print("Best test score with best parameters:", test_score)        

Best CV score: 0.9731225296442687
Best parameters C and gamma: 100 0.001
Best test score with best parameters: 0.9736842105263158


The test set score seems to improve as compared to using a validation set<br>but should at least try different random_states before drawing conclusions!

In [35]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
             'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}

In [36]:
from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(SVC(), param_grid, cv=5)

In [37]:
X_train,X_test,y_train,y_test = train_test_split(iris.data, iris.target, random_state=0)

In [38]:
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100],
                         'gamma': [0.001, 0.01, 0.1, 1, 10, 100]})

Fitting the GridSearchCV object not only searches for the best parameters,<br>
but also automatically fits a new model on the whole training dataset with the parameters that yielded the best cross-validation performance.

In [39]:
grid_search.score(X_test, y_test)

0.9736842105263158

In [40]:
print(grid_search.best_params_)
print(grid_search.best_score_)

{'C': 10, 'gamma': 0.1}
0.9731225296442687


In [41]:
grid_search.best_estimator_

SVC(C=10, gamma=0.1)