In [6]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [7]:
X, y = load_iris(return_X_y = True)

## Hold-Out Cross Validation

In [14]:
def hold_out_cv(X, y, random_state = 0, test_size = .2):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = random_state, test_size = test_size)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state = random_state)
    return X_train, X_val, X_test, y_train, y_val, y_test
X_train, X_val, X_test, y_train, y_val, y_test = hold_out_cv(X, y)

In [15]:
clf = SVC(kernel='linear', C = 1).fit(X_train, y_train)
clf.score(X_val, y_val)

0.9333333333333333

In [16]:
clf.score(X_test, y_test)

1.0

## k-Fold Cross Validation

In [17]:
from sklearn.model_selection import cross_val_score, KFold, RepeatedKFold

In [19]:
clf =  SVC(kernel='linear', C = 1, random_state = 42)
scores = cross_val_score(clf, X, y, cv = 5)
pd.Series(scores).describe()

count    5.000000
mean     0.980000
std      0.018257
min      0.966667
25%      0.966667
50%      0.966667
75%      1.000000
max      1.000000
dtype: float64

## Hyperparameter Tuning

In [22]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [23]:
param_grid = {
    "C" : [1, 10, 100, 1000],
    "kernel" : ["linear", "rbf"],
    "gamma" : [0.001, 0.0001]
}

In [29]:
svc = SVC()
clf = GridSearchCV(svc, param_grid)
clf.fit(X_train, y_train)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                         'kernel': ['linear', 'rbf']})

In [30]:
clf.best_params_

{'C': 1, 'gamma': 0.001, 'kernel': 'linear'}