Objective: Hyperparameter Tuning<br>
<br>
Data is split into:
* Train set (Model Fitting)
* Validation set (Hyperparam Selection)
* Test set (Model Evaluation)

# Import Module

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import load_iris

# Load Data & Pre-Process

In [3]:
data_obj = load_iris()

In [4]:
X_trainval, X_test, y_trainval, y_test = train_test_split(data_obj.data, data_obj.target, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, random_state=0)

In [23]:
X_train.shape

(84, 4)

In [24]:
X_val.shape

(28, 4)

In [22]:
X_test.shape

(38, 4)

# Grid Search

## Method : train-validation-test split

In [6]:
best_score = 0
best_param={}
for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
        model = SVC(gamma=gamma, C=C).fit(X_train, y_train)
        score = model.score(X_val, y_val)
        if score > best_score:
            best_score = score
            best_param = {'gamma': gamma, 'C':C}

In [8]:
model_optimised = SVC(**best_param).fit(X_trainval, y_trainval)
print(best_param, model_optimised.score(X_test, y_test))

{'gamma': 0.001, 'C': 100} 0.9736842105263158


## Method : cross_validation

In [9]:
best_score = 0
best_param={}
for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
        model = SVC(gamma=gamma, C=C)
        score = cross_val_score(model, X_trainval, y_trainval, cv=5)    # no need to split train-val set in cross-validation
        score = score.mean()
        if score > best_score:
            best_score = score
            best_param = {'gamma': gamma, 'C':C}

In [10]:
model_optimised = SVC(**best_param).fit(X_trainval, y_trainval)
print(best_param, model_optimised.score(X_test, y_test))

{'gamma': 0.1, 'C': 10} 0.9736842105263158


# Method : Sklearn - GridSearchCV

In [14]:
param = {'gamma': [0.001, 0.01, 0.1, 1, 10, 100], 'C' : [0.001, 0.01, 0.1, 1, 10, 100]}
estimator = SVC()
model = GridSearchCV(estimator, param, cv=5)
model.fit(X_trainval, y_trainval)

GridSearchCV(cv=5, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100],
                         'gamma': [0.001, 0.01, 0.1, 1, 10, 100]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [15]:
print(model.best_params_, model.score(X_test, y_test))

{'C': 10, 'gamma': 0.1} 0.9736842105263158


In [21]:
df = pd.DataFrame(model.cv_results_)
df[['rank_test_score', 'param_gamma', 'param_C', 'mean_test_score']].sort_values(by=['rank_test_score'], ascending=True)

Unnamed: 0,rank_test_score,param_gamma,param_C,mean_test_score
26,1,0.1,10.0,0.973123
31,2,0.01,100.0,0.964427
20,3,0.1,1.0,0.964032
30,3,0.001,100.0,0.964032
25,3,0.01,10.0,0.964032
15,6,1.0,0.1,0.955336
32,7,0.1,100.0,0.954941
21,8,1.0,1.0,0.94664
33,9,1.0,100.0,0.946245
27,9,1.0,10.0,0.946245
