### GridSearchCV class for Hyper-Parameter Tuning

https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html

In [1]:
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV
import sklearn.metrics

In [2]:
iris = datasets.load_iris()
# data, labels = datasets.load_iris(return_X_y = True)
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [3]:
# data, labels = datasets.load_iris(return_X_y = True)
# data

In [4]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [5]:
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}

In [6]:
svc = svm.SVC()

In [7]:
clf = GridSearchCV(svc, parameters, scoring = 'accuracy', cv = 5, verbose = 3)

In [8]:
clf.fit(iris.data, iris.target)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END ................C=1, kernel=linear;, score=0.967 total time=   0.0s
[CV 2/5] END ................C=1, kernel=linear;, score=1.000 total time=   0.0s
[CV 3/5] END ................C=1, kernel=linear;, score=0.967 total time=   0.0s
[CV 4/5] END ................C=1, kernel=linear;, score=0.967 total time=   0.0s
[CV 5/5] END ................C=1, kernel=linear;, score=1.000 total time=   0.0s
[CV 1/5] END ...................C=1, kernel=rbf;, score=0.967 total time=   0.0s
[CV 2/5] END ...................C=1, kernel=rbf;, score=0.967 total time=   0.0s
[CV 3/5] END ...................C=1, kernel=rbf;, score=0.967 total time=   0.0s
[CV 4/5] END ...................C=1, kernel=rbf;, score=0.933 total time=   0.0s
[CV 5/5] END ...................C=1, kernel=rbf;, score=1.000 total time=   0.0s
[CV 1/5] END ...............C=10, kernel=linear;, score=1.000 total time=   0.0s
[CV 2/5] END ...............C=10, kernel=linear;,

In [9]:
sorted(clf.cv_results_.keys())

['mean_fit_time',
 'mean_score_time',
 'mean_test_score',
 'param_C',
 'param_kernel',
 'params',
 'rank_test_score',
 'split0_test_score',
 'split1_test_score',
 'split2_test_score',
 'split3_test_score',
 'split4_test_score',
 'std_fit_time',
 'std_score_time',
 'std_test_score']

In [10]:
clf.best_params_

{'C': 1, 'kernel': 'linear'}

In [11]:
# Checking the score for all parameters
print("Grid scores on training set:")
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%f (+/-%f) for %r"% (mean, std * 2, params))
# best_params3 = clf.best_params_

Grid scores on training set:
0.980000 (+/-0.032660) for {'C': 1, 'kernel': 'linear'}
0.966667 (+/-0.042164) for {'C': 1, 'kernel': 'rbf'}
0.973333 (+/-0.077746) for {'C': 10, 'kernel': 'linear'}
0.980000 (+/-0.032660) for {'C': 10, 'kernel': 'rbf'}


In [12]:
# sklearn.metrics.SCORERS.keys()
sklearn.metrics.get_scorer_names()

['accuracy',
 'adjusted_mutual_info_score',
 'adjusted_rand_score',
 'average_precision',
 'balanced_accuracy',
 'completeness_score',
 'd2_absolute_error_score',
 'explained_variance',
 'f1',
 'f1_macro',
 'f1_micro',
 'f1_samples',
 'f1_weighted',
 'fowlkes_mallows_score',
 'homogeneity_score',
 'jaccard',
 'jaccard_macro',
 'jaccard_micro',
 'jaccard_samples',
 'jaccard_weighted',
 'matthews_corrcoef',
 'max_error',
 'mutual_info_score',
 'neg_brier_score',
 'neg_log_loss',
 'neg_mean_absolute_error',
 'neg_mean_absolute_percentage_error',
 'neg_mean_gamma_deviance',
 'neg_mean_poisson_deviance',
 'neg_mean_squared_error',
 'neg_mean_squared_log_error',
 'neg_median_absolute_error',
 'neg_negative_likelihood_ratio',
 'neg_root_mean_squared_error',
 'neg_root_mean_squared_log_error',
 'normalized_mutual_info_score',
 'positive_likelihood_ratio',
 'precision',
 'precision_macro',
 'precision_micro',
 'precision_samples',
 'precision_weighted',
 'r2',
 'rand_score',
 'recall',
 'recall

In [14]:
clf.scorer_          # Scorer function used on the held out data to choose the best parameters for the model.

make_scorer(accuracy_score, response_method='predict')