In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import cross_val_score

# Import and prepare dataset
iris = datasets.load_iris()
'''
df = pd.DataFrame(iris.data,columns=iris.feature_names)
df['flower'] = iris.target
df['flower'] = df['flower'].apply(lambda x: iris.target_names[x])
df[47:52]
'''

"\ndf = pd.DataFrame(iris.data,columns=iris.feature_names)\ndf['flower'] = iris.target\ndf['flower'] = df['flower'].apply(lambda x: iris.target_names[x])\ndf[47:52]\n"

In [3]:
# Selected model SVM *(assuming you've picked this model after testing against other models)
from sklearn.svm import SVC

In [4]:
#######################################################################################################################
# HyperTuning Parameters with GridSearchCV
#######################################################################################################################

from sklearn.model_selection import GridSearchCV

In [5]:
# First define your classifier model (clf)
# Then define your 'parameter grid', values of the parameters you want to loop through (and any you want to keep static e.g. gamma)
# (Keeping gamma static here to avoid warnings, but you can enter tuning values for that too)
# NOTE: GridSearchCV uses Kfold cross validation, so define the number of folds at the end too
# Keep the internal score=False, use the model's native scoring 
clf = GridSearchCV(SVC(gamma='auto'), {
    'C': [1,10,20]
    ,'kernel': ['rbf','linear']
}, cv=5, return_train_score=False)

## Then train the model using GridSearchCV
clf.fit(iris.data,iris.target)

# Get results
clf.cv_results_

{'mean_fit_time': array([0.0008038 , 0.00020013, 0.00059986, 0.00020008, 0.00059652,
        0.00060058]),
 'std_fit_time': array([0.00040195, 0.00040026, 0.00048978, 0.00040016, 0.0004871 ,
        0.00049037]),
 'mean_score_time': array([0.00059934, 0.00059996, 0.00019999, 0.00040007, 0.00020347,
        0.00019989]),
 'std_score_time': array([0.00048936, 0.00048986, 0.00039997, 0.00048998, 0.00040693,
        0.00039978]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20, 'kernel': 'linear'}],


In [6]:
# Import results into a dataframe for easy viewing
df = pd.DataFrame(clf.cv_results_)

# The Results will be pre-ordered and ranked from best to worst 
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.000804,0.000402,0.000599,0.000489,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.0002,0.0004,0.0006,0.00049,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.0006,0.00049,0.0002,0.0004,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.0002,0.0004,0.0004,0.00049,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.000597,0.000487,0.000203,0.000407,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5
5,0.000601,0.00049,0.0002,0.0004,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,5


In [7]:
# Most important result columns:
df[['param_C','param_kernel','mean_test_score','std_test_score','rank_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score,std_test_score,rank_test_score
0,1,rbf,0.98,0.01633,1
1,1,linear,0.98,0.01633,1
2,10,rbf,0.98,0.01633,1
3,10,linear,0.973333,0.038873,4
4,20,rbf,0.966667,0.036515,5
5,20,linear,0.966667,0.042164,5


In [8]:
## Interpreting results - Above, it shows that the first 3 combinations of parameters all yield a 98% accuracy, so any of those combos should be used

In [9]:
# Quick check of which are the best parameters:
clf.best_params_

{'C': 1, 'kernel': 'rbf'}

In [53]:
# Other options for results that are provided by GridSearch
dir(clf)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_check_is_fitted',
 '_estimator_type',
 '_format_results',
 '_get_param_names',
 '_get_tags',
 '_required_parameters',
 '_run_search',
 'best_estimator_',
 'best_index_',
 'best_params_',
 'best_score_',
 'classes_',
 'cv',
 'cv_results_',
 'decision_function',
 'error_score',
 'estimator',
 'fit',
 'get_params',
 'iid',
 'inverse_transform',
 'multimetric_',
 'n_jobs',
 'n_splits_',
 'param_grid',
 'pre_dispatch',
 'predict',
 'predict_log_proba',
 'predict_proba',
 'refit',
 'refit_time_',
 'return_train_score',
 'score',
 'scorer_',
 'scoring',
 '

In [None]:
#######################################################################################################################
# Alternative - RandomSearchCV
#######################################################################################################################

# This is useful for limited computing power, i.e. it wont run all the iterations at the same time (like GridSearchCV)

In [20]:
# Key difference here is the "n_iter" parameter, which defines the limit for iterations

from sklearn.model_selection import RandomizedSearchCV

rs = RandomizedSearchCV(SVC(gamma='auto'), {
    'C': [1,10,20]
    ,'kernel': ['rbf','linear']
}, 
cv=5
, return_train_score=False
,n_iter=2
)

## Then train the model
rs.fit(iris.data,iris.target)
df = pd.DataFrame(rs.cv_results_)[['param_C','param_kernel','mean_test_score','std_test_score','rank_test_score']]

# NOTE: This will change the parameters it tries every time you run this code block
# Useful in case you have limited computing power, so you can check limited iterations and note down the scores
# Eventually, it will go through all the iterations as GridSearchCV
df

Unnamed: 0,param_C,param_kernel,mean_test_score,std_test_score,rank_test_score
0,10,linear,0.973333,0.038873,2
1,10,rbf,0.98,0.01633,1
