# Hyperparameter Tuning

- Grid Search
- Randomized Search
- Bayesian Optimization

The following examples will use a Random Forest Classifier on the MNIST dataset but the process in generally applicable to all kinds of models and problems.

In [9]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)

X, y = mnist["data"], mnist["target"]

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, shuffle=True, random_state=0)

In [10]:
# Baseline model

from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier()

rnd_clf.fit(X_train, y_train)

RandomForestClassifier()

In [12]:
print(rnd_clf.score(X_train, y_train))
print(rnd_clf.score(X_test, y_test))

1.0
0.96475


In [19]:
from sklearn.metrics import classification_report

print(classification_report(y_test, rnd_clf.predict(X_test)))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98      2812
           1       0.98      0.99      0.99      3179
           2       0.96      0.97      0.96      2787
           3       0.96      0.95      0.95      2815
           4       0.96      0.96      0.96      2678
           5       0.97      0.95      0.96      2523
           6       0.97      0.98      0.98      2740
           7       0.97      0.96      0.97      2927
           8       0.94      0.95      0.95      2716
           9       0.95      0.95      0.95      2823

    accuracy                           0.96     28000
   macro avg       0.96      0.96      0.96     28000
weighted avg       0.96      0.96      0.96     28000



In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.1,1, 10, 100], 
              'gamma': [1,0.1,0.01,0.001]}

lin_svc = LinearSVC()

grid_search_clf = GridSearchCV(SVC(), param_grid, cv=3, refit=True, verbose=10, n_jobs=-1)

%time grid_search_clf.fit(X_train, y_train)