## What are hyperparamters?

Hyperparamters are parameters which are not directly learned within estimators. In Sci-kit learn they are passed as arguments to the constructor of the estimator classes. For example: "alpha" is a hyperparamter for the LASSO estimator, "n_estimators" is a hyperparamter for RandomForest.

## Why do we need to tune hyperparameters?

To achieve the best performance from the estimator model.

How can we tune hyperparameters?

1) Brute Force approach

2) Exhaustive Grid Search

3) Halving Grid Search

## Grid Search

In [41]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

In [2]:
# Loading the Digits dataset
digits = datasets.load_digits()

In [3]:
# To apply an classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
X = digits.images.reshape((n_samples, -1))
y = digits.target

In [4]:
# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

In [6]:
# Set the parameters by cross-validation
tuned_parameters = [
    {"criterion": ["gini"],"n_estimators": [50,100,150,200]},
    {"criterion": ["entropy"],"n_estimators": [50,100,150,200]},
]

scores = ["precision", "recall"]

In [None]:
for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(RandomForestClassifier(), tuned_parameters, scoring="%s_macro" % score, cv = 5)
    # clf = GridSearchCV(SVC(), tuned_parameters, scoring= score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()

In [None]:
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()

In [7]:
# # Set the parameters by cross-validation
# tuned_parameters = [
#     {"criterion": ["gini","entropy"],"n_estimators": [50,100,150,200]},
# ]

# scores = ["precision", "recall"]

In [17]:
# Set the parameters by cross-validation
tuned_parameters = [
    {"kernel": ["rbf"], "gamma": [1e-3, 1e-4], "C": [1, 10, 100, 1000]},
    {"kernel": ["linear"], "C": [1, 10, 100, 1000]},
]

scores = ["precision", "recall"]

In [18]:
for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(SVC(), tuned_parameters, scoring="%s_macro" % score)
    # clf = GridSearchCV(SVC(), tuned_parameters, scoring= score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    
    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}

Detailed classification report:

The model is trained on the full development set.
The scores are computed on the full evaluation set.

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        89
           1       0.97      1.00      0.98        90
           2       0.99      0.98      0.98        92
           3       1.00      0.99      0.99        93
           4       1.00      1.00      1.00        76
           5       0.99      0.98      0.99       108
           6       0.99      1.00      0.99        89
           7       0.99      1.00      0.99        78
           8       1.00      0.98      0.99        92
           9       0.99      0.99      0.99        92

    accuracy                           0.99       899
   macro avg       0.99      0.99      0.99       899
weighted avg       0.99     

## HalvingGridsearch

In [19]:
# explicitly require this experimental feature
from sklearn.experimental import enable_halving_search_cv  # noqa
# now you can import normally from model_selection
from sklearn.model_selection import HalvingGridSearchCV

In [22]:
for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    # clf = HalvingGridSearchCV(RandomForestClassifier(), tuned_parameters, scoring="%s_macro" % score, cv = 5, resource = 'n_samples', factor = 3)
    clf = GridSearchCV(SVC(), tuned_parameters, scoring= "%s_macro" % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_["mean_test_score"]
    stds = clf.cv_results_["std_test_score"]
    for mean, std, params in zip(means, stds, clf.cv_results_["params"]):
        print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.986 (+/-0.016) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.959 (+/-0.028) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.988 (+/-0.017) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.982 (+/-0.026) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.988 (+/-0.017) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.983 (+/-0.026) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.988 (+/-0.017) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.983 (+/-0.026) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.974 (+/-0.012) for {'C': 1, 'kernel': 'linear'}
0.974 (+/-0.012) for {'C': 10, 'kernel': 'linear'}
0.974 (+/-0.012) for {'C': 100, 'kernel': 'linear'}
0.974 (+/-0.012) for {'C': 1000, 'kernel': 'linear'}

Detailed classification report:

The model is trained on the full development set.
The scores are computed o

## Pipeline

In [23]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

In [24]:
pipe = Pipeline([('scaler', StandardScaler()), ('rf', RandomForestClassifier())])

In [25]:
pipe.fit(X_train, y_train)

In [26]:
pipe.score(X_test, y_test)

0.9721913236929922

## Putting it all together

In [27]:
from sklearn.decomposition import PCA

In [28]:
# Define a pipeline to search for the best combination of PCA truncation
# and classifier regularization.
pca = PCA()

In [29]:
# Define a Standard Scaler to normalize inputs
scaler = StandardScaler()

In [30]:
# set the tolerance to a large value to make the example faster
RF =  RandomForestClassifier()

In [31]:
pipe = Pipeline(steps=[("scaler", scaler), ("pca", pca), ("RandomForest", RF)])

In [32]:
## Parameter Grid
param_grid = [
    {"pca__n_components": [5,15,30,45]},
    {"RandomForest__criterion": ["gini"],"RandomForest__n_estimators": [50,100,150,200]},
    {"RandomForest__criterion": ["entropy"],"RandomForest__n_estimators": [50,100,150,200]},
]

In [33]:
search = GridSearchCV(pipe, param_grid)

In [34]:
search.fit(X_train, y_train)

In [37]:
y_true, y_pred = y_test, search.predict(X_test)

In [38]:
print(search.best_params_)

{'RandomForest__criterion': 'entropy', 'RandomForest__n_estimators': 200}


In [39]:
print(search.best_estimator_)

Pipeline(steps=[('scaler', StandardScaler()), ('pca', PCA()),
                ('RandomForest',
                 RandomForestClassifier(criterion='entropy',
                                        n_estimators=200))])


In [40]:
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99        89
           1       0.96      0.98      0.97        90
           2       0.99      0.98      0.98        92
           3       0.96      0.94      0.95        93
           4       0.99      0.97      0.98        76
           5       0.94      0.94      0.94       108
           6       0.96      0.98      0.97        89
           7       0.92      0.99      0.95        78
           8       0.96      0.84      0.90        92
           9       0.89      0.93      0.91        92

    accuracy                           0.95       899
   macro avg       0.95      0.95      0.95       899
weighted avg       0.95      0.95      0.95       899



In [50]:
pipeline = Pipeline(steps=[("scaler", scaler), ("pca", pca),('estimator', SVC())])

In [51]:
param_grid = [{
    'estimator': [KNeighborsClassifier()],
    'estimator__n_neighbors': [2, 5, 10]
},
{
    'estimator': [SVC()],
    'estimator__C': [0.0001, 0.01, 0.1, 1, 10, 1000]
}]

In [52]:
grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5)

In [53]:
grid_search.fit(X_train, y_train)

In [54]:
grid_search.best_estimator_

In [57]:
grid_search.best_params_

{'estimator': SVC(C=10), 'estimator__C': 10}

In [58]:
y_hat_grid = grid_search.predict(X_test)

In [59]:
print(classification_report(y_true, y_hat_grid))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        89
           1       0.97      0.99      0.98        90
           2       0.98      0.99      0.98        92
           3       0.99      0.98      0.98        93
           4       0.95      1.00      0.97        76
           5       0.97      0.97      0.97       108
           6       0.99      0.99      0.99        89
           7       0.97      0.99      0.98        78
           8       1.00      0.93      0.97        92
           9       0.98      0.97      0.97        92

    accuracy                           0.98       899
   macro avg       0.98      0.98      0.98       899
weighted avg       0.98      0.98      0.98       899

