# Hyperparameter Tuning

## In machine learning, a hyperparameter is a parameter whose value is set before the learning process begins.  By contrast, the values of other parameters are derived via training.

In [1]:
import sklearn.datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, train_test_split

  from numpy.core.umath_tests import inner1d


## Optuna - Automatic Hyperparameter Optimizer

In [2]:
import optuna

# RandomForestClassifier

In [16]:
iris = sklearn.datasets.load_iris()

x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target)

model = RandomForestClassifier(n_estimators=5, max_depth=3)

model.fit(x_train, y_train)

Accuracy = cross_val_score(model, x_test, y_test, n_jobs=-1, cv=3).mean()

print('Accuracy: ', Accuracy)  

Accuracy:  0.8964368964368964


# RandomForestClassifier - Optuna

In [18]:
def objective(trial):
    iris =  sklearn.datasets.load_iris()
    
    #x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target)
    
    n_estimators = trial.suggest_int('n_estimators', 2, 20)
    max_depth = int(trial.suggest_loguniform('max_depth', 1, 32))
    
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
    
    #model.fit(x_train, y_test)
    
    return cross_val_score(model, iris.data, iris.target, n_jobs=-1, cv=3).mean()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print('Best Hyperparameters: {}'.format(trial.params))

[I 2020-01-22 06:53:29,443] Finished trial#0 resulted in value: 0.9673202614379085. Current best value is 0.9673202614379085 with parameters: {'n_estimators': 20, 'max_depth': 3.914205323532008}.
[I 2020-01-22 06:53:35,595] Finished trial#1 resulted in value: 0.9464869281045751. Current best value is 0.9673202614379085 with parameters: {'n_estimators': 20, 'max_depth': 3.914205323532008}.
[I 2020-01-22 06:53:42,165] Finished trial#2 resulted in value: 0.9599673202614379. Current best value is 0.9673202614379085 with parameters: {'n_estimators': 20, 'max_depth': 3.914205323532008}.
[I 2020-01-22 06:53:48,680] Finished trial#3 resulted in value: 0.7516339869281046. Current best value is 0.9673202614379085 with parameters: {'n_estimators': 20, 'max_depth': 3.914205323532008}.
[I 2020-01-22 06:53:55,216] Finished trial#4 resulted in value: 0.960375816993464. Current best value is 0.9673202614379085 with parameters: {'n_estimators': 20, 'max_depth': 3.914205323532008}.
[I 2020-01-22 06:54:0

[I 2020-01-22 06:58:05,197] Finished trial#41 resulted in value: 0.960375816993464. Current best value is 0.9738562091503268 with parameters: {'n_estimators': 15, 'max_depth': 5.908638057207142}.
[I 2020-01-22 06:58:12,999] Finished trial#42 resulted in value: 0.9673202614379085. Current best value is 0.9738562091503268 with parameters: {'n_estimators': 15, 'max_depth': 5.908638057207142}.
[I 2020-01-22 06:58:21,272] Finished trial#43 resulted in value: 0.8778594771241831. Current best value is 0.9738562091503268 with parameters: {'n_estimators': 15, 'max_depth': 5.908638057207142}.
[I 2020-01-22 06:58:31,200] Finished trial#44 resulted in value: 0.9473039215686274. Current best value is 0.9738562091503268 with parameters: {'n_estimators': 15, 'max_depth': 5.908638057207142}.
[I 2020-01-22 06:58:38,690] Finished trial#45 resulted in value: 0.960375816993464. Current best value is 0.9738562091503268 with parameters: {'n_estimators': 15, 'max_depth': 5.908638057207142}.
[I 2020-01-22 06:

[I 2020-01-22 07:02:48,637] Finished trial#82 resulted in value: 0.9538398692810457. Current best value is 0.9738562091503268 with parameters: {'n_estimators': 15, 'max_depth': 5.908638057207142}.
[I 2020-01-22 07:02:55,390] Finished trial#83 resulted in value: 0.954248366013072. Current best value is 0.9738562091503268 with parameters: {'n_estimators': 15, 'max_depth': 5.908638057207142}.
[I 2020-01-22 07:03:03,180] Finished trial#84 resulted in value: 0.9738562091503268. Current best value is 0.9738562091503268 with parameters: {'n_estimators': 15, 'max_depth': 5.908638057207142}.
[I 2020-01-22 07:03:11,368] Finished trial#85 resulted in value: 0.960375816993464. Current best value is 0.9738562091503268 with parameters: {'n_estimators': 15, 'max_depth': 5.908638057207142}.
[I 2020-01-22 07:03:18,517] Finished trial#86 resulted in value: 0.9673202614379085. Current best value is 0.9738562091503268 with parameters: {'n_estimators': 15, 'max_depth': 5.908638057207142}.
[I 2020-01-22 07:

Accuracy: 0.9738562091503268
Best Hyperparameters: {'n_estimators': 15, 'max_depth': 5.908638057207142}


# Applying Optuna over RandomForest, SVC

In [20]:
from sklearn.svm import SVC

def objective(trial):
    iris = sklearn.datasets.load_iris()

    classifier = trial.suggest_categorical('classifier', ['RandomForest', 'SVC'])
    
    if classifier == 'RandomForest':
        n_estimators = trial.suggest_int('n_estimators', 2, 20)
        max_depth = int(trial.suggest_loguniform('max_depth', 1, 32))

        clf = RandomForestClassifier(
            n_estimators=n_estimators, max_depth=max_depth)
    else:
        c = trial.suggest_loguniform('svc_c', 1e-10, 1e10)
        
        clf = SVC(C=c, gamma='auto')

    return cross_val_score(
        clf, iris.data, iris.target, n_jobs=-1, cv=3).mean()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[I 2020-01-22 08:33:02,062] Finished trial#0 resulted in value: 0.9673202614379085. Current best value is 0.9673202614379085 with parameters: {'classifier': 'RandomForest', 'n_estimators': 5, 'max_depth': 7.203349677542514}.
[I 2020-01-22 08:33:08,497] Finished trial#1 resulted in value: 0.9399509803921569. Current best value is 0.9673202614379085 with parameters: {'classifier': 'RandomForest', 'n_estimators': 5, 'max_depth': 7.203349677542514}.
[I 2020-01-22 08:33:14,773] Finished trial#2 resulted in value: 0.8366013071895425. Current best value is 0.9673202614379085 with parameters: {'classifier': 'RandomForest', 'n_estimators': 5, 'max_depth': 7.203349677542514}.
[I 2020-01-22 08:33:21,187] Finished trial#3 resulted in value: 0.960375816993464. Current best value is 0.9673202614379085 with parameters: {'classifier': 'RandomForest', 'n_estimators': 5, 'max_depth': 7.203349677542514}.
[I 2020-01-22 08:33:29,453] Finished trial#4 resulted in value: 0.9538398692810457. Current best valu

[I 2020-01-22 08:37:09,438] Finished trial#36 resulted in value: 0.9865196078431372. Current best value is 0.9865196078431372 with parameters: {'classifier': 'SVC', 'svc_c': 1.887947872892547}.
[I 2020-01-22 08:37:16,054] Finished trial#37 resulted in value: 0.9342320261437909. Current best value is 0.9865196078431372 with parameters: {'classifier': 'SVC', 'svc_c': 1.887947872892547}.
[I 2020-01-22 08:37:22,419] Finished trial#38 resulted in value: 0.9538398692810457. Current best value is 0.9865196078431372 with parameters: {'classifier': 'SVC', 'svc_c': 1.887947872892547}.
[I 2020-01-22 08:37:28,929] Finished trial#39 resulted in value: 0.9342320261437909. Current best value is 0.9865196078431372 with parameters: {'classifier': 'SVC', 'svc_c': 1.887947872892547}.
[I 2020-01-22 08:37:35,645] Finished trial#40 resulted in value: 0.9342320261437909. Current best value is 0.9865196078431372 with parameters: {'classifier': 'SVC', 'svc_c': 1.887947872892547}.
[I 2020-01-22 08:37:42,441] Fi

[I 2020-01-22 08:42:09,961] Finished trial#78 resulted in value: 0.9669117647058824. Current best value is 0.9865196078431372 with parameters: {'classifier': 'SVC', 'svc_c': 1.887947872892547}.
[I 2020-01-22 08:42:17,946] Finished trial#79 resulted in value: 0.9538398692810457. Current best value is 0.9865196078431372 with parameters: {'classifier': 'SVC', 'svc_c': 1.887947872892547}.
[I 2020-01-22 08:42:27,200] Finished trial#80 resulted in value: 0.9399509803921569. Current best value is 0.9865196078431372 with parameters: {'classifier': 'SVC', 'svc_c': 1.887947872892547}.
[I 2020-01-22 08:42:33,688] Finished trial#81 resulted in value: 0.9342320261437909. Current best value is 0.9865196078431372 with parameters: {'classifier': 'SVC', 'svc_c': 1.887947872892547}.
[I 2020-01-22 08:42:41,139] Finished trial#82 resulted in value: 0.9673202614379085. Current best value is 0.9865196078431372 with parameters: {'classifier': 'SVC', 'svc_c': 1.887947872892547}.
[I 2020-01-22 08:42:48,052] Fi

Accuracy: 0.9865196078431372
Best hyperparameters: {'classifier': 'SVC', 'svc_c': 1.887947872892547}
