In [1]:
!pip install --quiet optuna

[K     |████████████████████████████████| 308 kB 15.5 MB/s 
[K     |████████████████████████████████| 209 kB 66.7 MB/s 
[K     |████████████████████████████████| 80 kB 9.1 MB/s 
[K     |████████████████████████████████| 75 kB 5.0 MB/s 
[K     |████████████████████████████████| 49 kB 6.2 MB/s 
[K     |████████████████████████████████| 112 kB 58.7 MB/s 
[K     |████████████████████████████████| 149 kB 63.6 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [2]:
import optuna
optuna.__version__

'2.10.0'

The Case of a Random Forest Classifier

In [3]:
import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection

def objective():
    iris = sklearn.datasets.load_iris()  # Prepare the data.
    
    clf = sklearn.ensemble.RandomForestClassifier(    
        n_estimators=5, max_depth=3)  # Define the model.
    
    return sklearn.model_selection.cross_val_score(
        clf, iris.data, iris.target, n_jobs=-1, cv=3).mean()  # Train and evaluate the model.

print('Accuracy: {}'.format(objective()))

Accuracy: 0.9533333333333333


In [4]:
import optuna

def objective(trial):
    iris = sklearn.datasets.load_iris()
    
    n_estimators = trial.suggest_int('n_estimators', 2, 20)
    max_depth = int(trial.suggest_float('max_depth', 1, 32, log=True))
    
    clf = sklearn.ensemble.RandomForestClassifier(
        n_estimators=n_estimators, max_depth=max_depth)
    
    return sklearn.model_selection.cross_val_score(
        clf, iris.data, iris.target, n_jobs=-1, cv=3).mean()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[32m[I 2022-01-01 18:14:03,395][0m A new study created in memory with name: no-name-14fcda6c-05c0-45eb-b7d6-55f4598f156e[0m
[32m[I 2022-01-01 18:14:03,445][0m Trial 0 finished with value: 0.8666666666666667 and parameters: {'n_estimators': 6, 'max_depth': 1.0151786767689153}. Best is trial 0 with value: 0.8666666666666667.[0m
[32m[I 2022-01-01 18:14:03,535][0m Trial 1 finished with value: 0.96 and parameters: {'n_estimators': 17, 'max_depth': 21.932164584821482}. Best is trial 1 with value: 0.96.[0m
[32m[I 2022-01-01 18:14:03,591][0m Trial 2 finished with value: 0.7933333333333334 and parameters: {'n_estimators': 10, 'max_depth': 1.7254223670553028}. Best is trial 1 with value: 0.96.[0m
[32m[I 2022-01-01 18:14:03,665][0m Trial 3 finished with value: 0.9466666666666667 and parameters: {'n_estimators': 13, 'max_depth': 2.3514966911249777}. Best is trial 1 with value: 0.96.[0m
[32m[I 2022-01-01 18:14:03,761][0m Trial 4 finished with value: 0.9666666666666667 and parameter

Accuracy: 0.9733333333333333
Best hyperparameters: {'n_estimators': 15, 'max_depth': 27.690898006311016}


The case of SVC

In [5]:
import sklearn.svm

def objective(trial):
    iris = sklearn.datasets.load_iris()

    classifier = trial.suggest_categorical('classifier', ['RandomForest', 'SVC'])
    
    if classifier == 'RandomForest':
        n_estimators = trial.suggest_int('n_estimators', 2, 20)
        max_depth = int(trial.suggest_float('max_depth', 1, 32, log=True))

        clf = sklearn.ensemble.RandomForestClassifier(
            n_estimators=n_estimators, max_depth=max_depth)
    else:
        c = trial.suggest_float('svc_c', 1e-10, 1e10, log=True)
        
        clf = sklearn.svm.SVC(C=c, gamma='auto')

    return sklearn.model_selection.cross_val_score(
        clf, iris.data, iris.target, n_jobs=-1, cv=3).mean()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[32m[I 2022-01-01 18:14:41,893][0m A new study created in memory with name: no-name-6cd992e3-7f69-41a4-a1d7-8a11fa219cc6[0m
[32m[I 2022-01-01 18:14:41,914][0m Trial 0 finished with value: 0.32 and parameters: {'classifier': 'SVC', 'svc_c': 8.507344340903596e-05}. Best is trial 0 with value: 0.32.[0m
[32m[I 2022-01-01 18:14:41,950][0m Trial 1 finished with value: 0.8266666666666667 and parameters: {'classifier': 'RandomForest', 'n_estimators': 5, 'max_depth': 1.8110976460098747}. Best is trial 1 with value: 0.8266666666666667.[0m
[32m[I 2022-01-01 18:14:41,967][0m Trial 2 finished with value: 0.96 and parameters: {'classifier': 'SVC', 'svc_c': 82.70812812893928}. Best is trial 2 with value: 0.96.[0m
[32m[I 2022-01-01 18:14:41,983][0m Trial 3 finished with value: 0.96 and parameters: {'classifier': 'SVC', 'svc_c': 1151.1733771157194}. Best is trial 2 with value: 0.96.[0m
[32m[I 2022-01-01 18:14:42,070][0m Trial 4 finished with value: 0.9533333333333333 and parameters: {'

Accuracy: 0.9866666666666667
Best hyperparameters: {'classifier': 'SVC', 'svc_c': 4.6057290517024505}


Optimization Plots

In [6]:
# Plotting the optimization history of the study.
optuna.visualization.plot_optimization_history(study)

In [7]:
# Plotting the accuracies for each hyperparameter for each trial.
optuna.visualization.plot_slice(study)

In [8]:
# Plotting the accuracy surface for the hyperparameters involved in the random forest model.
optuna.visualization.plot_contour(study, params=['n_estimators', 'max_depth'])