In [1]:
import cudf
import dask.array as da
from cuml.linear_model import LogisticRegression
from cuml.preprocessing.model_selection import train_test_split
from sklearn.datasets import load_iris

import pandas as pd
import optuna
import numpy as np
import mlflow
import cuml
from cuml.ensemble import RandomForestRegressor
from cuml.metrics import accuracy_score

In [2]:
def objective(trial):
    iris = load_iris()
    X, y = cudf.DataFrame(pd.DataFrame(iris.data.astype('float32'))), cudf.DataFrame(pd.DataFrame(iris.target.astype('float32')))
    max_depth = trial.suggest_int("max_depth", 5, 15)
    n_estimators = trial.suggest_int("n_estimators", 100, 750)
    max_leaves = trial.suggest_int("max_leaves", -1, 20, 2)

    classifier = RandomForestRegressor(max_depth=max_depth,
                                       n_estimators=n_estimators,
                                      max_leaves=max_leaves)

    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_valid)
    
    score = accuracy_score(y_valid, y_pred)
    return score

In [3]:
N_TRIALS = 10

In [4]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=N_TRIALS)

print("Number of finished trials: ", len(study.trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

  low=low, old_high=old_high, high=high, step=step
[32m[I 2020-06-11 15:04:57,558][0m Finished trial#0 with value: 0.7105262875556946 with parameters: {'max_depth': 8, 'n_estimators': 582, 'max_leaves': 9}. Best is trial#0 with value: 0.7105262875556946.[0m
[32m[I 2020-06-11 15:04:58,170][0m Finished trial#1 with value: 0.7894737124443054 with parameters: {'max_depth': 9, 'n_estimators': 507, 'max_leaves': 7}. Best is trial#1 with value: 0.7894737124443054.[0m
[32m[I 2020-06-11 15:04:58,848][0m Finished trial#2 with value: 0.7894737124443054 with parameters: {'max_depth': 11, 'n_estimators': 446, 'max_leaves': 19}. Best is trial#1 with value: 0.7894737124443054.[0m
[32m[I 2020-06-11 15:04:59,452][0m Finished trial#3 with value: 0.7894737124443054 with parameters: {'max_depth': 8, 'n_estimators': 563, 'max_leaves': 17}. Best is trial#1 with value: 0.7894737124443054.[0m
[32m[I 2020-06-11 15:05:00,144][0m Finished trial#4 with value: 0.6052631735801697 with parameters: {'ma

Number of finished trials:  10
Best trial:
  Value:  0.7894737124443054
  Params: 
    max_depth: 9
    n_estimators: 507
    max_leaves: 7


In [5]:
def mlflow_callback(study, trial):
    trial_value = trial.value if trial.value is not None else float("nan")
    with mlflow.start_run(run_name=study.study_name):
        mlflow.log_params(trial.params)
        mlflow.log_metrics({"accuracy": trial_value})

In [6]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=N_TRIALS, timeout=600, callbacks=[mlflow_callback])

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2020-06-11 15:05:02,594][0m Finished trial#0 with value: 0.3684210479259491 with parameters: {'max_depth': 14, 'n_estimators': 121, 'max_leaves': 11}. Best is trial#0 with value: 0.3684210479259491.[0m
  low=low, old_high=old_high, high=high, step=step
[32m[I 2020-06-11 15:05:03,379][0m Finished trial#1 with value: 0.6315789222717285 with parameters: {'max_depth': 6, 'n_estimators': 616, 'max_leaves': 1}. Best is trial#1 with value: 0.6315789222717285.[0m
[32m[I 2020-06-11 15:05:03,582][0m Finished trial#2 with value: 0.8421052694320679 with parameters: {'max_depth': 5, 'n_estimators': 149, 'max_leaves': 11}. Best is trial#2 with value: 0.8421052694320679.[0m
[32m[I 2020-06-11 15:05:04,077][0m Finished trial#3 with value: 0.6842105388641357 with parameters: {'max_depth': 8, 'n_estimators': 356, 'max_leaves': 15}. Best is trial#2 with value: 0.8421052694320679.[0m
[32m[I 2020-06-11 15:05:04,798][0m Finished trial#4 with value: 0.6842105388641357 with parameters: {'m

Number of finished trials: 10
Best trial:
  Value: 0.8421052694320679
  Params: 
    max_depth: 5
    n_estimators: 149
    max_leaves: 11
