In [None]:
import lightgbm as lgb
import numpy as np
import optuna
import sklearn.datasets
import sklearn.metrics
from optuna.visualization import (
    plot_contour,
    plot_edf,
    plot_intermediate_values,
    plot_optimization_history,
    plot_parallel_coordinate,
    plot_param_importances,
    plot_slice,
)
from sklearn.model_selection import train_test_split

SEED = 42

np.random.seed(SEED)

In [None]:
def objective(trial):
    param = {
        "objective": "binary",
        "metric": "auc",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }
    return train_evaluate

# Optuna

In [None]:
def objective(trial):
    data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25)
    dtrain = lgb.Dataset(train_x, label=train_y)
    dvalid = lgb.Dataset(valid_x, label=valid_y)

    param = {
        "objective": "binary",
        "metric": "auc",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }

    # Add a callback for pruning.
    pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "auc")
    gbm = lgb.train(
        param,
        dtrain,
        valid_sets=[dvalid],
        verbose_eval=False,
        callbacks=[pruning_callback],
    )

    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels)
    return accuracy

In [None]:
study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=SEED),
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10),
)
study.optimize(objective, n_trials=10, timeout=600)

In [None]:
plot_optimization_history(study)

In [None]:
plot_intermediate_values(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
study.trials_dataframe()

# Hyperopt

In [None]:
import pandas as pd
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe


def hyperopt_train_evaluate(params):
    data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25)
    dtrain = lgb.Dataset(train_x, label=train_y)
    dvalid = lgb.Dataset(valid_x, label=valid_y)
    gbm = lgb.train(params, dtrain, valid_sets=[dvalid], verbose_eval=False)

    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels)
    return accuracy

In [None]:
space = {
    "objective": "binary",
    "metric": "auc",
    "verbosity": -1,
    "boosting_type": "gbdt",
    "bagging_fraction": hp.uniform("bagging_fraction", 0.4, 1.0),
    "bagging_freq": hp.choice("bagging_freq", range(1, 7)),
    "min_child_samples": hp.choice("min_child_samples", range(5, 100)),
}

In [None]:
def f(params):
    acc = hyperopt_train_evaluate(params)
    return {"loss": -acc, "status": STATUS_OK} | params


trials = Trials()
best = fmin(f, space, algo=tpe.suggest, max_evals=10, trials=trials)

In [None]:
df = pd.DataFrame(trials.results)

In [None]:
import plotly.express as px

fig = px.parallel_coordinates(
    df,
    color=df.index,
    color_continuous_scale=px.colors.diverging.Tealrose,
    color_continuous_midpoint=2,
)
fig.show()