In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

import lightgbm as lgb
import optuna

In [2]:
dataset = datasets.fetch_covtype()

X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, random_state=179)

In [3]:
model = lgb.LGBMClassifier(
    n_estimators=150,
    verbose=-1
)

In [4]:
%%time
model = model.fit(X_train, y_train)

CPU times: user 16min 17s, sys: 4.35 s, total: 16min 22s
Wall time: 2min 51s


In [4]:
model = lgb.LGBMClassifier(
    n_estimators=150,
    device="cuda",
    verbose=-1
)

In [5]:
%%time
model = model.fit(X_train, y_train)

CPU times: user 10.6 s, sys: 1.56 s, total: 12.2 s
Wall time: 11 s


In [6]:
def objective(optimize_boosting_type=True):
    def _objective(trial):
        if optimize_boosting_type:
            boosting_type = trial.suggest_categorical("boosting_type", ["dart", "gbdt"])
        else:
            boosting_type = "gbdt"
        lambda_l1 = trial.suggest_float(
            'lambda_l1', 1e-8, 10.0, log=True),
        lambda_l2 = trial.suggest_float(
            'lambda_l2', 1e-8, 10.0, log=True),
        num_leaves = trial.suggest_int(
            'num_leaves', 2, 256),
        feature_fraction = trial.suggest_float(
            'feature_fraction', 0.4, 1.0),
        bagging_fraction = trial.suggest_float(
            'bagging_fraction', 0.4, 1.0),
        bagging_freq = trial.suggest_int(
            'bagging_freq', 1, 7),
        min_child_samples = trial.suggest_int(
            'min_child_samples', 5, 100),
        learning_rate = trial.suggest_float(
            "learning_rate", 0.0001, 0.5, log=True),
        max_bin = trial.suggest_int(
            "max_bin", 128, 512, 32)
        n_estimators = trial.suggest_int(
            "n_estimators", 40, 400, 20)

        model = lgb.LGBMClassifier(
            force_row_wise=True,
            boosting_type=boosting_type,
            n_estimators=n_estimators,
            lambda_l1=lambda_l1,
            lambda_l2=lambda_l2,
            num_leaves=num_leaves,
            feature_fraction=feature_fraction,
            bagging_fraction=bagging_fraction,
            bagging_freq=bagging_freq,
            min_child_samples=min_child_samples,
            learning_rate=learning_rate,
            max_bin=max_bin,
            device="cuda",
            verbose=-1)
        scores = cross_val_score(model, X_train, y_train, scoring="f1_macro")
        return scores.mean()

    return _objective

In [None]:
sampler = optuna.samplers.TPESampler()
pruner = optuna.pruners.HyperbandPruner(
    min_resource=10, max_resource=400, reduction_factor=3)

study = optuna.create_study(
    direction='maximize', sampler=sampler,
    pruner=pruner
)
study.optimize(objective(), n_trials=10, gc_after_trial=True, n_jobs=1)

[I 2023-08-06 14:36:28,978] A new study created in memory with name: no-name-9230bbce-53d5-4404-9c8a-2f87baec52d3
