In [1]:
import catboost as cb
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import optuna

In [2]:
def objective(trial):
    data, target = load_breast_cancer(return_X_y=True)
    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.3)

    param = {
        "objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1),
        "depth": trial.suggest_int("depth", 1, 12),
        "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
        "bootstrap_type": trial.suggest_categorical(
            "bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
        ),
        "used_ram_limit": "3gb",
    }

    if param["bootstrap_type"] == "Bayesian":
        param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
    elif param["bootstrap_type"] == "Bernoulli":
        param["subsample"] = trial.suggest_float("subsample", 0.1, 1)

    gbm = cb.CatBoostClassifier(**param)

    gbm.fit(train_x, train_y, eval_set=[(valid_x, valid_y)], verbose=0, early_stopping_rounds=100)

    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    accuracy = accuracy_score(valid_y, pred_labels)
    return accuracy

In [3]:
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100, timeout=600)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))


[32m[I 2021-05-16 12:01:12,253][0m A new study created in memory with name: no-name-0447d5d4-1205-4a17-93de-be9dc3fb34e5[0m
[32m[I 2021-05-16 12:01:23,820][0m Trial 0 finished with value: 0.9590643274853801 and parameters: {'objective': 'CrossEntropy', 'colsample_bylevel': 0.09930166116574451, 'depth': 11, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'subsample': 0.24828896022205982}. Best is trial 0 with value: 0.9590643274853801.[0m
[32m[I 2021-05-16 12:01:24,360][0m Trial 1 finished with value: 0.9824561403508771 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.05820031109628691, 'depth': 3, 'boosting_type': 'Plain', 'bootstrap_type': 'MVS'}. Best is trial 1 with value: 0.9824561403508771.[0m
[32m[I 2021-05-16 12:01:28,616][0m Trial 2 finished with value: 0.9649122807017544 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.08728164755911122, 'depth': 10, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'subsample': 0.875523

[32m[I 2021-05-16 12:01:53,349][0m Trial 26 finished with value: 0.9590643274853801 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.05237480635482974, 'depth': 4, 'boosting_type': 'Plain', 'bootstrap_type': 'MVS'}. Best is trial 9 with value: 0.9941520467836257.[0m
[32m[I 2021-05-16 12:01:53,782][0m Trial 27 finished with value: 0.9181286549707602 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.03363261870628953, 'depth': 1, 'boosting_type': 'Plain', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 9.947357161491894}. Best is trial 9 with value: 0.9941520467836257.[0m
[32m[I 2021-05-16 12:01:54,221][0m Trial 28 finished with value: 0.9707602339181286 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.019498909832476534, 'depth': 2, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bernoulli', 'subsample': 0.6003072571782037}. Best is trial 9 with value: 0.9941520467836257.[0m
[32m[I 2021-05-16 12:01:54,586][0m Trial 29 finished w

[32m[I 2021-05-16 12:02:37,048][0m Trial 53 finished with value: 0.9707602339181286 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.06940769861457972, 'depth': 3, 'boosting_type': 'Plain', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 3.2960200975100262}. Best is trial 46 with value: 1.0.[0m
[32m[I 2021-05-16 12:02:37,776][0m Trial 54 finished with value: 0.9649122807017544 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.08568717673104453, 'depth': 4, 'boosting_type': 'Plain', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 8.7576997550502}. Best is trial 46 with value: 1.0.[0m
[32m[I 2021-05-16 12:02:38,457][0m Trial 55 finished with value: 0.9883040935672515 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.06341972391937352, 'depth': 5, 'boosting_type': 'Plain', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 7.3023300832581}. Best is trial 46 with value: 1.0.[0m
[32m[I 2021-05-16 12:02:39,109][0m Trial 56 fin

[32m[I 2021-05-16 12:03:04,314][0m Trial 80 finished with value: 0.9707602339181286 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.052044347772298195, 'depth': 5, 'boosting_type': 'Plain', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 9.355318509252244}. Best is trial 46 with value: 1.0.[0m
[32m[I 2021-05-16 12:03:04,863][0m Trial 81 finished with value: 0.9766081871345029 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.07148828877623069, 'depth': 3, 'boosting_type': 'Plain', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 8.592382291262552}. Best is trial 46 with value: 1.0.[0m
[32m[I 2021-05-16 12:03:05,472][0m Trial 82 finished with value: 0.9707602339181286 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.06096036238120871, 'depth': 5, 'boosting_type': 'Plain', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 6.432316288441001}. Best is trial 46 with value: 1.0.[0m
[32m[I 2021-05-16 12:03:05,850][0m Trial 83

Number of finished trials: 100
Best trial:
  Value: 1.0
  Params: 
    objective: Logloss
    colsample_bylevel: 0.08591610256307494
    depth: 4
    boosting_type: Plain
    bootstrap_type: Bayesian
    bagging_temperature: 8.018830019177148
