In [1]:

import numpy as np
import pandas as pd
from pandas import read_csv
from sklearn.model_selection import*
import optuna
import catboost as cb
from sklearn.metrics import accuracy_score
from optuna.integration import CatBoostPruningCallback
from sklearn.metrics import log_loss
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")
# load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"
dataset = read_csv(url, header=None)
data = dataset.values
x_data, y_data = data[:, :-1],data[:, -1]
x_data = pd.DataFrame(x_data.astype('float64'))
#Label Encoding Target
le = preprocessing.LabelEncoder()
le.fit(y_data)
y_data = le.transform(y_data)
y_data = pd.DataFrame(y_data.astype('float64'))
##compare a validation set with the kfold output from optuna 
features, test_features, target, test_target = train_test_split(x_data, y_data, test_size=0.3, random_state=17)



def objective(trial: optuna.Trial) -> float:
    train_x, valid_x, train_y, valid_y = train_test_split(x_data, y_data, test_size=0.25)
    param = {
        "objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1, log=True),
        "depth": trial.suggest_int("depth", 1, 12),
        "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
        "bootstrap_type": trial.suggest_categorical(
            "bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
        ),
        "used_ram_limit": "3gb",
        "eval_metric": "Accuracy",
    }
    if param["bootstrap_type"] == "Bayesian":
        param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
    elif param["bootstrap_type"] == "Bernoulli":
        param["subsample"] = trial.suggest_float("subsample", 0.1, 1, log=True)
    gbm = cb.CatBoostClassifier(**param)
    pruning_callback = CatBoostPruningCallback(trial, "Accuracy")
    gbm.fit(
        train_x,
        train_y,
        eval_set=[(valid_x, valid_y)],
        verbose=0,
        early_stopping_rounds=100,
        callbacks=[pruning_callback],
    )
    # evoke pruning manually.
    pruning_callback.check_pruned()
    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    accuracy = accuracy_score(valid_y, pred_labels)
    return accuracy

if __name__ == "__main__":
    study = optuna.create_study(
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=5), direction="maximize"
    )
    study.optimize(objective, n_trials=10, timeout=600)
    print("Number of finished trials: {}".format(len(study.trials)))
    print("Best trial:")
    trial = study.best_trial
    print("  Value: {}".format(trial.value))
    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))



[I 2024-04-17 16:57:17,451] A new study created in memory with name: no-name-c863b8cb-7a0c-4715-9a6a-6856423bbe62
[I 2024-04-17 16:57:18,164] Trial 0 finished with value: 0.8653846153846154 and parameters: {'objective': 'CrossEntropy', 'colsample_bylevel': 0.016325390054085447, 'depth': 2, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'subsample': 0.3009087858120316}. Best is trial 0 with value: 0.8653846153846154.
[I 2024-04-17 16:57:18,722] Trial 1 finished with value: 0.8269230769230769 and parameters: {'objective': 'CrossEntropy', 'colsample_bylevel': 0.033146296470581246, 'depth': 11, 'boosting_type': 'Plain', 'bootstrap_type': 'MVS'}. Best is trial 0 with value: 0.8653846153846154.
[I 2024-04-17 16:57:19,489] Trial 2 finished with value: 0.7884615384615384 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.04516337901565217, 'depth': 10, 'boosting_type': 'Plain', 'bootstrap_type': 'MVS'}. Best is trial 0 with value: 0.8653846153846154.
[I 2024-04-17 16:57:

Number of finished trials: 10
Best trial:
  Value: 0.9230769230769231
  Params: 
    objective: Logloss
    colsample_bylevel: 0.030751131108929176
    depth: 6
    boosting_type: Ordered
    bootstrap_type: MVS
