In [1]:
import pandas as pd
df_train=pd.read_csv("train.csv")

In [2]:
# Separate features and target
X_train = df_train.drop(columns=["label"])
y_train = df_train["label"]

In [3]:
pip install bayesian-optimization


Note: you may need to restart the kernel to use updated packages.


In [4]:
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
import numpy as np

In [5]:
def objective_bo(max_depth, min_samples_split, min_samples_leaf, criterion):
    criterion = 'gini' if criterion < 0.5 else 'entropy'
    clf = DecisionTreeClassifier(
        max_depth=int(max_depth),
        min_samples_split=int(min_samples_split),
        min_samples_leaf=int(min_samples_leaf),
        criterion=criterion,
        random_state=42
    )
    score = cross_val_score(
        clf, X_train, y_train,
        scoring='accuracy',
        cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    ).mean()
    return score

pbounds = {
    'max_depth': (5, 60),
    'min_samples_split': (2, 10),
     'min_samples_leaf': (1, 5),
    'criterion': (0, 1)
}


In [6]:
optimizer = BayesianOptimization(f=objective_bo, pbounds=pbounds, random_state=42)
optimizer.maximize(init_points=30, n_iter=80)

print("Best parameters:", optimizer.max)

|   iter    |  target   | max_depth | min_sa... | min_sa... | criterion |
-------------------------------------------------------------------------
| [39m1        [39m | [39m0.9348418[39m | [39m25.599706[39m | [39m9.6057144[39m | [39m3.9279757[39m | [39m0.5986584[39m |
| [39m2        [39m | [39m0.8828101[39m | [39m13.581025[39m | [39m3.2479561[39m | [39m1.2323344[39m | [39m0.8661761[39m |
| [35m3        [39m | [35m0.9389635[39m | [35m38.061325[39m | [35m7.6645806[39m | [35m1.0823379[39m | [35m0.9699098[39m |
| [35m4        [39m | [35m0.9390608[39m | [35m50.784345[39m | [35m3.6987128[39m | [35m1.7272998[39m | [35m0.1834045[39m |
| [39m5        [39m | [39m0.9236169[39m | [39m21.733323[39m | [39m6.1980514[39m | [39m2.7277800[39m | [39m0.2912291[39m |
| [39m6        [39m | [39m0.9332563[39m | [39m38.651909[39m | [39m3.1159508[39m | [39m2.1685785[39m | [39m0.3663618[39m |
| [39m7        [39m | [39m0.9385331[39m | [

In [7]:
best_dt_BOGP=optimizer.max

In [8]:
import joblib

In [9]:
best_dt_BOGP=best_dt_BOGP['params']

In [10]:
best_dt_BOGP['criterion']='entropy'

In [11]:
# Convert numeric parameters to int
best_dt_BOGP = {k: int(v) if isinstance(v, (np.integer, np.floating)) else v for k, v in best_dt_BOGP.items()}

In [12]:
joblib.dump(best_dt_BOGP, "best_dt_BOGP.pkl")

['best_dt_BOGP.pkl']