In [1]:
import pandas as pd
df_train=pd.read_csv("train.csv")

In [2]:
# Separate features and target
X_train = df_train.drop(columns=["label"])
y_train = df_train["label"]

In [3]:
pip install bayesian-optimization


Note: you may need to restart the kernel to use updated packages.


In [4]:
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.ensemble import ExtraTreesClassifier
import numpy as np

In [5]:
def objective_bo(max_depth, n_estimators, min_samples_split, min_samples_leaf, criterion):
    criterion = 'gini' if criterion < 0.5 else 'entropy'
    clf = ExtraTreesClassifier(
        max_depth=int(max_depth),
        n_estimators=int(n_estimators),
        min_samples_split=int(min_samples_split),
        min_samples_leaf=int(min_samples_leaf),
        criterion=criterion,
        random_state=42
    )
    score = cross_val_score(
        clf, X_train, y_train,
        scoring='accuracy',
        cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    ).mean()
    return score

pbounds = {
    'max_depth': (5, 60),
    'n_estimators': (30, 100),
    'min_samples_split': (2, 10),
    'min_samples_leaf': (1, 5),
    'criterion': (0, 1)
}


In [6]:
optimizer = BayesianOptimization(f=objective_bo, pbounds=pbounds, random_state=42)
optimizer.maximize(init_points=30, n_iter=80)

print("Best parameters:", optimizer.max)

|   iter    |  target   | max_depth | n_esti... | min_sa... | min_sa... | criterion |
-------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.8592855[39m | [39m25.599706[39m | [39m96.550001[39m | [39m7.8559515[39m | [39m3.3946339[39m | [39m0.1560186[39m |
| [39m2        [39m | [39m0.7151472[39m | [39m13.579698[39m | [39m34.065852[39m | [39m8.9294091[39m | [39m3.4044600[39m | [39m0.7080725[39m |
| [39m3        [39m | [39m0.5620650[39m | [39m6.1321471[39m | [39m97.893689[39m | [39m8.6595411[39m | [39m1.8493564[39m | [39m0.1818249[39m |
| [39m4        [39m | [39m0.7440750[39m | [39m15.087248[39m | [39m51.296957[39m | [39m6.1980514[39m | [39m2.7277800[39m | [39m0.2912291[39m |
| [35m5        [39m | [35m0.9323784[39m | [35m38.651909[39m | [35m39.764570[39m | [35m4.3371571[39m | [35m2.4654473[39m | [35m0.4560699[39m |
| [39m6        [39m | [39m0.9225834[39m | [

In [7]:
best_et_BOGP=optimizer.max

In [8]:
import joblib

In [9]:
del best_et_BOGP['target']

In [10]:
best_et_BOGP=best_et_BOGP['params']

In [11]:
best_et_BOGP['criterion']='gini'

In [12]:
# Convert numeric parameters to int
best_et_BOGP = {k: int(v) if isinstance(v, (np.integer, np.floating)) else v for k, v in best_et_BOGP.items()}

In [13]:
best_et_BOGP

{'max_depth': 60,
 'n_estimators': 100,
 'min_samples_split': 2,
 'min_samples_leaf': 1,
 'criterion': 'gini'}

In [14]:
joblib.dump(best_et_BOGP, "best_et_BOGP.pkl")

['best_et_BOGP.pkl']