In [1]:
import pandas as pd
df_train=pd.read_csv("train.csv")

In [2]:
# Separate features and target
X_train = df_train.drop(columns=["label"])
y_train = df_train["label"]

In [3]:
%pip install hyperopt

Note: you may need to restart the kernel to use updated packages.


In [4]:
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.ensemble import ExtraTreesClassifier
import numpy as np

In [5]:
#Objective function
def objective_et(params):
    clf = ExtraTreesClassifier(
        n_estimators=int(params['n_estimators']),
        max_depth=int(params['max_depth']),
        criterion=params['criterion'],
        random_state=42,
        n_jobs=-1
    )
    score = cross_val_score(clf, X_train, y_train, scoring='accuracy', cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42)).mean()
    return {'loss': -score, 'status': STATUS_OK}

In [6]:
space_et = {
    'n_estimators': hp.quniform('n_estimators', 50, 150, 1),
    'max_depth': hp.quniform('max_depth', 5, 30, 1),
    'criterion': hp.choice('criterion', ['gini', 'entropy'])
}

trials_et = Trials()
best_et = fmin(fn=objective_et, space=space_et, algo=tpe.suggest, max_evals=100, trials=trials_et)
print("Best hyperparameters:", best_et)

  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 100/100 [5:29:30<00:00, 197.71s/trial, best loss: -0.9275758091578922] 
Best hyperparameters: {'criterion': 1, 'max_depth': 30.0, 'n_estimators': 68.0}


In [7]:
import joblib

In [8]:
# Convert numeric parameters to int
best_et = {k: int(v) if isinstance(v, (np.integer, np.floating)) else v for k, v in best_et.items()}

In [9]:
best_et['criterion']='entropy'

In [10]:
joblib.dump(best_et, "best_et.pkl")

['best_et.pkl']