In [1]:
import pandas as pd

In [2]:
df_train=pd.read_csv("train.csv")
df_test=pd.read_csv("test_f.csv")

In [3]:
# Separate features and target
X_train = df_train.drop(columns=["label"])
y_train = df_train["label"]
# Separate features and target
X_test = df_test.drop(columns=["label"])
y_test = df_test["label"]

In [4]:
%pip install hyperopt

Note: you may need to restart the kernel to use updated packages.


In [5]:
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
import numpy as np

In [6]:

# Objective function
def objective(params):
    clf = RandomForestClassifier(
        n_estimators=int(params['n_estimators']),
        max_depth=int(params['max_depth']),
        criterion=params['criterion'],
        random_state=42,
        n_jobs=-1
    )
    score = cross_val_score(clf, X_train, y_train, 
                            scoring='accuracy', 
                            cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
                           ).mean()
    return {'loss': -score, 'status': STATUS_OK}

In [7]:

# Hyperparameter space 
space = {
    'n_estimators': hp.quniform('n_estimators', 50, 150, 1),
    'max_depth': hp.quniform('max_depth', 5, 30, 1),
    'criterion': hp.choice('criterion', ['gini', 'entropy'])
}

rng=np.random.default_rng(42)
trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,  
            trials=trials,
            rstate=rng
            )

print("Best hyperparameters:", best)


  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 100/100 [14:06:37<00:00, 507.98s/trial, best loss: -0.9580575347129343] 
Best hyperparameters: {'criterion': 1, 'max_depth': 30.0, 'n_estimators': 148.0}


In [8]:
import joblib

In [9]:
best_rf=best

In [10]:
# Convert numeric parameters to int
best_rf = {k: int(v) if isinstance(v, (np.integer, np.floating)) else v for k, v in best_rf.items()}

In [13]:
best_rf['criterion']='entropy'

In [14]:
joblib.dump(best_rf, "best_rf.pkl")

['best_rf.pkl']