In [1]:
import optuna
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv("heart.csv")
df = pd.get_dummies(df)
X = df.drop("HeartDisease", axis=1).copy()
y = df["HeartDisease"].copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=369)


In [3]:
model = HistGradientBoostingClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

0.8876811594202898

In [9]:
def objective(trial):
    hiper = {
        #"loss": trial.suggest_categorical("loss", ["log_loss", "binary_crossentropy"]),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.5, log=True),
        "max_iter": trial.suggest_int("max_iter", 100, 1000),
        "max_leaf_nodes": trial.suggest_int("max_leaf_nodes", 20, 150),
        "max_depth": trial.suggest_int("max_depth", 5, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 20, 100),
        "l2_regularization": trial.suggest_float("l2_regularization", 0, 10),
        "max_bins": trial.suggest_int("max_bins", 100, 255),
    }
    model = HistGradientBoostingClassifier(**hiper, random_state=369)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return acc
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_jobs=-1, n_trials=1000)

[I 2025-08-22 20:31:52,932] A new study created in memory with name: no-name-39660152-7df9-42a8-91de-2875063cb44a
[I 2025-08-22 20:31:54,810] Trial 14 finished with value: 0.855072463768116 and parameters: {'learning_rate': 0.008638589877257444, 'max_iter': 206, 'max_leaf_nodes': 132, 'max_depth': 11, 'min_samples_leaf': 99, 'l2_regularization': 8.359102991216858, 'max_bins': 161}. Best is trial 14 with value: 0.855072463768116.
[I 2025-08-22 20:31:55,124] Trial 7 finished with value: 0.8804347826086957 and parameters: {'learning_rate': 0.01565979361202416, 'max_iter': 182, 'max_leaf_nodes': 122, 'max_depth': 14, 'min_samples_leaf': 79, 'l2_regularization': 5.954880611276103, 'max_bins': 130}. Best is trial 7 with value: 0.8804347826086957.
[I 2025-08-22 20:31:55,271] Trial 12 finished with value: 0.8623188405797102 and parameters: {'learning_rate': 0.4697552406224356, 'max_iter': 205, 'max_leaf_nodes': 119, 'max_depth': 7, 'min_samples_leaf': 83, 'l2_regularization': 8.74231577846449,

In [13]:
model = HistGradientBoostingClassifier(**study.best_params)
print(study.best_params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
acc  = accuracy_score(y_test, y_pred)
print(acc)

{'learning_rate': 0.009018207065489423, 'max_iter': 385, 'max_leaf_nodes': 138, 'max_depth': 5, 'min_samples_leaf': 36, 'l2_regularization': 6.559352388506162, 'max_bins': 183}
0.9239130434782609
