In [1]:
import optuna
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv("heart.csv")
df = pd.get_dummies(df)
df.info()
X = df.drop("HeartDisease", axis=1).copy()
y = df["HeartDisease"].copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=369)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Age                918 non-null    int64  
 1   RestingBP          918 non-null    int64  
 2   Cholesterol        918 non-null    int64  
 3   FastingBS          918 non-null    int64  
 4   MaxHR              918 non-null    int64  
 5   Oldpeak            918 non-null    float64
 6   HeartDisease       918 non-null    int64  
 7   Sex_F              918 non-null    bool   
 8   Sex_M              918 non-null    bool   
 9   ChestPainType_ASY  918 non-null    bool   
 10  ChestPainType_ATA  918 non-null    bool   
 11  ChestPainType_NAP  918 non-null    bool   
 12  ChestPainType_TA   918 non-null    bool   
 13  RestingECG_LVH     918 non-null    bool   
 14  RestingECG_Normal  918 non-null    bool   
 15  RestingECG_ST      918 non-null    bool   
 16  ExerciseAngina_N   918 non

In [21]:
model = GradientBoostingClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

0.9057971014492754

In [4]:
def objective(trial):
    hiper = {
        "loss": trial.suggest_categorical("loss", ["log_loss", "deviance", "exponential"]),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 1.0, log=True),
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "criterion": trial.suggest_categorical("criterion", ["friedman_mse", "squared_error"]),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 20),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2", None]),
        "ccp_alpha": trial.suggest_float("ccp_alpha", 0.0, 0.1)
    }
    model = GradientBoostingClassifier()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return acc
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_jobs=-1, n_trials=1000)

[I 2025-08-22 19:04:42,743] A new study created in memory with name: no-name-7b01566f-2ad2-4b73-a0ba-0b19ea877089
[I 2025-08-22 19:04:44,258] Trial 1 finished with value: 0.9057971014492754 and parameters: {'loss': 'exponential', 'learning_rate': 0.236055101641847, 'n_estimators': 114, 'subsample': 0.8635620362320121, 'criterion': 'squared_error', 'min_samples_split': 11, 'min_samples_leaf': 18, 'max_depth': 9, 'max_features': 'log2', 'ccp_alpha': 0.07729114385431099}. Best is trial 1 with value: 0.9057971014492754.
[I 2025-08-22 19:04:44,283] Trial 14 finished with value: 0.9057971014492754 and parameters: {'loss': 'log_loss', 'learning_rate': 0.17834880358691396, 'n_estimators': 977, 'subsample': 0.785505677281327, 'criterion': 'friedman_mse', 'min_samples_split': 7, 'min_samples_leaf': 12, 'max_depth': 3, 'max_features': None, 'ccp_alpha': 0.07137135338891563}. Best is trial 1 with value: 0.9057971014492754.
[I 2025-08-22 19:04:44,285] Trial 0 finished with value: 0.9057971014492754

In [44]:
model = GradientBoostingClassifier(**study.best_params)
print(study.best_params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

{'loss': 'exponential', 'learning_rate': 0.236055101641847, 'n_estimators': 114, 'subsample': 0.8635620362320121, 'criterion': 'squared_error', 'min_samples_split': 11, 'min_samples_leaf': 18, 'max_depth': 9, 'max_features': 'log2', 'ccp_alpha': 0.07729114385431099}


0.8695652173913043