In [4]:
import optuna
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

In [5]:
df = pd.read_csv("heart.csv")
df = pd.get_dummies(df)
df.info()
X = df.drop("HeartDisease", axis=1).copy()
y = df["HeartDisease"].copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=369)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Age                918 non-null    int64  
 1   RestingBP          918 non-null    int64  
 2   Cholesterol        918 non-null    int64  
 3   FastingBS          918 non-null    int64  
 4   MaxHR              918 non-null    int64  
 5   Oldpeak            918 non-null    float64
 6   HeartDisease       918 non-null    int64  
 7   Sex_F              918 non-null    bool   
 8   Sex_M              918 non-null    bool   
 9   ChestPainType_ASY  918 non-null    bool   
 10  ChestPainType_ATA  918 non-null    bool   
 11  ChestPainType_NAP  918 non-null    bool   
 12  ChestPainType_TA   918 non-null    bool   
 13  RestingECG_LVH     918 non-null    bool   
 14  RestingECG_Normal  918 non-null    bool   
 15  RestingECG_ST      918 non-null    bool   
 16  ExerciseAngina_N   918 non

In [39]:
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

0.8768115942028986

In [8]:
def objective(trial):
    bootstrap = trial.suggest_categorical('bootstrap', [True, False])
    hiper = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy', 'log_loss']),
        'max_depth': trial.suggest_int('max_depth', 3, 50),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 20),
        'min_weight_fraction_leaf': trial.suggest_float('min_weight_fraction_leaf', 0.0, 0.5),
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
        'max_leaf_nodes': trial.suggest_int('max_leaf_nodes', 10, 1000),
        'min_impurity_decrease': trial.suggest_float('min_impurity_decrease', 0.0, 0.1),
        'bootstrap': bootstrap,
        'oob_score': trial.suggest_categorical('oob_score', [True, False]) if bootstrap else False,
        'n_jobs': -1,
        'random_state': 42,
        'verbose': 0,
        'warm_start': trial.suggest_categorical('warm_start', [True, False]),
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced', 'balanced_subsample']),
        'ccp_alpha': trial.suggest_float('ccp_alpha', 0.0, 0.1)
    }
    if bootstrap:
        hiper['max_samples'] = trial.suggest_float('max_samples', 0.1, 1.0)
    model = RandomForestClassifier(**hiper)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return acc

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=1000, n_jobs=-1)

[I 2025-08-21 21:54:54,251] A new study created in memory with name: no-name-2ebc6bdb-ea1b-4629-a411-75e33fb186d6
[I 2025-08-21 21:54:59,753] Trial 9 finished with value: 0.8623188405797102 and parameters: {'bootstrap': False, 'n_estimators': 129, 'criterion': 'entropy', 'max_depth': 22, 'min_samples_split': 8, 'min_samples_leaf': 19, 'min_weight_fraction_leaf': 0.02351128167965927, 'max_features': 'log2', 'max_leaf_nodes': 513, 'min_impurity_decrease': 0.030849223707709983, 'warm_start': True, 'class_weight': 'balanced_subsample', 'ccp_alpha': 0.07308457059629019}. Best is trial 9 with value: 0.8623188405797102.
[I 2025-08-21 21:55:00,597] Trial 11 finished with value: 0.8369565217391305 and parameters: {'bootstrap': False, 'n_estimators': 289, 'criterion': 'entropy', 'max_depth': 21, 'min_samples_split': 9, 'min_samples_leaf': 12, 'min_weight_fraction_leaf': 0.36519911932918425, 'max_features': None, 'max_leaf_nodes': 324, 'min_impurity_decrease': 0.005428833584599325, 'warm_start': 

In [34]:
model = RandomForestClassifier(**study.best_params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

0.8913043478260869