In [6]:
import pandas as pd
import numpy as np
import optuna
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split

In [7]:
df = pd.read_csv("heart.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    object 
 2   ChestPainType   918 non-null    object 
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    object 
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    object 
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    object 
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 86.2+ KB


In [8]:
df_o = pd.get_dummies(df) #One hot encoding
df_o.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Age                918 non-null    int64  
 1   RestingBP          918 non-null    int64  
 2   Cholesterol        918 non-null    int64  
 3   FastingBS          918 non-null    int64  
 4   MaxHR              918 non-null    int64  
 5   Oldpeak            918 non-null    float64
 6   HeartDisease       918 non-null    int64  
 7   Sex_F              918 non-null    bool   
 8   Sex_M              918 non-null    bool   
 9   ChestPainType_ASY  918 non-null    bool   
 10  ChestPainType_ATA  918 non-null    bool   
 11  ChestPainType_NAP  918 non-null    bool   
 12  ChestPainType_TA   918 non-null    bool   
 13  RestingECG_LVH     918 non-null    bool   
 14  RestingECG_Normal  918 non-null    bool   
 15  RestingECG_ST      918 non-null    bool   
 16  ExerciseAngina_N   918 non

In [9]:
X = df_o.drop("HeartDisease", axis=1).copy()
y = df_o["HeartDisease"].copy()
X.shape, y.shape

((918, 20), (918,))

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=369)

In [15]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
reporte = classification_report(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)
print(acc)

0.7608695652173914


In [19]:
def objective(trial):
    global X_train, y_train, X_test
    params = {
        "criterion" : trial.suggest_categorical("criterion", ["gini", "entropy", "log_loss"]),
        "splitter" : trial.suggest_categorical("splitter", ["best", "random"]),
        "max_depth" : trial.suggest_int("max_depth", 1,300)
    }
    model = DecisionTreeClassifier(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return acc

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials= 1000, n_jobs=-1)


[I 2025-08-20 23:46:49,512] A new study created in memory with name: no-name-80689447-a351-4366-a218-111e9d756043
[I 2025-08-20 23:46:49,537] Trial 1 finished with value: 0.8804347826086957 and parameters: {'criterion': 'log_loss', 'splitter': 'random', 'max_depth': 7}. Best is trial 1 with value: 0.8804347826086957.
[I 2025-08-20 23:46:49,547] Trial 0 finished with value: 0.8586956521739131 and parameters: {'criterion': 'gini', 'splitter': 'random', 'max_depth': 59}. Best is trial 1 with value: 0.8804347826086957.
[I 2025-08-20 23:46:49,552] Trial 3 finished with value: 0.7934782608695652 and parameters: {'criterion': 'entropy', 'splitter': 'best', 'max_depth': 258}. Best is trial 1 with value: 0.8804347826086957.
[I 2025-08-20 23:46:49,558] Trial 6 finished with value: 0.8260869565217391 and parameters: {'criterion': 'log_loss', 'splitter': 'random', 'max_depth': 119}. Best is trial 1 with value: 0.8804347826086957.
[I 2025-08-20 23:46:49,562] Trial 2 finished with value: 0.804347826

In [23]:
best_param = study.best_params
model_tuneado = DecisionTreeClassifier(**best_param)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

0.782608695652174