In [1]:
import optuna
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv("heart.csv")
df = pd.get_dummies(df)
X = df.drop("HeartDisease", axis=1).copy()
y = df["HeartDisease"].copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=369)


In [3]:
model = xgb.XGBRFClassifier(device = "cuda")
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)


0.8876811594202898

In [4]:
def objective(trial):
    hiper = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0, 0.5),
        'device': 'cuda',  # SI TIENES GPU
    }
    model = xgb.XGBRFClassifier(**hiper)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return acc
study  = optuna.create_study(direction="maximize")
study.optimize(objective, n_jobs=-1, n_trials=50)

[I 2025-08-22 21:38:06,708] A new study created in memory with name: no-name-e34e487e-a952-44a6-83af-3ae599bc3838
[I 2025-08-22 21:38:29,598] Trial 11 finished with value: 0.5434782608695652 and parameters: {'n_estimators': 244, 'max_depth': 8, 'learning_rate': 0.017663641520000436, 'subsample': 0.9757166366167122, 'colsample_bytree': 0.9530742428662575, 'reg_alpha': 0.00021839497067271387, 'reg_lambda': 0.00021476635465458207, 'min_child_weight': 10, 'gamma': 0.36750006048328343}. Best is trial 11 with value: 0.5434782608695652.
[I 2025-08-22 21:38:32,877] Trial 4 finished with value: 0.5434782608695652 and parameters: {'n_estimators': 323, 'max_depth': 9, 'learning_rate': 0.011852633407766348, 'subsample': 0.7019234466737588, 'colsample_bytree': 0.7918688859370825, 'reg_alpha': 8.730309993686564e-05, 'reg_lambda': 1.6179465240937942e-07, 'min_child_weight': 9, 'gamma': 0.4321597544161573}. Best is trial 11 with value: 0.5434782608695652.
[I 2025-08-22 21:38:33,405] Trial 8 finished w

In [6]:
model = xgb.XGBRFClassifier(**study.best_params)
print(study.best_params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
acc  = accuracy_score(y_test, y_pred)
print(acc)

{'n_estimators': 728, 'max_depth': 4, 'learning_rate': 0.288058717735281, 'subsample': 0.8933646878557893, 'colsample_bytree': 0.8632449905269044, 'reg_alpha': 1.7857889671944856e-06, 'reg_lambda': 7.565528848893675e-06, 'min_child_weight': 1, 'gamma': 0.4924820159874104}
0.8768115942028986
