#  XGBoost Tuning with Optuna + Early Stopping

Bayesian optimization of hyperparameters using Optuna.

In [None]:
import pandas as pd
import optuna
import xgboost as xgb
from xgboost.callback import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
import shap
import seaborn as sns

# Load and split data
df = pd.read_csv('../data/framingham.csv').dropna()
X = df.drop('TenYearCHD', axis=1)
y = df['TenYearCHD']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)


ModuleNotFoundError: No module named 'pandas'

In [None]:

# Define objective function for Optuna
def objective(trial):
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'max_depth': trial.suggest_int('max_depth', 3, 7),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'lambda': trial.suggest_float('lambda', 1e-3, 10.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-3, 10.0, log=True),
        'verbosity': 0
    }
    booster = xgb.train(
        params,
        dtrain,
        num_boost_round=300,
        evals=[(dtest, "eval")],
        callbacks=[EarlyStopping(rounds=20)]
    )
    preds = booster.predict(dtest)
    return roc_auc_score(y_test, preds)


In [None]:

# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print("Best AUC:", study.best_value)
print("Best Params:", study.best_params)


In [None]:

# Visualize tuning results
optuna.visualization.plot_optimization_history(study).show()
optuna.visualization.plot_param_importances(study).show()


In [None]:

# Train final tuned model
best_params = study.best_params
best_params.update({'objective': 'binary:logistic', 'eval_metric': 'auc', 'verbosity': 0})
booster = xgb.train(
    best_params,
    dtrain,
    num_boost_round=300,
    evals=[(dtest, "eval")],
    callbacks=[EarlyStopping(rounds=20)]
)

# Evaluate
y_pred_prob = booster.predict(dtest)
auc_final = roc_auc_score(y_test, y_pred_prob)
print(f" Final AUC after tuning: {auc_final:.4f}")


In [None]:

# SHAP explanation on tuned model
explainer = shap.Explainer(booster.predict, X_test)
shap_values = explainer(X_test)

shap.summary_plot(shap_values, X_test)
