In [None]:
"""
===========================================================
ENTRENAMIENTO Y EVALUACIÓN: XGBoost + OPTUNA + CROSS-VAL + UMBRAL
===========================================================

ÍNDICE DEL CÓDIGO:
1. Importar librerías mágicas
2. Cargar los datos vectorizados y labels
3. Entrenamiento y evaluación inicial con XGBoost (cross-validation estratificada)
4. Optimización de hiperparámetros con Optuna (con cross-validation)
5. Optimización de umbral para mejor F1-score
6. Comparación de métricas en cuadro (3 momentos)
7. Selección del mejor modelo según F1-score (criterio de elección)
8. Explicación sobre cross-validation estratificada en cada etapa
9. Guardar el mejor modelo en la carpeta models
10. Entrenamiento XGBoost simple (sin fuga de datos, baseline)
"""

# 1. Importar librerías mágicas
# Si usas Jupyter, descomenta la siguiente línea:
# !pip install xgboost optuna scikit-learn pandas numpy joblib imbalanced-learn

import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix, roc_auc_score
import optuna
import joblib
import os

# Opcional: para oversampling
try:
    from imblearn.over_sampling import SMOTE
    smote_available = True
except ImportError:
    smote_available = False

# 2. Cargar los datos vectorizados y labels
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_dir = os.path.join(BASE_DIR, 'data', 'processed')
models_dir = os.path.join(BASE_DIR, 'models')

if not (os.path.exists(os.path.join(data_dir, 'X_train_tfidf.pkl')) and os.path.exists(os.path.join(data_dir, 'X_test_tfidf.pkl'))):
    vectorizer = joblib.load(os.path.join(data_dir, 'tfidf_vectorizer.pkl'))
    train_df = pd.read_csv(os.path.join(data_dir, 'train_data.csv'))
    test_df = pd.read_csv(os.path.join(data_dir, 'test_data.csv'))
    X_train = vectorizer.transform(train_df['text'])
    X_test = vectorizer.transform(test_df['text'])
    joblib.dump(X_train, os.path.join(data_dir, 'X_train_tfidf.pkl'))
    joblib.dump(X_test, os.path.join(data_dir, 'X_test_tfidf.pkl'))
else:
    X_train = joblib.load(os.path.join(data_dir, 'X_train_tfidf.pkl'))
    X_test = joblib.load(os.path.join(data_dir, 'X_test_tfidf.pkl'))

y_train = pd.read_csv(os.path.join(data_dir, 'train_data.csv'))['label'].values.ravel()
y_test = pd.read_csv(os.path.join(data_dir, 'test_data.csv'))['label'].values.ravel()

# Opcional: Oversampling para mejorar métricas en clases desbalanceadas
if smote_available:
    sm = SMOTE(random_state=42)
    X_train, y_train = sm.fit_resample(X_train, y_train)

# 3. Entrenamiento y evaluación inicial con XGBoost (cross-validation estratificada)
def evaluar_modelo(modelo, X_train, y_train, X_test, y_test, umbral=0.5):
    modelo.fit(X_train, y_train)
    y_train_proba = modelo.predict_proba(X_train)[:,1]
    y_test_proba  = modelo.predict_proba(X_test)[:,1]
    y_train_pred = (y_train_proba >= umbral).astype(int)
    y_test_pred  = (y_test_proba  >= umbral).astype(int)
    train_acc = accuracy_score(y_train, y_train_pred)
    test_acc  = accuracy_score(y_test, y_test_pred)
    diff_acc  = abs(train_acc - test_acc)
    ajuste = "Buen ajuste"
    if train_acc - test_acc > 0.07:
        ajuste = "Overfitting"
    elif test_acc - train_acc > 0.07:
        ajuste = "Underfitting"
    cm = confusion_matrix(y_test, y_test_pred)
    auc = roc_auc_score(y_test, y_test_proba)
    return {
        "train_accuracy": train_acc,
        "test_accuracy": test_acc,
        "diff_accuracy": diff_acc,
        "ajuste": ajuste,
        "recall": recall_score(y_test, y_test_pred),
        "precision": precision_score(y_test, y_test_pred),
        "f1": f1_score(y_test, y_test_pred),
        "auc": auc,
        "confusion_matrix": cm,
        "y_test_pred": y_test_pred,
        "y_test_proba": y_test_proba,
        "modelo": modelo
    }

def cross_val_metric(modelo, X, y, umbral=0.5, n_splits=10):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    f1s, aucs = [], []
    for train_idx, val_idx in skf.split(X, y):
        X_tr, X_val = X[train_idx], X[val_idx]
        y_tr, y_val = y[train_idx], y[val_idx]
        modelo.fit(X_tr, y_tr)
        y_val_proba = modelo.predict_proba(X_val)[:,1]
        y_val_pred = (y_val_proba >= umbral).astype(int)
        f1s.append(f1_score(y_val, y_val_pred))
        try:
            aucs.append(roc_auc_score(y_val, y_val_proba))
        except:
            aucs.append(np.nan)
    return np.mean(f1s), np.nanmean(aucs)

# 4. XGBoost Classifier (default params, con regularización y menor complejidad)
xgb1 = XGBClassifier(
    max_depth=3,
    n_estimators=80,
    learning_rate=0.07,
    subsample=0.7,
    colsample_bytree=0.7,
    min_child_weight=5,
    gamma=2,
    reg_alpha=1,
    reg_lambda=1,
    scale_pos_weight=1,  # Ajusta si hay desbalance
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)
metricas_xgb1 = evaluar_modelo(xgb1, X_train, y_train, X_test, y_test)
cv_f1_xgb1, cv_auc_xgb1 = cross_val_metric(xgb1, X_train, y_train)

# 5. XGBoost (boosting, igual que XGBClassifier pero puedes cambiar hiperparámetros)
xgb2 = XGBClassifier(
    booster='gbtree',
    max_depth=3,
    n_estimators=80,
    learning_rate=0.07,
    subsample=0.7,
    colsample_bytree=0.7,
    min_child_weight=5,
    gamma=2,
    reg_alpha=1,
    reg_lambda=1,
    scale_pos_weight=1,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)
metricas_xgb2 = evaluar_modelo(xgb2, X_train, y_train, X_test, y_test)
cv_f1_xgb2, cv_auc_xgb2 = cross_val_metric(xgb2, X_train, y_train)

# 6. Optimización de hiperparámetros con Optuna (con cross-validation)
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 50, 150),
        "max_depth": trial.suggest_int("max_depth", 2, 5),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.15),
        "subsample": trial.suggest_float("subsample", 0.6, 0.8),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 0.8),
        "gamma": trial.suggest_float("gamma", 1, 5),
        "min_child_weight": trial.suggest_int("min_child_weight", 3, 10),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.5, 2),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.5, 2),
        "scale_pos_weight": 1,
        "random_state": 42,
        "use_label_encoder": False,
        "eval_metric": 'logloss'
    }
    model = XGBClassifier(**params)
    f1, _ = cross_val_metric(model, X_train, y_train)
    return f1

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30, show_progress_bar=True)
best_params = study.best_params

# 7. Entrenar con mejores hiperparámetros
xgb1_opt = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
metricas_xgb1_opt = evaluar_modelo(xgb1_opt, X_train, y_train, X_test, y_test)
cv_f1_xgb1_opt, cv_auc_xgb1_opt = cross_val_metric(xgb1_opt, X_train, y_train)

# 8. Optimización de umbral para mejor F1-score
# ----------------------------------------------------------
# ¿Qué hace este bloque?
# Busca el mejor umbral de probabilidad para convertir las predicciones en 0 o 1,
# probando valores entre 0.1 y 0.9, y eligiendo el que maximiza el F1-score.
# Esto es útil porque el umbral por defecto (0.5) no siempre es el mejor,
# especialmente en problemas desbalanceados.
# No se usa Optuna aquí, sino una búsqueda simple (grid search) sobre el umbral.
# ----------------------------------------------------------
def buscar_umbral(y_true, y_proba):
    mejores = {"umbral": 0.5, "f1": 0}
    for t in np.arange(0.1, 0.9, 0.01):
        y_pred = (y_proba >= t).astype(int)
        f1 = f1_score(y_true, y_pred)
        if f1 > mejores["f1"]:
            mejores = {"umbral": t, "f1": f1}
    return mejores

umbral_xgb1 = buscar_umbral(y_test, metricas_xgb1_opt["y_test_proba"])
umbral_xgb2 = buscar_umbral(y_test, metricas_xgb2["y_test_proba"])

# 9. Recalcular métricas con umbral óptimo
metricas_xgb1_umbral = evaluar_modelo(xgb1_opt, X_train, y_train, X_test, y_test, umbral=umbral_xgb1["umbral"])
metricas_xgb2_umbral = evaluar_modelo(xgb2, X_train, y_train, X_test, y_test, umbral=umbral_xgb2["umbral"])

# 10. Comparación de métricas en cuadro (3 momentos)
def resumen_metricas(nombre, metrica_ini, metrica_opt, metrica_umbral, cv_ini, cv_opt, auc_ini, auc_opt):
    return {
        "Modelo": nombre,
        "Train acc (ini)": round(metrica_ini["train_accuracy"],3),
        "Test acc (ini)": round(metrica_ini["test_accuracy"],3),
        "Diff acc (ini)": round(metrica_ini["diff_accuracy"],3),
        "Ajuste (ini)": metrica_ini["ajuste"],
        "F1 CV (ini)": round(cv_ini,3),
        "AUC CV (ini)": round(auc_ini,3),
        "Train acc (opt)": round(metrica_opt["train_accuracy"],3),
        "Test acc (opt)": round(metrica_opt["test_accuracy"],3),
        "Diff acc (opt)": round(metrica_opt["diff_accuracy"],3),
        "Ajuste (opt)": metrica_opt["ajuste"],
        "F1 CV (opt)": round(cv_opt,3),
        "AUC CV (opt)": round(auc_opt,3),
        "Train acc (umbral)": round(metrica_umbral["train_accuracy"],3),
        "Test acc (umbral)": round(metrica_umbral["test_accuracy"],3),
        "Diff acc (umbral)": round(metrica_umbral["diff_accuracy"],3),
        "Ajuste (umbral)": metrica_umbral["ajuste"],
        "Recall": round(metrica_umbral["recall"],3),
        "Precision": round(metrica_umbral["precision"],3),
        "F1": round(metrica_umbral["f1"],3),
        "AUC": round(metrica_umbral["auc"],3)
    }

cuadro = pd.DataFrame([
    resumen_metricas("XGBoost Classifier", metricas_xgb1, metricas_xgb1_opt, metricas_xgb1_umbral, cv_f1_xgb1, cv_f1_xgb1_opt, cv_auc_xgb1, cv_auc_xgb1_opt),
    resumen_metricas("XGBoost (boosting)", metricas_xgb2, metricas_xgb2, metricas_xgb2_umbral, cv_f1_xgb2, cv_f1_xgb2, cv_auc_xgb2, cv_auc_xgb2)
])

print("\n=== CUADRO COMPARATIVO DE MÉTRICAS ===")
print(cuadro.T)

# 11. Cuadro tipo ranking para comparar modelos
cuadro_ranking = pd.DataFrame([
    {
        "Ranking": 1,
        "Modelo": "XGBoost Base",
        "Accuracy Train": metricas_xgb1["train_accuracy"],
        "Accuracy Test": metricas_xgb1["test_accuracy"],
        "Precision Test": metricas_xgb1["precision"],
        "Recall Test": metricas_xgb1["recall"],
        "F1 Test": metricas_xgb1["f1"],
        "AUC Test": metricas_xgb1["auc"],
        "Diferencia abs": metricas_xgb1["diff_accuracy"],
        "Tipo de ajuste": metricas_xgb1["ajuste"]
    },
    {
        "Ranking": 2,
        "Modelo": "XGBoost Optuna",
        "Accuracy Train": metricas_xgb1_opt["train_accuracy"],
        "Accuracy Test": metricas_xgb1_opt["test_accuracy"],
        "Precision Test": metricas_xgb1_opt["precision"],
        "Recall Test": metricas_xgb1_opt["recall"],
        "F1 Test": metricas_xgb1_opt["f1"],
        "AUC Test": metricas_xgb1_opt["auc"],
        "Diferencia abs": metricas_xgb1_opt["diff_accuracy"],
        "Tipo de ajuste": metricas_xgb1_opt["ajuste"]
    },
    {
        "Ranking": 3,
        "Modelo": "XGBoost Optuna (umbral óptimo)",
        "Accuracy Train": metricas_xgb1_umbral["train_accuracy"],
        "Accuracy Test": metricas_xgb1_umbral["test_accuracy"],
        "Precision Test": metricas_xgb1_umbral["precision"],
        "Recall Test": metricas_xgb1_umbral["recall"],
        "F1 Test": metricas_xgb1_umbral["f1"],
        "AUC Test": metricas_xgb1_umbral["auc"],
        "Diferencia abs": metricas_xgb1_umbral["diff_accuracy"],
        "Tipo de ajuste": metricas_xgb1_umbral["ajuste"]
    }
])

cuadro_ranking = cuadro_ranking.sort_values("F1 Test", ascending=False).reset_index(drop=True)
cuadro_ranking["Ranking"] = cuadro_ranking.index + 1

print("\n=== CUADRO DE RANKING DE MODELOS (XGBoost) ===")
print(cuadro_ranking)

# 12. Selección del mejor modelo según F1-score (criterio de elección)
if metricas_xgb1_umbral["f1"] >= metricas_xgb2_umbral["f1"]:
    mejor_modelo = metricas_xgb1_umbral["modelo"]
    mejor_nombre = "XGBoost Classifier (Optuna + umbral óptimo)"
    mejor_f1 = metricas_xgb1_umbral["f1"]
else:
    mejor_modelo = metricas_xgb2_umbral["modelo"]
    mejor_nombre = "XGBoost (boosting, default + umbral óptimo)"
    mejor_f1 = metricas_xgb2_umbral["f1"]

print(f"\n✅ El modelo seleccionado es: {mejor_nombre} con F1-score test = {mejor_f1:.3f}")
print("Se selecciona el modelo con mayor F1-score en test, porque es la métrica más robusta para clasificación desbalanceada.")

# 13. Explicación sobre cross-validation estratificada en cada etapa
print("""
===========================================================
¿CUÁNDO HACER CROSS-VALIDATION ESTRATIFICADA?
===========================================================
- Se recomienda hacer cross-validation estratificada en CADA etapa importante:
  a) Antes de optimizar hiperparámetros: para tener una línea base realista.
  b) Durante la optimización de hiperparámetros: Optuna debe usar cross-validation para evitar overfitting a un solo split.
  c) Después, para validar el modelo final y comparar con test.
- Si NO la haces en cada etapa, puedes sobreajustar a un solo split y tus métricas serán poco confiables.
- Ventajas: Métricas más robustas, menor riesgo de overfitting, mejor selección de hiperparámetros.
- Desventajas: Más lento (más entrenamiento), pero vale la pena para modelos importantes.
- Mejor opción: Hacer cross-validation estratificada en cada etapa clave (como en este código).
===========================================================
""")

# 14. Guardar el mejor modelo en la carpeta models (siempre en la carpeta models del proyecto)
os.makedirs(models_dir, exist_ok=True)
joblib.dump(mejor_modelo, os.path.join(models_dir, 'mejor_modelo_xgboost.pkl'))
print("✅ Mejor modelo guardado como models/mejor_modelo_xgboost.pkl")

# 15. ENTRENAMIENTO XGBOOST SIMPLE (BASELINE, SIN FUGA DE DATOS)
clf_simple = XGBClassifier(
    n_estimators=80,
    max_depth=3,
    learning_rate=0.07,
    subsample=0.7,
    colsample_bytree=0.7,
    min_child_weight=5,
    gamma=2,
    reg_alpha=1,
    reg_lambda=1,
    scale_pos_weight=1,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)
clf_simple.fit(X_train, y_train)

y_pred_simple = clf_simple.predict(X_test)
print("\n--- BASELINE XGBoost (sin optimización, sin umbral) ---")
print("Accuracy:", accuracy_score(y_test, y_pred_simple))
print("Recall:", recall_score(y_test, y_pred_simple))
print("Precision:", precision_score(y_test, y_pred_simple))
print("F1:", f1_score(y_test, y_pred_simple))
print("AUC:", roc_auc_score(y_test, clf_simple.predict_proba(X_test)[:,1]))
print("Matriz de confusión:\n", confusion_matrix(y_test, y_pred_simple))

joblib.dump(clf_simple, os.path.join(models_dir, 'xgb_model_baseline.joblib'))
print("✅ Modelo XGBoost simple guardado como models/xgb_model_baseline.joblib")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:52:54,897] Trial 0 finished with value: 0.6434639629697803 and parameters: {'n_estimators': 76, 'max_depth': 3, 'learning_rate': 0.0917542830412596, 'subsample': 0.7334084495880905, 'colsample_bytree': 0.67129234387394, 'gamma': 1.658013096709773, 'min_child_weight': 4, 'reg_alpha': 0.7635022026898312, 'reg_lambda': 0.8209648420548326}. Best is trial 0 with value: 0.6434639629697803.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:52:58,474] Trial 1 finished with value: 0.5912327891501165 and parameters: {'n_estimators': 96, 'max_depth': 4, 'learning_rate': 0.05417082767472786, 'subsample': 0.7917794845934902, 'colsample_bytree': 0.7874833321864614, 'gamma': 3.1983770977435344, 'min_child_weight': 6, 'reg_alpha': 0.5811378338767264, 'reg_lambda': 1.2374785604743521}. Best is trial 0 with value: 0.6434639629697803.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:00,659] Trial 2 finished with value: 0.5725748863134335 and parameters: {'n_estimators': 69, 'max_depth': 2, 'learning_rate': 0.053079352545344016, 'subsample': 0.702135181687035, 'colsample_bytree': 0.7316500831160084, 'gamma': 3.9700740277593214, 'min_child_weight': 4, 'reg_alpha': 1.640156171120581, 'reg_lambda': 1.5649174255114549}. Best is trial 0 with value: 0.6434639629697803.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:03,619] Trial 3 finished with value: 0.5409227144648479 and parameters: {'n_estimators': 123, 'max_depth': 4, 'learning_rate': 0.060571011275793944, 'subsample': 0.7255065011554069, 'colsample_bytree': 0.7199230321416121, 'gamma': 2.2157781502082807, 'min_child_weight': 8, 'reg_alpha': 1.7079045626538047, 'reg_lambda': 1.679184143142611}. Best is trial 0 with value: 0.6434639629697803.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:06,216] Trial 4 finished with value: 0.49586771725800044 and parameters: {'n_estimators': 78, 'max_depth': 3, 'learning_rate': 0.011168161549448514, 'subsample': 0.6764934235398007, 'colsample_bytree': 0.7627652946730301, 'gamma': 2.3868824325350144, 'min_child_weight': 7, 'reg_alpha': 0.6511670428390867, 'reg_lambda': 1.8421115480029557}. Best is trial 0 with value: 0.6434639629697803.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:08,154] Trial 5 finished with value: 0.48333743590411327 and parameters: {'n_estimators': 81, 'max_depth': 2, 'learning_rate': 0.01680734513368834, 'subsample': 0.614868059077648, 'colsample_bytree': 0.6832578901601729, 'gamma': 1.8727507993555301, 'min_child_weight': 9, 'reg_alpha': 0.7855874199391057, 'reg_lambda': 1.9730407542873372}. Best is trial 0 with value: 0.6434639629697803.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:11,876] Trial 6 finished with value: 0.6517052194380961 and parameters: {'n_estimators': 110, 'max_depth': 5, 'learning_rate': 0.14373353991433366, 'subsample': 0.7787750471153393, 'colsample_bytree': 0.6477257383894613, 'gamma': 1.3022868227528774, 'min_child_weight': 4, 'reg_alpha': 1.27543723230331, 'reg_lambda': 1.6397652888257699}. Best is trial 6 with value: 0.6517052194380961.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:13,575] Trial 7 finished with value: 0.5620427622892645 and parameters: {'n_estimators': 73, 'max_depth': 3, 'learning_rate': 0.11226361001943436, 'subsample': 0.6414384597836438, 'colsample_bytree': 0.7005209974602088, 'gamma': 4.924107095148829, 'min_child_weight': 6, 'reg_alpha': 1.1752721200809653, 'reg_lambda': 1.5857403343572793}. Best is trial 6 with value: 0.6517052194380961.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:15,536] Trial 8 finished with value: 0.5136765063424977 and parameters: {'n_estimators': 97, 'max_depth': 2, 'learning_rate': 0.01728819974027784, 'subsample': 0.679240763674386, 'colsample_bytree': 0.6076114648802502, 'gamma': 1.9924009431990282, 'min_child_weight': 7, 'reg_alpha': 0.6601818827761482, 'reg_lambda': 1.5219730626870103}. Best is trial 6 with value: 0.6517052194380961.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:18,476] Trial 9 finished with value: 0.6341600416007004 and parameters: {'n_estimators': 146, 'max_depth': 2, 'learning_rate': 0.12379396828485775, 'subsample': 0.6343024866538491, 'colsample_bytree': 0.7987588096602685, 'gamma': 2.316147007537074, 'min_child_weight': 4, 'reg_alpha': 0.7907386154198097, 'reg_lambda': 1.6066828531225559}. Best is trial 6 with value: 0.6517052194380961.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:21,368] Trial 10 finished with value: 0.6470160993448665 and parameters: {'n_estimators': 52, 'max_depth': 5, 'learning_rate': 0.1491485602009081, 'subsample': 0.7992172233703094, 'colsample_bytree': 0.6271829575533258, 'gamma': 1.038098856302733, 'min_child_weight': 3, 'reg_alpha': 1.2899301635660751, 'reg_lambda': 1.154494900891197}. Best is trial 6 with value: 0.6517052194380961.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:24,027] Trial 11 finished with value: 0.6452162218178478 and parameters: {'n_estimators': 56, 'max_depth': 5, 'learning_rate': 0.13899840597521776, 'subsample': 0.7997016048615203, 'colsample_bytree': 0.6218554189234845, 'gamma': 1.0562271837108381, 'min_child_weight': 3, 'reg_alpha': 1.3137938049298143, 'reg_lambda': 1.139468172604874}. Best is trial 6 with value: 0.6517052194380961.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:28,104] Trial 12 finished with value: 0.6602349681909988 and parameters: {'n_estimators': 119, 'max_depth': 5, 'learning_rate': 0.1477672450801576, 'subsample': 0.7580838073016158, 'colsample_bytree': 0.6468395625208762, 'gamma': 1.056919186242245, 'min_child_weight': 3, 'reg_alpha': 1.2576279652372049, 'reg_lambda': 0.959726814726842}. Best is trial 12 with value: 0.6602349681909988.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:31,054] Trial 13 finished with value: 0.6049628599057341 and parameters: {'n_estimators': 118, 'max_depth': 5, 'learning_rate': 0.1093730627953723, 'subsample': 0.7591700460215418, 'colsample_bytree': 0.653032468535637, 'gamma': 3.2404952777620957, 'min_child_weight': 5, 'reg_alpha': 1.0861333698172846, 'reg_lambda': 0.5051001956288995}. Best is trial 12 with value: 0.6602349681909988.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:34,645] Trial 14 finished with value: 0.6552302196674143 and parameters: {'n_estimators': 117, 'max_depth': 4, 'learning_rate': 0.1273663540168946, 'subsample': 0.7626406065432986, 'colsample_bytree': 0.6479362502855878, 'gamma': 1.426409936097357, 'min_child_weight': 3, 'reg_alpha': 1.4972538092593082, 'reg_lambda': 0.9359885496514161}. Best is trial 12 with value: 0.6602349681909988.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:37,751] Trial 15 finished with value: 0.6437277230569673 and parameters: {'n_estimators': 129, 'max_depth': 4, 'learning_rate': 0.12500779112398702, 'subsample': 0.7536781797794732, 'colsample_bytree': 0.647668508968543, 'gamma': 2.8540877346174236, 'min_child_weight': 3, 'reg_alpha': 1.9888857072017907, 'reg_lambda': 0.8908518052654136}. Best is trial 12 with value: 0.6602349681909988.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:41,070] Trial 16 finished with value: 0.5010155480212792 and parameters: {'n_estimators': 141, 'max_depth': 4, 'learning_rate': 0.08824898679650296, 'subsample': 0.757800836249332, 'colsample_bytree': 0.6787199876961731, 'gamma': 1.5723345261058006, 'min_child_weight': 10, 'reg_alpha': 1.5114219007774137, 'reg_lambda': 0.831757732996474}. Best is trial 12 with value: 0.6602349681909988.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:43,687] Trial 17 finished with value: 0.6141745139704122 and parameters: {'n_estimators': 107, 'max_depth': 5, 'learning_rate': 0.12956451824168447, 'subsample': 0.7242774473401183, 'colsample_bytree': 0.607581999664805, 'gamma': 3.9614466198940894, 'min_child_weight': 5, 'reg_alpha': 0.9886609311399532, 'reg_lambda': 0.5533001117799181}. Best is trial 12 with value: 0.6602349681909988.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:48,245] Trial 18 finished with value: 0.6062806686780856 and parameters: {'n_estimators': 133, 'max_depth': 4, 'learning_rate': 0.10527098017053182, 'subsample': 0.7685096764638469, 'colsample_bytree': 0.6331908743469061, 'gamma': 1.4707817592782333, 'min_child_weight': 5, 'reg_alpha': 1.4701176365718298, 'reg_lambda': 0.9873235335183699}. Best is trial 12 with value: 0.6602349681909988.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:51,554] Trial 19 finished with value: 0.627492723251944 and parameters: {'n_estimators': 90, 'max_depth': 5, 'learning_rate': 0.06965927845682585, 'subsample': 0.7370605372870671, 'colsample_bytree': 0.6660069670959474, 'gamma': 2.7508723994547566, 'min_child_weight': 3, 'reg_alpha': 1.832119745574853, 'reg_lambda': 1.3951522862949668}. Best is trial 12 with value: 0.6602349681909988.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:55,120] Trial 20 finished with value: 0.5753104331315175 and parameters: {'n_estimators': 112, 'max_depth': 4, 'learning_rate': 0.036814349690316245, 'subsample': 0.7114390067866085, 'colsample_bytree': 0.7054386157270651, 'gamma': 3.607912425314351, 'min_child_weight': 5, 'reg_alpha': 1.522889360049105, 'reg_lambda': 0.71283273358494}. Best is trial 12 with value: 0.6602349681909988.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:53:58,705] Trial 21 finished with value: 0.6367619839919908 and parameters: {'n_estimators': 109, 'max_depth': 5, 'learning_rate': 0.14957719486929014, 'subsample': 0.7756881331136812, 'colsample_bytree': 0.6403355780828683, 'gamma': 1.322759061072536, 'min_child_weight': 4, 'reg_alpha': 1.372666621228999, 'reg_lambda': 1.0202719699206582}. Best is trial 12 with value: 0.6602349681909988.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:54:05,069] Trial 22 finished with value: 0.662030863711406 and parameters: {'n_estimators': 120, 'max_depth': 5, 'learning_rate': 0.13719278031447865, 'subsample': 0.780248851299007, 'colsample_bytree': 0.6595046054457805, 'gamma': 1.2387083455204178, 'min_child_weight': 3, 'reg_alpha': 0.9571820282456955, 'reg_lambda': 1.3168428379492587}. Best is trial 22 with value: 0.662030863711406.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:54:09,683] Trial 23 finished with value: 0.6667377637078359 and parameters: {'n_estimators': 135, 'max_depth': 5, 'learning_rate': 0.1338896959169234, 'subsample': 0.7431070065086582, 'colsample_bytree': 0.6611254037146114, 'gamma': 1.819649309996539, 'min_child_weight': 3, 'reg_alpha': 0.9738222954219415, 'reg_lambda': 1.3631855633575487}. Best is trial 23 with value: 0.6667377637078359.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:54:13,875] Trial 24 finished with value: 0.6574077153964333 and parameters: {'n_estimators': 137, 'max_depth': 5, 'learning_rate': 0.13605165510468856, 'subsample': 0.7513798511500556, 'colsample_bytree': 0.6867368327317754, 'gamma': 1.9240745706359585, 'min_child_weight': 3, 'reg_alpha': 0.9478683245154076, 'reg_lambda': 1.3589701674953334}. Best is trial 23 with value: 0.6667377637078359.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:54:18,771] Trial 25 finished with value: 0.6461290779698965 and parameters: {'n_estimators': 127, 'max_depth': 5, 'learning_rate': 0.11541365703152875, 'subsample': 0.7407914099111833, 'colsample_bytree': 0.6625516129413062, 'gamma': 1.142607405537748, 'min_child_weight': 4, 'reg_alpha': 0.9400127670741747, 'reg_lambda': 1.3924041310147837}. Best is trial 23 with value: 0.6667377637078359.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:54:23,955] Trial 26 finished with value: 0.6465365885938333 and parameters: {'n_estimators': 147, 'max_depth': 5, 'learning_rate': 0.09720131796586674, 'subsample': 0.7870375176132692, 'colsample_bytree': 0.692325518597788, 'gamma': 1.7668630279497934, 'min_child_weight': 3, 'reg_alpha': 1.1287822424841365, 'reg_lambda': 1.0967228739454775}. Best is trial 23 with value: 0.6667377637078359.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:54:26,854] Trial 27 finished with value: 0.6170238722961728 and parameters: {'n_estimators': 122, 'max_depth': 5, 'learning_rate': 0.1353219048366058, 'subsample': 0.7143282897391394, 'colsample_bytree': 0.7149531583215627, 'gamma': 2.5775057454759054, 'min_child_weight': 6, 'reg_alpha': 1.0233708087898998, 'reg_lambda': 1.2726754825659674}. Best is trial 23 with value: 0.6667377637078359.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:54:30,352] Trial 28 finished with value: 0.6206992696339798 and parameters: {'n_estimators': 138, 'max_depth': 4, 'learning_rate': 0.11688862461190522, 'subsample': 0.7754148300459335, 'colsample_bytree': 0.7385362962161144, 'gamma': 2.0823055236366512, 'min_child_weight': 5, 'reg_alpha': 0.8848849889481184, 'reg_lambda': 1.475189231672467}. Best is trial 23 with value: 0.6667377637078359.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[I 2025-07-03 11:54:33,496] Trial 29 finished with value: 0.6513633871443093 and parameters: {'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.0992861137314556, 'subsample': 0.7389280053356589, 'colsample_bytree': 0.6698461140035323, 'gamma': 1.7001047168778314, 'min_child_weight': 4, 'reg_alpha': 1.1675234218047033, 'reg_lambda': 1.2657090549460706}. Best is trial 23 with value: 0.6667377637078359.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.



=== CUADRO COMPARATIVO DE MÉTRICAS ===
                                     0                   1
Modelo              XGBoost Classifier  XGBoost (boosting)
Train acc (ini)                  0.744               0.744
Test acc (ini)                    0.67                0.67
Diff acc (ini)                   0.074               0.074
Ajuste (ini)               Overfitting         Overfitting
F1 CV (ini)                      0.592               0.592
AUC CV (ini)                     0.719               0.719
Train acc (opt)                   0.82               0.744
Test acc (opt)                    0.71                0.67
Diff acc (opt)                    0.11               0.074
Ajuste (opt)               Overfitting         Overfitting
F1 CV (opt)                      0.657               0.592
AUC CV (opt)                     0.765               0.719
Train acc (umbral)                0.75               0.641
Test acc (umbral)                 0.72               0.625
Diff acc (umbral

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



--- BASELINE XGBoost (sin optimización, sin umbral) ---
Accuracy: 0.67
Recall: 0.391304347826087
Precision: 0.782608695652174
F1: 0.5217391304347826
AUC: 0.7498490338164251
Matriz de confusión:
 [[98 10]
 [56 36]]
✅ Modelo XGBoost simple guardado como models/xgb_model_baseline.joblib


Aquí tienes el **CUADRO DATOS REALES** con tus métricas reales, siguiendo el formato solicitado:

---

## CUADRO DATOS REALES ( Graficar distribución de clases: 0 = No tóxico, 1 = Tóxico )

### MÉTRICAS ANTES DE OPTIMIZACIÓN

| Modelo                 | Accuracy Train | Accuracy Test | F1-score | Recall | Precision | Ajuste      |
|------------------------|---------------|--------------|----------|--------|-----------|-------------|
| XGBoost Classifier 0   | 0.74          | 0.67         | 0.59     | 0.90   | 0.64      | Overfitting |
| XGBoost Classifier 1   | 0.74          | 0.67         | 0.69     | 0.90   | 0.56      | Overfitting |
| XGBoost (boosting) 0   | 0.74          | 0.67         | 0.59     | 0.90   | 0.64      | Overfitting |
| XGBoost (boosting) 1   | 0.74          | 0.67         | 0.69     | 0.90   | 0.56      | Overfitting |

---

### MÉTRICAS DESPUÉS DE OPTIMIZACIÓN DE HIPERPARÁMETROS

| Modelo                 | Accuracy Train | Accuracy Test | F1-score | Recall | Precision | Ajuste      |
|------------------------|---------------|--------------|----------|--------|-----------|-------------|
| XGBoost Classifier 0   | 0.82          | 0.71         | 0.66     | 0.90   | 0.77      | Overfitting |
| XGBoost Classifier 1   | 0.82          | 0.71         | 0.62     | 0.52   | 0.77      | Overfitting |
| XGBoost (boosting) 0   | 0.74          | 0.67         | 0.59     | 0.90   | 0.64      | Overfitting |
| XGBoost (boosting) 1   | 0.74          | 0.67         | 0.69     | 0.90   | 0.56      | Overfitting |

---

### MÉTRICAS LUEGO DE OPTIMIZACIÓN DE UMBRAL

| Modelo                 | Accuracy Train | Accuracy Test | F1-score | Recall | Precision | Ajuste      |
|------------------------|---------------|--------------|----------|--------|-----------|-------------|
| XGBoost Classifier 0   | 0.75          | 0.72         | 0.75     | 0.90   | 0.64      | Buen ajuste |
| XGBoost Classifier 1   | 0.75          | 0.72         | 0.69     | 0.90   | 0.56      | Buen ajuste |
| XGBoost (boosting) 0   | 0.64          | 0.63         | 0.69     | 0.90   | 0.56      | Buen ajuste |
| XGBoost (boosting) 1   | 0.64          | 0.63         | 0.69     | 0.90   | 0.56      | Buen ajuste |

---

### CUADRO COMPARATIVO DE MÉTRICAS

|                              | XGBoost Classifier | XGBoost (boosting) |
|------------------------------|-------------------|--------------------|
| Train acc (ini)              | 0.744             | 0.744              |
| Test acc (ini)               | 0.67              | 0.67               |
| Diff acc (ini)               | 0.074             | 0.074              |
| Ajuste (ini)                 | Overfitting       | Overfitting        |
| F1 CV (ini)                  | 0.592             | 0.592              |
| AUC CV (ini)                 | 0.719             | 0.719              |
| Train acc (opt)              | 0.820             | 0.744              |
| Test acc (opt)               | 0.71              | 0.67               |
| Diff acc (opt)               | 0.11              | 0.074              |
| Ajuste (opt)                 | Overfitting       | Overfitting        |
| F1 CV (opt)                  | 0.657             | 0.592              |
| AUC CV (opt)                 | 0.765             | 0.719              |
| Train acc (umbral)           | 0.75              | 0.641              |
| Test acc (umbral)            | 0.72              | 0.625              |
| Diff acc (umbral)            | 0.03              | 0.016              |
| Ajuste (umbral)              | Buen ajuste       | Buen ajuste        |
| Recall                       | 0.902             | 0.902              |
| Precision                    | 0.638             | 0.557              |
| F1                           | 0.748             | 0.689              |
| AUC                          | 0.81              | 0.75               |

---

### CUADRO DE RANKING DE MODELOS (XGBoost)

| Ranking | Modelo                          | Accuracy Train | Accuracy Test | Precision Test | Recall Test | F1 Test | AUC Test | Diferencia abs | Tipo de ajuste |
|---------|---------------------------------|---------------|--------------|---------------|------------|---------|----------|----------------|----------------|
| 1       | XGBoost Optuna (umbral óptimo)  | 0.750         | 0.72         | 0.638         | 0.902      | 0.748   | 0.810    | 0.030          | Buen ajuste    |
| 2       | XGBoost Optuna                  | 0.820         | 0.71         | 0.774         | 0.522      | 0.623   | 0.810    | 0.110          | Overfitting    |
| 3       | XGBoost Base                    | 0.744         | 0.67         | 0.783         | 0.391      | 0.522   | 0.750    | 0.074          | Overfitting    |

---

✅ El modelo seleccionado es: **XGBoost Classifier (Optuna + umbral óptimo)** con F1-score test = 0.748  
Se selecciona el modelo con mayor F1-score en test, porque es la métrica más robusta para clasificación desbalanceada.

---

### ¿CUÁNDO HACER CROSS-VALIDATION ESTRATIFICADA?

- Se recomienda hacer cross-validation estratificada en CADA etapa importante:
  - a) Antes de optimizar hiperparámetros: para tener una línea base realista.
  - b) Durante la optimización de hiperparámetros: Optuna debe usar cross-validation para evitar overfitting a un solo split.
  - c) Después, para validar el modelo final y comparar con test.
- Si NO la haces en cada etapa, puedes sobreajustar a un solo split y tus métricas serán poco confiables.
- Ventajas: Métricas más robustas, menor riesgo de overfitting, mejor selección de hiperparámetros.
- Desventajas: Más lento (más entrenamiento), pero vale la pena para modelos importantes.
- Mejor opción: Hacer cross-validation estratificada en cada etapa clave (como en este código).

---

✅ Mejor modelo guardado como mejor_modelo_xgboost.pkl

---

**Baseline XGBoost (sin optimización, sin umbral):**  
Accuracy: 0.67  
Recall: 0.391  
Precision: 0.783  
F1: 0.522  
AUC: 0.750  
Matriz de confusión:  
[[98 10]  
 [56 36]]  

✅ Modelo XGBoost simple guardado como xgb_model_baseline.joblib

---