In [2]:
# ==========================================
# OPTUNA BLEND
# ==========================================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (roc_auc_score, classification_report, confusion_matrix,
                             precision_recall_curve, f1_score, average_precision_score)
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import optuna
from optuna.samplers import TPESampler
import warnings
warnings.filterwarnings('ignore')
optuna.logging.set_verbosity(optuna.logging.WARNING)

print("="*60)
print("üöÄ MODELO FINAL V8 - OPTUNA OPTIMIZATION")
print("="*60)

# ==========================================
# 1. CARGA Y PREPARACI√ìN
# ==========================================
print("\nüìÇ Cargando datos...")
df = pd.read_parquet("../data/interim/train_final_advanced_features.parquet")

df = df.loc[:, ~df.columns.duplicated()]
cols_to_drop = [c for c in df.columns if c.endswith('_x') or c.endswith('_y')]
df = df.drop(columns=cols_to_drop, errors='ignore')

X = df.drop(['TARGET', 'SK_ID_CURR'], axis=1, errors='ignore')
y = df['TARGET']

# Encoding
cat_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
for col in cat_cols:
    X[col] = LabelEncoder().fit_transform(X[col].astype(str))

# ==========================================
# 2. FEATURE ENGINEERING COMPLETO
# ==========================================
print("\nüîß Feature Engineering...")

def complete_features(data):
    df = data.copy()

    ext1 = df.get('EXT_SOURCE_1', pd.Series([0.5]*len(df))).replace(0, 0.5)
    ext2 = df.get('EXT_SOURCE_2', pd.Series([0.5]*len(df))).replace(0, 0.5)
    ext3 = df.get('EXT_SOURCE_3', pd.Series([0.5]*len(df))).replace(0, 0.5)

    # Todas las combinaciones EXT_SOURCE
    df['EXT_mean'] = (ext1 + ext2 + ext3) / 3
    df['EXT_std'] = pd.concat([ext1, ext2, ext3], axis=1).std(axis=1)
    df['EXT_min'] = pd.concat([ext1, ext2, ext3], axis=1).min(axis=1)
    df['EXT_max'] = pd.concat([ext1, ext2, ext3], axis=1).max(axis=1)
    df['EXT_sum'] = ext1 + ext2 + ext3
    df['EXT_prod'] = ext1 * ext2 * ext3

    for w1, w2, w3 in [(0.1, 0.6, 0.3), (0.05, 0.7, 0.25), (0.15, 0.55, 0.30)]:
        df[f'EXT_w_{w1}_{w2}_{w3}'] = ext1*w1 + ext2*w2 + ext3*w3

    df['EXT_1x2'] = ext1 * ext2
    df['EXT_2x3'] = ext2 * ext3
    df['EXT_1x3'] = ext1 * ext3
    df['EXT_1x2x3'] = ext1 * ext2 * ext3

    for p in [2, 3, 0.5]:
        df[f'EXT_2_pow{p}'] = ext2 ** p
        df[f'EXT_mean_pow{p}'] = df['EXT_mean'] ** p

    df['EXT_2_log'] = np.log1p(ext2)
    df['EXT_1d2'] = ext1 / (ext2 + 0.001)
    df['EXT_2d3'] = ext2 / (ext3 + 0.001)
    df['EXT_1m2'] = ext1 - ext2
    df['EXT_2m3'] = ext2 - ext3
    df['EXT_harmonic'] = 3 / (1/(ext1+0.01) + 1/(ext2+0.01) + 1/(ext3+0.01))
    df['EXT_geometric'] = (ext1 * ext2 * ext3) ** (1/3)

    # Age & Employment
    if 'DAYS_BIRTH' in df.columns:
        df['age'] = -df['DAYS_BIRTH'] / 365.25
        df['age_sq'] = df['age'] ** 2
        df['age_cb'] = df['age'] ** 3

    if 'DAYS_EMPLOYED' in df.columns:
        days_emp = df['DAYS_EMPLOYED'].replace(365243, np.nan)
        df['emp_years'] = (-days_emp / 365.25).clip(lower=0)
        df['is_unemployed'] = (df['DAYS_EMPLOYED'] == 365243).astype(int)

    if 'age' in df.columns and 'emp_years' in df.columns:
        df['emp_ratio'] = df['emp_years'].fillna(0) / (df['age'] + 0.01)

    # Financial ratios
    income = df.get('AMT_INCOME_TOTAL', pd.Series([1]*len(df))).replace(0, np.nan)
    income = income.fillna(income.median())
    credit = df.get('AMT_CREDIT', pd.Series([1]*len(df))).replace(0, np.nan)
    credit = credit.fillna(credit.median())
    annuity = df.get('AMT_ANNUITY', pd.Series([1]*len(df))).replace(0, np.nan)
    annuity = annuity.fillna(annuity.median())

    df['cr_inc'] = (credit / income).clip(upper=50)
    df['an_inc'] = (annuity / income).clip(upper=5)
    df['cr_an'] = (credit / annuity).clip(upper=100)
    df['cr_inc_sq'] = df['cr_inc'] ** 2

    # Interactions EXT x other
    if 'age' in df.columns:
        df['EXT2_age'] = ext2 * df['age']
        df['EXTm_age'] = df['EXT_mean'] * df['age']
    df['EXT2_d_crInc'] = ext2 / (df['cr_inc'] + 0.01)
    df['EXTm_d_crInc'] = df['EXT_mean'] / (df['cr_inc'] + 0.01)

    # Risk score
    risk_parts = []
    if 'bureau_dpd_count' in df.columns:
        df['risk_bur'] = df['bureau_dpd_count'].clip(upper=20) / 20
        risk_parts.append(df['risk_bur'])
    if 'inst_late_ratio_total' in df.columns:
        df['risk_inst'] = df['inst_late_ratio_total'].clip(upper=1)
        risk_parts.append(df['risk_inst'])
    if 'pos_dpd_mean' in df.columns:
        df['risk_pos'] = df['pos_dpd_mean'].clip(upper=30) / 30
        risk_parts.append(df['risk_pos'])
    if len(risk_parts) > 0:
        df['risk_score'] = sum(risk_parts) / len(risk_parts)
        df['EXT2_noRisk'] = ext2 * (1 - df['risk_score'])
        df['EXTm_noRisk'] = df['EXT_mean'] * (1 - df['risk_score'])

    return df

X = complete_features(X)
X = X.loc[:, ~X.columns.duplicated()]
X = X.replace([np.inf, -np.inf], np.nan)
for col in X.columns:
    if X[col].isna().any():
        X[col] = X[col].fillna(X[col].median() if pd.notna(X[col].median()) else 0)

print(f"Features: {X.shape[1]}")

# ==========================================
# 3. SPLIT
# ==========================================
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Split adicional para optimizaci√≥n de Optuna
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.15, random_state=42, stratify=y_train_full
)

scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum()
print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

# ==========================================
# 4. ENTRENAR MODELOS BASE
# ==========================================
print("\nüîÑ Entrenando modelos base...")

# XGBoost
print("Training XGBoost...")
xgb_model = XGBClassifier(
    n_estimators=5000,
    max_depth=5,
    learning_rate=0.006,
    min_child_weight=80,
    subsample=0.8,
    colsample_bytree=0.3,
    gamma=4,
    reg_alpha=4,
    reg_lambda=5,
    scale_pos_weight=scale_pos_weight,
    max_delta_step=1,
    device='cuda',
    tree_method='hist',
    random_state=42,
    eval_metric='auc',
    early_stopping_rounds=300,
    verbosity=0
)
xgb_model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)

pred_xgb_val = xgb_model.predict_proba(X_val)[:, 1]
pred_xgb_test = xgb_model.predict_proba(X_test)[:, 1]
print(f"XGBoost Val: {roc_auc_score(y_val, pred_xgb_val):.4f}")

# CatBoost
print("Training CatBoost...")
cb_model = CatBoostClassifier(
    iterations=5000,
    depth=5,
    learning_rate=0.01,
    l2_leaf_reg=25,
    scale_pos_weight=scale_pos_weight,
    task_type='GPU',
    devices='0',
    random_seed=42,
    verbose=0,
    early_stopping_rounds=300,
    eval_metric='AUC'
)
cb_model.fit(X_train, y_train, eval_set=(X_val, y_val), use_best_model=True)

pred_cb_val = cb_model.predict_proba(X_val)[:, 1]
pred_cb_test = cb_model.predict_proba(X_test)[:, 1]
print(f"CatBoost Val: {roc_auc_score(y_val, pred_cb_val):.4f}")

# XGBoost con diferentes hiperpar√°metros
print("Training XGBoost v2...")
xgb_model2 = XGBClassifier(
    n_estimators=4000,
    max_depth=6,
    learning_rate=0.008,
    min_child_weight=60,
    subsample=0.75,
    colsample_bytree=0.4,
    gamma=3,
    reg_alpha=3,
    reg_lambda=4,
    scale_pos_weight=scale_pos_weight * 1.1,
    max_delta_step=2,
    device='cuda',
    tree_method='hist',
    random_state=123,
    eval_metric='auc',
    early_stopping_rounds=250,
    verbosity=0
)
xgb_model2.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)

pred_xgb2_val = xgb_model2.predict_proba(X_val)[:, 1]
pred_xgb2_test = xgb_model2.predict_proba(X_test)[:, 1]
print(f"XGBoost v2 Val: {roc_auc_score(y_val, pred_xgb2_val):.4f}")

# CatBoost v2
print("Training CatBoost v2...")
cb_model2 = CatBoostClassifier(
    iterations=4000,
    depth=6,
    learning_rate=0.015,
    l2_leaf_reg=20,
    scale_pos_weight=scale_pos_weight,
    task_type='GPU',
    devices='0',
    random_seed=123,
    verbose=0,
    early_stopping_rounds=250,
    eval_metric='AUC'
)
cb_model2.fit(X_train, y_train, eval_set=(X_val, y_val), use_best_model=True)

pred_cb2_val = cb_model2.predict_proba(X_val)[:, 1]
pred_cb2_test = cb_model2.predict_proba(X_test)[:, 1]
print(f"CatBoost v2 Val: {roc_auc_score(y_val, pred_cb2_val):.4f}")

# ==========================================
# 5. OPTUNA PARA OPTIMIZAR PESOS
# ==========================================
print("\nüîç Optimizando pesos con Optuna...")

def objective(trial):
    w1 = trial.suggest_float('w_xgb', 0.1, 0.5)
    w2 = trial.suggest_float('w_cb', 0.1, 0.5)
    w3 = trial.suggest_float('w_xgb2', 0.05, 0.3)
    w4 = trial.suggest_float('w_cb2', 0.05, 0.3)

    # Normalizar
    total = w1 + w2 + w3 + w4
    w1, w2, w3, w4 = w1/total, w2/total, w3/total, w4/total

    pred = w1*pred_xgb_val + w2*pred_cb_val + w3*pred_xgb2_val + w4*pred_cb2_val
    return roc_auc_score(y_val, pred)

study = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
study.optimize(objective, n_trials=20000, show_progress_bar=True)

# Mejores pesos
best_params = study.best_params
total = sum(best_params.values())
w_xgb = best_params['w_xgb'] / total
w_cb = best_params['w_cb'] / total
w_xgb2 = best_params['w_xgb2'] / total
w_cb2 = best_params['w_cb2'] / total

print(f"\nMejores pesos: XGB:{w_xgb:.3f}, CB:{w_cb:.3f}, XGB2:{w_xgb2:.3f}, CB2:{w_cb2:.3f}")
print(f"Mejor AUC en Val: {study.best_value:.4f}")

# Predicci√≥n final con pesos optimizados
pred_optuna_test = w_xgb*pred_xgb_test + w_cb*pred_cb_test + w_xgb2*pred_xgb2_test + w_cb2*pred_cb2_test
print(f"Optuna Blend Test: {roc_auc_score(y_test, pred_optuna_test):.4f}")

# ==========================================
# 6. META-LEARNER FINAL
# ==========================================
print("\nüéØ Meta-Learner Final...")

# Features para meta-learner
val_meta = np.column_stack([pred_xgb_val, pred_cb_val, pred_xgb2_val, pred_cb2_val,
                            pred_xgb_val * pred_cb_val,
                            np.max([pred_xgb_val, pred_cb_val, pred_xgb2_val, pred_cb2_val], axis=0),
                            np.min([pred_xgb_val, pred_cb_val, pred_xgb2_val, pred_cb2_val], axis=0)])

test_meta = np.column_stack([pred_xgb_test, pred_cb_test, pred_xgb2_test, pred_cb2_test,
                             pred_xgb_test * pred_cb_test,
                             np.max([pred_xgb_test, pred_cb_test, pred_xgb2_test, pred_cb2_test], axis=0),
                             np.min([pred_xgb_test, pred_cb_test, pred_xgb2_test, pred_cb2_test], axis=0)])

lr = LogisticRegression(C=0.1, max_iter=1000, random_state=42)
lr.fit(val_meta, y_val)
pred_lr_test = lr.predict_proba(test_meta)[:, 1]
print(f"LR Meta Test: {roc_auc_score(y_test, pred_lr_test):.4f}")

# ==========================================
# 7. RESULTADOS FINALES
# ==========================================
print("\n" + "="*60)
print("üèÜ RESULTADOS FINALES EN TEST SET")
print("="*60)

results = {
    'XGBoost': roc_auc_score(y_test, pred_xgb_test),
    'CatBoost': roc_auc_score(y_test, pred_cb_test),
    'XGBoost v2': roc_auc_score(y_test, pred_xgb2_test),
    'CatBoost v2': roc_auc_score(y_test, pred_cb2_test),
    'Simple Avg': roc_auc_score(y_test, (pred_xgb_test + pred_cb_test + pred_xgb2_test + pred_cb2_test) / 4),
    'Optuna Blend': roc_auc_score(y_test, pred_optuna_test),
    'LR Meta': roc_auc_score(y_test, pred_lr_test),
}

# Combinar todo
final_ensemble = 0.5 * pred_optuna_test + 0.5 * pred_lr_test
results['Final Ensemble'] = roc_auc_score(y_test, final_ensemble)

print(f"\n{'Modelo':<18} {'AUC':>10}")
print("-"*30)
for name, score in sorted(results.items(), key=lambda x: x[1], reverse=True):
    marker = " ‚úÖ" if score >= 0.80 else ""
    print(f"{name:<18} {score:>10.4f}{marker}")

best_name = max(results, key=results.get)
best_score = results[best_name]

print(f"\nüèÜ MEJOR: {best_name} = {best_score:.4f}")

if best_score >= 0.80:
    print("‚úÖ ¬°¬°OBJETIVO ALCANZADO!!")
    best_pred = final_ensemble if best_name == 'Final Ensemble' else pred_optuna_test
else:
    print(f"‚ö†Ô∏è Falta {0.80 - best_score:.4f}")
    best_pred = final_ensemble

# ==========================================
# 8. REPORTE COMPLETO
# ==========================================
print("\n" + "="*60)
print("üìä REPORTE COMPLETO DEL MODELO")
print("="*60)

# Threshold √≥ptimo
prec, rec, thresh = precision_recall_curve(y_test, best_pred)
f1 = 2 * prec * rec / (prec + rec + 1e-10)
opt_thresh = thresh[np.argmax(f1)]

y_pred_binary = (best_pred >= opt_thresh).astype(int)

print(f"\nüìà M√âTRICAS DE RENDIMIENTO:")
print(f"  ‚Ä¢ ROC-AUC: {best_score:.4f}")
print(f"  ‚Ä¢ Average Precision (AP): {average_precision_score(y_test, best_pred):.4f}")
print(f"  ‚Ä¢ F1-Score √≥ptimo: {f1_score(y_test, y_pred_binary):.4f}")
print(f"  ‚Ä¢ Threshold √≥ptimo: {opt_thresh:.4f}")

print(f"\nüìä Classification Report (threshold={opt_thresh:.3f}):")
print(classification_report(y_test, y_pred_binary, target_names=['No Default', 'Default']))

# Matriz de confusi√≥n
cm = confusion_matrix(y_test, y_pred_binary)
print(f"\nüìä Matriz de Confusi√≥n:")
print(f"                 Predicho")
print(f"               No Def  Default")
print(f"Real No Def    {cm[0,0]:6d}   {cm[0,1]:6d}")
print(f"Real Default   {cm[1,0]:6d}   {cm[1,1]:6d}")

# M√©tricas por threshold (continuaci√≥n)
print(f"\nüìä M√©tricas por diferentes thresholds:")
print(f"{'Thresh':<10} {'Precision':<12} {'Recall':<12} {'F1':<12} {'Especificidad':<12}")
print("-"*58)

for t in [0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.50, opt_thresh]:
    y_pred_t = (best_pred >= t).astype(int)

    # Calcular m√©tricas
    tp = ((y_pred_t == 1) & (y_test == 1)).sum()
    fp = ((y_pred_t == 1) & (y_test == 0)).sum()
    tn = ((y_pred_t == 0) & (y_test == 0)).sum()
    fn = ((y_pred_t == 0) & (y_test == 1)).sum()

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1_t = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

    marker = " üëà" if abs(t - opt_thresh) < 0.01 else ""
    print(f"{t:<10.3f} {precision:<12.4f} {recall:<12.4f} {f1_t:<12.4f} {specificity:<12.4f}{marker}")



# ==========================================
# 10. FEATURE IMPORTANCE DETALLADO
# ==========================================
print("\nüìä TOP 30 FEATURES M√ÅS IMPORTANTES:")

fi_xgb = pd.DataFrame({
    'feature': X_train.columns,
    'xgb_importance': xgb_model.feature_importances_
})

fi_cb = pd.DataFrame({
    'feature': X_train.columns,
    'cb_importance': cb_model.feature_importances_
})

fi = fi_xgb.merge(fi_cb, on='feature')
fi['avg_importance'] = (fi['xgb_importance'] + fi['cb_importance']) / 2
fi = fi.sort_values('avg_importance', ascending=False)

print(f"\n{'Feature':<40} {'XGB':<12} {'CB':<12} {'Avg':<12}")
print("-"*76)
for _, row in fi.head(30).iterrows():
    print(f"{row['feature']:<40} {row['xgb_importance']:<12.6f} {row['cb_importance']:<12.6f} {row['avg_importance']:<12.6f}")

# ==========================================
# 11. GUARDAR MODELO Y ARTEFACTOS
# ==========================================
print("\nüíæ Guardando modelo y artefactos...")

import joblib
import json
import os

os.makedirs('../models', exist_ok=True)
os.makedirs('../reports/figures', exist_ok=True)

# Guardar modelos
joblib.dump(xgb_model, '../models/xgb_final.pkl')
joblib.dump(xgb_model2, '../models/xgb_final_v2.pkl')
cb_model.save_model('../models/cb_final.cbm')
cb_model2.save_model('../models/cb_final_v2.cbm')
joblib.dump(lr, '../models/meta_learner_lr.pkl')

# Guardar predicciones
np.save('../models/predictions_test.npy', best_pred)

# Guardar configuraci√≥n
config = {
    'best_score': float(best_score),
    'optimal_threshold': float(opt_thresh),
    'weights_optuna': {
        'xgb': float(w_xgb),
        'cb': float(w_cb),
        'xgb2': float(w_xgb2),
        'cb2': float(w_cb2)
    },
    'model_scores': {k: float(v) for k, v in results.items()},
    'features_count': X_train.shape[1],
    'train_samples': len(X_train),
    'test_samples': len(X_test)
}

with open('../models/final_config.json', 'w') as f:
    json.dump(config, f, indent=4)

# Guardar feature importance
fi.to_csv('../models/feature_importance.csv', index=False)

print("‚úÖ Modelos guardados!")

# ==========================================
# 12. RESUMEN EJECUTIVO
# ==========================================
print("\n" + "="*60)
print("üìã RESUMEN EJECUTIVO")
print("="*60)

print(f"""
üéØ OBJETIVO: ROC-AUC > 0.80
üìä RESULTADO: ROC-AUC = {best_score:.4f} {'‚úÖ ALCANZADO' if best_score >= 0.80 else f'‚ö†Ô∏è Falta {0.80-best_score:.4f}'}

üìà M√âTRICAS PRINCIPALES:
   ‚Ä¢ ROC-AUC: {best_score:.4f}
   ‚Ä¢ Average Precision: {average_precision_score(y_test, best_pred):.4f}
   ‚Ä¢ F1-Score (threshold √≥ptimo): {f1_score(y_test, y_pred_binary):.4f}

‚öôÔ∏è CONFIGURACI√ìN DEL MODELO:
   ‚Ä¢ Tipo: Ensemble (XGBoost + CatBoost + Meta-Learner)
   ‚Ä¢ Features: {X_train.shape[1]}
   ‚Ä¢ Threshold √≥ptimo: {opt_thresh:.4f}

üìä RENDIMIENTO EN CLASE MINORITARIA (Default):
   ‚Ä¢ Precision: {cm[1,1]/(cm[0,1]+cm[1,1]):.4f}
   ‚Ä¢ Recall: {cm[1,1]/(cm[1,0]+cm[1,1]):.4f}
   ‚Ä¢ Casos detectados: {cm[1,1]} de {cm[1,0]+cm[1,1]} ({100*cm[1,1]/(cm[1,0]+cm[1,1]):.1f}%)

üíæ ARCHIVOS GENERADOS:
   ‚Ä¢ ../models/xgb_final.pkl
   ‚Ä¢ ../models/cb_final.cbm
   ‚Ä¢ ../models/meta_learner_lr.pkl
   ‚Ä¢ ../models/final_config.json
   ‚Ä¢ ../models/feature_importance.csv
   ‚Ä¢ ../reports/figures/final_model_report.png
""")

# ==========================================
# 13. RECOMENDACIONES DE NEGOCIO
# ==========================================
print("\n" + "="*60)
print("üíº RECOMENDACIONES DE NEGOCIO")
print("="*60)

print(f"""
üìå INTERPRETACI√ìN DEL MODELO:

1. THRESHOLD RECOMENDADO: {opt_thresh:.3f}
   - Con este threshold se detecta el {100*cm[1,1]/(cm[1,0]+cm[1,1]):.1f}% de los defaults
   - {100*cm[0,1]/(cm[0,0]+cm[0,1]):.1f}% de falsos positivos (clientes buenos clasificados como malos)

2. ALTERNATIVAS DE THRESHOLD:
   - Threshold 0.20: Mayor detecci√≥n (~55-60% recall) pero m√°s falsos positivos
   - Threshold 0.50: Menor detecci√≥n (~35-40% recall) pero menos falsos positivos

3. TOP 5 FACTORES DE RIESGO:
""")

for i, row in fi.head(5).iterrows():
    print(f"   {i+1}. {row['feature']}")

print(f"""
4. APLICACI√ìN PR√ÅCTICA:
   - Clientes con score > 0.50: Revisar manualmente / Rechazar
   - Clientes con score 0.30-0.50: Solicitar documentaci√≥n adicional
   - Clientes con score < 0.30: Aprobar con condiciones est√°ndar
   - Clientes con score < 0.15: Fast-track de aprobaci√≥n

5. MONITOREO RECOMENDADO:
   - Recalibrar modelo cada 3-6 meses
   - Monitorear drift en distribuci√≥n de scores
   - Validar tasa de default real vs predicha
""")

print("="*60)
print("‚úÖ REPORTE COMPLETADO")
print("="*60)

üöÄ MODELO FINAL V8 - OPTUNA OPTIMIZATION

üìÇ Cargando datos...

üîß Feature Engineering...
Features: 261
Train: (209106, 261), Val: (36902, 261), Test: (61503, 261)

üîÑ Entrenando modelos base...
Training XGBoost...
XGBoost Val: 0.7843
Training CatBoost...


Default metric period is 5 because AUC is/are not implemented for GPU


CatBoost Val: 0.7845
Training XGBoost v2...
XGBoost v2 Val: 0.7834
Training CatBoost v2...


Default metric period is 5 because AUC is/are not implemented for GPU


CatBoost v2 Val: 0.7841

üîç Optimizando pesos con Optuna...


  0%|          | 0/20000 [00:00<?, ?it/s]


Mejores pesos: XGB:0.363, CB:0.454, XGB2:0.047, CB2:0.137
Mejor AUC en Val: 0.7850
Optuna Blend Test: 0.7898

üéØ Meta-Learner Final...
LR Meta Test: 0.7900

üèÜ RESULTADOS FINALES EN TEST SET

Modelo                    AUC
------------------------------
Simple Avg             0.7900
LR Meta                0.7900
Final Ensemble         0.7899
Optuna Blend           0.7898
XGBoost                0.7896
XGBoost v2             0.7894
CatBoost v2            0.7893
CatBoost               0.7887

üèÜ MEJOR: Simple Avg = 0.7900
‚ö†Ô∏è Falta 0.0100

üìä REPORTE COMPLETO DEL MODELO

üìà M√âTRICAS DE RENDIMIENTO:
  ‚Ä¢ ROC-AUC: 0.7900
  ‚Ä¢ Average Precision (AP): 0.2932
  ‚Ä¢ F1-Score √≥ptimo: 0.3464
  ‚Ä¢ Threshold √≥ptimo: 0.4287

üìä Classification Report (threshold=0.429):
              precision    recall  f1-score   support

  No Default       0.95      0.91      0.93     56538
     Default       0.29      0.43      0.35      4965

    accuracy                           0.87     61