# Model Comparison & Selection

Benchmarking Logistic Regression, Random Forest, XGBoost, CatBoost, and LightGBM.

In [None]:
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.append(os.path.abspath('../src'))
from data_utils import load_data, split_data
from model import RiskModel

# Load Real Data
data_path = '../data/processed/real_bnpl_features.csv'
df = load_data(data_path)
target = 'is_default'

X = df.drop(columns=[target])
y = df[target]
X[target] = y
X_train_full, X_test_full = split_data(X, target)

y_train = X_train_full[target]
X_train = X_train_full.drop(columns=[target])
y_test = X_test_full[target]
X_test = X_test_full.drop(columns=[target])

print(f"Data loaded. Train: {X_train.shape}")

In [None]:
models_to_test = ['logreg', 'rf', 'xgboost', 'catboost', 'lightgbm']
results = []

for m_type in models_to_test:
    print(f"Training {m_type}...")
    model = RiskModel(model_type=m_type)
    try:
        model.train(X_train, y_train, X_val=X_test, y_val=y_test)
        metrics = model.evaluate(X_test, y_test)
        metrics['model'] = m_type
        results.append(metrics)
        print(f"  AUC: {metrics['auc']:.4f}")
        
        # Save if it's the champion so far, or just strictly 'lightgbm' for prod consistency for now?
        # We will select champion based on max AUC at the end
    except Exception as e:
        print(f"  Failed: {e}")

In [None]:
results_df = pd.DataFrame(results).sort_values('auc', ascending=False)
print("\n--- LEADERBOARD ---\n")
print(results_df)

plt.figure(figsize=(10, 5))
sns.barplot(data=results_df, x='model', y='auc')
plt.title('Model AUC Comparison')
plt.ylim(0.5, 0.8)
plt.show()

In [None]:
# Save champion
champion_name = results_df.iloc[0]['model']
print(f"Champion: {champion_name}")

# Retrain champion on full data or just save the best instance (we didn't keep instances in list, so retrain)
final_model = RiskModel(model_type=champion_name)
final_model.train(X_train, y_train, X_val=X_test, y_val=y_test)
final_model.save('../models/champion_model.pkl')
# Also save as the standard 'lightgbm_model.pkl' name if we want to keep app working without code change,
# OR update app to load champion. Let's update app later if needed. For now save as generic.
final_model.save('../models/risk_model_prod.pkl')