# Modeling & Validation

Train LightGBM using Optuna and evaluate on a time‑based hold‑out.

In [None]:
import pandas as pd, numpy as np, lightgbm as lgb, optuna
from pathlib import Path
from sklearn.metrics import roc_auc_score, roc_curve
from matplotlib import pyplot as plt

BASE = Path(__file__).resolve().parents[2]
DATA = BASE / "data" / "processed" / "train_ready.parquet"
df = pd.read_parquet(DATA)
y = df['isFraud']
X = df.drop(columns=['isFraud'])

split_idx = int(len(X)*0.90)
X_train, X_val = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_val = y.iloc[:split_idx], y.iloc[split_idx:]

ratio = (y_train.shape[0]-y_train.sum())/y_train.sum()
print("pos_weight:", ratio)


In [None]:
def objective(trial):
    params = {
        'objective': 'binary',
        'metric': 'auc',
        'verbosity': -1,
        'learning_rate': trial.suggest_float('lr', 0.01, 0.2, log=True),
        'num_leaves': trial.suggest_int('leaves', 63, 511, step=64),
        'max_depth': trial.suggest_int('depth', 6, 16),
        'feature_fraction': trial.suggest_float('ff', 0.5, 1.0),
        'scale_pos_weight': ratio,
    }
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)
    gbm = lgb.train(params, lgb_train, valid_sets=[lgb_val], 
                    num_boost_round=500, early_stopping_rounds=50, verbose_eval=False)
    preds = gbm.predict(X_val)
    return roc_auc_score(y_val, preds)


In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)
print("Best AUC:", study.best_value)
best_params = study.best_params
best_params.update({'objective':'binary','metric':'auc','verbosity':-1,'scale_pos_weight':ratio})
model = lgb.LGBMClassifier(**best_params)
model.fit(X_train, y_train)
val_probs = model.predict_proba(X_val)[:,1]
print("Hold‑out AUROC:", roc_auc_score(y_val, val_probs))


In [None]:
fpr, tpr, thr = roc_curve(y_val, val_probs)
import numpy as np
thr_fpr = thr[np.where(fpr<=0.03)[0][-1]]
print("Threshold at 3% FPR:", thr_fpr)
