In [2]:
import sys
from pathlib import Path

# Add project root to Python path
project_root = Path("..").resolve()
sys.path.append(str(project_root))

In [3]:
import joblib
import numpy as np
import pandas as pd

from src.data import load_data
from src.utils import simulate_imbalance
from src.train import temporal_split
from src.evaluate import threshold_metrics

In [4]:
# Load data
df = load_data("../data/creditcard2023.csv")

# Use SAME fraud rate you are optimizing for
df = simulate_imbalance(df, target_col="Class", fraud_rate=0.05)

train_df, val_df, test_df = temporal_split(df)

X_val = val_df.drop(columns=["Class"])
y_val = val_df["Class"]

In [7]:
import mlflow.sklearn

model = joblib.load("../artifacts/xgb_model.pkl")

In [8]:
val_probs = model.predict_proba(X_val)[:, 1]

In [9]:
thresholds = np.linspace(0.001, 0.5, 200)

df_thresh = threshold_metrics(
    y_val,
    val_probs,
    thresholds
)

df_thresh.sort_values("recall", ascending=False).head()

Unnamed: 0,threshold,precision,recall,fpr
0,0.001,0.113208,0.857143,0.001002
1,0.003508,0.193548,0.857143,0.000533
2,0.006015,0.26087,0.857143,0.000362
3,0.008523,0.333333,0.857143,0.000256
4,0.01103,0.333333,0.857143,0.000256


In [10]:
df_thresh[df_thresh["fpr"] <= 0.01] \
    .sort_values("recall", ascending=False) \
    .head()

Unnamed: 0,threshold,precision,recall,fpr
0,0.001,0.113208,0.857143,0.001002
1,0.003508,0.193548,0.857143,0.000533
2,0.006015,0.26087,0.857143,0.000362
3,0.008523,0.333333,0.857143,0.000256
4,0.01103,0.333333,0.857143,0.000256


Based on threshold sweeping under a fixed false-positive-rate constraint (â‰¤0.1%), a probability threshold of 0.006 was selected.
This achieves ~86% recall while limiting customer friction to <0.04%, demonstrating that decision thresholds are more critical than raw model accuracy in fraud detection.