# 04 â€” Threshold Optimization & Calibration

Fraud detection requires tuning thresholds based on:
- Precision-Recall tradeoffs
- Business cost matrix
- Investigator workload constraints


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import sys
sys.path.append('../')

from src.models.thresholding import (
    find_optimal_threshold, plot_precision_recall_curve,
    plot_roc_curve, plot_cost_vs_threshold
)
from src.evaluation.calibration import (
    calibrate_probabilities, plot_calibration_curve,
    calculate_brier_score, calculate_ece
)


## 1. Load Predictions


In [None]:
# Load test data and predictions (from previous notebook)
# In practice, load from saved model outputs
df_test = pd.read_csv("../data/processed/features.csv")
# ... load predictions from trained model
# For demo, we'll generate example predictions
y_test = df_test['fraud'].sample(10000, random_state=42)
y_proba = np.random.beta(2, 5, size=len(y_test))  # Example probabilities


## 2. ROC / PR Curve Analysis


In [None]:
# Plot PR curve
plot_precision_recall_curve(y_test, y_proba)

# Plot ROC curve
plot_roc_curve(y_test, y_proba)


## 3. Cost-Based Thresholding


In [None]:
# Find optimal threshold based on cost
optimal_threshold, metrics = find_optimal_threshold(
    y_test, y_proba,
    fraud_loss_cost=1000.0,  # Cost of missing fraud
    false_positive_cost=10.0,  # Cost of false alarm
    max_investigations=None  # No constraint
)

print(f"Optimal threshold: {optimal_threshold:.4f}")
print(f"Total cost: ${metrics['total_cost']:,.2f}")
print(f"Precision: {metrics['precision']:.4f}")
print(f"Recall: {metrics['recall']:.4f}")

# Plot cost vs threshold
plot_cost_vs_threshold(y_test, y_proba, fraud_loss_cost=1000.0, false_positive_cost=10.0)


## 4. Calibration: Platt Scaling / Isotonic Regression


In [None]:
# Calibrate probabilities
y_proba_calibrated = calibrate_probabilities(y_proba, y_test, method='isotonic')

# Calculate calibration metrics
brier_uncal = calculate_brier_score(y_test, y_proba)
brier_cal = calculate_brier_score(y_test, y_proba_calibrated)
ece_uncal = calculate_ece(y_test, y_proba)
ece_cal = calculate_ece(y_test, y_proba_calibrated)

print(f"Brier Score - Uncalibrated: {brier_uncal:.4f}, Calibrated: {brier_cal:.4f}")
print(f"ECE - Uncalibrated: {ece_uncal:.4f}, Calibrated: {ece_cal:.4f}")

# Plot calibration curve
plot_calibration_curve(y_test, y_proba, y_proba_calibrated)
