# Solution: The Accuracy Trap

This is the answer key for `drill_09_accuracy_trap.ipynb`.

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_auc_score
)

np.random.seed(42)

In [None]:
# Generate imbalanced fraud data
n_samples = 10000

amount = np.random.exponential(100, n_samples)
hour = np.random.randint(0, 24, n_samples)
is_foreign = np.random.binomial(1, 0.1, n_samples)
customer_age_days = np.random.uniform(1, 1000, n_samples)

fraud_prob = 1 / (1 + np.exp(-(
    -7 + 0.01 * amount + 2 * is_foreign +
    0.1 * ((hour < 6) | (hour > 22)).astype(int) -
    0.003 * customer_age_days
)))
fraud = (np.random.random(n_samples) < fraud_prob).astype(int)

df = pd.DataFrame({
    'amount': amount, 'hour': hour, 'is_foreign': is_foreign,
    'customer_age_days': customer_age_days, 'fraud': fraud
})

X = df[['amount', 'hour', 'is_foreign', 'customer_age_days']]
y = df['fraud']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_prob = model.predict_proba(X_test)[:, 1]
y_pred = model.predict(X_test)  # Default threshold 0.5

## SOLUTION: Cost-Optimal Threshold

In [None]:
# Business costs
COST_FN = 1000  # Missing fraud
COST_FP = 10    # False alarm

# Sweep thresholds
thresholds = np.arange(0.001, 0.5, 0.005)
results = []

for thresh in thresholds:
    preds = (y_prob >= thresh).astype(int)
    cm_t = confusion_matrix(y_test, preds)
    tn_t, fp_t, fn_t, tp_t = cm_t.ravel()
    
    cost = fn_t * COST_FN + fp_t * COST_FP
    recall = recall_score(y_test, preds)
    precision = precision_score(y_test, preds, zero_division=0)
    
    results.append({
        'threshold': thresh, 'cost': cost,
        'recall': recall, 'precision': precision,
        'fn': fn_t, 'fp': fp_t
    })

results_df = pd.DataFrame(results)
optimal_idx = results_df['cost'].idxmin()
optimal_threshold = results_df.loc[optimal_idx, 'threshold']

print(f"=== Cost-Optimal Threshold: {optimal_threshold:.3f} ===")
print(f"Recall at optimal: {results_df.loc[optimal_idx, 'recall']:.1%}")
print(f"Precision at optimal: {results_df.loc[optimal_idx, 'precision']:.1%}")

In [None]:
# Compare original vs optimized
y_pred_fixed = (y_prob >= optimal_threshold).astype(int)

cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()
original_cost = fn * COST_FN + fp * COST_FP
optimal_cost = results_df.loc[optimal_idx, 'cost']

print("=== Comparison ===")
print(f"\n                    Original (0.5)    Optimized ({optimal_threshold:.3f})")
print(f"  Recall:           {recall_score(y_test, y_pred):>10.1%}      {recall_score(y_test, y_pred_fixed):>10.1%}")
print(f"  Precision:        {precision_score(y_test, y_pred):>10.1%}      {precision_score(y_test, y_pred_fixed):>10.1%}")
print(f"  Total Cost:       ${original_cost:>10,}      ${optimal_cost:>10,}")
print(f"\n  ðŸ’° Savings: ${original_cost - optimal_cost:,}")

In [None]:
# Self-check
assert optimal_threshold < 0.5, "Optimal threshold should be below 0.5 for costly FN"
assert recall_score(y_test, y_pred_fixed) > recall_score(y_test, y_pred), "Should improve recall"
assert optimal_cost < original_cost, "Should reduce total cost"

print("âœ“ Accuracy trap escaped!")
print(f"âœ“ Threshold: 0.5 â†’ {optimal_threshold:.3f}")
print(f"âœ“ Recall: {recall_score(y_test, y_pred):.1%} â†’ {recall_score(y_test, y_pred_fixed):.1%}")

## Sample Postmortem

### What happened:
- Model had 99.5% accuracy but caught very few actual fraudsters, leading to increased fraud losses.

### Root cause:
- With only 0.5% fraud rate, predicting "no fraud" for everyone gets 99.5% accuracy. The default 0.5 threshold is inappropriate when FN costs (missed fraud: $1000) vastly exceed FP costs (false alarm: $10).

### How to prevent:
- Never use accuracy alone on imbalanced data. Use recall (to catch fraud) and precision (to avoid wasted reviews).
- Set threshold based on business costs: optimal â‰ˆ FP_cost / (FP_cost + FN_cost) = 0.01.
- Report PR-AUC instead of ROC-AUC for highly imbalanced problems.