# 04 — Threshold Tuning & Expected Loss (Decision Policy)

Convert risk scores (probabilities) into a **tiered decision policy**.

**Key idea:** choose thresholds that minimize **expected loss**, not default 0.5.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

scoring = pd.read_csv('../data/xgb_test_scoring.csv')
y_true = scoring['y_true'].astype(int).values
proba = scoring['proba'].astype(float).values
amount = scoring['amount'].astype(float).values

print('Test size:', len(y_true))
print('Fraud rate:', y_true.mean())

## 1) Cost model

Simple cost model for a portfolio-ready narrative:
- **FN (missed fraud)** cost = missed transaction amount
- **FP (wrongly intervened)** cost = fixed friction + ops cost (`FP_COST`)

Tune `FP_COST` to reflect how strict you want the system to be.


In [None]:
FP_COST = 10.0

def expected_loss_at_threshold(t, y, p, amt, fp_cost=FP_COST):
    pred = (p >= t).astype(int)
    fp = ((pred==1) & (y==0)).sum()
    fn_mask = ((pred==0) & (y==1))
    fn = fn_mask.sum()
    fn_cost = amt[fn_mask].sum()
    loss = fp * fp_cost + fn_cost
    return loss, fp, fn, fn_cost

thresholds = np.linspace(0, 1, 501)
rows = []
for t in thresholds:
    loss, fp, fn, fn_cost = expected_loss_at_threshold(t, y_true, proba, amount)
    rows.append((t, loss, fp, fn, fn_cost))

loss_df = pd.DataFrame(rows, columns=['threshold','expected_loss','fp','fn','fn_cost_amount'])
best = loss_df.loc[loss_df['expected_loss'].idxmin()]
best

In [None]:
plt.figure()
plt.plot(loss_df['threshold'], loss_df['expected_loss'])
plt.title('Expected Loss vs Threshold')
plt.xlabel('Threshold')
plt.ylabel('Expected Loss')
plt.show()

## 2) Tiered policy: Allow / Flag / Block

- `t_flag`: start flagging for review
- `t_block`: block only the top-risk tail


In [None]:
t_flag = float(best['threshold'])
block_top_pct = 0.001  # top 0.1% risk
t_block = float(np.quantile(proba, 1 - block_top_pct))
print('t_flag:', t_flag)
print('t_block:', t_block)

In [None]:
def assign_action(p, t_flag, t_block):
    return np.where(p >= t_block, 2, np.where(p >= t_flag, 1, 0))

actions = assign_action(proba, t_flag, t_block)
action_map = {0:'Allow', 1:'Flag', 2:'Block'}
print(pd.Series(actions).map(action_map).value_counts())

In [None]:
# Evaluate tiered policy (Flag+Block => intervention)
intervene = (actions >= 1).astype(int)
tp = ((intervene==1) & (y_true==1)).sum()
fp = ((intervene==1) & (y_true==0)).sum()
fn = ((intervene==0) & (y_true==1)).sum()
tn = ((intervene==0) & (y_true==0)).sum()

precision = tp / max(tp + fp, 1)
recall = tp / max(tp + fn, 1)
fn_cost = amount[(intervene==0) & (y_true==1)].sum()
policy_loss = fp * FP_COST + fn_cost

print('TP:', tp, 'FP:', fp, 'FN:', fn, 'TN:', tn)
print(f'Precision: {precision:.4f} | Recall: {recall:.4f}')
print(f'Expected Loss: {policy_loss:.2f}')

block_mask = (actions == 2)
print('Block count:', int(block_mask.sum()))
if block_mask.sum() > 0:
    print('Block fraud rate:', float(y_true[block_mask].mean()))

In [None]:
# Export decision policy output
out = scoring.copy()
out['action'] = pd.Series(actions).map(action_map)
out.to_csv('../data/decision_policy_output.csv', index=False)
print('Saved ../data/decision_policy_output.csv')

Next: Notebook 05 uses SHAP to provide a global explanation for the model.
