In [1]:
# Fraud Decisioning & Risk Scoring Framework (Credit Card / Customer Fraud)
# ✅ Colab / Jupyter-friendly (no __file__ dependency)
# ✅ Generates synthetic transaction data (safe), trains model, scores, decisioning, saves outputs + charts

import os
import math
import random
from datetime import datetime, timedelta

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    roc_auc_score, average_precision_score, classification_report,
    confusion_matrix, precision_recall_curve, roc_curve
)
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt

# ----------------------------
# 0) Paths (Notebook-safe)
# ----------------------------
ROOT = os.getcwd()
DATA_DIR = os.path.join(ROOT, "data")
OUT_DIR = os.path.join(ROOT, "outputs")
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(OUT_DIR, exist_ok=True)

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

# ----------------------------
# 1) Synthetic Data Generator
# ----------------------------
def generate_synthetic_fraud_data(
    n_customers: int = 6000,
    n_transactions: int = 80000,
    start_date: str = "2025-01-01",
    days: int = 120
) -> pd.DataFrame:
    """
    Creates a realistic-ish credit card transaction dataset with:
    - customer profile (tenure, prior chargebacks)
    - transaction attributes (amount, MCC, country)
    - risk signals (device change, velocity_1h/24h, failed_logins_24h, travel_flag)
    - label: is_fraud (imbalanced)
    """
    start_dt = datetime.strptime(start_date, "%Y-%m-%d")

    # Customer-level attributes
    customer_ids = np.arange(1, n_customers + 1)

    account_age_days = np.random.gamma(shape=2.0, scale=250.0, size=n_customers).astype(int) + 10
    account_age_days = np.clip(account_age_days, 10, 3650)

    # "prior_chargebacks" as a rare-ish history signal
    prior_chargebacks = np.random.poisson(lam=0.12, size=n_customers)
    prior_chargebacks = np.clip(prior_chargebacks, 0, 6)

    # Baseline risk propensity per customer (latent)
    cust_risk = np.random.beta(a=2.0, b=15.0, size=n_customers)  # most low-risk

    customers = pd.DataFrame({
        "customer_id": customer_ids,
        "account_age_days": account_age_days,
        "prior_chargebacks": prior_chargebacks,
        "cust_risk_propensity": cust_risk
    })

    # Transaction-level generation
    # Pick customers for each transaction
    tx_customer = np.random.choice(customer_ids, size=n_transactions, replace=True)

    # Transaction time spread
    tx_time_offsets = np.random.randint(0, days * 24 * 60, size=n_transactions)
    tx_time = [start_dt + timedelta(minutes=int(m)) for m in tx_time_offsets]

    # Amount distribution (log-normal-ish)
    amount = np.random.lognormal(mean=3.5, sigma=0.9, size=n_transactions)  # around tens/hundreds
    amount = np.clip(amount, 1, 2500).round(2)

    # Merchant categories (MCC-like)
    mcc = np.random.choice(
        ["grocery", "gas", "electronics", "online_retail", "travel", "restaurants", "pharmacy", "luxury"],
        size=n_transactions,
        p=[0.18, 0.12, 0.10, 0.22, 0.08, 0.16, 0.08, 0.06]
    )

    # Geo/country
    country = np.random.choice(
        ["US", "CA", "MX", "GB", "IN", "AE", "NG", "BR", "FR", "DE"],
        size=n_transactions,
        p=[0.72, 0.06, 0.05, 0.03, 0.03, 0.02, 0.02, 0.03, 0.02, 0.02]
    )

    # "card_present" signal
    card_present = np.where(
        np.isin(mcc, ["grocery", "gas", "restaurants", "pharmacy"]),
        np.random.binomial(1, 0.78, size=n_transactions),
        np.random.binomial(1, 0.22, size=n_transactions)
    )

    # Device change (new device / new browser) more common on online
    device_change = np.where(
        np.isin(mcc, ["online_retail", "electronics", "travel"]),
        np.random.binomial(1, 0.16, size=n_transactions),
        np.random.binomial(1, 0.06, size=n_transactions)
    )

    # Failed login attempts in last 24h (ATO signal) — mostly zero
    failed_logins_24h = np.random.poisson(lam=0.25, size=n_transactions)
    failed_logins_24h = np.clip(failed_logins_24h, 0, 8)

    # Travel flag (geo mismatch proxy) — rare
    travel_flag = np.random.binomial(1, 0.04, size=n_transactions)

    # Velocity features (transactions counts) — base + uplift from online and ATO-like patterns
    base_v1h = np.random.poisson(lam=0.18, size=n_transactions)  # typically 0/1
    base_v24 = np.random.poisson(lam=1.6, size=n_transactions)   # typically 0-5
    uplift_online = np.where(np.isin(mcc, ["online_retail", "electronics"]), 1, 0)
    uplift_ato = np.where(failed_logins_24h >= 2, 1, 0)

    velocity_1h = base_v1h + np.random.binomial(1, 0.12, size=n_transactions) + uplift_online + uplift_ato
    velocity_24h = base_v24 + np.random.poisson(lam=0.6, size=n_transactions) + (2 * uplift_online) + (2 * uplift_ato)

    velocity_1h = np.clip(velocity_1h, 0, 15)
    velocity_24h = np.clip(velocity_24h, 0, 60)

    df = pd.DataFrame({
        "transaction_id": np.arange(1, n_transactions + 1),
        "customer_id": tx_customer,
        "transaction_time": pd.to_datetime(tx_time),
        "transaction_amount": amount,
        "merchant_category": mcc,
        "transaction_country": country,
        "card_present": card_present.astype(int),
        "device_change": device_change.astype(int),
        "failed_logins_24h": failed_logins_24h.astype(int),
        "travel_flag": travel_flag.astype(int),
        "velocity_1h": velocity_1h.astype(int),
        "velocity_24h": velocity_24h.astype(int),
    })

    # Merge customer features
    df = df.merge(customers.drop(columns=["cust_risk_propensity"]), on="customer_id", how="left")
    df = df.merge(customers[["customer_id", "cust_risk_propensity"]], on="customer_id", how="left")

    # Create a fraud probability using a logistic-like structure:
    # - Amount high
    # - Online/electronics/travel more risky
    # - Non-card-present more risky
    # - Device change, travel flag, failed logins, velocity higher -> more risky
    # - Younger account age & prior chargebacks -> more risky
    # - Customer latent propensity
    mcc_risk_map = {
        "grocery": 0.2,
        "gas": 0.35,
        "electronics": 0.8,
        "online_retail": 0.9,
        "travel": 0.85,
        "restaurants": 0.3,
        "pharmacy": 0.25,
        "luxury": 0.95
    }
    country_risk_map = {
        "US": 0.25, "CA": 0.30, "MX": 0.45, "GB": 0.35, "IN": 0.50,
        "AE": 0.55, "NG": 0.75, "BR": 0.60, "FR": 0.35, "DE": 0.33
    }

    mcc_risk = df["merchant_category"].map(mcc_risk_map).astype(float)
    ctry_risk = df["transaction_country"].map(country_risk_map).astype(float)

    amt = df["transaction_amount"].astype(float)
    log_amt = np.log1p(amt)

    # Normalize account age so newer accounts get higher risk
    acc_age = df["account_age_days"].astype(float)
    newness = 1.0 - np.clip(acc_age / 3650.0, 0, 1)  # 0 (old) -> 1 (new)

    prior_cb = df["prior_chargebacks"].astype(float)

    # Score components
    z = (
        -6.2
        + 0.55 * log_amt
        + 1.1 * (1 - df["card_present"].astype(float))
        + 0.9 * df["device_change"].astype(float)
        + 1.25 * df["travel_flag"].astype(float)
        + 0.38 * df["failed_logins_24h"].astype(float)
        + 0.22 * df["velocity_1h"].astype(float)
        + 0.10 * df["velocity_24h"].astype(float)
        + 0.9 * mcc_risk
        + 0.7 * ctry_risk
        + 1.1 * newness
        + 0.75 * prior_cb
        + 2.2 * df["cust_risk_propensity"].astype(float)
    )

    p = 1 / (1 + np.exp(-z))

    # Impose class imbalance + some label noise
    # Target fraud rate ~ 1% to 3% depending on parameters
    is_fraud = np.random.binomial(1, np.clip(p, 0, 1))

    # Small random flips to mimic imperfect labels
    flip_mask = np.random.binomial(1, 0.002, size=n_transactions).astype(bool)
    is_fraud[flip_mask] = 1 - is_fraud[flip_mask]

    df["is_fraud"] = is_fraud.astype(int)

    # Drop latent propensity from model features (keep for generator only)
    df = df.drop(columns=["cust_risk_propensity"])

    return df


# ----------------------------
# 2) Risk Score + Decisioning
# ----------------------------
def prob_to_risk_score(prob: np.ndarray, min_score=0, max_score=1000) -> np.ndarray:
    """
    Convert probability to a 0–1000 score (higher = riskier).
    Simple linear mapping is fine for a portfolio project.
    """
    prob = np.clip(prob, 0, 1)
    score = (prob * (max_score - min_score) + min_score)
    return np.round(score).astype(int)

def apply_decision(score: np.ndarray, approve_max=300, review_max=700) -> np.ndarray:
    """
    Approve / Review / Decline based on risk score thresholds.
    """
    decision = np.where(score <= approve_max, "APPROVE",
                np.where(score <= review_max, "REVIEW", "DECLINE"))
    return decision

# ----------------------------
# 3) Build + Train Model
# ----------------------------
def train_fraud_model(df: pd.DataFrame):
    target = "is_fraud"

    # Sort by time (optional realism)
    df = df.sort_values("transaction_time").reset_index(drop=True)

    # Features
    feature_cols = [
        "transaction_amount",
        "merchant_category",
        "transaction_country",
        "card_present",
        "device_change",
        "failed_logins_24h",
        "travel_flag",
        "velocity_1h",
        "velocity_24h",
        "account_age_days",
        "prior_chargebacks"
    ]

    X = df[feature_cols].copy()
    y = df[target].copy()

    # Train/test split with stratification due to imbalance
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.20, random_state=RANDOM_SEED, stratify=y
    )

    numeric_features = [
        "transaction_amount",
        "failed_logins_24h",
        "velocity_1h",
        "velocity_24h",
        "account_age_days",
        "prior_chargebacks"
    ]
    categorical_features = ["merchant_category", "transaction_country"]
    passthrough_features = ["card_present", "device_change", "travel_flag"]

    preprocessor = ColumnTransformer(
        transformers=[
            ("num", Pipeline(steps=[
                ("scaler", StandardScaler())
            ]), numeric_features),
            ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
            ("pass", "passthrough", passthrough_features),
        ],
        remainder="drop"
    )

    # Logistic Regression baseline (strong + interpretable)
    # class_weight balances minority fraud class
    clf = LogisticRegression(
        max_iter=500,
        class_weight="balanced",
        n_jobs=None
    )

    model = Pipeline(steps=[
        ("preprocess", preprocessor),
        ("clf", clf)
    ])

    model.fit(X_train, y_train)

    # Predicted probabilities
    p_test = model.predict_proba(X_test)[:, 1]

    # Metrics
    roc = roc_auc_score(y_test, p_test)
    ap = average_precision_score(y_test, p_test)

    return model, (X_train, X_test, y_train, y_test, p_test, roc, ap)

# ----------------------------
# 4) Threshold Calibration (Optional but nice)
# ----------------------------
def find_threshold_for_precision(y_true, y_prob, target_precision=0.80):
    """
    Finds the smallest threshold that achieves >= target_precision.
    Useful for "DECLINE" cutoff in fraud systems.
    """
    precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
    # precision/recall arrays are len(thresholds)+1
    # We'll iterate thresholds and pick first satisfying precision.
    for i, t in enumerate(thresholds):
        if precision[i] >= target_precision:
            return float(t), float(precision[i]), float(recall[i])
    # fallback highest threshold
    return float(thresholds[-1]), float(precision[-2]), float(recall[-2])

# ----------------------------
# 5) Plot Helpers
# ----------------------------
def save_confusion_matrix(cm, path):
    plt.figure()
    plt.imshow(cm, interpolation="nearest")
    plt.title("Confusion Matrix (Test)")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.xticks([0, 1], ["Not Fraud", "Fraud"])
    plt.yticks([0, 1], ["Not Fraud", "Fraud"])
    for (i, j), v in np.ndenumerate(cm):
        plt.text(j, i, str(v), ha="center", va="center")
    plt.tight_layout()
    plt.savefig(path, dpi=160)
    plt.close()

def save_risk_score_distribution(df_scored, path):
    plt.figure()
    plt.hist(df_scored["risk_score"], bins=50)
    plt.title("Risk Score Distribution")
    plt.xlabel("Risk Score (0–1000)")
    plt.ylabel("Count")
    plt.tight_layout()
    plt.savefig(path, dpi=160)
    plt.close()

def save_roc_curve(y_true, y_prob, path):
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    plt.figure()
    plt.plot(fpr, tpr)
    plt.plot([0, 1], [0, 1], linestyle="--")
    plt.title("ROC Curve (Test)")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.tight_layout()
    plt.savefig(path, dpi=160)
    plt.close()

def save_pr_curve(y_true, y_prob, path):
    precision, recall, _ = precision_recall_curve(y_true, y_prob)
    plt.figure()
    plt.plot(recall, precision)
    plt.title("Precision-Recall Curve (Test)")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.tight_layout()
    plt.savefig(path, dpi=160)
    plt.close()

# ----------------------------
# 6) Main Run
# ----------------------------
def main():
    # A) Create data
    df = generate_synthetic_fraud_data(
        n_customers=6000,
        n_transactions=80000,
        start_date="2025-01-01",
        days=120
    )

    # Save raw data
    data_path = os.path.join(DATA_DIR, "synthetic_fraud_transactions.csv")
    df.to_csv(data_path, index=False)

    # B) Train model
    model, pack = train_fraud_model(df)
    X_train, X_test, y_train, y_test, p_test, roc, ap = pack

    # C) Calibrate a "decline" threshold for a target precision (optional)
    decline_thr, achieved_prec, achieved_rec = find_threshold_for_precision(
        y_true=y_test.values,
        y_prob=p_test,
        target_precision=0.80
    )

    # We’ll map probs -> risk score; then set score thresholds to mimic:
    # APPROVE <= 300, REVIEW 301-700, DECLINE > 700
    # But also: if you want data-driven, you can set decline based on decline_thr
    # Here: we keep fixed score thresholds (portfolio clarity) + report model-driven threshold.
    risk_scores = prob_to_risk_score(p_test)  # 0–1000
    decisions = apply_decision(risk_scores, approve_max=300, review_max=700)

    # D) For evaluation, treat DECLINE as "fraud predicted" (binary)
    y_pred_decline = (decisions == "DECLINE").astype(int)

    cm = confusion_matrix(y_test, y_pred_decline)
    report = classification_report(y_test, y_pred_decline, digits=4)

    # E) Build scored test output
    df_test_scored = X_test.copy()
    df_test_scored["actual_is_fraud"] = y_test.values
    df_test_scored["fraud_probability"] = p_test
    df_test_scored["risk_score"] = risk_scores
    df_test_scored["decision"] = decisions

    # Summary tables
    decision_summary = (
        df_test_scored
        .groupby(["decision", "actual_is_fraud"])
        .size()
        .reset_index(name="count")
        .sort_values(["decision", "actual_is_fraud"])
    )

    # "Fraud rate by decision" (great portfolio metric)
    fraud_rate_by_decision = (
        df_test_scored
        .groupby("decision")["actual_is_fraud"]
        .mean()
        .reset_index(name="fraud_rate")
        .sort_values("fraud_rate", ascending=False)
    )

    # Save outputs
    scored_path = os.path.join(OUT_DIR, "scored_test_transactions.csv")
    decision_summary_path = os.path.join(OUT_DIR, "decision_summary.csv")
    fraud_rate_path = os.path.join(OUT_DIR, "fraud_rate_by_decision.csv")

    df_test_scored.to_csv(scored_path, index=False)
    decision_summary.to_csv(decision_summary_path, index=False)
    fraud_rate_by_decision.to_csv(fraud_rate_path, index=False)

    # Save charts
    save_confusion_matrix(cm, os.path.join(OUT_DIR, "confusion_matrix_decline_vs_actual.png"))
    save_risk_score_distribution(df_test_scored, os.path.join(OUT_DIR, "risk_score_distribution.png"))
    save_roc_curve(y_test.values, p_test, os.path.join(OUT_DIR, "roc_curve.png"))
    save_pr_curve(y_test.values, p_test, os.path.join(OUT_DIR, "pr_curve.png"))

    # Executive summary markdown (portfolio-friendly)
    exec_md = f"""# Fraud Decisioning & Risk Scoring Framework (Credit Card / Customer Fraud)

## What this does
This project builds a production-style fraud decisioning framework:
- Train a supervised model to estimate fraud probability for transactions
- Convert probability into a **0–1000 risk score**
- Apply thresholds to produce decisions: **APPROVE / REVIEW / DECLINE**

## Dataset
Synthetic but realistic credit card transactions including:
- Amount, merchant category, country
- Card present vs not present
- Velocity (1h/24h), device change
- Failed logins (ATO proxy), travel flag
- Customer tenure and prior chargebacks

## Model
- Logistic Regression (interpretable baseline)
- Class imbalance handled via `class_weight="balanced"`

## Test Performance (probability model)
- ROC-AUC: **{roc:.4f}**
- Average Precision (PR-AUC): **{ap:.4f}**

## Decisioning Logic (risk score thresholds)
- 0–300: APPROVE
- 301–700: REVIEW
- 701–1000: DECLINE

## Precision-based decline calibration (optional)
A probability threshold targeting precision ~0.80 produced:
- Threshold: **{decline_thr:.4f}**
- Achieved precision: **{achieved_prec:.4f}**
- Achieved recall: **{achieved_rec:.4f}**

## Outputs
- `outputs/scored_test_transactions.csv`
- `outputs/decision_summary.csv`
- `outputs/fraud_rate_by_decision.csv`
- Charts: ROC, PR, confusion matrix, risk score distribution
"""
    with open(os.path.join(OUT_DIR, "executive_summary.md"), "w", encoding="utf-8") as f:
        f.write(exec_md)

    # Print quick console summary
    print("✅ Done.")
    print(f"ROOT: {ROOT}")
    print(f"Data saved: {data_path}")
    print(f"Scored output saved: {scored_path}")
    print(f"ROC-AUC: {roc:.4f} | PR-AUC: {ap:.4f}\n")
    print("Decisioning evaluation (DECLINE as positive prediction):")
    print(report)
    print("Saved outputs in:", OUT_DIR)

if __name__ == "__main__":
    main()


✅ Done.
ROOT: /content
Data saved: /content/data/synthetic_fraud_transactions.csv
Scored output saved: /content/outputs/scored_test_transactions.csv
ROC-AUC: 0.7705 | PR-AUC: 0.5974

Decisioning evaluation (DECLINE as positive prediction):
              precision    recall  f1-score   support

           0     0.7713    0.9137    0.8365     11139
           1     0.6573    0.3791    0.4809      4861

    accuracy                         0.7513     16000
   macro avg     0.7143    0.6464    0.6587     16000
weighted avg     0.7367    0.7513    0.7285     16000

Saved outputs in: /content/outputs
