In [1]:
# ==================================================
# SCRIPT 3: THRESHOLDING STRATEGIES (ALL MODELS)
# ==================================================
import numpy as np
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, average_precision_score, balanced_accuracy_score,
    classification_report, confusion_matrix, precision_recall_curve
)

# ------------------
# Setup & Artifacts
# ------------------
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

preprocessor = joblib.load("preprocessor.pkl")
X_train, X_test, y_train, y_test = joblib.load("splits.pkl")

X_train_proc = preprocessor.fit_transform(X_train)
X_test_proc  = preprocessor.transform(X_test)

X_tr, X_val, y_tr, y_val = train_test_split(
    X_train_proc, y_train, test_size=0.2, random_state=RANDOM_STATE, stratify=y_train
)

# Use a common SMOTE on the train fold (so every model sees the same balanced train data)
sm = SMOTE(random_state=RANDOM_STATE)
X_tr_res, y_tr_res = sm.fit_resample(X_tr, y_tr)

# ----------------------
# Models (plain; thresholding is the variable)
# ----------------------
models = {
    "Logistic Regression":
        LogisticRegression(max_iter=1000, random_state=RANDOM_STATE),
    "Random Forest":
        RandomForestClassifier(n_estimators=300, random_state=RANDOM_STATE, n_jobs=-1),
    "Gradient Boosting":
        GradientBoostingClassifier(random_state=RANDOM_STATE),
    "XGBoost":
        XGBClassifier(
            n_estimators=300, learning_rate=0.1, max_depth=4,
            subsample=0.8, colsample_bytree=0.8, eval_metric="logloss",
            random_state=RANDOM_STATE
        ),
}

# ----------------------
# Helper: evaluate with different threshold objectives
# ----------------------
def choose_threshold_from_validation(val_proba, objective="F1", fixed_recall=0.80):
    precisions, recalls, thresholds = precision_recall_curve(y_val, val_proba)

    if objective == "F1":
        f1s = (2 * precisions * recalls) / (precisions + recalls + 1e-9)
        best_idx = int(np.argmax(f1s[:-1]))
    elif objective == "BalancedAcc":
        bal_scores = []
        for thr in thresholds:
            y_val_pred = (val_proba >= thr).astype(int)
            bal_scores.append(balanced_accuracy_score(y_val, y_val_pred))
        best_idx = int(np.argmax(bal_scores))
    elif objective == "FixedRecall":
        cand = np.where(recalls[:-1] >= fixed_recall)[0]
        best_idx = cand[np.argmax(precisions[cand])] if len(cand) else int(np.argmax(recalls[:-1]))
    else:
        raise ValueError("Unknown objective")

    return float(thresholds[best_idx])

def evaluate_model_all_thresholds(model, model_name):
    model.fit(X_tr_res, y_tr_res)

    # Validation probabilities (shared for picking thresholds)
    val_proba = model.predict_proba(X_val)[:, 1]

    results = []
    for objective in ["F1", "BalancedAcc", "FixedRecall"]:
        best_thr = choose_threshold_from_validation(
            val_proba, objective=objective, fixed_recall=0.80
        )
        test_proba = model.predict_proba(X_test_proc)[:, 1]
        y_pred = (test_proba >= best_thr).astype(int)

        print(f"\n=== {model_name} | Threshold Objective: {objective} ===")
        print(f"Chosen threshold (val): {best_thr:.4f}")
        print(classification_report(y_test, y_pred, zero_division=0))
        print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

        results.append({
            "Model": model_name,
            "Objective": objective,
            "Threshold": best_thr,
            "Accuracy": accuracy_score(y_test, y_pred),
            "Balanced_Acc": balanced_accuracy_score(y_test, y_pred),
            "Precision": precision_score(y_test, y_pred, zero_division=0),
            "Recall": recall_score(y_test, y_pred, zero_division=0),
            "F1": f1_score(y_test, y_pred, zero_division=0),
            "ROC_AUC": roc_auc_score(y_test, test_proba),
            "PR_AUC": average_precision_score(y_test, test_proba),
        })
    return results

# -----------------
# Run all models across all threshold strategies
# -----------------
rows = []
for mname, model in models.items():
    rows.extend(evaluate_model_all_thresholds(model, mname))

df_thr = pd.DataFrame(rows)
print("\n=== Thresholding Strategies (All Models) — Summary (sorted by PR_AUC) ===")
print(df_thr.sort_values(["PR_AUC","ROC_AUC"], ascending=False)[
    ["Model","Objective","Accuracy","Balanced_Acc","Precision","Recall","F1","ROC_AUC","PR_AUC","Threshold"]
])



=== Logistic Regression | Threshold Objective: F1 ===
Chosen threshold (val): 0.4560
              precision    recall  f1-score   support

           0       0.90      0.08      0.15     17762
           1       0.10      0.92      0.18      1976

    accuracy                           0.17     19738
   macro avg       0.50      0.50      0.17     19738
weighted avg       0.82      0.17      0.15     19738

Confusion Matrix:
 [[ 1464 16298]
 [  163  1813]]

=== Logistic Regression | Threshold Objective: BalancedAcc ===
Chosen threshold (val): 0.4787
              precision    recall  f1-score   support

           0       0.90      0.22      0.36     17762
           1       0.10      0.78      0.18      1976

    accuracy                           0.28     19738
   macro avg       0.50      0.50      0.27     19738
weighted avg       0.82      0.28      0.34     19738

Confusion Matrix:
 [[ 3988 13774]
 [  431  1545]]

=== Logistic Regression | Threshold Objective: FixedRecall ===
C