In [None]:
from pathlib import Path

current_working_dir = Path.cwd()

# Model paths
model_dir = current_working_dir.parent / "models"

LR_models_paths = {
    'Hb': f'{model_dir}/Hb_LR_Calibrated.joblib',
    'PLT': f'{model_dir}/PLT_LR_Calibrated.joblib',
    'WBC_Neut': f'{model_dir}/WBC_Neut_LR_Calibrated.joblib'
}
XGB_models_paths = {
    'Hb': f'{model_dir}/Hb_XGBoost_Calibrated.joblib',
    'PLT': f'{model_dir}/PLT_XGBoost_Calibrated.joblib',
    'WBC_Neut': f'{model_dir}/WBC_Neut_XGBoost_Calibrated.joblib'
}
LGBM_models_paths = {
    'Hb': f'{model_dir}/Hb_LightGBM_Calibrated.joblib',
    'PLT': f'{model_dir}/PLT_LightGBM_Calibrated.joblib',
    'WBC_Neut': f'{model_dir}/WBC_Neut_LightGBM_Calibrated.joblib'
}
TabPFN_models_paths = {
    'Hb': f'{model_dir}/Hb_TabPFN_Calibrated.joblib',
    'PLT': f'{model_dir}/PLT_TabPFN_Calibrated.joblib',
    'WBC_Neut': f'{model_dir}/WBC_Neut_TabPFN_Calibrated.joblib'
}

# Scaler path
scaler_path = f'{model_dir}/scaler_continuous.joblib'
scaler_features_path = f'{model_dir}/scaler_continuous_features.joblib'

# LR selected features
LR_features = {
    'Hb': f'{model_dir}/selected_features_Hb.csv',
    'PLT': f'{model_dir}/selected_features_PLT.csv',
    'WBC_Neut': f'{model_dir}/selected_features_WBC_Neut.csv'
}

# features aligned file paths
XGB_features = f'{model_dir}/xgboost_feature_names.joblib'
LGBM_features = f'{model_dir}/lightgbm_feature_names.joblib'
TabPFN_features = f'{model_dir}/tabPFN_feature_names.joblib'

In [None]:
import pandas as pd
data_file_path = current_working_dir.parent / "data" 
# Load datasets
### BuildingCohort
X_train = pd.read_parquet(f'{data_file_path}/X_train.parquet') # training set
y_train = pd.read_parquet(f'{data_file_path}/y_train.parquet')
X_test = pd.read_parquet(f'{data_file_path}/X_test.parquet') # tuning set
y_test = pd.read_parquet(f'{data_file_path}/y_test.parquet')
X_val = pd.read_parquet(f'{data_file_path}/X_val.parquet') # validation set
y_val = pd.read_parquet(f'{data_file_path}/y_val.parquet')

### RetroCohort
X_retro = pd.read_parquet(f'{data_file_path}/X_retro.parquet')
y_retro = pd.read_parquet(f'{data_file_path}/y_retro.parquet')

### ProsCohort
X_pros = pd.read_parquet(f'{data_file_path}/X_pros.parquet')
y_pros = pd.read_parquet(f'{data_file_path}/y_pros.parquet')

In [None]:
outcome_map = {
    "Hb": "outcome_Hb",
    "PLT": "outcome_PLT",
    "WBC_Neut": "outcome_WBC_Neut"
}

model_paths = {
    "LR": LR_models_paths,
    "XGBoost": XGB_models_paths,
    "LGBM": LGBM_models_paths,
    #"TabPFN": TabPFN_models_paths
}

datasets = {
    "X_train": (X_train, y_train),
    "X_test": (X_test, y_test),
    "X_val": (X_val, y_val)
}

model_name_map = {
    "LR": "LR",
    "XGBoost": "XGBoost",
    "LGBM": "LightGBM",
    "TabPFN": "TabPFN"
}

model_cmaps = {
    "LR": "Blues",
    "XGBoost": "Greens",
    "LGBM": "Purples",
    "TabPFN": "Oranges"
}

In [None]:
from sklearn.linear_model import LogisticRegression

class PlattScalingCalibrator:
    def __init__(self, base_model):
        self.base_model = base_model
        self.platt_lr = LogisticRegression(max_iter=1000)
    
    def fit(self, X, y):
        if hasattr(self.base_model, "predict_proba"):
            raw_probs = self.base_model.predict_proba(X)[:, 1]
        else:
            raw_probs = self.base_model.predict(X)
        self.platt_lr.fit(raw_probs.reshape(-1, 1), y)
        return self
    
    def predict_proba(self, X):
        if hasattr(self.base_model, "predict_proba"):
            raw_probs = self.base_model.predict_proba(X)[:, 1]
        else:
            raw_probs = self.base_model.predict(X)
        calibrated_probs = self.platt_lr.predict_proba(raw_probs.reshape(-1, 1))
        return calibrated_probs

In [None]:
import numpy as np
import joblib
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix

plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['font.sans-serif'] = ['Arial']
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.titlesize'] = 12
plt.rcParams['axes.titleweight'] = 'bold'
plt.rcParams['legend.fontsize'] = 9

roc_colors = {"X_train": "#1F77B4", "X_test": "#2CA02C", "X_val": "#D62728"}
roc_linestyles = {"X_train": "-", "X_test": "--", "X_val": "-."}

xgb_feature_names = joblib.load(XGB_features)
lgbm_feature_names = joblib.load(LGBM_features)
tabpfn_feature_names = joblib.load(TabPFN_features)

scaler = joblib.load(scaler_path)
scaler_feature_names = joblib.load(scaler_features_path)

def get_prob(model, X):
    if hasattr(model, "predict_proba"):
        return model.predict_proba(X)[:, 1]
    return model.predict(X)

def apply_scaler(X):
    X_out = X.copy()
    X_scale = X_out.reindex(columns=scaler_feature_names)
    X_scaled = scaler.transform(X_scale)
    X_out.loc[:, scaler_feature_names] = X_scaled
    return X_out

def align_features(X, feature_names):
    return X.reindex(columns=feature_names)

def bootstrap_metrics(y_true, prob, cutoff, n_boot=1000, seed=42):
    rng = np.random.default_rng(seed)
    stats = []
    n = len(y_true)
    for _ in range(n_boot):
        idx = rng.integers(0, n, n)
        y_b = y_true[idx]
        p_b = prob[idx]
        y_hat = (p_b >= cutoff).astype(int)
        tn, fp, fn, tp = confusion_matrix(y_b, y_hat, labels=[0, 1]).ravel()
        auc = roc_auc_score(y_b, p_b)
        acc = (tp + tn) / (tp + tn + fp + fn)
        sen = tp / (tp + fn) if (tp + fn) > 0 else np.nan
        spe = tn / (tn + fp) if (tn + fp) > 0 else np.nan
        ppv = tp / (tp + fp) if (tp + fp) > 0 else np.nan
        npv = tn / (tn + fn) if (tn + fn) > 0 else np.nan
        stats.append([auc, acc, sen, spe, ppv, npv])
    stats = np.array(stats)
    metrics = ["AUC", "Accuracy", "Sensitivity", "Specificity", "PPV", "NPV"]
    out = []
    for i, m in enumerate(metrics):
        vals = stats[:, i]
        mean = np.nanmean(vals)
        lci, uci = np.nanpercentile(vals, [2.5, 97.5])
        out.append(f"{mean:.3f} ({lci:.3f}-{uci:.3f})")
    return pd.Series(out, index=metrics)

def format_metrics_table(metrics_dict, outcome_key):
    rows = []
    for dataset_name in ["X_train", "X_test", "X_val"]:
        for metric in ["AUC", "Accuracy", "Sensitivity", "Specificity", "PPV", "NPV"]:
            row = {"index": metric, "Datasets": dataset_name}
            for model_name in ["LR", "XGBoost", "LGBM"]:
                row[model_name] = metrics_dict[(outcome_key, model_name, dataset_name)][metric]
            rows.append(row)
    return pd.DataFrame(rows)[["index", "LR", "XGBoost", "LGBM", "Datasets"]]

In [None]:
import joblib

prob_dict = {}

for outcome_key, outcome_col in outcome_map.items():
    for model_name, path_dict in model_paths.items():
        model = joblib.load(path_dict[outcome_key])

        probs = {}
        for split_name, (X, y) in datasets.items():
            X_use = apply_scaler(X)

            if model_name == "LR":
                selected = pd.read_csv(LR_features[outcome_key]).iloc[:, 0].tolist()
                X_use = X_use[selected]

            if model_name == "XGBoost":
                X_use = align_features(X_use, xgb_feature_names)

            if model_name == "LGBM":
                X_use = align_features(X_use, lgbm_feature_names)

            if model_name == "TabPFN":
                X_use = align_features(X_use, tabpfn_feature_names)

            probs[split_name] = get_prob(model, X_use)

        prob_dict[(outcome_key, model_name)] = probs

In [None]:
# Cutoff Selection and Evaluation
def pick_cutoff_by_sens(y_true, y_prob, target_sens=0.85):
    fpr, tpr, thresholds = roc_curve(y_true, y_prob)
    sens = tpr
    spec = 1 - fpr
    mask = sens >= target_sens
    if mask.sum() == 0:
        idx = np.argmax(sens)
        return thresholds[idx]
    idx = np.argmax(spec[mask])
    return thresholds[mask][idx]

def eval_compact(y_true, y_prob, cutoff):
    y_pred = (y_prob >= cutoff).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0, 1]).ravel()
    sens = tp / (tp + fn) if (tp + fn) > 0 else np.nan
    spec = tn / (tn + fp) if (tn + fp) > 0 else np.nan
    ppv = tp / (tp + fp) if (tp + fp) > 0 else np.nan
    return sens, spec, ppv, fp

sens_targets = [0.75, 0.80, 0.85, 0.90]
rows = []

for outcome_key, outcome_col in outcome_map.items():
    for model_name in model_paths.keys():
        probs = prob_dict[(outcome_key, model_name)]
        for s in sens_targets:
            cutoff = pick_cutoff_by_sens(y_test[outcome_col].values, probs["X_test"], target_sens=s)
            for split_name, y_df in [("Train", y_train), ("Test", y_test), ("Val", y_val)]:
                sens, spec, ppv, fp = eval_compact(y_df[outcome_col].values, probs[f"X_{split_name.lower()}"], cutoff)
                rows.append({
                    "Outcome": outcome_key,
                    "Model": model_name,
                    "Dataset": split_name,
                    "Target_Sensitivity": s,
                    "Sensitivity": sens,
                    "Cutoff": cutoff,
                    "Specificity": spec,
                    "PPV": ppv,
                    "FP_count": fp
                })

cutoff_summary = pd.DataFrame(rows)

In [None]:
# Cutoff loading
cutoff = pd.read_csv(f'{model_dir}/Cutoff.csv', index_col=0)

In [None]:
from sklearn.calibration import calibration_curve

# results directory
results_file_path = current_working_dir.parent / "results"

metrics_dict = {}

roc_figs = {}
cm_figs = {}

for outcome_key, outcome_col in outcome_map.items():
    for model_name, path_dict in model_paths.items():
        model = joblib.load(path_dict[outcome_key])
        probs = {}
        for dataset_name, (X, y) in datasets.items():
            X_use = apply_scaler(X)
            if model_name == "LR":
                selected = pd.read_csv(LR_features[outcome_key]).iloc[:, 0].tolist()
                X_use = X_use[selected]
            if model_name == "XGBoost":
                X_use = align_features(X_use, xgb_feature_names)
            if model_name == "LGBM":
                X_use = align_features(X_use, lgbm_feature_names)
            if model_name == "TabPFN":
                X_use = align_features(X_use, tabpfn_feature_names)
            probs[dataset_name] = get_prob(model, X_use)

        fig, axes = plt.subplots(1, 3, figsize=(18, 5))

        for dataset_name, (X, y) in datasets.items():
            y_true = y[outcome_col].values
            fpr, tpr, _ = roc_curve(y_true, probs[dataset_name])
            auc = roc_auc_score(y_true, probs[dataset_name])
            axes[0].plot(
                fpr, tpr,
                color=roc_colors[dataset_name],
                linestyle=roc_linestyles[dataset_name],
                lw=2.2,
                label=f"{dataset_name} AUC={auc:.3f}"
            )
        axes[0].plot([0, 1], [0, 1], color="#4DBBD5", lw=2, linestyle="--", alpha=0.6)
        axes[0].set_xlim(0, 1)
        axes[0].set_ylim(0, 1)
        axes[0].set_xlabel("False Positive Rate")
        axes[0].set_ylabel("True Positive Rate")
        axes[0].set_title(f"{outcome_col} | {model_name} | ROC")
        axes[0].legend(loc="lower right")
        axes[0].grid(alpha=0.3, linestyle="--")

        y_val_true = y_val[outcome_col].values
        prob_val = probs["X_val"]
        prob_true, prob_pred = calibration_curve(y_val_true, prob_val, n_bins=10, strategy="quantile")
        axes[1].plot(prob_pred, prob_true, marker="o", markersize=7, color="#00A087", linewidth=2.2)
        axes[1].plot([0, 1], [0, 1], linestyle="--", color="gray", lw=2, alpha=0.6)
        x_min, x_max = prob_pred.min(), prob_pred.max()
        y_min, y_max = prob_true.min(), prob_true.max()
        x_pad = (x_max - x_min) * 0.15 if x_max > x_min else 0.02
        y_pad = (y_max - y_min) * 0.15 if y_max > y_min else 0.02
        axes[1].set_xlim(max(0, x_min - x_pad), min(1, x_max + x_pad))
        axes[1].set_ylim(max(0, y_min - y_pad), min(1, y_max + y_pad))
        axes[1].set_xlabel("Predicted Probability")
        axes[1].set_ylabel("Observed Proportion")
        axes[1].set_title(f"{outcome_col} | {model_name} | Calibration")
        axes[1].grid(alpha=0.3, linestyle="--")

        thresholds_dca = np.linspace(0.01, 0.99, 50)
        net_benefit_model = []
        for t in thresholds_dca:
            tp = np.sum((prob_val >= t) & (y_val_true == 1))
            fp = np.sum((prob_val >= t) & (y_val_true == 0))
            nb = (tp / len(y_val_true)) - (fp / len(y_val_true)) * (t / (1 - t))
            net_benefit_model.append(nb)
        nb_all = (np.sum(y_val_true == 1) / len(y_val_true)) - (np.sum(y_val_true == 0) / len(y_val_true)) * (thresholds_dca / (1 - thresholds_dca))
        axes[2].plot(thresholds_dca, net_benefit_model, color="#E64B35", linewidth=2.5, label="Model")
        axes[2].plot(thresholds_dca, nb_all, color="black", linestyle=":", linewidth=2, label="Treat All")
        axes[2].axhline(y=0, color="gray", linestyle="-", linewidth=2, label="Treat None")
        axes[2].set_ylim(-0.05, max(max(net_benefit_model), max(nb_all)) * 1.2)
        axes[2].set_xlabel("Threshold Probability")
        axes[2].set_ylabel("Net Benefit")
        axes[2].set_title(f"{outcome_col} | {model_name} | DCA")
        axes[2].legend(loc="upper right")
        axes[2].grid(alpha=0.3, linestyle="--")

        plt.tight_layout()
        fig_path = f"{results_file_path}/BuildingCohort/ROC_{outcome_col}_{model_name}.pdf"
        plt.savefig(fig_path, format="pdf", dpi=300, bbox_inches="tight")
        roc_figs[(outcome_col, model_name)] = fig

        cutoff_key = f"{outcome_key}_{model_name_map[model_name]}_Calibrated"
        cutoff_val = cutoff.loc[cutoff_key, "CutOFF"]
        for dataset_name, (X, y) in datasets.items():
            y_true = y[outcome_col].values
            prob = probs[dataset_name]
            metrics_dict[(outcome_key, model_name, dataset_name)] = bootstrap_metrics(y_true, prob, cutoff_val)

for dataset_name, (X, y) in datasets.items():
    fig, axes = plt.subplots(3, 3, figsize=(12, 12))
    for i, (model_name, path_dict) in enumerate(model_paths.items()):
        for j, (outcome_key, outcome_col) in enumerate(outcome_map.items()):
            model = joblib.load(path_dict[outcome_key])
            X_use = apply_scaler(X)
            if model_name == "LR":
                selected = pd.read_csv(LR_features[outcome_key]).iloc[:, 0].tolist()
                X_use = X_use[selected]
            if model_name == "XGBoost":
                X_use = align_features(X_use, xgb_feature_names)
            if model_name == "LGBM":
                X_use = align_features(X_use, lgbm_feature_names)
            prob = get_prob(model, X_use)
            cutoff_key = f"{outcome_key}_{model_name_map[model_name]}_Calibrated"
            cutoff_val = cutoff.loc[cutoff_key, "CutOFF"]
            y_true = y[outcome_col].values
            y_pred = (prob >= cutoff_val).astype(int)
            cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
            ax = axes[i, j]
            ax.imshow(cm, cmap=model_cmaps[model_name])
            ax.set_title(f"{dataset_name} | {model_name} | {outcome_col}")
            ax.set_xticks([0, 1])
            ax.set_yticks([0, 1])
            for (r, c), v in np.ndenumerate(cm):
                ax.text(c, r, str(v), ha="center", va="center")
    plt.tight_layout()
    fig_path = f"{results_file_path}/BuildingCohort/Confusion_{dataset_name.replace(' ', '_')}.pdf"
    plt.savefig(fig_path, format="pdf", dpi=300, bbox_inches="tight")
    cm_figs[dataset_name] = fig

with pd.ExcelWriter(f"{results_file_path}/BuildingCohort/sTable3_Results.xlsx") as writer:
    format_metrics_table(metrics_dict, "Hb").to_excel(writer, sheet_name="sTable3F. Hb Results", index=False)
    format_metrics_table(metrics_dict, "PLT").to_excel(writer, sheet_name="sTable3G. PLT Results", index=False)
    format_metrics_table(metrics_dict, "WBC_Neut").to_excel(writer, sheet_name="sTable3H. WBC_Neut Results", index=False)

results = {
    "roc_figs": roc_figs,
    "metrics_tables": metrics_dict,
    "confusion_figs": cm_figs
}

results

In [None]:
# Evaluation on External Cohorts

calib_xlim = {
    "Prospective": {"Hb": 0.15, "PLT": 0.20, "WBC_Neut": 0.50},
    "Retrospective": {"Hb": 0.25, "PLT": 0.05, "WBC_Neut": 0.30}
}

def dca_curve(y_true, prob):
    thresholds = np.linspace(0.01, 0.99, 50)
    net_benefit_model = []
    for t in thresholds:
        tp = np.sum((prob >= t) & (y_true == 1))
        fp = np.sum((prob >= t) & (y_true == 0))
        nb = (tp / len(y_true)) - (fp / len(y_true)) * (t / (1 - t))
        net_benefit_model.append(nb)
    nb_all = (np.sum(y_true == 1) / len(y_true)) - (np.sum(y_true == 0) / len(y_true)) * (thresholds / (1 - thresholds))
    return thresholds, np.array(net_benefit_model), nb_all

def eval_metrics(y_true, prob, cutoff):
    y_pred = (prob >= cutoff).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0, 1]).ravel()
    auc = roc_auc_score(y_true, prob) if len(np.unique(y_true)) > 1 else np.nan
    acc = (tp + tn) / (tp + tn + fp + fn)
    sen = tp / (tp + fn) if (tp + fn) > 0 else np.nan
    spe = tn / (tn + fp) if (tn + fp) > 0 else np.nan
    ppv = tp / (tp + fp) if (tp + fp) > 0 else np.nan
    npv = tn / (tn + fn) if (tn + fn) > 0 else np.nan
    return auc, acc, sen, spe, ppv, npv, fp

def eval_one_cohort(X, y, cohort_label):
    rows = []
    for outcome_key, outcome_col in outcome_map.items():
        model = joblib.load(LGBM_models_paths[outcome_key])
        X_use = align_features(X, lgbm_feature_names)
        prob = get_prob(model, X_use)
        cutoff_key = f"{outcome_key}_LightGBM_Calibrated"
        cutoff_val = cutoff.loc[cutoff_key, "CutOFF"]
        auc, acc, sen, spe, ppv, npv, fp = eval_metrics(y[outcome_col].values, prob, cutoff_val)
        rows.append({
            "Cohort": cohort_label,
            "Outcome": outcome_key,
            "AUC": auc,
            "Accuracy": acc,
            "Sensitivity": sen,
            "Specificity": spe,
            "PPV": ppv,
            "NPV": npv,
            "FP_count": fp,
            "Cutoff": cutoff_val
        })
    return pd.DataFrame(rows)

def plot_cohort_panel(X, y, cohort_label):
    fig, axes = plt.subplots(3, 3, figsize=(18, 12))
    for i, (outcome_key, outcome_col) in enumerate(outcome_map.items()):
        model = joblib.load(LGBM_models_paths[outcome_key])
        X_use = align_features(X, lgbm_feature_names)
        prob = get_prob(model, X_use)
        y_true = y[outcome_col].values

        if len(np.unique(y_true)) > 1:
            fpr, tpr, _ = roc_curve(y_true, prob)
            auc = roc_auc_score(y_true, prob)
            axes[i, 0].plot(fpr, tpr, color="#D62728", lw=2.2, label=f"AUC={auc:.3f}")
        axes[i, 0].plot([0, 1], [0, 1], color="#4DBBD5", lw=2, linestyle="--", alpha=0.6)
        axes[i, 0].set_xlim(0, 1)
        axes[i, 0].set_ylim(0, 1)
        axes[i, 0].set_xlabel("False Positive Rate")
        axes[i, 0].set_ylabel("True Positive Rate")
        axes[i, 0].set_title(f"{outcome_col} | ROC")
        axes[i, 0].legend(loc="lower right")
        axes[i, 0].grid(alpha=0.3, linestyle="--")

        prob_true, prob_pred = calibration_curve(y_true, prob, n_bins=10, strategy="quantile")
        axes[i, 1].plot(prob_pred, prob_true, marker="o", markersize=7, color="#00A087", linewidth=2.2)
        axes[i, 1].plot([0, 1], [0, 1], linestyle="--", color="gray", lw=2, alpha=0.6)
        lim = calib_xlim[cohort_label][outcome_key]
        axes[i, 1].set_xlim(0, lim)
        axes[i, 1].set_ylim(0, lim)
        axes[i, 1].set_xlabel("Predicted Probability")
        axes[i, 1].set_ylabel("Observed Proportion")
        axes[i, 1].set_title(f"{outcome_col} | Calibration")
        axes[i, 1].grid(alpha=0.3, linestyle="--")

        thresholds, nb_model, nb_all = dca_curve(y_true, prob)
        axes[i, 2].plot(thresholds, nb_model, color="#E64B35", linewidth=2.5, label="Model")
        axes[i, 2].plot(thresholds, nb_all, color="black", linestyle=":", linewidth=2, label="Treat All")
        axes[i, 2].axhline(y=0, color="gray", linestyle="-", linewidth=2, label="Treat None")
        axes[i, 2].set_xlim(0, 0.5)
        axes[i, 2].set_ylim(-0.02, max(max(nb_model), max(nb_all)) * 1.2)
        axes[i, 2].set_xlabel("Threshold Probability")
        axes[i, 2].set_ylabel("Net Benefit")
        axes[i, 2].set_title(f"{outcome_col} | DCA")
        axes[i, 2].legend(loc="upper right")
        axes[i, 2].grid(alpha=0.3, linestyle="--")

    plt.tight_layout()
    fig_path = f"{results_file_path}/ExternalValidation/{cohort_label}_3x3.pdf"
    plt.savefig(fig_path, format="pdf", dpi=300, bbox_inches="tight")

def plot_confusion_2x3(X_pros, y_pros, X_retro, y_retro):
    fig, axes = plt.subplots(2, 3, figsize=(12, 8))
    cohorts = [("Prospective", X_pros, y_pros, "Blues"), ("Retrospective", X_retro, y_retro, "Purples")]
    for r, (label, Xc, yc, cmap) in enumerate(cohorts):
        for c, (outcome_key, outcome_col) in enumerate(outcome_map.items()):
            model = joblib.load(LGBM_models_paths[outcome_key])
            X_use = align_features(Xc, lgbm_feature_names)
            prob = get_prob(model, X_use)
            cutoff_key = f"{outcome_key}_LightGBM_Calibrated"
            cutoff_val = cutoff.loc[cutoff_key, "CutOFF"]
            y_true = yc[outcome_col].values
            y_pred = (prob >= cutoff_val).astype(int)
            cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
            ax = axes[r, c]
            ax.imshow(cm, cmap=cmap)
            ax.set_title(f"{label} | {outcome_col}")
            ax.set_xticks([0, 1])
            ax.set_yticks([0, 1])
            for (i, j), v in np.ndenumerate(cm):
                ax.text(j, i, str(v), ha="center", va="center")
    plt.tight_layout()
    fig_path = f"{results_file_path}/ExternalValidation/Confusion_2x3.pdf"
    plt.savefig(fig_path, format="pdf", dpi=300, bbox_inches="tight")

pros_table = eval_one_cohort(X_pros, y_pros, "Prospective")
retro_table = eval_one_cohort(X_retro, y_retro, "Retrospective")

pros_table.to_csv(f"{results_file_path}/ExternalValidation/Prospective_Eval.csv", index=False)
retro_table.to_csv(f"{results_file_path}/ExternalValidation/Retrospective_Eval.csv", index=False)

plot_cohort_panel(X_pros, y_pros, "Prospective")
plot_cohort_panel(X_retro, y_retro, "Retrospective")
plot_confusion_2x3(X_pros, y_pros, X_retro, y_retro)

pros_table, retro_table

In [None]:
# External Cohort Cutoff Grid Evaluation
sens_targets = [0.75, 0.80, 0.85, 0.90]

def run_cutoff_eval(X, y, cohort_label):
    rows = []
    for outcome_key, outcome_col in outcome_map.items():
        model = joblib.load(LGBM_models_paths[outcome_key])
        X_use = align_features(X, lgbm_feature_names)
        prob = get_prob(model, X_use)
        y_true = y[outcome_col].values
        for s in sens_targets:
            cutoff = pick_cutoff_by_sens(y_true, prob, target_sens=s)
            sens, spec, ppv, fp = eval_compact(y_true, prob, cutoff)
            rows.append({
                "Cohort": cohort_label,
                "Outcome": outcome_key,
                "Target_Sensitivity": s,
                "Sensitivity": sens,
                "Specificity": spec,
                "PPV": ppv,
                "FP_count": fp,
                "Cutoff": cutoff
            })
    return pd.DataFrame(rows)

pros_cutoff_table = run_cutoff_eval(X_pros, y_pros, "Prospective")
retro_cutoff_table = run_cutoff_eval(X_retro, y_retro, "Retrospective")

pros_cutoff_table.to_csv(f"{results_file_path}/ExternalValidation/Prospective_CutoffGrid.csv", index=False)
retro_cutoff_table.to_csv(f"{results_file_path}/ExternalValidation/Retrospective_CutoffGrid.csv", index=False)

pros_cutoff_table, retro_cutoff_table

In [None]:
# Load External Cutoff Results
ExterCutoff = pd.read_csv(f"{model_dir}/ExternalCutoff.csv", index_col=0)

In [None]:
def eval_with_external_cutoff(X, y, cohort_label, cutoff_suffix):
    rows = []
    for outcome_key, outcome_col in outcome_map.items():
        model = joblib.load(LGBM_models_paths[outcome_key])
        X_use = align_features(X, lgbm_feature_names)
        prob = get_prob(model, X_use)
        cutoff_key = f"{outcome_key}_LightGBM_{cutoff_suffix}"
        cutoff_val = ExterCutoff.loc[cutoff_key, "CutOFF"]
        auc, acc, sen, spe, ppv, npv, fp = eval_metrics(y[outcome_col].values, prob, cutoff_val)
        rows.append({
            "Cohort": cohort_label,
            "Outcome": outcome_key,
            "AUC": auc,
            "Accuracy": acc,
            "Sensitivity": sen,
            "Specificity": spe,
            "PPV": ppv,
            "NPV": npv,
            "FP_count": fp,
            "Cutoff": cutoff_val
        })
    out_df = pd.DataFrame(rows)
    out_df = out_df[["Cohort", "Outcome", "AUC", "Accuracy", "Sensitivity", "Specificity", "PPV", "NPV", "FP_count", "Cutoff"]]
    return out_df

def plot_confusion_2x3_external(X_pros, y_pros, X_retro, y_retro):
    fig, axes = plt.subplots(2, 3, figsize=(12, 8))
    cohorts = [("Prospective", X_pros, y_pros, "Blues", "Pros"), ("Retrospective", X_retro, y_retro, "Purples", "Retro")]
    for r, (label, Xc, yc, cmap, suffix) in enumerate(cohorts):
        for c, (outcome_key, outcome_col) in enumerate(outcome_map.items()):
            model = joblib.load(LGBM_models_paths[outcome_key])
            X_use = align_features(Xc, lgbm_feature_names)
            prob = get_prob(model, X_use)
            cutoff_key = f"{outcome_key}_LightGBM_{suffix}"
            cutoff_val = ExterCutoff.loc[cutoff_key, "CutOFF"]
            y_true = yc[outcome_col].values
            y_pred = (prob >= cutoff_val).astype(int)
            cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
            ax = axes[r, c]
            ax.imshow(cm, cmap=cmap)
            ax.set_title(f"{label} | {outcome_col}")
            ax.set_xticks([0, 1])
            ax.set_yticks([0, 1])
            for (i, j), v in np.ndenumerate(cm):
                ax.text(j, i, str(v), ha="center", va="center")
    plt.tight_layout()
    fig_path = f"{results_file_path}/Confusion_2x3_ExternalCutoff.pdf"
    plt.savefig(fig_path, format="pdf", dpi=300, bbox_inches="tight")

pros_ext = eval_with_external_cutoff(X_pros, y_pros, "Prospective", "Pros")
retro_ext = eval_with_external_cutoff(X_retro, y_retro, "Retrospective", "Retro")

pros_ext.to_csv(f"{results_file_path}/ExternalValidation/Prospective_Eval_ExternalCutoff.csv", index=False)
retro_ext.to_csv(f"{results_file_path}/ExternalValidation/Retrospective_Eval_ExternalCutoff.csv", index=False)

plot_confusion_2x3_external(X_pros, y_pros, X_retro, y_retro)

pros_ext, retro_ext