In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedGroupKFold
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import (
    roc_auc_score, average_precision_score, brier_score_loss,
    f1_score, confusion_matrix, accuracy_score, balanced_accuracy_score
)

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from tqdm.auto import tqdm

# ML

In [None]:
# -------------------------
# 1) Load + basic cleanup
# -------------------------
CSV_PATH = r"CSV/Exports/Temp/12_ML/ML_Stroke_df.csv"
GROUP_COL = "subject_id"
TARGETS = ["mort_30d", "mort_180d", "mort_360d"]



# =========================
# CONFIG
# =========================


ID_COLS = ["subject_id", "hadm_id", "stay_id", "Time_Zone"]

DROP_ALWAYS = [
    "dod", "time_to_death_days",
    "event_30d", "duration_30d", "event_180d", "duration_180d", "event_360d", "duration_360d",
    "hospital_expire_flag",
    "icu_intime", "icu_outtime", "hosp_dischtime",
    "los",
]

SNAPSHOT_TZ = 8

In [None]:
# =========================
# DATA LOADING + FEATURES
# =========================
def load_df(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)

    for t in TARGETS:
        if t in df.columns:
            df[t] = pd.to_numeric(df[t], errors="coerce")

    return df


def add_rar_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    pairs = [
        ("RDW - Median", "Albumin  - Median", "RAR_median"),
        ("RDW - Mean",   "Albumin  - Mean",   "RAR_mean"),
        ("RDW - Max",    "Albumin  - Min",    "RAR_max_over_min"),
    ]

    for rdw, alb, out in pairs:
        if rdw in df.columns and alb in df.columns:
            df[rdw] = pd.to_numeric(df[rdw], errors="coerce")
            df[alb] = pd.to_numeric(df[alb], errors="coerce")
            df[out] = df[rdw] / df[alb]

    return df

# =========================
# VIEWS
# =========================
def build_snapshot(df: pd.DataFrame, tz: int = SNAPSHOT_TZ) -> pd.DataFrame:
    df = df.copy()

    if "Time_Zone" in df.columns:
        tzs = df["Time_Zone"].dropna().unique()
        if len(tzs) > 0 and tz not in set(tzs):
            tz = int(np.nanmax(df["Time_Zone"].values))
        df = df[df["Time_Zone"] == tz].copy()

    # 1 row per stay_id (πιο σωστό από το να “συμπιέζεις” σε subject_id)
    if "stay_id" in df.columns:
        df = df.sort_values(ID_COLS).drop_duplicates(subset=["stay_id"], keep="first")
    else:
        df = df.drop_duplicates(subset=[GROUP_COL], keep="first")

    return df


def build_wide_pivot(df: pd.DataFrame, max_tz: int = None) -> pd.DataFrame:
    df = df.copy()
    if "Time_Zone" not in df.columns:
        raise ValueError("Δεν βρέθηκε Time_Zone για wide pivot.")

    if max_tz is not None:
        df = df[df["Time_Zone"] <= max_tz].copy()

    targets_present = [c for c in TARGETS if c in df.columns]
    leak_present = [c for c in DROP_ALWAYS if c in df.columns]

    non_feature = set(ID_COLS + targets_present + leak_present)
    feat_cols = [c for c in df.columns if c not in non_feature]

    # 1 row per stay_id, κρατάμε και subject_id για group CV
    wide = df.pivot_table(
        index=["stay_id", GROUP_COL],
        columns="Time_Zone",
        values=feat_cols,
        aggfunc="first"
    )

    wide.columns = [f"{f}__tz{tz}" for (f, tz) in wide.columns]
    wide = wide.reset_index()

    y = df[["stay_id"] + targets_present].drop_duplicates(subset=["stay_id"], keep="first")
    out = wide.merge(y, on="stay_id", how="left")

    return out


# =========================
# PREPROCESS (scaling -> βοηθά logistic να συγκλίνει)
# =========================
def make_preprocess(X: pd.DataFrame) -> ColumnTransformer:
    cat_cols = [c for c in X.columns if X[c].dtype == "object"]
    num_cols = [c for c in X.columns if c not in cat_cols]

    numeric_pipe = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median", add_indicator=True)),
        ("scaler", StandardScaler(with_mean=False)),  # FIX για SAG/SAGA
    ])

    categorical_pipe = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ])

    return ColumnTransformer(
        transformers=[
            ("num", numeric_pipe, num_cols),
            ("cat", categorical_pipe, cat_cols),
        ],
        remainder="drop",
        sparse_threshold=0.3,
    )



In [None]:
# =========================
# MODELS
# =========================
def make_lr():
    return LogisticRegression(
        max_iter=10000,
        tol=1e-3,
        solver="saga",
        penalty="l2",
        class_weight="balanced",
        n_jobs=-1,
        random_state=42,
    )


def make_rf():
    return RandomForestClassifier(
        n_estimators=600,
        max_depth=None,
        min_samples_leaf=2,
        class_weight="balanced_subsample",
        n_jobs=-1,
        random_state=42,
    )


def make_xgb(y_train: np.ndarray):
    pos = np.sum(y_train == 1)
    neg = np.sum(y_train == 0)
    spw = (neg / max(pos, 1.0))

    return XGBClassifier(
        n_estimators=1200,
        learning_rate=0.03,
        max_depth=4,
        subsample=0.8,
        colsample_bytree=0.8,
        min_child_weight=2,
        reg_lambda=1.0,
        reg_alpha=0.0,
        scale_pos_weight=spw,
        objective="binary:logistic",
        eval_metric="auc",
        tree_method="hist",
        n_jobs=-1,
        random_state=42,
    )

In [None]:
# =========================
# HELPERS
# =========================
def prepare_xy(df_view: pd.DataFrame, target: str):
    dfv = df_view.copy()
    dfv = dfv[dfv[target].notna()].copy()

    drop_cols = []
    drop_cols += [c for c in DROP_ALWAYS if c in dfv.columns]
    drop_cols += [c for c in TARGETS if c in dfv.columns]
    drop_cols += [c for c in ID_COLS if c in dfv.columns and c != GROUP_COL]  # κρατάω μόνο subject_id για groups

    X = dfv.drop(columns=drop_cols, errors="ignore")
    y = dfv[target].astype(int)
    groups = dfv[GROUP_COL].values

    return X, y, groups


def best_threshold_f1(y_true, p):
    thresholds = np.linspace(0.05, 0.95, 91)
    best_t, best_f = 0.5, -1.0
    for t in thresholds:
        yhat = (p >= t).astype(int)
        f = f1_score(y_true, yhat)
        if f > best_f:
            best_f, best_t = f, t
    return float(best_t), float(best_f)

In [None]:
# =========================
# CV EVAL (OOF preds) + ✅ progress
# =========================
def cv_oof_metrics(X, y, groups, model_name: str, n_splits=5, show_progress=True):
    cv = StratifiedGroupKFold(n_splits=n_splits, shuffle=True, random_state=42)
    oof = np.zeros(len(y), dtype=float)

    fold_aucs, fold_aps, fold_briers = [], [], []

    splits = list(cv.split(X, y, groups=groups))
    fold_iter = splits
    if show_progress:
        fold_iter = tqdm(splits, desc=f"CV folds | {model_name}", total=n_splits, leave=False)

    for fold_idx, (tr, va) in enumerate(fold_iter, start=1):
        X_tr, X_va = X.iloc[tr], X.iloc[va]
        y_tr, y_va = y.iloc[tr].values, y.iloc[va].values

        pre = make_preprocess(X_tr)

        if model_name == "lr":
            clf = make_lr()
        elif model_name == "rf":
            clf = make_rf()
        elif model_name == "xgb":
            clf = make_xgb(y_tr)
        else:
            raise ValueError("Unknown model_name")

        pipe = Pipeline(steps=[("pre", pre), ("clf", clf)])
        pipe.fit(X_tr, y_tr)

        p_va = pipe.predict_proba(X_va)[:, 1]
        oof[va] = p_va

        fold_auc = roc_auc_score(y_va, p_va)
        fold_ap = average_precision_score(y_va, p_va)
        fold_br = brier_score_loss(y_va, p_va)

        fold_aucs.append(fold_auc)
        fold_aps.append(fold_ap)
        fold_briers.append(fold_br)

        if show_progress and hasattr(fold_iter, "set_postfix"):
            fold_iter.set_postfix({
                "fold_auc": f"{fold_auc:.3f}",
                "mean_auc": f"{np.mean(fold_aucs):.3f}",
            })

    auc = roc_auc_score(y, oof)
    ap = average_precision_score(y, oof)
    brier = brier_score_loss(y, oof)

    thr, best_f = best_threshold_f1(y.values.astype(int), oof)
    yhat = (oof >= thr).astype(int)

    acc = accuracy_score(y, yhat)
    bacc = balanced_accuracy_score(y, yhat)

    tn, fp, fn, tp = confusion_matrix(y, yhat).ravel()
    sens = tp / (tp + fn + 1e-12)
    spec = tn / (tn + fp + 1e-12)

    return {
        "model": model_name,
        "AUC_oof": float(auc),
        "PR_AUC_oof": float(ap),
        "Brier_oof": float(brier),
        "AUC_mean_fold": float(np.mean(fold_aucs)),
        "AUC_std_fold": float(np.std(fold_aucs, ddof=1)),
        "best_thr_F1_oof": float(thr),
        "best_F1_oof": float(best_f),
        "Accuracy@thr": float(acc),
        "BalancedAcc@thr": float(bacc),
        "Sensitivity@thr": float(sens),
        "Specificity@thr": float(spec),
        "n": int(len(y)),
        "pos_rate": float(np.mean(y)),
    }



In [None]:
# =========================
# RUN LEADERBOARD + ✅ progress
# =========================
def run_leaderboard(view_name: str, df_view: pd.DataFrame, show_progress=True):
    rows = []

    targets_iter = [t for t in TARGETS if t in df_view.columns]
    if show_progress:
        targets_iter = tqdm(targets_iter, desc=f"Targets | {view_name}")

    for target in targets_iter:
        X, y, groups = prepare_xy(df_view, target)

        models_iter = ["lr", "rf", "xgb"]
        if show_progress:
            models_iter = tqdm(models_iter, desc=f"Models | {view_name} | {target}", leave=False)

        for m in models_iter:
            r = cv_oof_metrics(X, y, groups, model_name=m, n_splits=5, show_progress=show_progress)
            r["target"] = target
            r["view"] = view_name
            rows.append(r)

    out = pd.DataFrame(rows)
    out = out.sort_values(
        by=["target", "AUC_oof", "PR_AUC_oof", "Brier_oof"],
        ascending=[True, False, False, True]
    ).reset_index(drop=True)

    return out

In [None]:
# =========================
# RUN
# =========================
df = load_df(CSV_PATH)
df = add_rar_features(df)

snap = build_snapshot(df, tz=SNAPSHOT_TZ)
wide = build_wide_pivot(df, max_tz=None)

leader_snap = run_leaderboard("snapshot_tz", snap, show_progress=True)
leader_wide = run_leaderboard("wide_pivot", wide, show_progress=True)

leader = pd.concat([leader_snap, leader_wide], ignore_index=True)
leader.to_csv("leaderboard_subjectid_cv.csv", index=False)
print("Saved: leaderboard_subjectid_cv.csv")

for t in TARGETS:
    top = leader[leader["target"] == t].head(3)
    print("\n=== TOP 3 for", t, "===")
    print(top[[
        "view", "model", "AUC_oof", "PR_AUC_oof", "Brier_oof",
        "best_thr_F1_oof", "best_F1_oof",
        "Accuracy@thr", "BalancedAcc@thr", "Sensitivity@thr", "Specificity@thr",
        "n", "pos_rate"
    ]])

In [None]:
def load_df(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)

    # Targets σε numeric 0/1
    for t in TARGETS:
        if t in df.columns:
            df[t] = pd.to_numeric(df[t], errors="coerce")

    return df


# -------------------------
# 2) Feature engineering: RAR (προαιρετικό αλλά χρήσιμο)
# -------------------------
def add_rar_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    # βασικό RAR από Median (αυτό ταιριάζει με τα περισσότερα clinical summaries)
    rdw = "RDW - Median"
    alb = "Albumin  - Median"  # πρόσεχε: έχει διπλό κενό πριν το -
    if rdw in df.columns and alb in df.columns:
        df[rdw] = pd.to_numeric(df[rdw], errors="coerce")
        df[alb] = pd.to_numeric(df[alb], errors="coerce")
        df["RAR_median"] = df[rdw] / df[alb]

    return df


# -------------------------
# 3) Επιλογή “ένα δείγμα ανά ασθενή”
#    A) Snapshot: κρατάμε ένα Time_Zone (π.χ. 8)
#    B) Wide pivot: 1 γραμμή ανά stay, features από όλα τα Time_Zone
# -------------------------
ID_COLS = ["subject_id", "hadm_id", "stay_id", "Time_Zone"]

# Στήλες που ΠΡΕΠΕΙ να πεταχτούν (leakage / outcome-derived / future info)
DROP_ALWAYS = [
    "dod", "time_to_death_days",
    "event_30d","duration_30d","event_180d","duration_180d","event_360d","duration_360d",
    "hospital_expire_flag",
    "icu_intime","icu_outtime","hosp_dischtime",
    "los",  # αν κάνεις mortality prediction, το LOS συχνά κάνει leakage (καθώς “ξέρει” τη διάρκεια)
]

def build_snapshot(df: pd.DataFrame, tz: int = 8) -> pd.DataFrame:
    df = df.copy()
    if "Time_Zone" in df.columns:
        if tz not in set(df["Time_Zone"].dropna().unique()):
            # αν δεν υπάρχει tz (π.χ. σε μικρό sample), πάρε το max διαθέσιμο
            tz = int(np.nanmax(df["Time_Zone"].values))
        df = df[df["Time_Zone"] == tz].copy()

    # 1 γραμμή ανά group (αν υπάρχουν διπλότυπα)
    df = df.sort_values(ID_COLS).drop_duplicates(subset=[GROUP_COL], keep="first")

    return df

def build_wide_pivot(df: pd.DataFrame, max_tz: int = None) -> pd.DataFrame:
    df = df.copy()
    if "Time_Zone" not in df.columns:
        raise ValueError("Δεν βρέθηκε Time_Zone για wide pivot.")

    if max_tz is not None:
        df = df[df["Time_Zone"] <= max_tz].copy()

    # κρατάμε μόνο “feature columns” (όχι targets/ids)
    targets_present = [c for c in TARGETS if c in df.columns]
    leak_cols_present = [c for c in DROP_ALWAYS if c in df.columns]

    non_feature = set(ID_COLS + targets_present + leak_cols_present)
    feat_cols = [c for c in df.columns if c not in non_feature]

    # pivot: index = group (subject_id ή stay_id), columns = (feature, Time_Zone)
    wide = df.pivot_table(
        index=GROUP_COL,
        columns="Time_Zone",
        values=feat_cols,
        aggfunc="first"
    )

    # flatten column names: "<feature>__tz<k>"
    wide.columns = [f"{f}__tz{tz}" for (f, tz) in wide.columns]
    wide = wide.reset_index()

    # φέρνουμε targets σε 1 γραμμή ανά group (πάρε first)
    y = df[[GROUP_COL] + targets_present].drop_duplicates(subset=[GROUP_COL], keep="first")
    out = wide.merge(y, on=GROUP_COL, how="left")
    return out


# -------------------------
# 4) Preprocess + Models
# -------------------------
def make_preprocess(X: pd.DataFrame):
    # numerical vs categorical (object)
    cat_cols = [c for c in X.columns if X[c].dtype == "object"]
    num_cols = [c for c in X.columns if c not in cat_cols]

    numeric_pipe = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median", add_indicator=True)),
    ])

    categorical_pipe = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ])

    pre = ColumnTransformer(
        transformers=[
            ("num", numeric_pipe, num_cols),
            ("cat", categorical_pipe, cat_cols),
        ],
        remainder="drop",
        sparse_threshold=0.3,
    )
    return pre


def make_models():
    # Baseline Logistic (για reference)
    # (αν θες, το προσθέτουμε αργότερα – κρατάω εδώ μόνο XGBoost που “πιάνει” καλά τα tabular)
    xgb = XGBClassifier(
        n_estimators=800,
        learning_rate=0.05,
        max_depth=4,
        subsample=0.8,
        colsample_bytree=0.8,
        reg_lambda=1.0,
        reg_alpha=0.0,
        objective="binary:logistic",
        eval_metric="auc",
        tree_method="hist",
        n_jobs=-1,
        random_state=42,
    )
    return {"xgb": xgb}


# -------------------------
# 5) Train / Evaluate (leakage-safe split by groups)
# -------------------------
def get_feature_matrix(df: pd.DataFrame, target: str):
    df = df.copy()

    # drop leakage columns + target columns
    cols_to_drop = []
    cols_to_drop += [c for c in DROP_ALWAYS if c in df.columns]
    cols_to_drop += [c for c in TARGETS if c in df.columns]  # αφαιρώ όλα τα targets
    cols_to_drop += [c for c in ID_COLS if c in df.columns and c != GROUP_COL]  # κρατάω μόνο group

    X = df.drop(columns=cols_to_drop, errors="ignore")
    y = df[target].astype(float)

    # group series
    groups = df[GROUP_COL].values
    return X, y, groups


def evaluate_holdout(df: pd.DataFrame, target: str, test_size=0.2):
    # κρατάμε μόνο rows με y διαθέσιμο
    df = df[df[target].notna()].copy()

    X, y, groups = get_feature_matrix(df, target)

    # group-aware holdout
    gss = GroupShuffleSplit(n_splits=1, test_size=test_size, random_state=42)
    tr_idx, te_idx = next(gss.split(X, y, groups=groups))

    X_tr, X_te = X.iloc[tr_idx], X.iloc[te_idx]
    y_tr, y_te = y.iloc[tr_idx], y.iloc[te_idx]

    pre = make_preprocess(X_tr)
    models = make_models()

    results = []

    for name, clf in models.items():
        pipe = Pipeline(steps=[("pre", pre), ("clf", clf)])
        pipe.fit(X_tr, y_tr)

        p_te = pipe.predict_proba(X_te)[:, 1]
        yhat = (p_te >= 0.5).astype(int)

        auc = roc_auc_score(y_te, p_te)
        ap = average_precision_score(y_te, p_te)
        brier = brier_score_loss(y_te, p_te)

        acc = accuracy_score(y_te, yhat)
        bacc = balanced_accuracy_score(y_te, yhat)
        f1 = f1_score(y_te, yhat)

        tn, fp, fn, tp = confusion_matrix(y_te, yhat).ravel()
        sens = tp / (tp + fn + 1e-12)
        spec = tn / (tn + fp + 1e-12)

        results.append({
            "target": target,
            "model": name,
            "AUC": auc,
            "PR_AUC": ap,
            "Brier": brier,
            "Accuracy": acc,
            "BalancedAcc": bacc,
            "F1": f1,
            "Sensitivity": sens,
            "Specificity": spec,
            "n_test": len(y_te),
            "pos_rate_test": float(np.mean(y_te)),
        })

    return pd.DataFrame(results)


# -------------------------
# 6) RUN
# -------------------------
df = load_df(CSV_PATH)
df = add_rar_features(df)

# Επιλογή σεναρίου:
# (A) Snapshot στο Time_Zone=8
snap = build_snapshot(df, tz=8)

# (B) Wide pivot (όλα τα TZ) – ξεκίνα εδώ μόλις “παίζει” καλά το snapshot
# wide = build_wide_pivot(df, max_tz=None)

OUT = []
for t in TARGETS:
    if t in snap.columns:
        OUT.append(evaluate_holdout(snap, t))

metrics_df = pd.concat(OUT, ignore_index=True) if OUT else pd.DataFrame()
display(metrics_df)

# export
metrics_df.to_csv("ml_metrics_holdout.csv", index=False)
print("Saved: ml_metrics_holdout.csv")

# Leader bord

In [None]:
# outcome/future/leakage columns -> drop
DROP_ALWAYS = [
    "dod", "time_to_death_days",
    "event_30d","duration_30d","event_180d","duration_180d","event_360d","duration_360d",
    "icu_intime","icu_outtime","hosp_dischtime",
    "hospital_expire_flag",
    "los",
]

SNAPSHOT_TZ = 8  # αν δεν υπάρχει, ο κώδικας θα πάρει το max διαθέσιμο


# =========================
# DATA LOADING + FEATURES
# =========================
def load_df(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)

    # make targets numeric 0/1
    for t in TARGETS:
        if t in df.columns:
            df[t] = pd.to_numeric(df[t], errors="coerce")

    return df

def add_rar_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    # υπάρχουν στο dataset σου:
    # 'RDW - Median' και 'Albumin  - Median' (πρόσεχε τα 2 κενά στο Albumin)
    pairs = [
        ("RDW - Median", "Albumin  - Median", "RAR_median"),
        ("RDW - Mean",   "Albumin  - Mean",   "RAR_mean"),
        ("RDW - Max",    "Albumin  - Min",    "RAR_max_over_min"),  # προαιρετικό feature
    ]
    for rdw, alb, out in pairs:
        if rdw in df.columns and alb in df.columns:
            df[rdw] = pd.to_numeric(df[rdw], errors="coerce")
            df[alb] = pd.to_numeric(df[alb], errors="coerce")
            df[out] = df[rdw] / df[alb]
    return df


# =========================
# VIEWS
# =========================
def build_snapshot(df: pd.DataFrame, tz: int = SNAPSHOT_TZ) -> pd.DataFrame:
    df = df.copy()
    if "Time_Zone" in df.columns:
        tzs = df["Time_Zone"].dropna().unique()
        if len(tzs) == 0:
            return df
        if tz not in set(tzs):
            tz = int(np.nanmax(df["Time_Zone"].values))
        df = df[df["Time_Zone"] == tz].copy()

    # 1 row per stay_id (ασφαλές)
    if "stay_id" in df.columns:
        df = df.sort_values(ID_COLS).drop_duplicates(subset=["stay_id"], keep="first")

    return df

def build_wide_pivot(df: pd.DataFrame, max_tz: int = None) -> pd.DataFrame:
    df = df.copy()
    if "Time_Zone" not in df.columns:
        raise ValueError("Δεν βρέθηκε Time_Zone για wide pivot.")

    if max_tz is not None:
        df = df[df["Time_Zone"] <= max_tz].copy()

    targets_present = [c for c in TARGETS if c in df.columns]
    leak_present = [c for c in DROP_ALWAYS if c in df.columns]

    non_feature = set(ID_COLS + targets_present + leak_present)
    feat_cols = [c for c in df.columns if c not in non_feature]

    # pivot σε 1 γραμμή ανά stay_id (με subject_id μαζί)
    wide = df.pivot_table(
        index=["stay_id", GROUP_COL],
        columns="Time_Zone",
        values=feat_cols,
        aggfunc="first"
    )

    wide.columns = [f"{f}__tz{tz}" for (f, tz) in wide.columns]
    wide = wide.reset_index()

    # targets 1 φορά ανά stay_id
    y = df[["stay_id"] + targets_present].drop_duplicates(subset=["stay_id"], keep="first")
    out = wide.merge(y, on="stay_id", how="left")
    return out


# =========================
# PREPROCESS
# =========================
def make_preprocess(X: pd.DataFrame):
    cat_cols = [c for c in X.columns if X[c].dtype == "object"]
    num_cols = [c for c in X.columns if c not in cat_cols]

    numeric_pipe = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median", add_indicator=True)),
    ])
    categorical_pipe = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ])

    return ColumnTransformer(
        transformers=[
            ("num", numeric_pipe, num_cols),
            ("cat", categorical_pipe, cat_cols),
        ],
        remainder="drop",
        sparse_threshold=0.3,
    )


# =========================
# MODEL FACTORIES (scale_pos_weight per fold)
# =========================
def make_lr():
    return LogisticRegression(
        max_iter=3000,
        solver="saga",
        penalty="l2",
        class_weight="balanced",
        n_jobs=-1,
        random_state=42,
    )

def make_rf():
    return RandomForestClassifier(
        n_estimators=600,
        max_depth=None,
        min_samples_leaf=2,
        class_weight="balanced_subsample",
        n_jobs=-1,
        random_state=42,
    )

def make_xgb(y_train: np.ndarray):
    pos = np.sum(y_train == 1)
    neg = np.sum(y_train == 0)
    spw = (neg / max(pos, 1.0))

    return XGBClassifier(
        n_estimators=1200,
        learning_rate=0.03,
        max_depth=4,
        subsample=0.8,
        colsample_bytree=0.8,
        min_child_weight=2,
        reg_lambda=1.0,
        reg_alpha=0.0,
        scale_pos_weight=spw,
        objective="binary:logistic",
        eval_metric="auc",
        tree_method="hist",
        n_jobs=-1,
        random_state=42,
    )


# =========================
# CV EVAL (OOF preds)
# =========================
def best_threshold_f1(y_true, p):
    # ψάχνει threshold που μεγιστοποιεί F1 στο OOF
    thresholds = np.linspace(0.05, 0.95, 91)
    best_t, best_f = 0.5, -1
    for t in thresholds:
        yhat = (p >= t).astype(int)
        f = f1_score(y_true, yhat)
        if f > best_f:
            best_f, best_t = f, t
    return float(best_t), float(best_f)

def cv_oof_metrics(X, y, groups, model_name: str, n_splits=5):
    cv = StratifiedGroupKFold(n_splits=n_splits, shuffle=True, random_state=42)
    oof = np.zeros(len(y), dtype=float)

    fold_aucs, fold_aps, fold_briers = [], [], []

    for fold, (tr, va) in enumerate(cv.split(X, y, groups=groups), start=1):
        X_tr, X_va = X.iloc[tr], X.iloc[va]
        y_tr, y_va = y.iloc[tr].values, y.iloc[va].values

        pre = make_preprocess(X_tr)

        if model_name == "lr":
            clf = make_lr()
        elif model_name == "rf":
            clf = make_rf()
        elif model_name == "xgb":
            clf = make_xgb(y_tr)
        else:
            raise ValueError("Unknown model_name")

        pipe = Pipeline(steps=[("pre", pre), ("clf", clf)])
        pipe.fit(X_tr, y_tr)

        p_va = pipe.predict_proba(X_va)[:, 1]
        oof[va] = p_va

        fold_aucs.append(roc_auc_score(y_va, p_va))
        fold_aps.append(average_precision_score(y_va, p_va))
        fold_briers.append(brier_score_loss(y_va, p_va))

    # global OOF metrics
    auc = roc_auc_score(y, oof)
    ap = average_precision_score(y, oof)
    brier = brier_score_loss(y, oof)

    thr, best_f = best_threshold_f1(y.values.astype(int), oof)
    yhat = (oof >= thr).astype(int)
    tn, fp, fn, tp = confusion_matrix(y, yhat).ravel()
    sens = tp / (tp + fn + 1e-12)
    spec = tn / (tn + fp + 1e-12)

    return {
        "model": model_name,
        "AUC_oof": auc,
        "PR_AUC_oof": ap,
        "Brier_oof": brier,
        "AUC_mean_fold": float(np.mean(fold_aucs)),
        "AUC_std_fold": float(np.std(fold_aucs, ddof=1)),
        "best_thr_F1_oof": thr,
        "best_F1_oof": best_f,
        "Sensitivity@thr": float(sens),
        "Specificity@thr": float(spec),
        "n": int(len(y)),
        "pos_rate": float(np.mean(y)),
    }


# =========================
# RUN LEADERBOARD
# =========================
def prepare_xy(df_view: pd.DataFrame, target: str):
    dfv = df_view.copy()
    dfv = dfv[dfv[target].notna()].copy()

    # drop ids (κρατάμε μόνο subject_id για groups)
    drop_cols = []
    drop_cols += [c for c in DROP_ALWAYS if c in dfv.columns]
    drop_cols += [c for c in TARGETS if c in dfv.columns]
    drop_cols += [c for c in ID_COLS if c in dfv.columns and c != GROUP_COL]

    X = dfv.drop(columns=drop_cols, errors="ignore")
    y = dfv[target].astype(int)
    groups = dfv[GROUP_COL].values

    return X, y, groups

def run_leaderboard(df, view_name: str, df_view: pd.DataFrame):
    rows = []
    for target in TARGETS:
        if target not in df_view.columns:
            continue

        X, y, groups = prepare_xy(df_view, target)

        for m in ["lr", "rf", "xgb"]:
            r = cv_oof_metrics(X, y, groups, model_name=m, n_splits=5)
            r["target"] = target
            r["view"] = view_name
            rows.append(r)

    out = pd.DataFrame(rows)

    # ranking: πρώτα AUC, μετά PR-AUC, μετά Brier (μικρότερο καλύτερο)
    out = out.sort_values(by=["target", "AUC_oof", "PR_AUC_oof", "Brier_oof"],
                          ascending=[True, False, False, True]).reset_index(drop=True)
    return out


df = load_df(CSV_PATH)
df = add_rar_features(df)

snap = build_snapshot(df, tz=SNAPSHOT_TZ)
wide = build_wide_pivot(df, max_tz=None)

leader_snap = run_leaderboard(df, "snapshot_tz", snap)
leader_wide = run_leaderboard(df, "wide_pivot", wide)

leader = pd.concat([leader_snap, leader_wide], ignore_index=True)

leader.to_csv("leaderboard_subjectid_cv.csv", index=False)
print("Saved: leaderboard_subjectid_cv.csv")

# δείξε τα top-3 ανά target
for t in TARGETS:
    top = leader[leader["target"] == t].head(3)
    print("\n=== TOP 3 for", t, "===")
    print(top[["view","model","AUC_oof","PR_AUC_oof","Brier_oof","best_thr_F1_oof","best_F1_oof","Sensitivity@thr","Specificity@thr","n","pos_rate"]])

# Andromeda

In [2]:
andromeda_head_path = r"CSV/Exports/Temp/12_ML/andromeda/leaderboard_subjectid_cv.csv"

andromeda_metrics_path = r"CSV/Exports/Temp/12_ML/andromeda/ml_metrics_holdout.csv"

andromeda_metrics_full_path = r"CSV/Exports/Temp/12_ML/andromeda/ml_metrics_holdout_full.csv"


In [3]:
andromeda_head = pd.read_csv(andromeda_head_path)
andromeda_metrics = pd.read_csv(andromeda_metrics_path)
andromeda_metrics_full = pd.read_csv(andromeda_metrics_full_path)

In [4]:
display(andromeda_head)
display(andromeda_metrics)
display(andromeda_metrics_full)

Unnamed: 0,model,AUC_oof,PR_AUC_oof,Brier_oof,AUC_mean_fold,AUC_std_fold,best_thr_F1_oof,best_F1_oof,Accuracy@thr,BalancedAcc@thr,Sensitivity@thr,Specificity@thr,n,pos_rate,target,view
0,lr,0.837939,0.711487,0.163483,0.839343,0.022631,0.55,0.668861,0.778481,0.758006,0.701467,0.814545,3634,0.318932,mort_180d,snapshot_tz
1,xgb,0.83774,0.7261,0.154437,0.839415,0.020911,0.34,0.671156,0.758118,0.762325,0.773943,0.750707,3634,0.318932,mort_180d,snapshot_tz
2,rf,0.826091,0.713087,0.154955,0.828023,0.01945,0.37,0.659366,0.751789,0.752173,0.753236,0.751111,3634,0.318932,mort_180d,snapshot_tz
3,xgb,0.85961,0.708998,0.126252,0.859997,0.011064,0.37,0.637591,0.807925,0.770676,0.698521,0.842831,3634,0.241882,mort_30d,snapshot_tz
4,lr,0.858206,0.659603,0.149441,0.858872,0.014913,0.56,0.647902,0.810677,0.77985,0.720137,0.839564,3634,0.241882,mort_30d,snapshot_tz
5,rf,0.85414,0.698242,0.124094,0.854458,0.014726,0.32,0.629432,0.790039,0.772049,0.737201,0.806897,3634,0.241882,mort_30d,snapshot_tz
6,xgb,0.832077,0.739954,0.162682,0.832446,0.01161,0.34,0.692229,0.750413,0.760233,0.793774,0.726692,3634,0.353605,mort_360d,snapshot_tz
7,lr,0.826189,0.721363,0.169401,0.826648,0.017657,0.46,0.681446,0.742983,0.750785,0.777432,0.724138,3634,0.353605,mort_360d,snapshot_tz
8,rf,0.824947,0.734089,0.162847,0.825891,0.015248,0.39,0.680469,0.745184,0.750197,0.767315,0.733078,3634,0.353605,mort_360d,snapshot_tz
9,lr,0.859868,0.745393,0.149573,0.861006,0.022051,0.49,0.697276,0.785911,0.7825,0.77308,0.791919,3634,0.318932,mort_180d,wide_pivot


Unnamed: 0,target,model,AUC,PR_AUC,Brier,Accuracy,BalancedAcc,F1,Sensitivity,Specificity,n_test,pos_rate_test
0,mort_30d,lr,0.886445,0.761004,0.121763,0.851444,0.792369,0.684211,0.680233,0.904505,727,0.236589
1,mort_180d,lr,0.876534,0.762982,0.138105,0.799175,0.797575,0.703252,0.793578,0.801572,727,0.299862
2,mort_360d,lr,0.864266,0.77455,0.145956,0.806052,0.779387,0.711656,0.693227,0.865546,727,0.345254


Unnamed: 0,target,view,model,AUC,PR_AUC,Brier,Accuracy,BalancedAcc,F1,Sensitivity,Specificity,thr_used,best_F1_train_oof,n_train,n_test,pos_rate_train,pos_rate_test
0,mort_180d,snapshot_tz,lr,0.855653,0.722015,0.1538,0.774415,0.765469,0.663934,0.743119,0.787819,0.5,0.671154,2907,727,0.323701,0.299862
1,mort_180d,snapshot_tz,xgb,0.834745,0.704846,0.154852,0.759285,0.77171,0.666667,0.802752,0.740668,0.28,0.671147,2907,727,0.323701,0.299862
2,mort_180d,snapshot_tz,rf,0.823381,0.684464,0.151929,0.766162,0.763509,0.66,0.756881,0.770138,0.35,0.65897,2907,727,0.323701,0.299862
3,mort_30d,snapshot_tz,lr,0.865661,0.707372,0.140052,0.825309,0.783276,0.655827,0.703488,0.863063,0.57,0.641558,2907,727,0.243206,0.236589
4,mort_30d,snapshot_tz,xgb,0.854096,0.706888,0.127081,0.817056,0.769846,0.637602,0.680233,0.859459,0.39,0.634179,2907,727,0.243206,0.236589
5,mort_30d,snapshot_tz,rf,0.851299,0.695961,0.122909,0.799175,0.730049,0.585227,0.598837,0.861261,0.36,0.627708,2907,727,0.243206,0.236589
6,mort_360d,snapshot_tz,lr,0.844236,0.747374,0.157808,0.793673,0.784999,0.716981,0.756972,0.813025,0.5,0.67706,2907,727,0.355693,0.345254
7,mort_360d,snapshot_tz,rf,0.84191,0.749001,0.155793,0.76066,0.771088,0.698962,0.804781,0.737395,0.37,0.674542,2907,727,0.355693,0.345254
8,mort_360d,snapshot_tz,xgb,0.837984,0.755068,0.158299,0.744154,0.760366,0.686869,0.812749,0.707983,0.31,0.693455,2907,727,0.355693,0.345254
9,mort_180d,wide_pivot,lr,0.876534,0.762982,0.138105,0.799175,0.797575,0.703252,0.793578,0.801572,0.46,0.690587,2907,727,0.323701,0.299862


In [None]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# -------------------------
# PATHS
# -------------------------
LEADERBOARD_PATH = Path("CSV/Exports/Temp/12_ML/andromeda/leaderboard_subjectid_cv.csv")

# Prefer full holdout if present
HOLDOUT_FULL_PATH = Path("CSV/Exports/Temp/12_ML/andromeda/ml_metrics_holdout_full.csv")
HOLDOUT_PATH_FALLBACK = Path("CSV/Exports/Temp/12_ML/andromeda/ml_metrics_holdout.csv")

OUT_DIR = Path("CSV/Exports/Temp/12_ML/andromeda/plots")
OUT_DIR.mkdir(exist_ok=True)


# -------------------------
# HELPERS
# -------------------------
TARGET_TO_DAYS = {
    "mort_30d": 30,
    "mort_180d": 180,
    "mort_360d": 360,
}

def _ensure_loaded(path: Path) -> pd.DataFrame:
    if not path.exists():
        raise FileNotFoundError(f"Missing file: {path.resolve()}")
    return pd.read_csv(path)

def add_days_col(df: pd.DataFrame, target_col: str = "target") -> pd.DataFrame:
    df = df.copy()
    df["days"] = df[target_col].map(TARGET_TO_DAYS)
    return df

def grouped_bar_by_model_view(
    df: pd.DataFrame,
    metric: str,
    title: str,
    outpath: Path,
    target: str,
    view_col: str = "view",
    model_col: str = "model",
    value_fmt: str = "{:.3f}",
):
    """
    One plot per target:
      x-axis: model
      bars: each view
    """
    tmp = df[df["target"] == target].copy()
    if tmp.empty:
        print(f"[WARN] No rows for target={target} in {metric}")
        return

    views = list(tmp[view_col].unique())
    models = list(tmp[model_col].unique())

    # Ensure consistent ordering
    views = sorted(views)
    models = ["lr", "rf", "xgb"] if set(models) >= {"lr", "rf", "xgb"} else sorted(models)

    x = np.arange(len(models))
    width = 0.8 / max(len(views), 1)

    fig, ax = plt.subplots(figsize=(10, 5))

    for i, v in enumerate(views):
        vals = []
        for m in models:
            r = tmp[(tmp[view_col] == v) & (tmp[model_col] == m)]
            vals.append(r[metric].iloc[0] if len(r) else np.nan)

        ax.bar(x + i * width - (len(views)-1)*width/2, vals, width=width, label=v)

        # annotate
        for xi, yi in zip(x, vals):
            if np.isfinite(yi):
                ax.text(
                    xi + i * width - (len(views)-1)*width/2,
                    yi,
                    value_fmt.format(yi),
                    ha="center",
                    va="bottom",
                    fontsize=9,
                    rotation=90
                )

    ax.set_xticks(x)
    ax.set_xticklabels(models)
    ax.set_ylabel(metric)
    ax.set_title(title)
    ax.legend()
    ax.grid(True, axis="y", alpha=0.3)

    fig.tight_layout()
    fig.savefig(outpath, dpi=200)
    plt.close(fig)

def horizon_lineplot_best_per_view(
    df: pd.DataFrame,
    metric: str,
    title: str,
    outpath: Path,
    view_col: str = "view",
):
    """
    For each view, pick the best model per target (by AUC then PR_AUC then Brier),
    and plot metric vs days.
    """
    tmp = df.copy()

    if "days" not in tmp.columns:
        tmp = add_days_col(tmp, target_col="target")

    # pick best per (target, view)
    sort_cols = [metric]
    asc = [False]
    if "PR_AUC" in tmp.columns and metric != "PR_AUC":
        sort_cols.append("PR_AUC"); asc.append(False)
    if "Brier" in tmp.columns and metric != "Brier":
        sort_cols.append("Brier"); asc.append(True)

    best = (
        tmp.sort_values(["target", view_col] + sort_cols, ascending=[True, True] + asc)
           .groupby(["target", view_col], as_index=False)
           .first()
    )

    views = sorted(best[view_col].unique())

    fig, ax = plt.subplots(figsize=(9, 5))
    for v in views:
        vv = best[best[view_col] == v].sort_values("days")
        ax.plot(vv["days"], vv[metric], marker="o", label=v)

        # annotate with model names
        if "model" in vv.columns:
            for _, row in vv.iterrows():
                ax.text(row["days"], row[metric], f"{row['model']}", fontsize=9, ha="left", va="bottom")

    ax.set_xlabel("Mortality horizon (days)")
    ax.set_ylabel(metric)
    ax.set_title(title)
    ax.legend()
    ax.grid(True, alpha=0.3)

    fig.tight_layout()
    fig.savefig(outpath, dpi=200)
    plt.close(fig)


# -------------------------
# LOAD FILES
# -------------------------
leader = _ensure_loaded(LEADERBOARD_PATH)

holdout = None
if HOLDOUT_FULL_PATH.exists():
    holdout = _ensure_loaded(HOLDOUT_FULL_PATH)
else:
    holdout = _ensure_loaded(HOLDOUT_PATH_FALLBACK)

# Normalize holdout columns if it is the small summary file (no view/model columns)
# - If 'view' missing, we can still plot by target only.
if "view" not in holdout.columns:
    holdout["view"] = "holdout_summary"
if "model" not in holdout.columns:
    holdout["model"] = "unknown"

# Add days columns for convenience
leader = add_days_col(leader, target_col="target")
holdout = add_days_col(holdout, target_col="target")


# -------------------------
# PLOTS: LEADERBOARD (OOF CV)
# -------------------------
for t in sorted(leader["target"].unique(), key=lambda x: TARGET_TO_DAYS.get(x, 9999)):
    grouped_bar_by_model_view(
        df=leader,
        metric="AUC_oof",
        title=f"Leaderboard (OOF CV) | AUC | {t}",
        outpath=OUT_DIR / f"leaderboard_auc_{t}.png",
        target=t,
        view_col="view",
        model_col="model",
    )
    grouped_bar_by_model_view(
        df=leader,
        metric="PR_AUC_oof",
        title=f"Leaderboard (OOF CV) | PR-AUC | {t}",
        outpath=OUT_DIR / f"leaderboard_prauc_{t}.png",
        target=t,
        view_col="view",
        model_col="model",
    )
    grouped_bar_by_model_view(
        df=leader,
        metric="Brier_oof",
        title=f"Leaderboard (OOF CV) | Brier | {t}",
        outpath=OUT_DIR / f"leaderboard_brier_{t}.png",
        target=t,
        view_col="view",
        model_col="model",
        value_fmt="{:.4f}",
    )

# Trend plot (best per view)
# (we plot AUC_oof / PR_AUC_oof / Brier_oof separately)
tmp_leader = leader.rename(columns={
    "AUC_oof": "AUC",
    "PR_AUC_oof": "PR_AUC",
    "Brier_oof": "Brier",
})
horizon_lineplot_best_per_view(
    df=tmp_leader,
    metric="AUC",
    title="Leaderboard (OOF CV) | Best model per view | AUC vs horizon",
    outpath=OUT_DIR / "leaderboard_best_auc_vs_horizon.png",
)
horizon_lineplot_best_per_view(
    df=tmp_leader,
    metric="PR_AUC",
    title="Leaderboard (OOF CV) | Best model per view | PR-AUC vs horizon",
    outpath=OUT_DIR / "leaderboard_best_prauc_vs_horizon.png",
)
horizon_lineplot_best_per_view(
    df=tmp_leader,
    metric="Brier",
    title="Leaderboard (OOF CV) | Best model per view | Brier vs horizon",
    outpath=OUT_DIR / "leaderboard_best_brier_vs_horizon.png",
)


# -------------------------
# PLOTS: HOLDOUT
# -------------------------
# If you have full holdout, you'll get view/model comparison; if not, it's still OK.
for t in sorted(holdout["target"].unique(), key=lambda x: TARGET_TO_DAYS.get(x, 9999)):
    if "view" in holdout.columns and "model" in holdout.columns and holdout["view"].nunique() > 1:
        grouped_bar_by_model_view(
            df=holdout,
            metric="AUC",
            title=f"Holdout (Group split) | AUC | {t}",
            outpath=OUT_DIR / f"holdout_auc_{t}.png",
            target=t,
            view_col="view",
            model_col="model",
        )
        grouped_bar_by_model_view(
            df=holdout,
            metric="PR_AUC",
            title=f"Holdout (Group split) | PR-AUC | {t}",
            outpath=OUT_DIR / f"holdout_prauc_{t}.png",
            target=t,
            view_col="view",
            model_col="model",
        )
        grouped_bar_by_model_view(
            df=holdout,
            metric="Brier",
            title=f"Holdout (Group split) | Brier | {t}",
            outpath=OUT_DIR / f"holdout_brier_{t}.png",
            target=t,
            view_col="view",
            model_col="model",
            value_fmt="{:.4f}",
        )

# Trend plot on holdout (best per view)
if "view" in holdout.columns:
    horizon_lineplot_best_per_view(
        df=holdout,
        metric="AUC",
        title="Holdout (Group split) | Best model per view | AUC vs horizon",
        outpath=OUT_DIR / "holdout_best_auc_vs_horizon.png",
    )
    horizon_lineplot_best_per_view(
        df=holdout,
        metric="PR_AUC",
        title="Holdout (Group split) | Best model per view | PR-AUC vs horizon",
        outpath=OUT_DIR / "holdout_best_prauc_vs_horizon.png",
    )
    horizon_lineplot_best_per_view(
        df=holdout,
        metric="Brier",
        title="Holdout (Group split) | Best model per view | Brier vs horizon",
        outpath=OUT_DIR / "holdout_best_brier_vs_horizon.png",
    )

print(f"Saved plots into: {OUT_DIR.resolve()}")


In [None]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# -------------------------
# PATHS
# -------------------------
LEADERBOARD_PATH = Path("CSV/Exports/Temp/12_ML/andromeda/leaderboard_subjectid_cv.csv")

# Prefer full holdout if present
HOLDOUT_FULL_PATH = Path("CSV/Exports/Temp/12_ML/andromeda/ml_metrics_holdout_full.csv")
HOLDOUT_PATH_FALLBACK = Path("CSV/Exports/Temp/12_ML/andromeda/ml_metrics_holdout.csv")

OUT_DIR = Path("CSV/Exports/Temp/12_ML/andromeda/plots")
OUT_DIR.mkdir(exist_ok=True)


# -------------------------
# HELPERS
# -------------------------
TARGET_TO_DAYS = {
    "mort_30d": 30,
    "mort_180d": 180,
    "mort_360d": 360,
}

def _ensure_loaded(path: Path) -> pd.DataFrame:
    if not path.exists():
        raise FileNotFoundError(f"Missing file: {path.resolve()}")
    return pd.read_csv(path)

def add_days_col(df: pd.DataFrame, target_col: str = "target") -> pd.DataFrame:
    df = df.copy()
    df["days"] = df[target_col].map(TARGET_TO_DAYS)
    return df

def _pick_first_existing_col(df: pd.DataFrame, candidates: list[str]) -> str:
    for c in candidates:
        if c in df.columns:
            return c
    raise KeyError(f"None of these columns exist: {candidates}. Available: {list(df.columns)[:30]} ...")

def grouped_bar_by_model_view_ax(
    df: pd.DataFrame,
    metric: str,
    target: str,
    ax: plt.Axes,
    view_col: str = "view",
    model_col: str = "model",
    view_order: list[str] | None = None,
    model_order: list[str] | None = None,
    value_fmt: str = "{:.3f}",
    annotate: bool = True,
    show_legend: bool = False,
):
    """
    On the provided axis:
      x-axis: model
      bars: each view
    """
    tmp = df[df["target"] == target].copy()
    if tmp.empty:
        ax.set_title(f"(no data) {target} | {metric}")
        ax.axis("off")
        return None

    # ordering
    views = sorted(tmp[view_col].unique()) if view_order is None else [v for v in view_order if v in set(tmp[view_col])]
    models = sorted(tmp[model_col].unique()) if model_order is None else [m for m in model_order if m in set(tmp[model_col])]

    # fallback if order filters everything
    if not views:
        views = sorted(tmp[view_col].unique())
    if not models:
        models = sorted(tmp[model_col].unique())

    x = np.arange(len(models))
    width = 0.8 / max(len(views), 1)

    for i, v in enumerate(views):
        vals = []
        for m in models:
            r = tmp[(tmp[view_col] == v) & (tmp[model_col] == m)]
            vals.append(r[metric].iloc[0] if len(r) else np.nan)

        bars = ax.bar(x + i * width - (len(views)-1)*width/2, vals, width=width, label=v)

        if annotate:
            for xi, yi in zip(x, vals):
                if np.isfinite(yi):
                    ax.text(
                        xi + i * width - (len(views)-1)*width/2,
                        yi,
                        value_fmt.format(yi),
                        ha="center",
                        va="bottom",
                        fontsize=8,
                        rotation=90
                    )

    ax.set_xticks(x)
    ax.set_xticklabels(models)
    ax.grid(True, axis="y", alpha=0.3)

    if show_legend:
        ax.legend()

    return ax

def panel_barplots_leader_holdout(
    leader: pd.DataFrame,
    holdout: pd.DataFrame,
    target: str,
    outpath: Path,
    view_col: str = "view",
    model_col: str = "model",
):
    """
    One figure per target:
      columns: Leaderboard (OOF CV) | Holdout (Group split)
      rows: AUC | PR-AUC | Brier
    """

    # union ordering for consistent colors across both panels
    all_views = sorted(set(leader[view_col].unique()).union(set(holdout[view_col].unique())))
    all_models = sorted(set(leader[model_col].unique()).union(set(holdout[model_col].unique())))

    # common preferred model ordering if present
    preferred = ["lr", "rf", "xgb"]
    if set(preferred).issubset(set(all_models)):
        model_order = preferred + [m for m in all_models if m not in preferred]
    else:
        model_order = all_models

    view_order = all_views

    # Robust column mapping (in case names differ slightly)
    leader_auc = _pick_first_existing_col(leader, ["AUC_oof", "AUC"])
    leader_pr  = _pick_first_existing_col(leader, ["PR_AUC_oof", "PR_AUC"])
    leader_bri = _pick_first_existing_col(leader, ["Brier_oof", "Brier"])

    hold_auc = _pick_first_existing_col(holdout, ["AUC", "AUC_holdout", "AUC_test"])
    hold_pr  = _pick_first_existing_col(holdout, ["PR_AUC", "PR_AUC_holdout", "PR_AUC_test"])
    hold_bri = _pick_first_existing_col(holdout, ["Brier", "Brier_holdout", "Brier_test"])

    rows = [
        ("AUC", leader_auc, hold_auc, "{:.3f}"),
        ("PR-AUC", leader_pr, hold_pr, "{:.3f}"),
        ("Brier", leader_bri, hold_bri, "{:.4f}"),
    ]

    fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(18, 12))
    fig.suptitle(f"Leaderboard vs Holdout | {target} ({TARGET_TO_DAYS.get(target, '?')} days)", fontsize=14)

    # Column headers
    axes[0, 0].set_title("Leaderboard (OOF CV)", fontsize=12)
    axes[0, 1].set_title("Holdout (Group split)", fontsize=12)

    legend_handles = None
    legend_labels = None

    for r, (metric_label, m_leader, m_hold, fmt) in enumerate(rows):
        # plot both
        grouped_bar_by_model_view_ax(
            df=leader, metric=m_leader, target=target, ax=axes[r, 0],
            view_col=view_col, model_col=model_col,
            view_order=view_order, model_order=model_order,
            value_fmt=fmt, annotate=True, show_legend=False
        )
        grouped_bar_by_model_view_ax(
            df=holdout, metric=m_hold, target=target, ax=axes[r, 1],
            view_col=view_col, model_col=model_col,
            view_order=view_order, model_order=model_order,
            value_fmt=fmt, annotate=True, show_legend=False
        )

        # y labels (row metric label)
        axes[r, 0].set_ylabel(metric_label)

        # share y-limit per row for fair comparison
        ymax = np.nanmax([
            leader.loc[leader["target"] == target, m_leader].max() if m_leader in leader.columns else np.nan,
            holdout.loc[holdout["target"] == target, m_hold].max() if m_hold in holdout.columns else np.nan
        ])
        if np.isfinite(ymax):
            pad = 0.12 * ymax if ymax > 0 else 0.05
            for c in [0, 1]:
                axes[r, c].set_ylim(0, ymax + pad)

        # capture legend once (from left subplot of first row)
        if r == 0:
            handles, labels = axes[r, 0].get_legend_handles_labels()
            legend_handles, legend_labels = handles, labels

    # One global legend (views) for the whole figure
    if legend_handles and legend_labels:
        fig.legend(
            legend_handles, legend_labels,
            loc="lower center",
            ncol=min(len(legend_labels), 6),
            frameon=False
        )

    fig.tight_layout(rect=[0, 0.05, 1, 0.96])
    fig.savefig(outpath, dpi=200)
    plt.close(fig)


def horizon_lineplot_best_per_view(
    df: pd.DataFrame,
    metric: str,
    title: str,
    outpath: Path,
    view_col: str = "view",
):
    """
    For each view, pick the best model per target (by AUC then PR_AUC then Brier),
    and plot metric vs days.
    """
    tmp = df.copy()

    if "days" not in tmp.columns:
        tmp = add_days_col(tmp, target_col="target")

    # pick best per (target, view)
    sort_cols = [metric]
    asc = [False]
    if "PR_AUC" in tmp.columns and metric != "PR_AUC":
        sort_cols.append("PR_AUC"); asc.append(False)
    if "Brier" in tmp.columns and metric != "Brier":
        sort_cols.append("Brier"); asc.append(True)

    best = (
        tmp.sort_values(["target", view_col] + sort_cols, ascending=[True, True] + asc)
           .groupby(["target", view_col], as_index=False)
           .first()
    )

    views = sorted(best[view_col].unique())

    fig, ax = plt.subplots(figsize=(9, 5))
    for v in views:
        vv = best[best[view_col] == v].sort_values("days")
        ax.plot(vv["days"], vv[metric], marker="o", label=v)

        # annotate with model names
        if "model" in vv.columns:
            for _, row in vv.iterrows():
                ax.text(row["days"], row[metric], f"{row['model']}", fontsize=9, ha="left", va="bottom")

    ax.set_xlabel("Mortality horizon (days)")
    ax.set_ylabel(metric)
    ax.set_title(title)
    ax.legend()
    ax.grid(True, alpha=0.3)

    fig.tight_layout()
    fig.savefig(outpath, dpi=200)
    plt.close(fig)


# -------------------------
# LOAD FILES
# -------------------------
leader = _ensure_loaded(LEADERBOARD_PATH)

if HOLDOUT_FULL_PATH.exists():
    holdout = _ensure_loaded(HOLDOUT_FULL_PATH)
else:
    holdout = _ensure_loaded(HOLDOUT_PATH_FALLBACK)

# Normalize holdout columns if it is the small summary file (no view/model columns)
if "view" not in holdout.columns:
    holdout["view"] = "holdout_summary"
if "model" not in holdout.columns:
    holdout["model"] = "unknown"

# Add days columns
leader = add_days_col(leader, target_col="target")
holdout = add_days_col(holdout, target_col="target")


# -------------------------
# NEW: GROUPED PANELS (LEADERBOARD + HOLDOUT) PER TARGET
# -------------------------
targets_sorted = sorted(leader["target"].unique(), key=lambda x: TARGET_TO_DAYS.get(x, 9999))

# Αν στο holdout υπάρχουν targets που δεν υπάρχουν στο leaderboard (ή το αντίστροφο), πάρε union:
targets_sorted = sorted(
    set(leader["target"].unique()).union(set(holdout["target"].unique())),
    key=lambda x: TARGET_TO_DAYS.get(x, 9999)
)

for t in targets_sorted:
    panel_barplots_leader_holdout(
        leader=leader,
        holdout=holdout,
        target=t,
        outpath=OUT_DIR / f"panel_bars_leaderboard_vs_holdout_{t}.png",
        view_col="view",
        model_col="model",
    )


# -------------------------
# KEEP: TREND PLOTS (BEST PER VIEW) AS YOU HAD THEM
# -------------------------
tmp_leader = leader.rename(columns={
    "AUC_oof": "AUC",
    "PR_AUC_oof": "PR_AUC",
    "Brier_oof": "Brier",
})

horizon_lineplot_best_per_view(
    df=tmp_leader,
    metric="AUC",
    title="Leaderboard (OOF CV) | Best model per view | AUC vs horizon",
    outpath=OUT_DIR / "leaderboard_best_auc_vs_horizon.png",
)
horizon_lineplot_best_per_view(
    df=tmp_leader,
    metric="PR_AUC",
    title="Leaderboard (OOF CV) | Best model per view | PR-AUC vs horizon",
    outpath=OUT_DIR / "leaderboard_best_prauc_vs_horizon.png",
)
horizon_lineplot_best_per_view(
    df=tmp_leader,
    metric="Brier",
    title="Leaderboard (OOF CV) | Best model per view | Brier vs horizon",
    outpath=OUT_DIR / "leaderboard_best_brier_vs_horizon.png",
)

if "view" in holdout.columns:
    horizon_lineplot_best_per_view(
        df=holdout,
        metric="AUC",
        title="Holdout (Group split) | Best model per view | AUC vs horizon",
        outpath=OUT_DIR / "holdout_best_auc_vs_horizon.png",
    )
    horizon_lineplot_best_per_view(
        df=holdout,
        metric="PR_AUC",
        title="Holdout (Group split) | Best model per view | PR-AUC vs horizon",
        outpath=OUT_DIR / "holdout_best_prauc_vs_horizon.png",
    )
    horizon_lineplot_best_per_view(
        df=holdout,
        metric="Brier",
        title="Holdout (Group split) | Best model per view | Brier vs horizon",
        outpath=OUT_DIR / "holdout_best_brier_vs_horizon.png",
    )

print(f"Saved plots into: {OUT_DIR.resolve()}")
