In [None]:

# Step 1: Create a new environment
#!python -m venv cleanenv

# Step 2: Activate it
# On Windows:
#!cleanenv\Scripts\activate
# On Mac/Linux:
#source cleanenv/bin/activate

# Step 3: Install only what you need
#!pip install numpy==1.26.4 scipy==1.13.0 scikit-learn==1.5.0 imbalanced-learn==0.13.0 tensorflow==2.18.0
# GOOD (pick one)
# 1) Install into the running kernel
!pip install --upgrade pip
!pip install numpy==1.26.4 scipy==1.13.0 scikit-learn==1.5.1 imbalanced-learn==0.13.0 tensorflow==2.18.0


!pip install tensorflow

!pip install tensorflow==2.18.0
import torch                         # PyTorch only
# OR
import tensorflow as tf              # TensorFlow only
# OR
import jax                           # JAX only

# Import TF first so cuDNN is registered once
import tensorflow as tf
import torch

import numpy, scipy, sklearn, imblearn, tensorflow as tf

print("numpy:", numpy.__version__)
print("scipy:", scipy.__version__)
print("scikit-learn:", sklearn.__version__)
print("imbalanced-learn:", imblearn.__version__)
print("tensorflow:", tf.__version__)

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.combine import SMOTEENN, SMOTETomek
from sklearn.utils import resample
from collections import Counter

In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv("/kaggle/input/hmcdataset/intraday.csv")

# Preprocessing
df['start'] = pd.to_datetime(df['start'], errors='coerce')
df['date'] = df['start'].dt.date
df['hour'] = df['start'].dt.floor('h')  # use lowercase 'h' to avoid deprecation warning

# Filter relevant columns and define hypoglycemia
df_cgm = df[['patientID', 'date', 'hour', 'cgm', 'steps']].dropna(subset=['cgm'])
df_cgm['hypo'] = df_cgm['cgm'] < 70


# Keep only relevant columns and drop rows without CGM
df_cgm = df[['patientID', 'hour', 'cgm']].dropna(subset=['cgm'])

# STEP 1: Filter for complete hours (≥ 4 CGM readings)
grouped = df_cgm.groupby(['patientID', 'hour'])
valid_hours = grouped.filter(lambda x: len(x) >= 4)

# STEP 2: Create features and label
features = valid_hours.groupby(['patientID', 'hour']).agg(
    cgm_std=('cgm', 'std'),
    cgm_min=('cgm', 'min'),
   cgm_mean=('cgm', 'mean'),
    cgm_max=('cgm', 'max'),
   
    hypo_label=('cgm', lambda x: int((x < 70).any()))
).reset_index()

# STEP 3: Sort for time-series modeling
features = features.sort_values(['patientID', 'hour']).reset_index(drop=True)

# STEP 4: Display preview of the processed data
#print("LSTM-ready features (preview):")
#print(features.head())

import numpy as np

#Train/Test Split by Sample vs. by Patient
# Assign entire patients to either the training or testing set.
#Group and build sequences separately for each set to avoid data leakage.

from sklearn.model_selection import train_test_split

# Step 1: Get list of unique patients and split
unique_patients = features['patientID'].unique()
train_patients, test_patients = train_test_split(unique_patients, test_size=0.2, random_state=42)

# Step 2: Split the features dataframe accordingly
train_df = features[features['patientID'].isin(train_patients)]
test_df = features[features['patientID'].isin(test_patients)]

# Configuration
sequence_length = 24
feature_cols = ['cgm_mean']  # You can include others like 'cgm_min', 'cgm_std' if available

def build_sequences(df, feature_cols, label_col='hypo_label'):
    X, y = [], []
    for patient_id, group in df.groupby('patientID'):
        group = group.sort_values('hour').reset_index(drop=True)
        for i in range(len(group) - sequence_length):
            seq_x = group.loc[i:i+sequence_length-1, feature_cols].values
            seq_y = group.loc[i + sequence_length, label_col]
            X.append(seq_x)
            y.append(seq_y)
    return np.array(X), np.array(y)

# Build sequences for each subset
X_train, y_train = build_sequences(train_df, feature_cols)
X_test, y_test = build_sequences(test_df, feature_cols)

#print(X_train[:5])   # Preview first 5 sequences from training set
#print(y_train[:20])  # Preview first 20 labels from training set



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.regularizers import l1
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf


In [None]:
# ================================
# Balanced LSTM Pipeline (All-in-1)
# ================================

# ------ Optional installs (Kaggle/Colab) ------
# !pip install --upgrade pip
# !pip install numpy==1.26.4 scipy==1.13.0 scikit-learn==1.5.1 imbalanced-learn==0.13.0 tensorflow==2.18.0

# ------ Imports ------
import os, time, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, roc_curve, precision_recall_curve,
    average_precision_score, auc, mean_squared_error
)

from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTEENN, SMOTETomek

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Bidirectional, Dense, Dropout, TimeDistributed, Flatten
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# --------------------------
# Config
# --------------------------
DATA_PATH         = "/kaggle/input/hmcdataset/intraday.csv"
SEQUENCE_LENGTH   = 24
FEATURE_COLS      = ["cgm_mean"]      # you can add: "cgm_min","cgm_std","cgm_max"
THR_MIN, THR_MAX  = 0.40, 0.60
RANDOM_STATE      = 42
AUGMENT_SIGMA     = 0.01              # small Gaussian jitter on train (optional); set None to disable
RESAMPLE_METHODS  = [
    "none",           # baseline (class_weight focal only)
    "oversample_seq", # duplicate minority sequences
    "undersample_seq",# downsample majority sequences
    "smote",          # SMOTE on flattened sequences
    "smoteenn",       # SMOTE+ENN on flattened sequences
    "smotetomek"      # SMOTE+Tomek on flattened sequences
]

# --------------------------
# Utilities
# --------------------------
def _best_threshold_in_range(thresholds, scores, thr_min=THR_MIN, thr_max=THR_MAX):
    thresholds = np.asarray(thresholds, dtype=float)
    scores     = np.asarray(scores, dtype=float)
    mask = np.isfinite(thresholds) & (thresholds >= thr_min) & (thresholds <= thr_max)
    if mask.any():
        idx_in = int(np.nanargmax(scores[mask]))
        idx    = np.where(mask)[0][idx_in]
        return float(thresholds[idx]), True
    idx = int(np.nanargmax(scores))
    return float(np.clip(thresholds[idx], thr_min, thr_max)), False

def focal_loss(gamma=2.0, alpha=0.25):
    bce = tf.keras.losses.BinaryCrossentropy(from_logits=False, reduction=tf.keras.losses.Reduction.NONE)
    eps = tf.keras.backend.epsilon()
    def loss(y_true, y_pred):
        y_pred = tf.clip_by_value(y_pred, eps, 1.0 - eps)
        ce = bce(y_true, y_pred)
        p_t = y_true * y_pred + (1.0 - y_true) * (1.0 - y_pred)
        alpha_t = y_true * alpha + (1.0 - y_true) * (1.0 - alpha)
        modulating = tf.pow(1.0 - p_t, gamma)
        return alpha_t * modulating * ce
    return loss

def _safe_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred, labels=[0,1])
    if cm.shape != (2,2):
        full = np.zeros((2,2), dtype=int)
        full[:cm.shape[0], :cm.shape[1]] = cm
        cm = full
    return cm

def _specificity_from_cm(cm, pos_label=1):
    tn, fp, fn, tp = cm.ravel()
    if pos_label == 1:  # negatives are 0
        return tn / (tn + fp + 1e-8)
    else:               # negatives are 1
        return tp / (tp + fn + 1e-8)

def evaluate_full_metrics(y_true, y_pred, y_prob=None):
    y_true = np.asarray(y_true).astype(int).ravel()
    y_pred = np.asarray(y_pred).astype(int).ravel()
    cm = _safe_confusion_matrix(y_true, y_pred)

    # per-class
    metrics = {}
    for lbl in [0,1]:
        metrics[f"Class{lbl}/Precision"]   = precision_score(y_true, y_pred, pos_label=lbl, zero_division=0)
        metrics[f"Class{lbl}/Recall"]      = recall_score(y_true, y_pred,    pos_label=lbl, zero_division=0)
        metrics[f"Class{lbl}/F1"]          = f1_score(y_true, y_pred,        pos_label=lbl, zero_division=0)
        metrics[f"Class{lbl}/Specificity"] = _specificity_from_cm(cm, pos_label=lbl)
        metrics[f"Class{lbl}/Support"]     = int(np.sum(y_true == lbl))

    # overall (hard preds)
    metrics["Overall/Accuracy"]             = accuracy_score(y_true, y_pred)
    metrics["Overall/Precision_macro"]      = precision_score(y_true, y_pred, average='macro',    zero_division=0)
    metrics["Overall/Precision_weighted"]   = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    metrics["Overall/Recall_macro"]         = recall_score(y_true, y_pred,    average='macro',    zero_division=0)
    metrics["Overall/Recall_weighted"]      = recall_score(y_true, y_pred,    average='weighted', zero_division=0)
    metrics["Overall/F1_macro"]             = f1_score(y_true, y_pred,        average='macro',    zero_division=0)
    metrics["Overall/F1_weighted"]          = f1_score(y_true, y_pred,        average='weighted', zero_division=0)
    mse_pred                                = mean_squared_error(y_true, y_pred)
    metrics["Overall/MSE_pred"]             = mse_pred
    metrics["Overall/RMSE_pred"]            = float(np.sqrt(mse_pred))

    # prob-based
    if y_prob is not None:
        y_prob = np.asarray(y_prob, dtype=float).ravel()
        try:  metrics["Overall/ROC-AUC"] = roc_auc_score(y_true, y_prob)
        except ValueError: metrics["Overall/ROC-AUC"] = np.nan
        try:  metrics["Overall/PR-AUC"]  = average_precision_score(y_true, y_prob)
        except ValueError: metrics["Overall/PR-AUC"] = np.nan
        mse_prob                          = mean_squared_error(y_true, y_prob)
        metrics["Overall/MSE_prob"]       = mse_prob
        metrics["Overall/RMSE_prob"]      = float(np.sqrt(mse_prob))
    else:
        metrics["Overall/ROC-AUC"]  = np.nan
        metrics["Overall/PR-AUC"]   = np.nan
        metrics["Overall/MSE_prob"] = np.nan
        metrics["Overall/RMSE_prob"]= np.nan

    return metrics

def make_class_weight(y):
    y  = np.asarray(y).astype(int).ravel()
    n0 = max(1, (y==0).sum()); n1 = max(1, (y==1).sum()); N = n0+n1
    w0 = N/(2.0*n0); w1 = N/(2.0*n1)
    return {0: float(w0), 1: float(w1)}

# --------------------------
# Data loading & preprocessing
# --------------------------
df = pd.read_csv(DATA_PATH)
df["start"] = pd.to_datetime(df["start"], errors="coerce")
df["hour"]  = df["start"].dt.floor("h")

# keep & aggregate (≥4 CGM readings per hour)
df_cgm = df[["patientID","hour","cgm"]].dropna(subset=["cgm"])
valid  = df_cgm.groupby(["patientID","hour"]).filter(lambda x: len(x) >= 4)

features = (valid.groupby(["patientID","hour"])
                 .agg(cgm_std=("cgm","std"),
                      cgm_min=("cgm","min"),
                      cgm_mean=("cgm","mean"),
                      cgm_max=("cgm","max"),
                      hypo_label=("cgm", lambda x: int((x<70).any())))
                 .reset_index()
                 .sort_values(["patientID","hour"])
                 .reset_index(drop=True))

# patient-level split
unique_patients = features["patientID"].unique()
train_pat, test_pat = train_test_split(unique_patients, test_size=0.20, random_state=RANDOM_STATE)
train_df = features[features["patientID"].isin(train_pat)]
test_df  = features[features["patientID"].isin(test_pat)]

# --------------------------
# Sequences
# --------------------------
def build_sequences(df, feature_cols, label_col="hypo_label", seq_len=SEQUENCE_LENGTH):
    X, y = [], []
    for pid, grp in df.groupby("patientID"):
        grp = grp.sort_values("hour").reset_index(drop=True)
        for i in range(len(grp) - seq_len):
            X.append(grp.loc[i:i+seq_len-1, feature_cols].values)
            y.append(int(grp.loc[i+seq_len, label_col]))
    return np.array(X), np.array(y)

X_train, y_train = build_sequences(train_df, FEATURE_COLS)
X_test,  y_test  = build_sequences(test_df,  FEATURE_COLS)

# sanity: no leakage
assert set(train_df.patientID).isdisjoint(set(test_df.patientID))

# tiny augmentation (optional)
def augment(X, y, sigma=AUGMENT_SIGMA):
    if sigma is None or sigma <= 0: return X, y
    noise = np.random.normal(0, sigma, X.shape)
    return np.vstack([X, X+noise]), np.hstack([y, y])

X_train, y_train = augment(X_train, y_train)

# --------------------------
# Resampling (sequence level & SMOTE family)
# --------------------------
RNG = np.random.default_rng(RANDOM_STATE)

def seq_resample(X, y, method="none", random_state=RANDOM_STATE):
    """
    method ∈ {
      "none",
      "oversample_seq", "undersample_seq",        # window-level (no interpolation)
      "smote", "smoteenn", "smotetomek"           # flattened window resampling
    }
    """
    X = np.asarray(X); y = np.asarray(y).astype(int).ravel()
    n, T, F = X.shape

    if method == "none":
        return X, y

    if method in {"oversample_seq","undersample_seq"}:
        idx0 = np.where(y==0)[0]; idx1 = np.where(y==1)[0]
        n0, n1 = len(idx0), len(idx1)
        if n0==0 or n1==0: return X, y

        if method == "oversample_seq":
            if n1 < n0:
                add = RNG.choice(idx1, size=n0-n1, replace=True)
                keep = np.concatenate([idx0, idx1, add])
            else:
                add = RNG.choice(idx0, size=n1-n0, replace=True)
                keep = np.concatenate([idx0, idx1, add])
        else:
            if n0 > n1:
                keep0 = RNG.choice(idx0, size=n1, replace=False)
                keep  = np.concatenate([keep0, idx1])
            else:
                keep1 = RNG.choice(idx1, size=n0, replace=False)
                keep  = np.concatenate([idx0, keep1])

        RNG.shuffle(keep)
        return X[keep], y[keep]

    # SMOTE family on flattened sequences
    Xf = X.reshape(n, -1)
    if method == "smote":
        minority_n = int((y==1).sum())
        k_neighbors = max(1, min(5, minority_n-1))
        sm = SMOTE(random_state=random_state, k_neighbors=k_neighbors)
        Xr, yr = sm.fit_resample(Xf, y)
    elif method == "smoteenn":
        Xr, yr = SMOTEENN(random_state=random_state).fit_resample(Xf, y)
    elif method == "smotetomek":
        Xr, yr = SMOTETomek(random_state=random_state).fit_resample(Xf, y)
    else:
        raise ValueError(f"Unknown resampling method: {method}")
    return Xr.reshape(-1, T, F), yr

def make_balanced_test(X_test, y_test, random_state=RANDOM_STATE):
    X_test = np.asarray(X_test)
    y_test = np.asarray(y_test).astype(int).ravel()
    idx0, idx1 = np.where(y_test==0)[0], np.where(y_test==1)[0]
    if len(idx0)==0 or len(idx1)==0: return X_test, y_test
    m = min(len(idx0), len(idx1))
    rs = np.random.RandomState(random_state)
    keep = np.concatenate([rs.choice(idx0, m, replace=False), rs.choice(idx1, m, replace=False)])
    rs.shuffle(keep)
    return X_test[keep], y_test[keep]

X_test_bal, y_test_bal = make_balanced_test(X_test, y_test)

# --------------------------
# Models
# --------------------------
def define_models(input_shape):
    return {
        "LSTM_100": Sequential([
            Input(shape=input_shape),
            LSTM(100, return_sequences=True), Dropout(0.2),
            LSTM(50), Dropout(0.2),
            Dense(25, activation='relu'),
            Dense(1, activation='sigmoid')
        ]),
        "LSTM_50": Sequential([
            Input(shape=input_shape),
            LSTM(50, return_sequences=True), Dropout(0.2),
            LSTM(25), Dropout(0.2),
            Dense(10, activation='relu'),
            Dense(1, activation='sigmoid')
        ]),
        "LSTM_25_L1": Sequential([
            Input(shape=input_shape),
            LSTM(50, return_sequences=True, kernel_regularizer=l1(1e-5)), Dropout(0.2),
            LSTM(25, kernel_regularizer=l1(1e-5)), Dropout(0.2),
            Dense(10, activation='relu', kernel_regularizer=l1(1e-5)),
            Dense(1, activation='sigmoid')
        ]),
        "LSTM_25_L2": Sequential([
            Input(shape=input_shape),
            LSTM(50, return_sequences=True, kernel_regularizer=l2(1e-5)), Dropout(0.2),
            LSTM(25, kernel_regularizer=l2(1e-5)), Dropout(0.2),
            Dense(10, activation='relu', kernel_regularizer=l2(1e-5)),
            Dense(1, activation='sigmoid')
        ]),
        "BiLSTM": Sequential([
            Input(shape=input_shape),
            Bidirectional(LSTM(64, return_sequences=True)), Dropout(0.2),
            Bidirectional(LSTM(32)), Dropout(0.2),
            Dense(16, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
    }

# --------------------------
# Train + evaluate
# --------------------------
os.makedirs("checkpoints", exist_ok=True)
os.makedirs("plots", exist_ok=True)
os.makedirs("outputs", exist_ok=True)

results = {}       # key -> metrics dict
roc_data = {}      # (method, model) -> (fpr, tpr, auc)
pr_data  = {}      # (method, model) -> (recall, precision, ap)
best_thresholds = {}  # (method, model) -> {"youden": t, "f1": t}

def train_eval_one(method_name, model_name, model, Xtr, ytr, Xte, yte, XteB, yteB):
    tag = f"{method_name}__{model_name}"
    print(f"\n🚀 Training [{tag}] with class-weighted focal loss")
    es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)
    cp = ModelCheckpoint(f"checkpoints/{tag}.h5", save_best_only=True, monitor='val_loss', verbose=0)
    model.compile(optimizer='adam', loss=focal_loss(), metrics=['accuracy'])

    class_weight = make_class_weight(ytr)
    t0 = time.time()
    model.fit(Xtr, ytr, epochs=5, batch_size=32,
              validation_data=(Xte, yte),
              callbacks=[es, cp], verbose=1,
              class_weight=class_weight)
    print(f"⏱️ Training Time: {time.time()-t0:.2f}s")

    # probabilities
    p_tr  = model.predict(Xtr,  verbose=0).ravel()
    p_te  = model.predict(Xte,  verbose=0).ravel()
    p_teB = model.predict(XteB, verbose=0).ravel()

    # thresholds (on ORIGINAL test), constrained to [0.40, 0.60]
    try:
        fpr, tpr, thr_roc = roc_curve(yte, p_te); auc_roc = auc(fpr, tpr)
    except ValueError:
        fpr, tpr, thr_roc, auc_roc = np.array([0,1]), np.array([0,1]), np.array([0.5]), np.nan
    youden = tpr - fpr
    t_roc, _ = _best_threshold_in_range(thr_roc, youden)

    prec, rec, thr_pr = precision_recall_curve(yte, p_te)
    f1s = 2*prec[:-1]*rec[:-1] / (prec[:-1]+rec[:-1]+1e-8)
    t_pr, _ = _best_threshold_in_range(thr_pr, f1s)
    ap_val  = average_precision_score(yte, p_te)

    roc_data[(method_name, model_name)] = (fpr, tpr, auc_roc)
    pr_data[(method_name, model_name)]  = (rec, prec, ap_val)
    best_thresholds[(method_name, model_name)] = {"youden": t_roc, "f1": t_pr}
    print(f"📌 [{tag}] thresholds → Youden={t_roc:.4f}, PR-F1={t_pr:.4f} (window [{THR_MIN},{THR_MAX}])")

    eval_ts = sorted(set([THR_MIN, 0.50, THR_MAX, float(t_roc), float(t_pr)]))
    # evaluate at all thresholds on train / test / testBalanced
    for t in eval_ts:
        yhat_tr  = (p_tr  >= t).astype(int)
        yhat_te  = (p_te  >= t).astype(int)
        yhat_teB = (p_teB >= t).astype(int)

        results[f"{tag}__thr_{t:.2f}__train"]         = evaluate_full_metrics(ytr,  yhat_tr,  p_tr)
        results[f"{tag}__thr_{t:.2f}__test"]          = evaluate_full_metrics(yte,  yhat_te,  p_te)
        results[f"{tag}__thr_{t:.2f}__testBalanced"]  = evaluate_full_metrics(yteB, yhat_teB, p_teB)

# run all methods x models
models = define_models((X_train.shape[1], X_train.shape[2]))

for METHOD in RESAMPLE_METHODS:
    Xtr_rs, ytr_rs = seq_resample(X_train, y_train, method=METHOD)
    print(f"\n🔁 Resampling: {METHOD} → X={Xtr_rs.shape}, y={Counter(ytr_rs)}")
    for mname, model in define_models((X_train.shape[1], X_train.shape[2])).items():
        train_eval_one(METHOD, mname, model, Xtr_rs, ytr_rs, X_test, y_test, X_test_bal, y_test_bal)

# --------------------------
# Curves (optional plots)
# --------------------------
plt.figure(figsize=(14,6))
# ROC
plt.subplot(1,2,1)
for (meth, mname), (fpr, tpr, auc_roc) in roc_data.items():
    plt.plot(fpr, tpr, label=f'{meth}/{mname} (AUC={auc_roc:.3f})')
plt.plot([0,1],[0,1],'--',label='Random')
plt.xlabel('FPR'); plt.ylabel('TPR'); plt.title('ROC'); plt.legend(fontsize=8)
# PR
plt.subplot(1,2,2)
for (meth, mname), (rec, prec, ap) in pr_data.items():
    plt.plot(rec, prec, label=f'{meth}/{mname} (AP={ap:.3f})')
plt.xlabel('Recall'); plt.ylabel('Precision'); plt.title('PR'); plt.legend(fontsize=8)
plt.tight_layout(); plt.savefig("plots/combined_roc_pr_curves.png", dpi=300); plt.show()

# --------------------------
# --------------------------
# Summaries (robust)
# --------------------------
results_df = pd.DataFrame(results).T

# Keep the original key for debugging
results_df = results_df.reset_index().rename(columns={"index":"Key"})

# Robust Split extraction (no regex fragility / trailing-space safe)
k = results_df["Key"].str.strip()
split = np.where(k.str.endswith("__train"), "train",
         np.where(k.str.endswith("__testBalanced"), "testBalanced",
         np.where(k.str.endswith("__test"), "test", np.nan)))
results_df["Split"] = split

# Method, Model, Threshold extraction (robust)
# Key format: "<method>__<model>__thr_<thr>__<split>"
parts = k.str.split("__")
results_df["Method"] = parts.str[0]
results_df["Model"]  = parts.str[1]
# threshold lives in the third chunk like "thr_0.40"
thr_str = parts.str[2].str.replace("thr_","", regex=False)
with np.errstate(all='ignore'):
    results_df["Threshold"] = pd.to_numeric(thr_str, errors="coerce")

# Save
results_df.round(6).to_csv("outputs/results_summary_all.csv", index=False)
print("\n📁 Saved files:")
print(" - plots/combined_roc_pr_curves.png")
print(" - outputs/results_summary_all.csv")

# Quick sanity
print("\nSplit counts:")
print(results_df["Split"].value_counts(dropna=False))

# Leaderboards
eval_test_df  = results_df[results_df["Split"]=="test"].copy()
eval_tbal_df  = results_df[results_df["Split"]=="testBalanced"].copy()

def top_k(df, by_col, k=10, cols=None):
    if df.empty:
        return pd.DataFrame(columns=(cols or []))
    if cols is None:
        cols = ['Method','Model','Threshold','Overall/F1_weighted','Overall/Recall_weighted',
                'Overall/Precision_weighted','Overall/ROC-AUC','Overall/PR-AUC','Overall/Accuracy']
    present = [c for c in cols if c in df.columns]
    return df.sort_values(by_col, ascending=False)[present].head(k).round(4)

print("\n🔽 ORIGINAL TEST — top by Overall/F1_weighted")
print(top_k(eval_test_df, 'Overall/F1_weighted'))

print("\n🔽 BALANCED TEST — top by Overall/F1_weighted")
print(top_k(eval_tbal_df, 'Overall/F1_weighted'))

def best_per_model(df):
    if df.empty:
        return df
    idx = df.groupby(['Method','Model'])['Overall/F1_weighted'].idxmax()
    return df.loc[idx].sort_values(['Overall/F1_weighted'], ascending=False)

overall_cols = [
    'Overall/Accuracy','Overall/Precision_macro','Overall/Recall_macro','Overall/F1_macro',
    'Overall/Precision_weighted','Overall/Recall_weighted','Overall/F1_weighted',
    'Overall/Specificity(label=1)','Overall/ROC-AUC','Overall/PR-AUC',
    'Overall/MSE_pred','Overall/RMSE_pred','Overall/MSE_prob','Overall/RMSE_prob'
]
class_cols = [
    'Class0/Precision','Class0/Recall','Class0/F1','Class0/Specificity','Class0/Support',
    'Class1/Precision','Class1/Recall','Class1/F1','Class1/Specificity','Class1/Support'
]

print("\n=== ORIGINAL TEST — best per (Method,Model) ===")
best_test = best_per_model(eval_test_df)
if best_test.empty:
    print("⚠️ No TEST rows found — check Split counts above.")
else:
    print(best_test[['Method','Model','Threshold']+[c for c in overall_cols if c in best_test.columns]].round(4))
    print("\n--- Per-class breakdown:")
    print(best_test[['Method','Model','Threshold']+[c for c in class_cols if c in best_test.columns]].round(4))

print("\n=== BALANCED TEST — best per (Method,Model) ===")
best_tbal = best_per_model(eval_tbal_df)
if best_tbal.empty:
    print("⚠️ No BALANCED TEST rows found — check Split counts above.")
else:
    print(best_tbal[['Method','Model','Threshold']+[c for c in overall_cols if c in best_tbal.columns]].round(4))
    print("\n--- Per-class breakdown:")
    print(best_tbal[['Method','Model','Threshold']+[c for c in class_cols if c in best_tbal.columns]].round(4))


In [None]:
# ===== ✅ Step 7B: Inline displays (no Excel needed) =====
from IPython.display import display
import pandas as pd

pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 180)
pd.set_option("display.precision", 4)

# --------------------------------------
# Flatten results -> DataFrame
# --------------------------------------
results_df = pd.DataFrame(results).T
results_df["Split"] = results_df.index.str.extract(r'_(train|test)$')[0]
results_df["Model"] = results_df.index.str.extract(r'^(.*?)_thr_')[0]
results_df["Threshold"] = (
    results_df.index.str.extract(r'thr_([0-9.]+)_(?:train|test)$')[0]
    .astype(float)
)

# --------------------------------------
# Define column groups (safe + flexible)
# --------------------------------------
overall_cols = [
    'Overall/Accuracy','Overall/Precision_macro','Overall/Recall_macro','Overall/F1_macro',
    'Overall/Precision_weighted','Overall/Recall_weighted','Overall/F1_weighted',
    'Overall/ROC-AUC','Overall/PR-AUC',
    'Overall/MSE_pred','Overall/RMSE_pred','Overall/MSE_prob','Overall/RMSE_prob'
]

class_cols = [
    'Class0/Precision','Class0/Recall','Class0/F1','Class0/Specificity','Class0/Support',
    'Class1/Precision','Class1/Recall','Class1/F1','Class1/Specificity','Class1/Support'
]

# helper: keep only existing columns
def safe_cols(df, cols):
    return [c for c in cols if c in df.columns]

# --------------------------------------
# Select best rows per model
# --------------------------------------
def best_per_model(df):
    idx = df.groupby('Model')['Overall/F1_weighted'].idxmax()
    return df.loc[idx].sort_values('Overall/F1_weighted', ascending=False)

# --------------------------------------
# TEST — Top 20 by F1_weighted
# --------------------------------------
test_all = results_df[results_df['Split'] == 'test'].copy()
test_sorted = test_all.sort_values('Overall/F1_weighted', ascending=False)
print("\n🔎 TEST — All models & thresholds (Top 20 by F1_weighted)")
display(test_sorted[['Model','Threshold'] + safe_cols(test_sorted, overall_cols)].head(20).round(4))

# --------------------------------------
# TRAIN — Top 20 by F1_weighted
# --------------------------------------
train_all = results_df[results_df['Split'] == 'train'].copy()
train_sorted = train_all.sort_values('Overall/F1_weighted', ascending=False)
print("\n🔎 TRAIN — All models & thresholds (Top 20 by F1_weighted)")
display(train_sorted[['Model','Threshold'] + safe_cols(train_sorted, overall_cols)].head(20).round(4))

# --------------------------------------
# BEST PER MODEL (TEST)
# --------------------------------------
test_best = best_per_model(test_all)
print("\n🏁 TEST — Best per model (overall metrics @ best threshold)")
display(test_best[['Model','Threshold'] + safe_cols(test_best, overall_cols)].round(4))

print("\n🏁 TEST — Best per model (per-label metrics @ best threshold)")
display(test_best[['Model','Threshold'] + safe_cols(test_best, class_cols)].round(4))

# --------------------------------------
# BEST PER MODEL (TRAIN)
# --------------------------------------
train_best = best_per_model(train_all)
print("\n🏋️ TRAIN — Best per model (overall metrics @ best threshold)")
display(train_best[['Model','Threshold'] + safe_cols(train_best, overall_cols)].round(4))

print("\n🏋️ TRAIN — Best per model (per-label metrics @ best threshold)")
display(train_best[['Model','Threshold'] + safe_cols(train_best, class_cols)].round(4))

# --------------------------------------
# Compact view: TEST
# --------------------------------------
compact_cols = ['Overall/F1_weighted','Overall/Recall_weighted','Overall/PR-AUC','Overall/Accuracy']
print("\n📋 TEST — Compact table per model & threshold (sorted by F1_weighted)")
compact = (
    test_all[['Model','Threshold'] + safe_cols(test_all, compact_cols)]
    .sort_values(['Model','Overall/F1_weighted'], ascending=[True, False])
)
display(compact.round(4).reset_index(drop=True))


In [None]:
from collections import Counter
import numpy as np
import pandas as pd

def _to_1d_int(y):
    y = np.asarray(y).ravel()
    # cast to int if your labels are floats like 0.0/1.0
    try:
        return y.astype(int)
    except Exception:
        return y

y_tr = _to_1d_int(y_train)
y_te = _to_1d_int(y_test)
y_all = np.concatenate([y_tr, y_te])

def class_counts(y):
    c = Counter(y.tolist())
    # sort by class label for stable printing
    return {k: c[k] for k in sorted(c.keys())}

def class_props(y):
    c = Counter(y.tolist())
    total = sum(c.values())
    return {k: f"{c[k]} ({c[k]/total:.2%})" for k in sorted(c.keys())}

print("🔢 Class Distribution Summary")
print(f"➡️ Full Dataset : {class_counts(y_all)}")
print(f"➡️ Training Set : {class_counts(y_tr)}")
print(f"➡️ Testing Set  : {class_counts(y_te)}")

print("\n📊 Class Proportions")
print(f"Full  : {class_props(y_all)}")
print(f"Train : {class_props(y_tr)}")
print(f"Test  : {class_props(y_te)}")
from IPython.display import display

def df_counts_props(y, split_name):
    c = Counter(y.tolist()); total = sum(c.values())
    rows = [{"Split": split_name, "Class": k, "Count": c[k], "Proportion": c[k]/total}
            for k in sorted(c.keys())]
    return pd.DataFrame(rows)

dist_df = pd.concat([
    df_counts_props(y_tr, "Train"),
    df_counts_props(y_te, "Test"),
    df_counts_props(y_all, "Full")
], ignore_index=True)

display(dist_df.pivot(index="Class", columns="Split", values=["Count","Proportion"]).round(4))
