In [None]:
import pandas as pd
import numpy as np
from sksurv.util import Surv
from sksurv.metrics import cumulative_dynamic_auc, concordance_index_ipcw
import warnings
from tqdm import tqdm
import os

# Suppress all warnings
warnings.filterwarnings("ignore")

def load_and_clean_data(filepath: str) -> pd.DataFrame:
    df = pd.read_csv(filepath)
    return df[["tte_cox_true_time", "tte_cox_true_event", "tte_cox_risk_score"]].dropna()

def create_surv_object(df: pd.DataFrame):
    times = df["tte_cox_true_time"].values
    events = df["tte_cox_true_event"].values.astype(bool)
    surv = Surv.from_arrays(events, times)
    risks = df["tte_cox_risk_score"].values
    return surv, risks

def evaluate_day_365(surv, risks: np.ndarray) -> dict:
    eval_day = [365]
    c_index = concordance_index_ipcw(surv, surv, risks)[0]
    _, auc_vals = cumulative_dynamic_auc(surv, surv, risks, eval_day)
    auc_365 = float(auc_vals[0]) if isinstance(auc_vals, (list, np.ndarray)) else float(auc_vals)
    return {"AUC@365": auc_365, "C-index": float(c_index)}

# Main evaluation block
filepaths = [
    "./365day_future_prediction_outputs_50/DeepSurv_LSTM_365DayFutureTarget_detailed_outputs.csv",
    "./365day_future_prediction_outputs_50/DeepSurv_MLP_365DayFutureTarget_detailed_outputs.csv",
    "./365day_future_prediction_outputs_50/DeepSurv_RNN_365DayFutureTarget_detailed_outputs.csv",
    "./365day_future_prediction_outputs_50/DeepSurv_TCN_365DayFutureTarget_detailed_outputs.csv",
    "./365day_future_prediction_outputs_50/DeepSurv_Transformer_365DayFutureTarget_detailed_outputs.csv",
]

print("\n--- Time-dependent AUC and C-index at Day 365 ---")
for filepath in tqdm(filepaths, desc="Evaluating models at Day 365"):
    fn = os.path.basename(filepath)
    df = load_and_clean_data(filepath)
    surv, risks = create_surv_object(df)
    metrics = evaluate_day_365(surv, risks)
    print(f"\nModel: {os.path.splitext(fn)[0]}")
    print(f"  AUC@365: {metrics['AUC@365']:.4f}")
    print(f"  C-index: {metrics['C-index']:.4f}")


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from lifelines import KaplanMeierFitter

filepaths = [
    "./365day_future_prediction_outputs/DeepSurv_LSTM_365DayFuture_detailed_outputs.csv",
    "./365day_future_prediction_outputs/DeepSurv_MLP_365DayFuture_detailed_outputs.csv",
    "./365day_future_prediction_outputs/DeepSurv_RNN_365DayFuture_detailed_outputs.csv",
    "./365day_future_prediction_outputs/DeepSurv_TCN_365DayFuture_detailed_outputs.csv",
    "./365day_future_prediction_outputs/DeepSurv_Transformer_365DayFuture_detailed_outputs.csv",
]

model_names = [fp.split("/")[-1].replace("_365DayFuture_detailed_outputs.csv", "") for fp in filepaths]
eval_times = np.arange(30, 366, 30)

combined_brier_scores = []

for fp, model_name in zip(filepaths, model_names):
    df = pd.read_csv(fp)
    df_clean = df.dropna(subset=['tte_cox_true_time', 'tte_cox_true_event', 'cl_prob_1'])

    event_times = df_clean['tte_cox_true_time'].values
    event_observed = df_clean['tte_cox_true_event'].values
    predicted_probs = df_clean['cl_prob_1'].values

    kmf_censor = KaplanMeierFitter()
    kmf_censor.fit(event_times, event_observed == 0)

    for t in eval_times:
        y_true = (event_times > t).astype(int)
        y_pred = 1 - predicted_probs  # survival prob

        G_t = kmf_censor.predict(t)
        weights = (event_times >= t).astype(float) / np.clip(G_t, 1e-5, None)

        brier_score_t = np.mean(weights * (y_pred - y_true) ** 2)
        combined_brier_scores.append({
            'time': t,
            'brier_score': brier_score_t,
            'model': model_name
        })

# Convert to DataFrame
brier_df = pd.DataFrame(combined_brier_scores)

# Plot
plt.figure(figsize=(10, 6))
for model_name in brier_df['model'].unique():
    df_plot = brier_df[brier_df['model'] == model_name]
    plt.plot(df_plot['time'], df_plot['brier_score'], marker='o', label=model_name)

plt.title("Temporal Brier Scores by Model")
plt.xlabel("Time (days)")
plt.ylabel("Brier Score")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
df = pd.read_csv("./365day_future_prediction_outputs_50/DeepSurv_LSTM_365DayFutureTarget_detailed_outputs.csv")
df

In [None]:
import os
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from lifelines import KaplanMeierFitter
from lifelines.utils import concordance_index
from sksurv.util import Surv
from sksurv.metrics import cumulative_dynamic_auc, brier_score

# --- Configuration ---
EVAL_DAY = 365
N_BOOTSTRAP_RUNS = 100
N_PROGRESSOR_SAMPLES = 5
RANDOM_SEED = 42

np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

filepaths = [
    "./365day_future_prediction_outputs/DeepSurv_LSTM_365DayFuture_detailed_outputs.csv",
    "./365day_future_prediction_outputs/DeepSurv_MLP_365DayFuture_detailed_outputs.csv",
    "./365day_future_prediction_outputs/DeepSurv_RNN_365DayFuture_detailed_outputs.csv",
    "./365day_future_prediction_outputs/DeepSurv_TCN_365DayFuture_detailed_outputs.csv",
    "./365day_future_prediction_outputs/DeepSurv_Transformer_365DayFuture_detailed_outputs.csv",
]

def sample_shared_test_set(df, horizon):
    """
    For each patient, sample up to N_PROGRESSOR_SAMPLES distinct failure times
    if they progressed, or one time >= horizon if they did not.
    """
    samples = []
    for pid, group in df.groupby("PatientID"):
        if group['cl_true_label'].iloc[0] == 1:
            # only keep rows where failure actually occurred
            fails = group[group['tte_cox_true_event'] == 1]
            for i in range(min(N_PROGRESSOR_SAMPLES, len(fails))):
                row = fails.sample(n=1, random_state=RANDOM_SEED + i).iloc[0].copy()
                samples.append(row)
        else:
            # sample a censored time that survives at least to the horizon
            cens = group[(group['tte_cox_true_event'] == 0) &
                         (group['tte_cox_true_time'] >= horizon)]
            if cens.empty:
                # if none survive past horizon, skip patient
                continue
            row = cens.sample(n=1, random_state=RANDOM_SEED).iloc[0].copy()
            samples.append(row)
    return pd.DataFrame(samples).drop_duplicates(subset=["PatientID", "tte_cox_true_time"])

def evaluate_model_fullsample(df, eval_day):
    """
    Compute C-index, time-dependent AUC@eval_day, and IPCW Brier@eval_day
    using the entire test sample without further dropping.
    """
    df = df.dropna(subset=['tte_cox_true_time', 'tte_cox_true_event', 'tte_cox_risk_score'])
    T = df['tte_cox_true_time'].values
    E = df['tte_cox_true_event'].astype(bool).values
    R = df['tte_cox_risk_score'].values

    surv = Surv.from_arrays(E, T)

    # Concordance
    try:
        c_index = concordance_index(T, -R, event_observed=E)
    except Exception:
        c_index = np.nan

    # Time-dependent AUC
    try:
        _, auc_vals = cumulative_dynamic_auc(
            surv_train=surv,
            surv_test=surv,
            risk_scores_train=-R,
            risk_scores_test=-R,
            times=np.array([eval_day])
        )
        auc_365 = float(auc_vals[0])
    except Exception:
        auc_365 = np.nan

    # IPCW-weighted Brier score
    try:
        _, brier_vals = brier_score(
            surv_train=surv,
            surv_test=surv,
            pred_scores=-R,
            times=np.array([eval_day])
        )
        brier_365 = float(brier_vals[0])
    except Exception:
        brier_365 = np.nan

    return c_index, auc_365, brier_365

def plot_risk_distribution(df, model_name):
    plt.hist(df['tte_cox_risk_score'], bins=50, edgecolor='black')
    plt.title(f"Risk Score Distribution: {model_name}")
    plt.xlabel("Risk Score")
    plt.ylabel("Count")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def plot_risk_vs_tte(df, model_name):
    plt.scatter(
        df['tte_cox_risk_score'],
        df['tte_cox_true_time'],
        c=df['tte_cox_true_event'],
        cmap='coolwarm',
        alpha=0.6
    )
    plt.title(f"Risk vs Time-to-Event: {model_name}")
    plt.xlabel("Predicted Risk Score")
    plt.ylabel("Time to Event")
    plt.colorbar(label="Event (1) vs Censored (0)")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# --- Build a shared test set from the first model's outputs ---
base_df = pd.read_csv(filepaths[0])
test_df = sample_shared_test_set(base_df, EVAL_DAY)

# Report prevalence in the shared sample
p_count = (test_df['tte_cox_true_event'] == 1).sum()
tot = len(test_df)
prev = 100 * p_count / tot if tot > 0 else 0
print(f"Shared-test prevalence: {prev:.1f}% ({p_count}/{tot})")

# --- Evaluate each model on that same sample ---
for path in filepaths:
    model_name = os.path.splitext(os.path.basename(path))[0]
    df_full = pd.read_csv(path)
    merged = (
        test_df[['PatientID', 'tte_cox_true_time', 'tte_cox_true_event']]
        .merge(
            df_full[['PatientID', 'tte_cox_true_time', 'tte_cox_risk_score']],
            on=['PatientID', 'tte_cox_true_time'],
            how='inner'
        )
    )

    c_idx, auc365, bri365 = evaluate_model_fullsample(merged, EVAL_DAY)

    print(f"\n--- {model_name} ---")
    print(f"C-index            : {c_idx:.3f}")
    print(f"AUC@{EVAL_DAY}      : {auc365:.3f}")
    print(f"Brier@{EVAL_DAY}    : {bri365:.3f}")

    plot_risk_distribution(merged, model_name)
    plot_risk_vs_tte(merged, model_name)
