In [None]:
# Install scikit-survival package
import subprocess
import sys

subprocess.check_call([sys.executable, "-m", "pip", "install", "scikit-survival"])

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%pip install scikit-learn==1.3.2 scikit-survival==0.22.2

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from lifelines import CoxPHFitter, WeibullAFTFitter
from lifelines.utils import concordance_index
from sksurv.util import Surv
from sksurv.metrics import (
    concordance_index_ipcw,
    integrated_brier_score,
    brier_score,
    cumulative_dynamic_auc
)


# Load datasets
df_zero    = pd.read_csv("C:\\Users\\04ama\\OneDrive\\pension survival analysis\\notebooks\\ipcw_and_other_censoring\\data\\censoring_methods\\data_zero.csv")
df_discard = pd.read_csv("C:\\Users\\04ama\\OneDrive\\pension survival analysis\\notebooks\\ipcw_and_other_censoring\\data\\censoring_methods\\data_discard.csv")
df_ipcw    = pd.read_csv("C:\\Users\\04ama\\OneDrive\\pension survival analysis\\notebooks\\ipcw_and_other_censoring\\data\\censoring_methods\\data_ipcw.csv")

datasets = {"zero": df_zero, "discard": df_discard, "ipcw": df_ipcw}

X_COLS = ["age_at_entry", "income_level", "health_score", "pension_contrib_rate"]
DUR = "time_to_event"
EVT = "event_observed"
T_STAR = 15.0
# Adjust TIMES to be within the valid follow-up range [0.04; 25.0[
TIMES = np.array([1.0, 5.0, 10.0, 15.0, 17.0])  # Changed from (5, 30, 6) to (1, 24, 6)


In [None]:
from lifelines import KaplanMeierFitter
from sksurv.metrics import concordance_index_ipcw, integrated_brier_score, cumulative_dynamic_auc
from sksurv.util import Surv
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
import numpy as np

# ------------------------------------------------------
# Convert dataframe into sksurv-compatible survival object
# ------------------------------------------------------
def make_surv(df):
    """
    Convert a pandas DataFrame into a structured survival array.

    Parameters:
    - df: DataFrame with event column (EVT) and duration column (DUR)

    Returns:
    - Structured array usable for sksurv models and metrics
    """
    return Surv.from_arrays(event=df[EVT].astype(bool), time=df[DUR])


# ------------------------------------------------------
# Evaluate survival models using IPCW-based metrics
# ------------------------------------------------------
def evaluate_survival_model(model_name, model, train_df, test_df):
    """
    Evaluate a fitted survival model using:
    - Uno's C-index (concordance_index_ipcw)
    - Integrated Brier Score (IBS)
    - Time-dependent AUC at a specified evaluation time (T_STAR)

    Parameters:
    - model_name: string name of the model
    - model: a trained survival model with predict_survival_function()
    - train_df, test_df: DataFrames containing duration, event, and predictors

    Returns:
    - Dictionary containing C-index, IBS, and AUC at T_STAR
    """
    y_tr = make_surv(train_df)
    y_te = make_surv(test_df)

    # Predict full survival function S(t|x) across test samples
    S_pred = model.predict_survival_function(test_df[X_COLS], times=TIMES).T.values

    # Event risk = 1 - survival probability at the final time point
    risk_scores = 1 - S_pred[:, -1]

    # IPCW-adjusted C-index (Uno's version)
    c_uno = concordance_index_ipcw(y_tr, y_te, -risk_scores, tau=TIMES[-1])[0]

    # Integrated Brier Score over TIMES
    ibs = integrated_brier_score(y_tr, y_te, S_pred, TIMES)

    # Time-dependent AUC
    auc_times, aucs = cumulative_dynamic_auc(y_tr, y_te, risk_scores, TIMES)

    # Extract AUC at T_STAR (closest time point)
    if np.isscalar(aucs):
        auc_15 = float(aucs)
    else:
        closest_idx = np.argmin(np.abs(auc_times - T_STAR))
        auc_15 = float(aucs[closest_idx])

    return {"Model": model_name, "C_index": c_uno, "IBS": ibs, "AUC@15": auc_15}


# ------------------------------------------------------
# Evaluate classifier + optionally compute NRI vs baseline
# ------------------------------------------------------
def evaluate_classifier(model_name, model, X_train, y_train, X_test, y_test, 
                        sample_weight=None, baseline_proba=None, cutoff=0.5):
    """
    Train and evaluate a binary classifier with accuracy, AUC, F1, and optional NRI.

    Parameters:
    - model_name: str, name of the classifier
    - model: sklearn model or pipeline
    - X_train, y_train: training data and binary labels
    - X_test, y_test: test data and labels
    - sample_weight: optional IPCW or class weights
    - baseline_proba: predicted probabilities from baseline model (for NRI)
    - cutoff: threshold for risk reclassification (default = 0.5)

    Returns:
    - metrics: dict with Accuracy, AUC, F1, and NRI values
    - y_pred_proba: predictions probabilities (used to store baseline for next models)
    """

    # Fit the model (if weights are supported, use them)
    if sample_weight is not None and hasattr(model, 'fit') and 'sample_weight' in model.fit.__code__.co_varnames:
        model.fit(X_train, y_train, sample_weight=sample_weight)
    else:
        model.fit(X_train, y_train)

    # Predict labels
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Get probability or score output for AUC/NRI
    if hasattr(model, 'predict_proba'):
        y_pred_proba = model.predict_proba(X_test)[:, 1]
        auc = roc_auc_score(y_test, y_pred_proba)
    elif hasattr(model, 'decision_function'):
        scores = model.decision_function(X_test)
        auc = roc_auc_score(y_test, scores)
        y_pred_proba = 1 / (1 + np.exp(-scores))  # Convert to probabilities
    else:
        auc = np.nan
        y_pred_proba = y_pred.astype(float)

    # Default NRI values
    nri_events = np.nan
    nri_nonevents = np.nan
    nri_total = np.nan

    # Compute NRI only if baseline probabilities are provided
    if baseline_proba is not None:
        try:
            # Binary classification based on threshold
            baseline_class = (baseline_proba >= cutoff).astype(int)
            new_class = (y_pred_proba >= cutoff).astype(int)

            # NRI for events (y=1)
            events = (y_test == 1)
            if np.sum(events) > 0:
                up = np.sum((new_class[events] == 1) & (baseline_class[events] == 0))
                down = np.sum((new_class[events] == 0) & (baseline_class[events] == 1))
                nri_events = (up - down) / np.sum(events)

            # NRI for non-events (y=0)
            nonevents = (y_test == 0)
            if np.sum(nonevents) > 0:
                up = np.sum((new_class[nonevents] == 0) & (baseline_class[nonevents] == 1))
                down = np.sum((new_class[nonevents] == 1) & (baseline_class[nonevents] == 0))
                nri_nonevents = (up - down) / np.sum(nonevents)

            # Total NRI
            nri_total = nri_events + nri_nonevents

        except Exception as e:
            print(f"NRI could not be calculated for {model_name}: {e}")

    # Return metrics and predicted probabilities
    return {
        "Model": model_name,
        "Accuracy": accuracy,
        "AUC": auc,
        "F1": f1,
        "NRI_Events": nri_events,
        "NRI_Non_Events": nri_nonevents,
        "NRI_Total": nri_total
    }, y_pred_proba


In [None]:
# ----------------- Enhanced classifier evaluation with NRI -----------------
def evaluate_classifier_with_nri(model_name, model, X_train, y_train, X_test, y_test, sample_weight=None, baseline_proba=None):
    """
    Evaluate a binary classifier and calculate NRI (Net Reclassification Improvement) if baseline predictions are provided.

    Returns:
    - dict with Accuracy, AUC, F1, NRI_Events, NRI_Non_Events, NRI_Total
    - predicted probabilities (for storing as baseline later when method='zero')
    """
    # Fit model (use IPCW if supported)
    if sample_weight is not None and hasattr(model, 'fit') and 'sample_weight' in model.fit.__code__.co_varnames:
        model.fit(X_train, y_train, sample_weight=sample_weight)
    else:
        model.fit(X_train, y_train)

    # Standard predictions
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    # Probability scores (for AUC + NRI)
    if hasattr(model, 'predict_proba'):
        y_pred_proba = model.predict_proba(X_test)[:, 1]
        auc = roc_auc_score(y_test, y_pred_proba)
    elif hasattr(model, 'decision_function'):
        scores = model.decision_function(X_test)
        auc = roc_auc_score(y_test, scores)
        y_pred_proba = 1 / (1 + np.exp(-scores))
    else:
        auc = np.nan
        y_pred_proba = y_pred.astype(float)

    f1 = f1_score(y_test, y_pred)

    # Default NRI values
    nri_events = nri_non_events = nri_total = np.nan

    # If baseline probabilities exist (from zero method), compute NRI
    if baseline_proba is not None:
        try:
            cutoff = 0.5
            baseline_class = (baseline_proba >= cutoff).astype(int)
            new_class = (y_pred_proba >= cutoff).astype(int)

            # NRI for events
            events = (y_test == 1)
            if np.sum(events) > 0:
                up = np.sum((new_class[events] == 1) & (baseline_class[events] == 0))
                down = np.sum((new_class[events] == 0) & (baseline_class[events] == 1))
                nri_events = (up - down) / np.sum(events)

            # NRI for non-events
            nonevents = (y_test == 0)
            if np.sum(nonevents) > 0:
                up = np.sum((new_class[nonevents] == 0) & (baseline_class[nonevents] == 1))
                down = np.sum((new_class[nonevents] == 1) & (baseline_class[nonevents] == 0))
                nri_non_events = (up - down) / np.sum(nonevents)

            # Total NRI
            nri_total = nri_events + nri_non_events

        except Exception as e:
            print(f"Warning: NRI failed for {model_name}: {e}")
            nri_events = nri_non_events = nri_total = np.nan

    return {
        "Model": model_name,
        "Accuracy": accuracy,
        "AUC": auc,
        "F1": f1,
        "NRI_Events": nri_events,
        "NRI_Non_Events": nri_non_events,
        "NRI_Total": nri_total
    }, y_pred_proba


# ----------------- Main Evaluation Loop -----------------
all_results = []
baseline_probabilities = {}  # Save zero-method probabilities for NRI comparison

for method, df in datasets.items():
    print(f"\n=== METHOD: {method.upper()} ===")
    train_df, test_df = train_test_split(df, test_size=0.3, random_state=42, stratify=df[EVT])

    df_fit = train_df[[DUR, EVT] + X_COLS].copy()

    if method == "ipcw" and "ipcw" in train_df.columns:
        df_fit["ipcw"] = train_df["ipcw"]
        w_col = "ipcw"
    else:
        w_col = None

    # CoxPH
    cph = CoxPHFitter()
    cph.fit(df_fit, duration_col=DUR, event_col=EVT, weights_col=w_col, robust=True)
    res_cph = evaluate_survival_model_robust("CoxPH", cph, train_df, test_df)
    res_cph["Method"] = method
    res_cph["Model_Type"] = "Survival"
    all_results.append(res_cph)

    # Weibull AFT
    aft = WeibullAFTFitter()
    if w_col:
        df_fit_aft = df_fit.copy()
        df_fit_aft[w_col] = np.maximum(df_fit_aft[w_col], 1e-6)
        aft.fit(df_fit_aft, duration_col=DUR, event_col=EVT, weights_col=w_col)
    else:
        aft.fit(df_fit, duration_col=DUR, event_col=EVT)
    res_aft = evaluate_survival_model_robust("WeibullAFT", aft, train_df, test_df)
    res_aft["Method"] = method
    res_aft["Model_Type"] = "Survival"
    all_results.append(res_aft)

    # Classification targets (binary by T_STAR)
    y_train = ((train_df[DUR] <= T_STAR) & (train_df[EVT] == 1)).astype(int)
    y_test = ((test_df[DUR] <= T_STAR) & (test_df[EVT] == 1)).astype(int)
    X_train, X_test = train_df[X_COLS], test_df[X_COLS]

    sw_train = train_df["ipcw"].copy() if method == "ipcw" and "ipcw" in train_df.columns else None
    if sw_train is not None:
        sw_train[y_train == 0] = 1e-6

    classifiers = {
        "Logistic Regression": make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, random_state=42)),
        "Random Forest": RandomForestClassifier(n_estimators=400, random_state=42),
        "SVM (RBF)": make_pipeline(StandardScaler(), SVC(probability=True, random_state=42)),
        "KNN": make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=25))
    }

    for name, clf in classifiers.items():
        baseline_proba = baseline_probabilities.get(name, None) if method != "zero" else None

        res_clf, y_pred_proba = evaluate_classifier_with_nri(
            name, clf, X_train, y_train, X_test, y_test, sample_weight=sw_train, baseline_proba=baseline_proba
        )
        res_clf["Method"] = method
        res_clf["Model_Type"] = "Classification"
        all_results.append(res_clf)

        if method == "zero":  # Save baseline for NRI comparison
            baseline_probabilities[name] = y_pred_proba

# Save results
res = pd.DataFrame(all_results)
res.to_csv("results.csv", index=False)
display(res)    