In [1]:
# Ensure repository root on sys.path for `import app.*`
import sys
from pathlib import Path
repo_root = (Path.cwd() / '..').resolve()
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))
print('Repo root:', repo_root)


Repo root: C:\Users\Golds\Downloads\survival-readmission


# 03 - Survival Modeling

This notebook trains and evaluates survival models:
- Cox Proportional Hazards
- XGBoost Survival
- Other survival models


In [2]:
# Import libraries
import pandas as pd
import numpy as np
from lifelines import CoxPHFitter
from app.evaluation import compute_concordance_index, compute_td_auc


## Quick Feature Importance (Tree-based proxy)

As a fast heuristic, train a tree-based classifier on the 30-day event label to inspect feature importances. This is a proxy (not a survival objective) to surface candidate predictors; we will follow up with survival-specific modeling (Cox, XGBoost Survival).


In [3]:
# Load cohort and build features
cohort = pd.read_csv('../data/processed/cohort_30d.csv', parse_dates=['admittime','dischtime','next_admittime'])

# Build features via app.feature_engineering (includes labs when available)
from app.feature_engineering import engineer_features
import os

# Set the MIMIC_DEMO_DIR environment variable to ensure data loader finds the data
os.environ['MIMIC_DEMO_DIR'] = '../data/raw/mimic-iv-demo'

X, y_df = engineer_features(cohort)
y = y_df['event'].astype(int)

print('Feature matrix shape:', X.shape)
print('Num lab-derived columns:', sum([1 for col in X.columns if str(col).startswith('lab_')]))
print('Sample lab columns:', [col for col in X.columns if str(col).startswith('lab_')][:5])


Feature matrix shape: (260, 40)
Num lab-derived columns: 20
Sample lab columns: ['lab_50868', 'lab_50882', 'lab_50893', 'lab_50902', 'lab_50912']


In [4]:
# Train/test split and RandomForest quick AUC
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

# Simple train/test split by index (for demo; prefer temporal split in practice)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

print(f"Train set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Event rate in train: {y_train.mean():.3f}")
print(f"Event rate in test: {y_test.mean():.3f}")

# Quick RandomForest for feature importance (proxy, not survival-specific)
rf = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=5)
rf.fit(X_train, y_train)

# Quick AUC (30-day classification proxy)
y_pred_proba = rf.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_pred_proba)
print(f"Quick AUC (30-day classification proxy): {auc:.3f}")

# Feature importance
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': rf.feature_importances_
}).sort_values('importance', ascending=False)

print("\nTop 15 features:")
print(feature_importance.head(15))


Train set: 182 samples
Test set: 78 samples
Event rate in train: 0.192
Event rate in test: 0.192
Quick AUC (30-day classification proxy): 0.692

Top 15 features:
      feature  importance
17  lab_51265    0.091135
20  lab_51301    0.088304
10  lab_50983    0.065512
18  lab_51277    0.063760
13  lab_51222    0.061051
4   lab_50902    0.048165
12  lab_51221    0.044855
19  lab_51279    0.041921
7   lab_50960    0.041769
3   lab_50893    0.041396
5   lab_50912    0.041395
6   lab_50931    0.039421
15  lab_51249    0.034529
9   lab_50971    0.032628
14  lab_51248    0.032163


In [5]:
# Load cohort from previous step (ensure variable exists before feature engineering)
import pandas as pd
cohort = pd.read_csv('../data/processed/cohort_30d.csv', parse_dates=['admittime','dischtime','next_admittime'])


In [6]:
# Build features via app.feature_engineering (includes labs when available)
from app.feature_engineering import engineer_features

X, y_df = engineer_features(cohort)
y = y_df['event'].astype(int)

print('Feature matrix shape:', X.shape)
print('Num lab-derived columns:', sum([1 for col in X.columns if str(col).startswith('lab_')]))


Feature matrix shape: (260, 40)
Num lab-derived columns: 20


In [7]:
# Setup: add repo root to path for `app.*`
import sys
from pathlib import Path
repo_root = (Path.cwd() / '..').resolve()
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))
print('Repo root:', repo_root)


Repo root: C:\Users\Golds\Downloads\survival-readmission


## Cox Proportional Hazards (baseline)
Fit an interpretable Cox PH model and report C-index on a holdout split.


In [8]:
# Prepare data for Cox PH
from lifelines import CoxPHFitter
from app.evaluation import compute_concordance_index

cox_df = X.copy()
cox_df["time_to_event"] = y_df["time_to_event"].values
cox_df["event"] = y_df["event"].values

# Simple train/test split by index (for demo; prefer temporal split in practice)
train_idx, test_idx = X_train.index, X_test.index
cox_train = cox_df.loc[train_idx]
cox_test = cox_df.loc[test_idx]

cph = CoxPHFitter(penalizer=0.1, l1_ratio=0.1)
cph.fit(cox_train.assign(event=cox_train["event"].astype(bool)), duration_col="time_to_event", event_col="event", show_progress=False)

# Risk scores and C-index
risk_scores = cph.predict_partial_hazard(cox_test)
c_index = compute_concordance_index(cox_test["event"], cox_test["time_to_event"], risk_scores)
print(f"Cox PH C-index (holdout): {c_index:.3f}")


Cox PH C-index (holdout): 0.696


## XGBoost Survival (Cox objective)
Train an XGBoost model with survival:cox to produce risk scores and compute C-index.


In [9]:
import xgboost as xgb

# Prepare DMatrix for XGBoost Cox
# For Cox, label is time; event is provided implicitly via order weighting
# We'll follow the typical approach: sort by time and pass (time, event) via special settings.

# Create DMatrix with features
dtrain = xgb.DMatrix(X_train, label=y_df.loc[X_train.index, 'time_to_event'].values)
dtest = xgb.DMatrix(X_test, label=y_df.loc[X_test.index, 'time_to_event'].values)

params = {
    'objective': 'survival:cox',
    'eval_metric': 'cox-nloglik',
    'eta': 0.05,
    'max_depth': 3,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'seed': 42,
}

bst = xgb.train(params, dtrain, num_boost_round=300)

# XGBoost Cox produces risk scores as predictions
xgb_risk = bst.predict(dtest)
cox_cindex = compute_concordance_index(
    y_df.loc[X_test.index, 'event'].values,
    y_df.loc[X_test.index, 'time_to_event'].values,
    xgb_risk,
)
print(f"XGBoost Cox C-index (holdout): {cox_cindex:.3f}")


XGBoost Cox C-index (holdout): 0.622


In [10]:
# Train/test split and RF model (ensure X_train/X_test defined)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

mask = X.notna().all(axis=1)
Xc = X[mask]
yc = y[mask]

X_train, X_test, y_train, y_test = train_test_split(Xc, yc, test_size=0.3, random_state=42, stratify=yc)
y_df_train = y_df.loc[X_train.index]
y_df_test = y_df.loc[X_test.index]

clf = RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1)
clf.fit(X_train, y_train)
proba = clf.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, proba)
print(f'Quick AUC (30-day classification proxy): {auc:.3f}')

imp = pd.Series(clf.feature_importances_, index=X_train.columns).sort_values(ascending=False)
print('Top 15 features:\n', imp.head(15))


Quick AUC (30-day classification proxy): 0.687
Top 15 features:
 lab_51301    0.085971
lab_51265    0.078831
lab_50983    0.060554
lab_51277    0.055757
lab_51222    0.053808
lab_50931    0.050722
lab_51221    0.048176
lab_50893    0.044860
lab_51249    0.043307
lab_51279    0.043026
lab_50970    0.040929
lab_50902    0.038081
lab_51248    0.037272
lab_50960    0.036535
lab_50912    0.035801
dtype: float64


## Comprehensive Model Evaluation

Now let's evaluate each model with proper survival analysis metrics.


### Cox PH Model Evaluation


In [11]:
# Comprehensive Cox PH Evaluation
from app.evaluation import compute_concordance_index, compute_td_auc, compute_brier_score, compute_calibration
import matplotlib.pyplot as plt
import numpy as np

print("COX PROPORTIONAL HAZARDS MODEL EVALUATION")
print("=" * 50)

# 1. Basic Performance Metrics
print(f"C-index: {c_index:.3f}")
print(f"Interpretation: {c_index:.1%} of patient pairs correctly ordered by risk")

# 2. Time-dependent AUC
time_horizons = [1, 7, 14, 21]
y_train_tuple = (y_df_train['event'].values, y_df_train['time_to_event'].values)
y_test_tuple = (y_df_test['event'].values, y_df_test['time_to_event'].values)

td_auc, mean_auc = compute_td_auc(y_train_tuple, y_test_tuple, risk_scores, time_horizons)
print(f"\nTime-dependent AUC:")
for t, auc in zip(time_horizons, td_auc):
    print(f"  Day {t:2d}: {auc:.3f}")
print(f"Mean AUC: {mean_auc:.3f}")

# 3. Integrated Brier Score
cox_test_df = X_test.copy()
cox_test_df['time_to_event'] = y_df_test['time_to_event'].values
cox_test_df['event'] = y_df_test['event'].values
survival_probs = cph.predict_survival_function(cox_test_df, times=time_horizons)
survival_probs_matrix = survival_probs.T.values
ibs = compute_brier_score(y_train_tuple, y_test_tuple, survival_probs_matrix, time_horizons)
print(f"\nIntegrated Brier Score: {ibs:.3f}")

# 4. Calibration
event_probs_30d = 1 - survival_probs_matrix[:, -1]
calibration = compute_calibration(event_probs_30d, y_df_test['event'].values, n_bins=5)
print(f"\nCalibration (5 bins):")
for i, bin_data in enumerate(calibration['bins']):
    diff = bin_data['obs'] - bin_data['pred']
    print(f"  Bin {i+1}: Pred={bin_data['pred']:.3f}, Obs={bin_data['obs']:.3f}, Diff={diff:+.3f}")

# Store Cox results for comparison
cox_results = {
    'model': 'Cox PH',
    'c_index': c_index,
    'td_auc_mean': mean_auc,
    'ibs': ibs,
    'calibration_error': abs(calibration['bins'][-1]['obs'] - calibration['bins'][-1]['pred'])
}


COX PROPORTIONAL HAZARDS MODEL EVALUATION
C-index: 0.696
Interpretation: 69.6% of patient pairs correctly ordered by risk

Time-dependent AUC:
  Day  1: 0.260
  Day  7: 0.625
  Day 14: 0.731
  Day 21: 0.693
Mean AUC: 0.662

Integrated Brier Score: 0.080

Calibration (5 bins):
  Bin 1: Pred=0.062, Obs=0.062, Diff=+0.001
  Bin 2: Pred=0.088, Obs=0.062, Diff=-0.025
  Bin 3: Pred=0.118, Obs=0.188, Diff=+0.070
  Bin 4: Pred=0.156, Obs=0.267, Diff=+0.111
  Bin 5: Pred=0.294, Obs=0.400, Diff=+0.106


### XGBoost Survival Model Evaluation


In [12]:
# Comprehensive XGBoost Evaluation
print("\nXGBOOST SURVIVAL MODEL EVALUATION")
print("=" * 50)

# XGBoost already trained above, get predictions
xgb_risk = bst.predict(dtest)
xgb_c_index = compute_concordance_index(
    y_df_test['event'].values,
    y_df_test['time_to_event'].values,
    xgb_risk
)

print(f"C-index: {xgb_c_index:.3f}")

# Time-dependent AUC for XGBoost
xgb_td_auc, xgb_mean_auc = compute_td_auc(y_train_tuple, y_test_tuple, xgb_risk, time_horizons)
print(f"\nTime-dependent AUC:")
for t, auc in zip(time_horizons, xgb_td_auc):
    print(f"  Day {t:2d}: {auc:.3f}")
print(f"Mean AUC: {xgb_mean_auc:.3f}")

# For XGBoost, we need to approximate survival probabilities
# This is a simplified approach - in practice, you'd use proper survival probability estimation
xgb_survival_probs = np.exp(-xgb_risk.reshape(-1, 1) * np.array(time_horizons).reshape(1, -1))
xgb_ibs = compute_brier_score(y_train_tuple, y_test_tuple, xgb_survival_probs, time_horizons)
print(f"\nIntegrated Brier Score: {xgb_ibs:.3f}")

# Calibration for XGBoost
xgb_event_probs_30d = 1 - xgb_survival_probs[:, -1]
xgb_calibration = compute_calibration(xgb_event_probs_30d, y_df_test['event'].values, n_bins=5)
print(f"\nCalibration (5 bins):")
for i, bin_data in enumerate(xgb_calibration['bins']):
    diff = bin_data['obs'] - bin_data['pred']
    print(f"  Bin {i+1}: Pred={bin_data['pred']:.3f}, Obs={bin_data['obs']:.3f}, Diff={diff:+.3f}")

# Store XGBoost results
xgb_results = {
    'model': 'XGBoost Cox',
    'c_index': xgb_c_index,
    'td_auc_mean': xgb_mean_auc,
    'ibs': xgb_ibs,
    'calibration_error': abs(xgb_calibration['bins'][-1]['obs'] - xgb_calibration['bins'][-1]['pred'])
}



XGBOOST SURVIVAL MODEL EVALUATION
C-index: 0.622

Time-dependent AUC:
  Day  1: 0.468
  Day  7: 0.652
  Day 14: 0.629
  Day 21: 0.575
Mean AUC: 0.610

Integrated Brier Score: 0.907

Calibration (5 bins):
  Bin 1: Pred=1.000, Obs=0.250, Diff=-0.750
  Bin 2: Pred=1.000, Obs=0.188, Diff=-0.812
  Bin 3: Pred=1.000, Obs=0.125, Diff=-0.875
  Bin 4: Pred=1.000, Obs=0.133, Diff=-0.867
  Bin 5: Pred=1.000, Obs=0.267, Diff=-0.733


### Model Comparison Summary


In [13]:
# Compare all models
import pandas as pd

results_df = pd.DataFrame([cox_results, xgb_results])
print("\nMODEL COMPARISON SUMMARY")
print("=" * 50)
print(results_df.round(3))

print(f"\n🏆 BEST PERFORMING MODEL:")
best_c_index = results_df.loc[results_df['c_index'].idxmax(), 'model']
best_ibs = results_df.loc[results_df['ibs'].idxmin(), 'model']
best_calibration = results_df.loc[results_df['calibration_error'].idxmin(), 'model']

print(f"  • Best C-index: {best_c_index}")
print(f"  • Best IBS (calibration): {best_ibs}")
print(f"  • Best calibration: {best_calibration}")

print(f"\n📊 PERFORMANCE RANKINGS:")
print("C-index ranking:")
for i, (_, row) in enumerate(results_df.sort_values('c_index', ascending=False).iterrows()):
    print(f"  {i+1}. {row['model']}: {row['c_index']:.3f}")

print("\nIBS ranking (lower is better):")
for i, (_, row) in enumerate(results_df.sort_values('ibs').iterrows()):
    print(f"  {i+1}. {row['model']}: {row['ibs']:.3f}")

print(f"\n✅ RECOMMENDATION:")
if cox_results['c_index'] > xgb_results['c_index']:
    print(f"  Cox PH model shows better discrimination ({cox_results['c_index']:.3f} vs {xgb_results['c_index']:.3f})")
    print(f"  Cox PH is more interpretable with hazard ratios")
    print(f"  Recommended for clinical use: Cox PH")
else:
    print(f"  XGBoost shows better discrimination ({xgb_results['c_index']:.3f} vs {cox_results['c_index']:.3f})")
    print(f"  XGBoost captures non-linear relationships")
    print(f"  Consider ensemble or Cox PH for interpretability")



MODEL COMPARISON SUMMARY
         model  c_index  td_auc_mean    ibs  calibration_error
0       Cox PH    0.696        0.662  0.080              0.106
1  XGBoost Cox    0.622        0.610  0.907              0.733

🏆 BEST PERFORMING MODEL:
  • Best C-index: Cox PH
  • Best IBS (calibration): Cox PH
  • Best calibration: Cox PH

📊 PERFORMANCE RANKINGS:
C-index ranking:
  1. Cox PH: 0.696
  2. XGBoost Cox: 0.622

IBS ranking (lower is better):
  1. Cox PH: 0.080
  2. XGBoost Cox: 0.907

✅ RECOMMENDATION:
  Cox PH model shows better discrimination (0.696 vs 0.622)
  Cox PH is more interpretable with hazard ratios
  Recommended for clinical use: Cox PH


In [14]:
# Build features via app.feature_engineering (includes labs when available)
from app.feature_engineering import engineer_features

X, y_df = engineer_features(cohort)
y = y_df['event'].astype(int)

print('Feature matrix shape:', X.shape)
print('Num lab-derived columns:', sum(col.startswith('"lab_"') for col in X.columns))


Feature matrix shape: (260, 40)
Num lab-derived columns: 0


In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier

# Load cohort from previous step
cohort = pd.read_csv('../data/processed/cohort_30d.csv', parse_dates=['admittime','dischtime','next_admittime'])

# Basic numeric/categorical feature set for quick pass
X_cols = []
for c in ['age_at_discharge','los_days']:
    if c in cohort.columns:
        X_cols.append(c)

# One-hot encode a few categoricals
cat_cols = [c for c in ['gender','admission_type','discharge_location','insurance'] if c in cohort.columns]
X = pd.get_dummies(cohort[X_cols + cat_cols], drop_first=True)
y = cohort['event'].astype(int)

# Drop rows with any NA in features
mask = X.notna().all(axis=1)
X = X[mask]
y = y[mask]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

clf = RandomForestClassifier(n_estimators=300, max_depth=None, random_state=42, n_jobs=-1)
clf.fit(X_train, y_train)

# AUC as quick heuristic
proba = clf.predict_proba(X_test)[:,1]
auc = roc_auc_score(y_test, proba)
print(f"Quick AUC (30-day classification proxy): {auc:.3f}")

# Feature importance
imp = pd.Series(clf.feature_importances_, index=X.columns).sort_values(ascending=False)
print("Top 15 features:\n", imp.head(15))


Quick AUC (30-day classification proxy): 0.576
Top 15 features:
 age_at_discharge                                   0.401644
gender_M                                           0.081105
discharge_location_SKILLED NURSING FACILITY        0.060272
insurance_Medicare                                 0.050945
insurance_Other                                    0.050063
admission_type_EW EMER.                            0.049381
discharge_location_HOME                            0.048827
admission_type_DIRECT EMER.                        0.038194
discharge_location_HOME HEALTH CARE                0.034777
admission_type_OBSERVATION ADMIT                   0.031774
admission_type_URGENT                              0.028284
discharge_location_REHAB                           0.024051
discharge_location_CHRONIC/LONG TERM ACUTE CARE    0.022864
admission_type_SURGICAL SAME DAY ADMISSION         0.020727
admission_type_EU OBSERVATION                      0.013956
dtype: float64
