In [None]:

import numpy as np
from pathlib import Path
import pandas as pd

# Survival analysis libraries
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.ensemble import RandomSurvivalForest
from sksurv.util import Surv
from sklearn.model_selection import train_test_split
from sksurv.metrics import concordance_index_censored
import xgboost as xgb

# Set paths
PREPROCESSED_DIR = Path("./experiments/preprocessed")
MODEL_DIR = Path("./experiments/models/prognostic")
MODEL_DIR.mkdir(parents=True, exist_ok=True)

# Load tabular data
X_tab = np.load(PREPROCESSED_DIR / "X_tab.npy")
y_tab = np.load(PREPROCESSED_DIR / "y_tab.npy")

# Create synthetic survival times and events
np.random.seed(42)
surv_times = np.random.exponential(scale=50, size=y_tab.shape[0])
events = np.random.binomial(1, 0.7, size=y_tab.shape[0])

# Structured array for sksurv
y_surv = np.array([(bool(e), t) for e, t in zip(events, surv_times)],
                  dtype=[('event', bool), ('time', float)])

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(
    X_tab, y_surv, test_size=0.2, random_state=42
)

print("Train features shape:", X_train.shape)
print("Test features shape:", X_test.shape)
print("Example survival data:", y_train[:3])


In [None]:

# Initialize Cox model
cox_model = CoxPHSurvivalAnalysis()
cox_model.fit(X_train, y_train)

# Predict risk scores on test set
cox_risk = cox_model.predict(X_test)

# Evaluate with concordance index
c_index = concordance_index_censored(y_test['event'], y_test['time'], cox_risk)[0]
print(f"Cox PH Model C-index: {c_index:.4f}")


In [None]:

# Initialize RSF model
rsf_model = RandomSurvivalForest(n_estimators=50, min_samples_split=5,
                                 min_samples_leaf=3, random_state=42)
rsf_model.fit(X_train, y_train)

# Predict risk scores on test set
rsf_risk = rsf_model.predict(X_test)

# Evaluate with concordance index
c_index_rsf = concordance_index_censored(y_test['event'], y_test['time'], rsf_risk)[0]
print(f"RSF Model C-index: {c_index_rsf:.4f}")


In [None]:

# Prepare data in DMatrix format
dtrain = xgb.DMatrix(X_train, label=surv_times[:len(X_train)])
dtest = xgb.DMatrix(X_test, label=surv_times[len(X_train):])

# Define XGBoost parameters for Cox survival
params = {
    "objective": "survival:cox",
    "eval_metric": "cox-nloglik",
    "eta": 0.1,
    "max_depth": 3,
    "seed": 42
}

# Train model
xgb_model = xgb.train(params, dtrain, num_boost_round=50)

# Predict risk scores
xgb_risk = xgb_model.predict(dtest)

# Evaluate with concordance index
c_index_xgb = concordance_index_censored(y_test['event'], y_test['time'], xgb_risk)[0]
print(f"XGBoost Survival Model C-index: {c_index_xgb:.4f}")
