In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn_pandas import DataFrameMapper
from sklearn.metrics import mean_squared_error, roc_auc_score
from scipy.integrate import trapz
import torch
import torchtuples as tt
from pycox.models import CoxCC
from pycox.evaluation import EvalSurv
from survival_evaluation import d_calibration, l1
import statistics
from pycox.models.cox_time import MLPVanillaCoxTime
from pycox.models import LogisticHazard, PMF, DeepHitSingle, CoxPH, MTLR, CoxTime
from sksurv.metrics import cumulative_dynamic_auc
import warnings
warnings.filterwarnings('ignore')

In [5]:
AUC_scores = []

# Load and process data
path = '../../data/NASA-Turbofan/Signature/train_new.csv'
path1 = '../../data/NASA-Turbofan/Signature/test_new.csv'

def load_data(path):
    D = pd.read_csv(path)
    # Configure column names
    x_cols = D.iloc[:, 4:].columns.tolist()  # Feature columns starting from the 5th column
    event_col = ['event']   # Survival event column
    time_col = ['time']     # Survival time column
    # Data cleaning and column selection
    D = D[x_cols + event_col + time_col]
    return D, x_cols

# Load training and testing datasets
d, x_cols = load_data(path)
d1, x_cols = load_data(path1)

# Columns to standardize
cols_standardize = x_cols.copy()
n_exp = 30  # Number of experiments

# Model evaluation metrics
CI = []         # Concordance Index
IBS = []        # Integrated Brier Score
L1_hinge = []   # L1 loss for hinge risk prediction
RMSE = []       # Root Mean Squared Error
AUC = []        # Area Under the Curve

In [4]:
for i in range(n_exp):
    #df_train, df_val, df_test = train_val_test_stratified_split(d, 'event', frac_train=0.8, frac_val=0.05, frac_test=0.15, random_state=10)
    df_train=d
    df_val=d
    df_test=d1
    standardize = [([col], StandardScaler()) for col in cols_standardize]
    leave = []
    x_mapper = DataFrameMapper(standardize + leave)
    x_train = x_mapper.fit_transform(df_train).astype('float32')
    x_val = x_mapper.transform(df_val).astype('float32')
    x_test = x_mapper.transform(df_test).astype('float32')
    in_features = x_train.shape[1]

    num_durations = 10
    get_target = lambda df: (df['time'].values, df['event'].values)
    y_train = get_target(df_train)
    y_val = get_target(df_val)
    val = (x_val, y_val)
    durations_test, events_test = get_target(df_test)

    out_features = 1
    num_nodes = [32,32]
    batch_norm = True
    dropout = 0.1
    output_bias = False

    net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm,
                              dropout, output_bias=output_bias)

    model = CoxCC(net, tt.optim.Adam)

    batch_size = 256
   

    lr_finder = model.lr_finder(x_train, y_train, batch_size, tolerance=10)
    model.optimizer.set_lr(0.01)

    epochs = 512
    callbacks = [tt.cb.EarlyStopping()]
    model.fit(x_train, y_train, batch_size, epochs, callbacks, val_data=val, verbose=0, val_batch_size=batch_size)

    _ = model.compute_baseline_hazards()
    surv = model.predict_surv_df(x_test)

    surv_df = pd.DataFrame(surv)
    surv_df.index.name = 'time'
    surv_df.columns.name = 'survival_function'
    

    survival_predictions = pd.Series(trapz(surv.values.T, surv.index), index=df_test.index)
    l1_hinge = l1(df_test.time, df_test.event, survival_predictions, l1_type = 'hinge')

    ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
    c_index = ev.concordance_td('antolini')
    time_grid = np.linspace(durations_test.min(), durations_test.max(), 10)
    brier = ev.integrated_brier_score(time_grid)

    quantiles = np.sort(df_test['time'].unique())
    quantiles = [q for q in quantiles if q >= surv.index.min() and q < surv.index.max()]

    labels_train = np.array([(e, t) for e, t in zip(df_train['event'], df_train['time'])], dtype=[('event', 'bool'), ('time', 'float')])
    labels_test = np.array([(e, t) for e, t in zip(df_test['event'], df_test['time'])], dtype=[('event', 'bool'), ('time', 'float')])
    time_grid_train = np.unique(df_train['time'])

    auc_scores = []
    for eval_time in quantiles:
        try:
            interp_time_index = np.argmin(np.abs(eval_time - surv.index))
            surv_values_at_eval_time = surv.iloc[interp_time_index].values
            estimated_risks = 1 - surv_values_at_eval_time
            if np.min(estimated_risks) == np.max(estimated_risks):
                continue
            auc = cumulative_dynamic_auc(labels_train, labels_test, estimated_risks, times=[eval_time])[0][0]
            if not np.isnan(auc) and not np.isinf(auc):
                auc_scores.append(auc)
        except Exception as e:
            #print(f"AUC calculation failed: {e}, eval_time={eval_time}")
            pass

    AUC_scores.append(np.mean(auc_scores) if auc_scores else 0.5)
    CI.append(c_index)
    IBS.append(brier)
    L1_hinge.append(l1_hinge)

def safe_stat(data):
    return round(statistics.mean(data), 3), round(statistics.stdev(data), 3) if len(data) > 1 else (0.0, 0.0)
auc_mean, auc_std = safe_stat(AUC_scores)

print('CI:', round(statistics.mean(CI), 3), '±', round(statistics.stdev(CI), 3))
print('IBS:', round(statistics.mean(IBS), 3), '±', round(statistics.stdev(IBS), 3))
print('L1_hinge:', round(statistics.mean(L1_hinge), 3), '±', round(statistics.stdev(L1_hinge), 3))
print(f'AUC: {auc_mean} ± {auc_std}')

d_calib = d_calibration(df_test['event'], surv.iloc[6])
print('d_calibration_p_value:', round(d_calib['p_value'], 3))
print('D-Calibration (bin proportions):', round(sum(d_calib['bin_proportions']), 3))
print('D-Calibration (censored contributions):', round(sum(d_calib['censored_contributions']), 3))
print('D-Calibration (uncensored contributions):', round(sum(d_calib['uncensored_contributions']), 3))

CI: 0.926 ± 0.013
IBS: 0.019 ± 0.003
L1_hinge: 4.157 ± 0.437
AUC: 0.978 ± 0.01
d_calibration_p_value: 0.0
D-Calibration (bin proportions): 1.0
D-Calibration (censored contributions): 0.573
D-Calibration (uncensored contributions): 0.427
