In [4]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn_pandas import DataFrameMapper
from scipy.integrate import trapz
import torch
import torchtuples as tt
from pycox.datasets import metabric
from pycox.models import LogisticHazard, PMF, DeepHitSingle, CoxPH, MTLR
from pycox.evaluation import EvalSurv
from survival_evaluation import d_calibration, l1, one_calibration
import random
import statistics
from pdb import set_trace
from sklearn.metrics import mean_squared_error, roc_auc_score
from sksurv.metrics import cumulative_dynamic_auc

In [5]:
AUC_scores = []

# Read and process data
path = '../../data/NASA-Turbofan/Signature/train_new.csv'
path1 = '../../data/NASA-Turbofan/Signature/test_new.csv'

def load_data(path):
    D = pd.read_csv(path)
    # Configure column names
    x_cols = D.iloc[:, 4:].columns.tolist()  # Feature columns start from the 5th column
    event_col = ['event']   # Survival event column
    time_col = ['time']     # Survival time column
    # Data cleaning and column selection
    D = D[x_cols + event_col + time_col]
    return D, x_cols

# Load training and testing data
d, x_cols = load_data(path)
d1, x_cols = load_data(path1)

# Columns to standardize
cols_standardize = x_cols.copy()

# Number of experiments
n_exp = 30

# Model evaluation metrics
CI = []
IBS = []
L1_hinge = []
L1_margin = []
RMSE = []
AUC = []


DeepHit

In [6]:
import warnings
warnings.filterwarnings('ignore')
for i in range(n_exp):
    #df_train, df_val, df_test = train_val_test_stratified_split(d, 'event', frac_train=0.80, frac_val=0.05, frac_test=0.15, random_state=10)
    df_train=d
    df_val=d
    df_test=d1
    standardize = [([col], StandardScaler()) for col in cols_standardize]
    x_mapper = DataFrameMapper(standardize)
    x_train = x_mapper.fit_transform(df_train).astype('float32')
    x_val = x_mapper.transform(df_val).astype('float32')
    x_test = x_mapper.transform(df_test).astype('float32')
    in_features = x_train.shape[1]

    num_durations = 10
    labtrans = DeepHitSingle.label_transform(num_durations)
    get_target = lambda df: (df['time'].values, df['event'].values)
    y_train = labtrans.fit_transform(*get_target(df_train))
    y_val = labtrans.transform(*get_target(df_val))
    val = tt.tuplefy(x_val, y_val)
    durations_test, events_test = get_target(df_test)

    out_features = labtrans.out_features
    num_nodes = [32, 32]
    batch_norm = True
    dropout = 0.1
    net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout)
    model = DeepHitSingle(net, tt.optim.Adam, duration_index=labtrans.cuts)

    batch_size = 256
    lr_finder = model.lr_finder(x_train, y_train, batch_size, tolerance=6)
    model.optimizer.set_lr(0.01)

    epochs = 512
    callbacks = [tt.cb.EarlyStopping()]
    model.fit(x_train, y_train, batch_size, epochs, callbacks, val_data=val, verbose=0)

    surv = model.interpolate(10).predict_surv_df(x_test)
    surv_df = pd.DataFrame(surv)
    surv_df.index.name = 'time'
    surv_df.columns.name = 'survival_function'
 

    survival_predictions = pd.Series(trapz(surv.values.T, surv.index), index=df_test.index)
    l1_hinge_value = l1(df_test.time, df_test.event, survival_predictions, l1_type='hinge')
    l1_margin_value = l1(df_test.time, df_test.event, survival_predictions, df_train.time, df_train.event, l1_type='margin')

    ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
    c_index = ev.concordance_td('antolini')
    time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)
    brier = ev.integrated_brier_score(time_grid)

    quantiles = np.sort(df_test['time'].unique())
    labels_train = np.array([(e, t) for e, t in zip(df_train['event'], df_train['time'])], dtype=[('event', 'bool'), ('time', 'float')])
    labels_test = np.array([(e, t) for e, t in zip(df_test['event'], df_test['time'])], dtype=[('event', 'bool'), ('time', 'float')])

    auc_scores = []
    for eval_time in quantiles:
        try:
            # use surv.index instead of time_grid_train
            interp_time_index = np.argmin(np.abs(eval_time - surv.index.values))
            surv_values_at_eval_time = surv.iloc[interp_time_index].values
            estimated_risks = 1 - surv_values_at_eval_time

            if np.min(estimated_risks) == np.max(estimated_risks):
                continue

            auc = cumulative_dynamic_auc(labels_train, labels_test, estimated_risks, times=[eval_time])[0][0]

            if not np.isnan(auc) and not np.isinf(auc):
                auc_scores.append(auc)
        except Exception as e:
            #print(f"AUC calculation failed: {e}, eval_time={eval_time}")
            pass

    AUC_scores.append(np.mean(auc_scores) if auc_scores else 0.5)
    CI.append(c_index)
    IBS.append(brier)
    L1_hinge.append(l1_hinge_value)
    L1_margin.append(l1_margin_value)

def safe_stat(data):
    return round(statistics.mean(data), 3), round(statistics.stdev(data), 3) if len(data) > 1 else (0.0, 0.0)

auc_mean, auc_std = safe_stat(AUC_scores)

print('CI:', round(statistics.mean(CI), 3), round(statistics.stdev(CI), 3))
print('IBS:', round(statistics.mean(IBS), 3), round(statistics.stdev(IBS), 3))
print('L1_hinge:', round(statistics.mean(L1_hinge), 3), round(statistics.stdev(L1_hinge), 3))
print('L1_margin:', round(statistics.mean(L1_margin), 3), round(statistics.stdev(L1_margin), 3))
print(f'AUC: {auc_mean} ± {auc_std}')

print('d_calibration_p_value:', round(d_calibration(df_test.event, surv.iloc[6])['p_value'], 3))
print('D-Calibration:', round(sum(d_calibration(df_test.event, surv.iloc[6])['bin_proportions']), 3))
print('d_calibration_bin_proportions:')
for i in d_calibration(df_test.event, surv.iloc[6])['bin_proportions']:
    print(i)
print('D-Calibration_censored:', round(sum(d_calibration(df_test.event, surv.iloc[6])['censored_contributions']), 3))
print('d_calibration_censored_contributions:')
for i in d_calibration(df_test.event, surv.iloc[6])['censored_contributions']:
    print(i)
print('D-Calibration_uncensored:', round(sum(d_calibration(df_test.event, surv.iloc[6])['uncensored_contributions']), 3))
print('d_calibration_uncensored_contributions:')
for i in d_calibration(df_test.event, surv.iloc[6])['uncensored_contributions']:
    print(i)


CI: 0.845 0.035
IBS: 0.055 0.017
L1_hinge: 6.735 1.413
L1_margin: 17.177 3.055
AUC: 0.903 ± 0.042
d_calibration_p_value: 0.0
D-Calibration: 1.0
d_calibration_bin_proportions:
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.04780835041633017
0.43266929099192986
D-Calibration_censored: 0.573
d_calibration_censored_contributions:
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.06494029852060171
0.04780835041633017
0.005746214068852932
D-Calibration_uncensored: 0.427
d_calibration_uncensored_contributions:
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.4269230769230769
