In [None]:
import os
os.chdir('../')

import sys
sys.argv=['']
del sys

import argparse
import json
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import random
import scipy

from sklearn.preprocessing import StandardScaler
from sksurv.util import Surv as skSurv
from sksurv.metrics import concordance_index_ipcw, cumulative_dynamic_auc, brier_score, integrated_brier_score
from sklearn_pandas import DataFrameMapper
from sklearn.utils import resample
from utils.param_search import *
from utils.output_results import *

The dataset used here is the simulation dataset. 

The uncertainty measures that are available are : MCDropout, DeepEnsemble, VAEUnc, BMask, Bootstrap. Here, we apply the bootstrap method. The other methods can be applied as presented in the other scripts, just by changing the dataset argument to "Simulation".

We apply this method to different types of neural network models :
- CoxCC, CoxTime
- DeepHit

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', '-dt', default='Simulation', type=str) 
parser.add_argument('--plot_mode', '-pm', default=True, action='store_true')
parser.add_argument('--name', '-n',type=str, default="DeepHit") #CoxTime, DeepHit, CoxCC 
parser.add_argument('--uncertainty', '-u',type=str, default="Bootstrap") #Bootstrap, MCDropout, DeepEnsemble, VAE, BMask
parser.add_argument('--timepoints', '-tp',type=str, default="fixed")
config = parser.parse_args()

In [None]:
dir_res = 'results/'+ config.dataset + "/"+config.uncertainty+"/"+ config.name+'/'
os.makedirs(dir_res, exist_ok=True)
dir_data = 'data'+'/'+config.dataset + "/"

# Data Preparation

The data is split into train and test sets beforehand. As the data is simulated, it is possible to output the true value of survival times: it is stored in the st_test file. 

In [None]:
df_train = pd.read_csv(dir_data+"sim_train.csv")
df_test = pd.read_csv(dir_data+"sim_test.csv")
st_test = pd.read_csv(dir_data+"st_test.csv").T
st_test.index = st_test.index.astype('float')

The simulated data is composed of continous variables (the X variables) and a binary variable (the Z variable). 

In [None]:
ContVar = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10']
CatVar = ['Z1']
AllVar = ContVar+CatVar+['yy', 'status','id']

The continuous variable are standardized. The yy variable corresponds to the survival time, the status is the censoring indicator (a value of 0 corresponds to censoring) and the id variable is the id of the patient.

In [None]:
standardize = [([col], StandardScaler()) for col in ContVar]
leave = [(col, None) for col in ['yy', 'status','id']+CatVar]
df_mapper = DataFrameMapper(standardize + leave, df_out=True) 

# Cross Validation and Hyperparameters Search

A simple 5-folds cross validation is implemented using the training set to determine the hyperparameters of the neural network models. The optuna package is used to perform the hyperparmeter search. The hyperparameter that are searched are the following:

| Hyperparameter | Values |
|----------|--------------|
| Activation function |  {tanh, relu} |
| Batch size |  {8,16,32,64,128} |
| Dropout rate |  [0.0,0.3] | 
|Layers | {1,2,3,4}|
|Learning rate|[1e-3, 1e-2]|
|Neurons|[4,128]|
|Optimizer|{adam, adam_amsgrad, RMSProp, SGDWR}|
|Pénalisation L2|[0,0.1]|
|Alpha (DeepHit)|[0,1]|
|Sigma(DeepHit)|{0.1,0.25,0.5,1,2.5,5,10,100}|
|Durations(DeepHit)|{10,50,100,200,400}|

In [None]:
sampler = optuna.samplers.TPESampler()
study = optuna.create_study(study_name = config.name, 
                            storage = 'sqlite:///'+dir_res+config.name+ '.db',
                            sampler=sampler, 
                            direction='minimize', 
                            load_if_exists=True)

study.optimize(lambda trial : objective_net(trial,
                                            df_train,
                                            df_mapper,
                                            dir_res,
                                            config), 
               n_trials=2)

print("Best trial:")
trial = study.best_trial
outer_loop = pd.DataFrame([trial.params])
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
print(study.trials_dataframe())

outer_loop.to_csv(dir_res + 'best_param.csv', sep = ';', index = False, header = True)
df_results = study.trials_dataframe()
df_results.to_csv(dir_res + 'trials_dataframe.csv', sep = ';', header = True)

# Uncertainty Measure using Bootstrap

We build the model using always the same hyperparameters that were selected by cross validaton. We sample with replacement data from the training set.  Then, the model is trained on this  bootstraped dataset. We repeat the sampling M times, obtaining M predictions per point of the test set. This is the method called [Bootstrap](https://www.jstor.org/stable/2958830).

We load the hyperparameters selected previously.

In [None]:
outer_loop = pd.read_csv(dir_res+'best_param.csv', sep = ';')
config.acti_func = outer_loop['activation'][0]
config.batch_size = outer_loop['batch_size'][0]
config.dr = outer_loop['dropout'][0]
config.layers = outer_loop['n_layers'][0]
config.lr  = outer_loop['learning_rate'][0]
config.neurons = outer_loop['neurons'][0]
config.optim = outer_loop['optimizer'][0]
config.pen_l2 = outer_loop['l2'][0]

if config.name=="DeepHit":
    config.alpha = outer_loop['alpha'][0] 
    config.sigma = outer_loop['sigma'][0]
    config.num_durations = outer_loop['num_durations'][0]
    labtrans = DeepHitSingle.label_transform(config.num_durations)
elif config.name=="CoxTime":
    labtrans = CoxTime.label_transform()
else:
    labtrans=""

We define the number of repetitions, M.

In [None]:
M=100

In [None]:
if config.timepoints == "percentiles":
    kmf = KaplanMeierFitter()
    kmf.fit(np.array(df_train['yy']), np.array(df_train['status']))
    time_grid = np.array(kmf.percentile(np.linspace(0.9, 0.1, 9)).iloc[:,0])
elif config.timepoints == "fixed":
    if config.dataset == "Metabric":
        time_grid = [2,5]
    elif config.dataset == "LungCancerExplorer":
        time_grid = [24,60]
    else:
        time_grid = [0.5,1,2,3]

We compute the Concordance Index (C-index), the Oracle C-index, the Bias, the Oracle Bias, and the survival predictions for the model at the timepoints of the time grid previously defined.

In [None]:
CAll = pd.DataFrame()
COrAll = pd.DataFrame()
BSAll = pd.DataFrame()
BSOrAll = pd.DataFrame()
PredAll = []
measures = pd.DataFrame()

for j in range(M):
    print(j)
    
    #Bootstrap of the train set and split into validation and train set
    df_trainb = df_train.iloc[resample(df_train.index, replace=True, n_samples=len(df_train),random_state=j)]
    df_valb = df_trainb.sample(frac=0.2, random_state = j)
    df_trainb = df_trainb.drop(df_valb.index)

    df_trainb = df_mapper.fit_transform(df_trainb)
    df_valb = df_mapper.transform(df_valb).astype('float32')
    df_test = df_mapper.transform(df_test).astype('float32')

    x_train = np.array(df_trainb.drop(['yy','status','id'], axis=1)).astype('float32')
    x_val = np.array(df_valb.drop(['yy','status','id'], axis=1)).astype('float32')
    x_test = np.array(df_test.drop(['yy','status','id'], axis=1)).astype('float32')
    y_train = (df_trainb['yy'].values, df_trainb['status'].values)
    y_val = (df_valb['yy'].values, df_valb['status'].values)
    y_test = (df_test['yy'].values, df_test['status'].values)

    if labtrans !="":
        y_train = labtrans.fit_transform(*y_train)
        y_val = labtrans.transform(*y_val)

    val = tt.tuplefy(x_val, y_val)

    in_features = x_train.shape[1]
    model,callbacks = build_model_net(config,in_features,labtrans)

    log = model.fit(x_train, 
                y_train, 
                int(config.batch_size),
                epochs = 500, 
                callbacks = callbacks,
                verbose = False,
                val_data = val,
                shuffle=True)
    
    #Output of the survival probabilities
    if config.name in ["CoxCC","CoxTime"]:
        _ = model.compute_baseline_hazards()
        surv = model.predict_surv_df(x_test)
    elif config.name == "DeepHit":
         surv = model.interpolate(10).predict_surv_df(x_test)

    #Output of the evaluation measures on the test set at predifined time points
    data_train = skSurv.from_arrays(event=df_trainb['status'], time=df_trainb['yy'])
    data_test = skSurv.from_arrays(event=df_test['status'], time=df_test['yy'])
    CAll[j] = [concordance_index_ipcw(data_train, data_test, np.array(-determine_surv_prob(surv,t)),t)[0] for t in time_grid]
    BSAll[j] = [brier_score(data_train, data_test, np.array(-determine_surv_prob(surv,t)),t)[1][0] for t in time_grid]
    Pred = np.asarray([determine_surv_prob(surv,t) for t in time_grid])
    PredAll.append(Pred)
    BSOrAll[j] = [brier_score(data_train, data_test, np.array(-determine_surv_prob(st_test,t)),t)[1][0] for t in time_grid]     
    COrAll[j] = [concordance_index_ipcw(data_train, data_test, np.array(-determine_surv_prob(st_test,t)),t)[0] for t in time_grid]     

    del model 
    del log

For the coverage rate, the level of confidence is $\alpha = 0.95$.

In [None]:
measures['C'] = CAll.mean(axis=1)
measures['C_Oracle'] = COrAll.mean(axis=1)
measures['BS'] = BSAll.mean(axis=1)
measures['BS_Oracle'] = BSOrAll.mean(axis=1)

res_all = np.empty((M,len(time_grid)))
for m in range(M):
    res_all[m,] = [np.mean(PredAll[m][t]) for t in range(len(time_grid))]

cr = []
BNAll = pd.DataFrame()
alpha = 0.95
for t in range (len(time_grid)):
    res_time = pd.DataFrame([PredAll[l][t] for l in range(M)]).T
    cr.append(output_cr(res_time,st_test,time_grid[t],0.95))
    BNAll[str(time_grid[t])] = np.mean(pd.DataFrame([PredAll[l][t]-determine_surv_prob(st_test,time_grid[t]) for l in range(M)]).T)
measures['Bias_id'] = np.array(BNAll.mean())
measures['Coverage'] = cr
measures['Time'] = time_grid
measures.to_csv(dir_res+'measures_'+config.name+'.csv', sep = ';', header = True, index=True)

In [None]:
measures