This file allows to analyze results obtained by running experiments_paper.

In [None]:
import os 
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sys

sys.path.append('../')
sys.path.append('../DeepSurvivalMachines/')
from nfg import datasets

In [None]:
# Change this to analyze other datasets result
dataset = 'FRAMINGHAM'

In [None]:
path = 'Results/' # Path where the data is saved
x, t, e, covariates = datasets.load_dataset(dataset, competing = True, normalize = False) # Open the data

In [None]:
from sksurv.metrics import concordance_index_ipcw, brier_score, cumulative_dynamic_auc, integrated_brier_score

### Utils
def evaluate(survival, e = e, t = t, groups = None):
    folds = survival.iloc[:, -1].values
    survival = survival.iloc[:, :-1]
    times = survival.columns.get_level_values(1).unique()
    risk = 1 - survival

    results = {}

    # If multiple risk, compute cause specific metrics
    for r in survival.columns.get_level_values(0).unique():
        for fold in np.arange(5):
            e_train, t_train = e[folds != fold], t[folds != fold]
            e_test,  t_test  = e[folds == fold], t[folds == fold]
            g_train, g_test = (None, None) if groups is None else (groups[folds != fold], groups[folds == fold])

            et_train = np.array([(e_train[i] == int(r), t_train[i]) for i in range(len(e_train))], # For estimation censoring
                            dtype = [('e', bool), ('t', float)])
            et_test = np.array([(e_test[i] == int(r), t_test[i]) for i in range(len(e_test))], # For measure performance for given outcome
                            dtype = [('e', bool), ('t', float)])
            selection = (t_test < t_train.max()) | (e[folds == fold] != int(r))
            
            et_test, g_test = et_test[selection], None if groups is None else g_test[selection]
            survival_fold = survival[folds == fold][r][selection]
            risk_fold = risk[folds == fold][r][selection]

            try:
                brs = brier_score(et_train, et_test, survival_fold.values, times)[1]
            except:
                brs = [np.nan] * len(times)
            # Concordance and ROC for each time
            gcis, cis, rocs = [], [], []
            res_group = {} if groups is None else {"CIS_{}".format(group): [] for group in groups.unique()}
            for time in times:
                try:
                    gcis.append(concordance_index_ipcw(et_train, et_test, risk_fold[time])[0])
                except:
                    gcis.append(np.nan)
                    
                try:
                    cis.append(concordance_index_ipcw(et_train, et_test, risk_fold[time], float(time))[0])
                except:
                    cis.append(np.nan)

                try:
                    rocs.append(cumulative_dynamic_auc(et_train, et_test, risk_fold[time], float(time))[0][0])
                except:
                    rocs.append(np.nan)

                try:
                    for group in groups.unique():
                        res_group["CIS_{}".format(group)].append(concordance_index_ipcw(et_train[g_train == group], et_test[g_test == group], risk_fold[time][g_test == group], float(time))[0])
                except:
                    pass

            res = {"GCIS": gcis, "CIS": cis, "BRS": brs, "ROCS": rocs}
            if groups is not None:
                res.update(res_group)
            results[(r, fold)] = pd.DataFrame.from_dict(res, orient='index', columns = times)
    results = pd.concat(results)
    results.index.set_names(['Risk', 'Fold', 'Metric'], inplace = True)

    return results

In [None]:
# To analyze group performance 
if dataset == "FRAMINGHAM":
    groups = pd.DataFrame(x, columns = covariates).AGE
    groups = pd.cut(groups, [0, 40, 50, 60, 100], labels=["<40", "40-50", "50-60", "60+"])
else:
    groups = None
groups.value_counts()

In [None]:
# Open file and compute performance
predictions, clusters, results, likelihood = {}, {}, {}, {}
for file_name in os.listdir(path):
    if dataset in file_name and '.csv' in file_name: 
        model = file_name       
        model = model[model.index('_') + 1: model.index('.')]

        print("Opening :", file_name, ' - ', model)
        if 'finegray' in model:
            # Reinitialize index
            predictions[model] = pd.read_csv(path + file_name, header = [0], index_col = 0)
            index = pd.DataFrame([[i, t] for i in ('1', '2') for t in predictions[model].columns[:3]] + [['Use', '']])
            predictions[model].columns = pd.MultiIndex.from_frame(index)
        else:
            predictions[model] = pd.read_csv(path + file_name, header = [0, 1], index_col = 0)
        results[model] = evaluate(predictions[model], groups = groups)

        cluster_file = file_name[: file_name.index('.')] + '_clusters.pickle'
        if os.path.isfile(path + cluster_file):
            clusters[model] = pickle.load(open(path + cluster_file, 'rb'))
# Rename
# TODO: Add your method in the list for nicer display
dict_name = {'nfg': 'NeuralFG', 'nfgcs': 'NeuralFG OvA', 'finegray': 'Fine Gray', 'dsm': 'DSM', 'dsmcs': 'DSM OvA', 'dh': 'DeepHit', 'dhcs': 'DeepHit OvA'} 

likelihood = pd.DataFrame.from_dict(likelihood, 'index').rename(dict_name)
results = pd.concat(results).rename(dict_name)
results.index.set_names('Model', 0, inplace = True)

In [None]:
table = results.groupby(['Model', 'Risk', 'Metric']).apply(lambda x: pd.Series(["{:.3f} ({:.2f})".format(mean, std) for mean, std in zip(x.mean(), x.std())], index = x.columns.astype(float)))
table = table.loc[table.index.get_level_values(2).isin(['CIS', 'BRS'])].unstack(level=-1).stack(level=0).unstack(level=-1).loc[:, ['CIS', 'BRS']]
#table = table.loc[['NeuralFG', 'NeuralFG OvA', 'DSM', 'DeepHit', 'Fine Gray']]
table = table.reorder_levels(['Risk', 'Model']).sort_index(level = 0, sort_remaining = False)

table

In [None]:
print(table.to_latex())

# Split by age

In [None]:
table = results.groupby(['Model', 'Risk', 'Metric']).apply(lambda x: pd.Series(["{:.3f} ({:.2f})".format(mean, std) for mean, std in zip(x.mean(), x.std())], index = x.columns.astype(float)))
table = table.loc[table.index.get_level_values(2).str.contains('CIS_')].unstack(level=-1).stack(level=0).loc[['NeuralFG', 'NeuralFG OvA'], ['CIS_<40', 'CIS_40-50', 'CIS_50-60', 'CIS_60+']]
#table = table.loc[['NeuralFG', 'NeuralFG OvA', 'DSM', 'DeepHit', 'Fine Gray']]
table = table.reorder_levels(['Risk', 'Model', None]).sort_index(level = 0, sort_remaining = False)

difference = (results.loc['NeuralFG'] - results.loc['NeuralFG OvA']).groupby(['Risk', 'Metric']).apply(lambda x: pd.Series(["{:.3f} ({:.2f})".format(mean, std) for mean, std in zip(x.mean(), x.std())], index = x.columns.astype(float)))
difference = difference.loc[difference.index.get_level_values(1).str.contains('CIS_')].unstack(level=-1).stack(level=0).loc[:, ['CIS_<40', 'CIS_40-50', 'CIS_50-60', 'CIS_60+']]

In [None]:
table = table.loc['2'].T.stack().reorder_levels([None, 'Metric']).sort_index(level = 0, sort_remaining = False)
table['Difference'] = difference.loc['2'].stack()
table

In [None]:
print(table.to_latex())

In [None]:
print(pd.concat({"Age Group": groups, "Event": pd.Series(e)}, 1).groupby(['Age Group', 'Event']).size().unstack().to_latex())