This notebook aims to evaluate and compare the different models that are saved in `results/`. 
*It is to be used last.*

In [None]:
import os 
import numpy as np
import pandas as pd 

import seaborn as sns
import matplotlib.pyplot as plt

from pycox.evaluation import EvalSurv

### Evaluate all models

Enumerate all csv files and compare their performance on the different splits considered.

In [None]:
outcomes = pd.read_csv('data/TGCA_Merged.csv', index_col = 0)
split = pd.read_csv('results/split.csv', index_col = [0])

In [None]:
performances = {}
for file_name in sorted(os.listdir('results/')):
    if 'predictions.csv' in file_name: 
        predictions = pd.read_csv('results/' + file_name, index_col = [0, 1])

        model = file_name
        model = model[:model.rindex('_')]
        
        performances[model] = {}
        for split_type in split.columns:
            columns = split[split_type].dropna().unique()
            performances[model][split_type] = pd.DataFrame(index = ['C-Index', 'Brier'], columns = columns)
            pred_split = predictions.loc[split_type]
            pred_split.columns = pred_split.columns.astype(float)

            for fold in columns:
                train, test = split[split_type] != fold, split[split_type] == fold
                train, test = train[train].index, test[test].index
                ev = EvalSurv(pred_split.loc[test].T, outcomes.t.loc[test].values, outcomes.e.loc[test].values, censor_surv='km')
                performances[model][split_type].loc['C-Index', fold] = ev.concordance_td()
                performances[model][split_type].loc['Brier' , fold] = ev.integrated_brier_score(pred_split.columns.to_numpy())
        performances[model] = pd.concat(performances[model])
performances = pd.concat(performances)
performances.index.set_names(['Model', 'Split', 'Metric'], inplace = True)

In [None]:
performances