# Collect Stats

In this notebook, we will apply Neural Fine Gray on the FRAMINGHAM data.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/DLHC

/content/drive/MyDrive/DLHC


In [3]:
from getpass import getpass

# Enter token securely
token = getpass("Enter your GitHub token: ")

# Set remote URL with token
remote_url = f"https://DrEaston:{token}@github.com/DrEaston/DLHC.git"



Enter your GitHub token: ··········


In [None]:
! git status


In [None]:
! git add .

In [None]:
! git commit -m "add collect stats back"

In [None]:

!pip install scikit-survival
!pip install pycox
!pip install lifelines

In [None]:
import sys
sys.path.append('/content/drive/MyDrive/DLHC')
sys.path.append('/content/drive/MyDrive/DLHC/NeuralFineGray')
sys.path.append('/content/drive/MyDrive/DLHC/DeepSurvivalMachines')

In [None]:
# --- Setup
import os
import sys
import numpy as np
import pandas as pd

from nfg import datasets
from experiment import Experiment
from metrics import truncated_concordance_td, auc_td, brier_score as bs

from pycox.evaluation import EvalSurv
from sksurv.metrics import concordance_index_ipcw, brier_score, cumulative_dynamic_auc, integrated_brier_score
from metrics import truncated_concordance_td, auc_td, brier_score as bs

# --- Set your dataset
dataset = 'FRAMINGHAM'

# --- Set the correct path dynamically
path = f'/content/drive/MyDrive/DLHC/Results/{dataset}/'

# --- Load your data
x, t, e, covariates = datasets.load_dataset(dataset, path='./', competing=True, normalize=False)

# --- Set evaluation times
horizons = [0.25, 0.5, 0.75]
times_eval = np.quantile(t[e > 0], horizons)

groups = None  # <-- no groups for PBC


### Utils: The evaluatino metrics used
def evaluate(survival, e = e, t = t, groups = None, times_eval = []):
    folds = survival.iloc[:, -1].values
    survival = survival.iloc[:, :-1]
    survival.columns = pd.MultiIndex.from_frame(pd.DataFrame(index=survival.columns).reset_index().astype(float))

    times = survival.columns.get_level_values(1).unique()
    results = {}

    # If multiple risk, compute cause specific metrics
    for r in survival.columns.get_level_values(0).unique():
        for fold in np.arange(5):
            res = {}
            e_train, t_train = e[folds != fold], t[folds != fold]
            e_test,  t_test  = e[folds == fold], t[folds == fold]
            g_train, g_test = (None, None) if groups is None else (groups[folds != fold], groups[folds == fold])

            survival_train = survival[folds != fold][r]
            survival_fold = survival[folds == fold][r]

            km = EvalSurv(survival_train.T, t_train, e_train != 0, censor_surv = 'km')
            test_eval = EvalSurv(survival_fold.T, t_test, e_test == int(r), censor_surv = km)

            res['Overall'] = {
                    "CIS": test_eval.concordance_td(),
                }
            try:
                res['Overall']['BRS'] = test_eval.integrated_brier_score(times.to_numpy())
            except: pass

            km = (e_train, t_train)
            if len(times_eval) > 0:
                for te in times_eval:
                    try:
                        ci, km = truncated_concordance_td(e_test, t_test, 1 - survival_fold.values, times, te, km = km, competing_risk = int(r))
                        res[te] = {
                            "CIS": ci,
                            "BRS": bs(e_test, t_test, 1 - survival_fold.values, times, te, km = km, competing_risk = int(r))[0]}
                    except:
                        pass

                    for group in groups.unique() if groups is not None else []:
                        try:
                            km = (e_train[g_train == group], t_train[g_train == group])
                            res[te]["CIS_{}".format(group)] = truncated_concordance_td(e_test[g_test == group], t_test[g_test == group], 1 - survival_fold[g_test == group].values, times, te, km = km, competing_risk = int(r))[0]
                            res[te]["BRS_{}".format(group)] = bs(e_test[g_test == group], t_test[g_test == group], 1 - survival_fold[g_test == group].values, times, te, km = km, competing_risk = int(r))[0]

                            km = (e_train[g_train != group], t_train[g_train != group])
                            res[te]["Delta_CIS_{}".format(group)] = res[te]["CIS_{}".format(group)] - truncated_concordance_td(e_test[g_test != group], t_test[g_test != group], 1 - survival_fold[g_test != group].values, times, te, km = km, competing_risk = int(r))[0]
                            res[te]["Delta_BRS_{}".format(group)] = res[te]["BRS_{}".format(group)] - bs(e_test[g_test != group], t_test[g_test != group], 1 - survival_fold[g_test != group].values, times, te, km = km, competing_risk = int(r))[0]

                        except:
                            pass
            results[(r, fold)] = pd.DataFrame.from_dict(res)
    results = pd.concat(results)
    results.index.set_names(['Risk', 'Fold', 'Metric'], inplace = True)

    return results

# --- Load predictions and compute metrics
predictions, results, models = {}, {}, {}
for file_name in os.listdir(path):
    if dataset in file_name and (('.csv' in file_name) or ('.csv.gz' in file_name)):
        model = file_name
        model = model[model.rindex('_') + 1: model.index('.')]
        print("Opening:", file_name, ' - ', model)

        predictions[model] = pd.read_csv(path + file_name, header=[0, 1], index_col=0)
        results[model] = evaluate(predictions[model], groups=groups, times_eval=times_eval)

# --- Rename models nicely
dict_name = {'dsm': 'DSM'}  # keep simple for now

results = pd.concat(results).rename(dict_name)
results.index.set_names('Model', level=0, inplace=True)

# --- Summarize results
table = results.groupby(['Model', 'Risk', 'Metric']).apply(
    lambda x: pd.Series(["{:.3f} ({:.3f})".format(mean, std) for mean, std in zip(x.mean(), x.std())], index=x.columns)
)
table = table.unstack(level=-1).stack(level=0).unstack(level=-1).loc[:, ['CIS', 'BRS']]
table = table.reorder_levels(['Risk', 'Model']).sort_index(level=0, sort_remaining=False)

# --- Display table
print(table)


In [None]:
! ls

In [None]:
! pwd

In [None]:
! git status

In [None]:
! git add .

In [None]:
! git commit -m "initial commit"

In [None]:
import os

# Replace with your actual GitHub username and
username = "DrEaston"





In [None]:
! git config --global user.email "curtis.easton@gmail.com"

In [None]:
! git config --global user.name "DrEaston"

In [None]:
! git push origin main

In [None]:
! git status

In [None]:
! git add -A

In [None]:
! git rm --cached NeuralFineGray


In [None]:
! git add .


In [None]:

!git rm --cached NeuralFineGray
!rm -rf .git/modules/NeuralFineGray

In [None]:
pwd

In [None]:
cd NeuralFineGray

In [None]:
rm -f NeuralFineGray/.git


In [None]:
! git push origin main

In [None]:
! git checkout -b curtis

In [None]:
! git push origin curtis

In [None]:
! rm -fr ".git/rebase-merge"

In [None]:
! ls -al

In [None]:
rm-rf .git

In [None]:
! git add experiment.py
! git add examples/experiment_competing_risk.py

In [None]:
! git add "Collect  Stats.ipynb"

In [None]:
! git status

In [None]:
! git rm "../Collect  Stats.ipynb"
! git add "../CollectStats.ipynb"

In [None]:
! git push origin main

In [None]:
! git commit -m "fixed"

In [None]:
! git add CollectStats.ipynb

In [None]:
! git push origin main

In [None]:
! chmod +x .git/hooks/post-commit

In [None]:
rm .git/hooks/post-commit

In [None]:
ls -l .git/hooks/

In [None]:
pwd

In [None]:
! shutil.copy("CollectStats.ipynb", "CollectStats_backup.ipynb")