# Speed comparison

In this notebook, we will apply Neural Fine Gray and Desurv on the FRAMINGHAM dataset and measure the execution times

In [None]:
import sys
sys.path.append('../')
sys.path.append('../DeepSurvivalMachines/')

### Load the FRAMINGHAM Dataset

The package includes helper functions to load the dataset.

X represents an np.array of features (covariates),
T is the event/censoring times and,
E is the censoring indicator.

In [None]:
from nfg import datasets
x, t, e, columns = datasets.load_dataset('FRAMINGHAM', competing = True)

### Compute horizons at which we evaluate the performance of Neural Fine Gray

Survival predictions are issued at certain time horizons. Here we will evaluate the performance
of NFG to issue predictions at the 25th, 50th and 75th event time quantile as is standard practice in Survival Analysis.

In [None]:
import pandas as pd
import numpy as np
import torch
np.random.seed(42)
torch.random.manual_seed(42)

### Splitting the data into train, test and validation sets

We will train NSC on 80% of the Data (10 % of which is used for stopping criterion and 10% for model Selection) and report performance on the remaining 20% held out test set.

In [None]:
from sklearn.model_selection import train_test_split

def obtain_split(seed = 42):
    x_train, x_test, t_train, t_test, e_train, e_test = train_test_split(x, t, e, test_size = 0.2, random_state = seed)
    x_train, x_val, t_train, t_val, e_train, e_val = train_test_split(x_train, t_train, e_train, test_size = 0.2, random_state = seed)

    minmax = lambda x: x / t_train.max() # Enforce to be inferior to 1
    t_train_ddh = minmax(t_train).flatten()
    t_test_ddh = minmax(t_test).flatten()
    t_val_ddh = minmax(t_val).flatten()

    return (x_train, t_train_ddh, e_train), (x_val, t_val_ddh, e_val), (x_test, t_test_ddh, e_test)

### Model Training and Selection

In [None]:
from nfg import NeuralFineGray
from desurv import DeSurv
import time

In [None]:
speed = {'Mono': [], 'NFG': [], 'n=1': [], 'n=15': [], 'n=100': []}
times = {'Mono': [], 'NFG': [], 'n=1': [], 'n=15': [], 'n=100': []}

for i in range(100):
    (x_train, t_train_ddh, e_train), (x_val, t_val_ddh, e_val), (x_test, t_test_ddh, e_test) = obtain_split(i)

    for n in [1, 15, 100]:
        np.random.seed(i)
        torch.random.manual_seed(i)

        start_time = time.process_time()
        model = DeSurv(layers = [50, 50, 50], layers_surv = [50, 50, 50], n = n) 
        model.fit(x_train, t_train_ddh, e_train, n_iter = 1000, bs = 100, # Ensures that we train for n_iter iterations
            lr = 1e-3, val_data = (x_val, t_val_ddh, e_val))
        times['n={}'.format(n)].append(time.process_time() - start_time) # Time to converge
        speed['n={}'.format(n)].append(model.speed) # Number iteration needed to converge

    np.random.seed(i)
    torch.random.manual_seed(i)
    start_time = time.process_time()
    model = NeuralFineGray(layers = [50], layers_surv = [50, 50], multihead = True) 
    model.fit(x_train, t_train_ddh, e_train, n_iter = 1000, bs = 100, # Ensures that we train for n_iter iterations
        lr = 1e-3, val_data = (x_val, t_val_ddh, e_val))
    times['NFG'].append(time.process_time() - start_time)
    speed['NFG'].append(model.speed)

    np.random.seed(i)
    torch.random.manual_seed(i)
    start_time = time.process_time()
    model = NeuralFineGray(layers = [50], layers_surv = [50, 50], multihead = False) 
    model.fit(x_train, t_train_ddh, e_train, n_iter = 1000, bs = 100, # Ensures that we train for n_iter iterations
        lr = 1e-3, val_data = (x_val, t_val_ddh, e_val))
    times['Mono'].append(time.process_time() - start_time)
    speed['Mono'].append(model.speed)

In [None]:
speed = pd.DataFrame.from_dict(speed)
times = pd.DataFrame.from_dict(times)

In [None]:
speed = pd.Series(["{:.2f} ({:.2f})".format(mean, std) for mean, std in zip(speed.mean(), speed.std())], index = speed.columns)
times = pd.Series(["{:.2f} ({:.2f})".format(mean, std) for mean, std in zip(times.mean(), times.std())], index = times.columns)
speed, times