In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np

# Hyperparameters

In [None]:
window = 30

# Preprocessing 

In [None]:
df = pd.read_excel('data.xlsx', engine='openpyxl')
df.head(10)

### Remove every attribute with 20% of missing values

In [None]:
missing_percent = {}
for column in df.columns:
    missing_percent[column] = df[column].isna().sum() / len(df)
for column in sorted(missing_percent, key=missing_percent.get, reverse=True):
    print('{}: {:.3f}'.format(column, missing_percent[column]))

In [None]:
date_columns = ['data_trapianto', 'data_decesso', 'lista_uscita_data', 'lista_ingresso_data']
drop_list = [k for k, v in missing_percent.items() if v >= 0.198 and k not in date_columns] + \
            ['diagnosi', 'ctx', 'codice_dm', 'lista_id', 'Deceduto iscritto in lista']
print(drop_list)
df = df.drop(drop_list, axis=1)

### Encoding Categorical Attributes

In [None]:
categories = ['pers_sesso', 'ric_HCV', 'HCC', 'trombosi_portale', 'ric_HBsAg',
              'UNOS', 'ric_HIV', 'chirurgia_addom', 'iscriz_tx_comb']

In [None]:
df = pd.get_dummies(df, columns=categories, drop_first=True)

### Define Censored Data and Compute ToS

In [None]:
df['deceased'] = np.zeros(len(df)) 
df['transplanted'] = np.zeros(len(df))

In [None]:
for index, row in df.iterrows():
    if not row['data_trapianto'] is pd.NaT:
        # check if patient is transplanted
        df.at[index, 'transplanted'] = 1
    elif not row['data_decesso'] is pd.NaT:
        # check if patient is deceased 
        df.at[index, 'deceased'] = 1
    elif row['data_decesso'] is pd.NaT:
        # patients data are censored if we have a date signifing the exit from the waiting list, but not a 
        # confirmend death date. Thus, only deceased people are uncensored. 
        df.at[index, 'data_decesso'] = row['lista_uscita_data']
        
# compute Time of Survival as number of day/months/weeks spent on the list before exiting it or dying 
df['ToS'] = (df['data_decesso'] - df['lista_ingresso_data']).dt.days / window

In [None]:
df.head(10)

# Building Machine Learning Model

## Non-transplanted

In [None]:
df_n_trans = df[df['transplanted'] == 0]
df_n_trans = df_n_trans.drop(date_columns + ['transplanted'], axis=1)

In [None]:
X = df_n_trans.drop(['ToS', 'deceased'], axis=1)
y = df_n_trans[['ToS', 'deceased']]

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
from src.model import *

model = SurvivalNN(len(X.iloc[0]), hidden=[32, 32])
model.compile(optimizer='Adam')
history = model.fit(X_train.astype('float32'), y_train.astype('float32'), batch_size=32, epochs=200, verbose=1)
plt.plot(history.history['loss'])