This notebook runs the model on the preprocessed data. The goal is to predict if the patient will survive to its stay.

In [None]:
import sys
sys.path.append('../')
import pandas as pd

# Reload data

In [None]:
labs = pd.read_csv('data/labs_1_day.csv', index_col = [0, 1], header = [0, 1])
outcomes = pd.read_csv('data/outcomes_1_day.csv', index_col = 0)

In [None]:
outcomes['Death'] = ~outcomes.Death.isna()

# Split 

In [None]:
# Results path
results = 'results/classification' 

In [None]:
training = pd.Series(outcomes.index.isin(outcomes.sample(frac = 0.8, random_state = 0).index), index = outcomes.index)

In [None]:
print('Total patients: {}'.format(len(training)))
print('Training patients: {}'.format(training.sum()))

# Imputation

In [None]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

def imputation(data, strategy):
    # Compute fill value
    if strategy == 'LOCF':
        data = data.groupby('Patient').ffill()
        impute = - 1
        
    elif strategy == 'Individual':
        impute = data.groupby('Patient').median()

    elif strategy == "MICE":
        data = pd.DataFrame(IterativeImputer(random_state = 0, max_iter = 50,
            imputation_order = 'random', initial_strategy = 'median').fit_transform(data), 
            index = data.index, columns = data.columns)
        impute = - 1 

    return data, impute

def process(data, strategy = 'LOCF'):
    """
        Preprocesses data 
        Take last observation and impute given strategy
    """
    updated, impute = imputation(data, strategy)
    resampled = updated.groupby('Patient').last()
    imputed = resampled.fillna(impute)

    return imputed

In [None]:
from utils import Experiment

In [None]:
hyperparams = {
    'penalty': ['elasticnet'],
    'C': [0.01, 0.1, 1., 10],
    'solver': ['saga'], 
    'max_iter': [1000],
    'l1_ratio': [0, 0.1, 0.5, 0.9, 1.],
    'n_jobs': [-1]
}

In [None]:
for strategy in ['LOCF', 'MICE', 'Individual']:
    print('Imputation strategy: ', strategy)
    last = process(labs, strategy = strategy)
    assert (last == -1).sum().sum() == 0, "Non imputed values"
    se = Experiment.create(model = 'log', hyper_grid = hyperparams, 
                     path = results + '_reg_' + strategy)
    se.train(last, outcomes.Death, training)


# MICE with group

In [None]:
group = 'ethnicity' # gender or ethnicity

In [None]:
groups_binary = (outcomes.ETHNICITY == 'White') if group == 'ethnicity' else (outcomes.GENDER == 'M')
groups = groups_binary.replace({True: 'White', False: 'Non white'}) if group == 'ethnicity' else groups_binary.replace({True: 'Male', False: 'Female'})
groups_value = groups.unique()

In [None]:
last = process(labs.join(groups_binary), strategy = 'MICE')
se = Experiment.create(model = 'log', hyper_grid = hyperparams, 
                    path = results + '_reg_{}_specific'.format(group))
se.train(last, outcomes.Death, training)