# Sepsis Prediction

### Training Notes

Imputation
- Imputed training data with a nice monotonic piecewise polynomial (PCHIP)
- If weight was missing, used average UK weights

Data Splitting
- Split on per-patient basis to avoid correlations

Picking Data
- Avoided an hour before sepsis is recorded because a patient could have contracted it prior to detection

Standardization
- Mean 0, SD 1

In [None]:
# For python 2 & 3 compatibility:
# Import future builtins
from builtins import (ascii, bytes, chr, dict, filter, hex, input,
                      int, map, next, oct, open, pow, range, round,
                      str, super, zip)
from __future__ import print_function
# Disallow removed builtins like xrange
#from future.builtins.disabled import *
from tqdm import tqdm

import numpy as np
import score as sc

# Settings
TRAINING_SIZE     = 0.7 # proportion of dataset to train on

In [None]:
def perc(num):
    return '{:.2f}'.format(100*num)+"%"

In [None]:
def build_score_table(trigger_alert, patient_data, window):
    patient_table = []
    septic = []
    window_size = window[1]-window[0]+1
    
    for patient in tqdm(patient_data.itervalues(), total = len(patient_data)):
        #a buffer to prevent alarms from being fired at an undesired rate
        alarm_deque = deque(window_size*[False], window_size)
        
        patient_alarms = []
        
        # if the patient does not contract sepsis, test on all times
        if math.isnan(patient['column_onset']):
            score_length = patient['measurements'].shape[0]
            septic.append(0)
        else:
            score_length = patient['column_onset']+1 # +1 b/c we're including the onset time in prediction
            septic.append(1)
                        
        for i in range(score_length):
            patient_prefix = deepcopy(patient)
            #no cheating!
            del patient_prefix['column_onset']
            patient_prefix['measurements'] = patient['measurements'][:i+1,:]

            if trigger_alert(patient_prefix) and True not in alarm_deque:
                alarm_deque.append(True)
                patient_alarms.append(1)
            else:
                alarm_deque.append(False)
                patient_alarms.append(0)
        
        patient_table.append(patient_alarms)
    
    return patient_table, septic



# Load
This section processes and imputes the original dataset.

In [None]:
from mimic_loader import mimic_interpolated, mimic_carryforward

In [None]:
from sepsis_predictor import sepsis_predictor

# Test

In [None]:
#Split data
training_keys = np.random.choice(mimic_interpolated.keys(), int(TRAINING_SIZE*len(mimic_interpolated.keys())), False)
testing_keys = list(set(mimic_interpolated.keys()).difference(training_keys))

#Use carryforward imputation for testing
training_patients = {k: mimic_interpolated[k] for k in training_keys}
testing_patients =  {k: mimic_carryforward[k] for k in testing_keys}

#Train
print("Training...")
predictor = sepsis_predictor(lookback = 5, predict_ahead = 1)
predictor.train(training_patients, percent_septic = .5, sepsis_length = 1, pre_sepsis_ignore = 0)
print("done.")

In [None]:
def trigger_alert(patient):
    return predictor.predict(patient)

print("Testing...")
alarm_table, septic = build_score_table(trigger_alert, testing_patients, [-5,-1])
print(sc.score(alarm_table, septic, [-5,-1]))

Balanced at 0.001%:
```
Sensitivity: 27.66%
        PPV: 91.50%
raw:
[226, 817, 226, 247]
```

Balanced at 0.05%:
```
Sensitivity: 32.56%
        PPV: 60.04%
raw:
[293, 900, 293, 488]```

Balanced at 0.01%:
```
Sensitivity: 39.14%
        PPV: 35.82%
raw:
[326, 833, 326, 910]```

Balanced at 0.5%:
```
Sensitivity: 73.65%
        PPV: 8.56%
raw:
[640, 869, 640, 7478]```

Ideal Scores:

<     PPV     < 
< Sensitivity <

In [None]:
alarm_and_septic = 0

for i, patient in enumerate(alarm_table):
    if sum(patient) >0 and septic[i]:
        print(patient)
        alarm_and_septic += 1
        
print(alarm_and_septic)