# Sepsis Prediction

### Training Notes

Imputation
- Imputed training data with a nice monotonic piecewise polynomial (PCHIP)
- If weight was missing, used average UK weights

Data Splitting
- Split on per-patient basis to avoid correlations

Picking Data
- Avoided an hour before sepsis is recorded because a patient could have contracted it prior to detection

Standardization
- Mean 0, SD 1

In [None]:
# For python 2 & 3 compatibility:
# Import future builtins
from builtins import (ascii, bytes, chr, dict, filter, hex, input,
                      int, map, next, oct, open, pow, range, round,
                      str, super, zip)
from __future__ import print_function
# Disallow removed builtins like xrange
#from future.builtins.disabled import *
from tqdm import tqdm

import numpy as np

# Settings
TRAINING_SIZE     = 0.7 # proportion of dataset to train on

In [None]:
def perc(num):
    return '{:.2f}'.format(100*num)+"%"

In [None]:
# assumes data structure
# {'patient id': {'info1': 1234, ..., 'measurements': numpyarray[time x measurement]}}
# and that trigger_alert is a binary function

from copy import deepcopy
import math
from collections import deque

def score(trigger_alert, patient_data, k):
    """inputs detection window k, an alarm function to test, and patient data with
    rows time and columns measurements. Note that the detection window is inclusive."""
    assert k >= 1
    num_septic          = 0
    num_detected_septic = 0
    num_alerts          = 0
    num_positive_alerts = 0
    
    
    
    for patient in tqdm(patient_data.itervalues(), total = len(patient_data)):
        alarm_deque = deque(3*[False], 3)
        
        # if the patient does not contract sepsis, test on all times
        if math.isnan(patient['column_onset']):
            for i in range(patient['measurements'].shape[0]):
                patient_prefix = deepcopy(patient)
                #no cheating!
                del patient_prefix['column_onset']
                patient_prefix['measurements'] = patient['measurements'][:i+1,:]
                
                if trigger_alert(patient_prefix, k) and True not in alarm_deque:
                    alarm_deque.append(True)
                    num_alerts += 1
                else:
                    alarm_deque.append(False)
                    
        else: #the patient is septic; test up through diagnosis
            sepsis_detected = False
            
            for i in range(patient['column_onset']+1):# +1 b/c we're including the onset time in prediction
                patient_prefix = deepcopy(patient)
                #no cheating!
                del patient_prefix['column_onset']
                patient_prefix['measurements'] = patient['measurements'][:i+1,:]
                
                if trigger_alert(patient_prefix, k) and True not in alarm_deque:
                    alarm_deque.append(True)
                    sepsis_detected = True
                    num_alerts += 1
                    if patient['column_onset'] - k + 1 <= i: #+1 b/c we include the onset time in prediction
                        num_positive_alerts += 1
                else:
                    alarm_deque.append(False)
            
            num_septic += 1                        
            num_detected_septic += int(sepsis_detected)
    
    #return sensitivity, PPV    
    return float(num_detected_septic)/num_septic, float(num_positive_alerts)/num_alerts, [num_detected_septic, num_septic, num_positive_alerts, num_alerts]

# Load
This section processes and imputes the original dataset.

In [None]:
from mimic_loader import mimic_interpolated, mimic_carryforward

In [None]:
from sepsis_predictor import sepsis_predictor

# Test

In [None]:
#Split data
training_keys = np.random.choice(mimic_interpolated.keys(), int(TRAINING_SIZE*len(mimic_interpolated.keys())), False)
testing_keys = list(set(mimic_interpolated.keys()).difference(training_keys))

#Use carryforward imputation for testing
training_patients = {k: mimic_interpolated[k] for k in training_keys}
testing_patients =  {k: mimic_carryforward[k] for k in testing_keys}

#Train
print("Training...")
predictor = sepsis_predictor(lookback = 5)
predictor.train(training_patients, percent_septic = .001)
print("done.")

In [None]:
def trigger_alert(patient, k):
    return predictor.predict(patient)

print("Testing...")
sensitivity, PPV, raw = score(trigger_alert, testing_patients, 4)
print("Sensitivity:",perc(sensitivity))
print("        PPV:",perc(PPV))
print("raw:")
print(raw)

import os
os.system("printf '\a'")

Balanced at 0.001%:
```
Sensitivity: 27.66%
        PPV: 91.50%
raw:
[226, 817, 226, 247]
```

Balanced at 0.05%:
```
Sensitivity: 32.56%
        PPV: 60.04%
raw:
[293, 900, 293, 488]```

Balanced at 0.01%:
```
Sensitivity: 39.14%
        PPV: 35.82%
raw:
[326, 833, 326, 910]```

Balanced at 0.5%:
```
Sensitivity: 73.65%
        PPV: 8.56%
raw:
[640, 869, 640, 7478]```