# Imports

In [None]:
# For python 2 & 3 compatibility:
# Import future builtins
from builtins import (ascii, bytes, chr, dict, filter, hex, input,
                      int, map, next, oct, open, pow, range, round,
                      str, super, zip)
from __future__ import print_function
# Disallow removed builtins like xrange
from future.builtins.disabled import *

import json
import math
import numpy as np
from utils import log_progress as log
from mimic_loader import mimic_nsoa, common_measurements

LOOKBACK_WINDOW = 5   # number of hours to use to predict next window
TRAINING_SIZE   = 0.7 # proportion of dataset to train on

## Split Data by Visits

In [None]:
training_keys = np.random.choice(mimic_nsoa.keys(), int(TRAINING_SIZE*len(mimic_nsoa.keys())), False)
testing_keys = list(set(mimic_nsoa.keys()).difference(training_keys))

training_dict = {k: mimic_nsoa[k] for k in training_keys}
testing_dict = {k: mimic_nsoa[k] for k in testing_keys}

print("Training on", len(training_dict), "visits")

## Unwind Data into Per-Hour and Scale

In [None]:
def unwind(patient_dict, mmts):
    # int to id
    patient_id_lookup = sorted(patient_dict.keys())
    # id to int, to speed up the following loop
    patient_int_lookup = {id: n for n, id in enumerate(patient_id_lookup)}
    
    timeseries_table = []
    for patient, data in log(patient_dict.iteritems(), 100, size=len(patient_dict)):
        for i in range(len(data['measurements'][data['measurements'].keys()[0]])):
            timeseries_table.append(
                [patient_int_lookup[patient]]+[data['measurements'][mmt][i] for mmt in mmts])
            
    timeseries_table = np.asarray(timeseries_table)
    
    return timeseries_table, patient_id_lookup

In [None]:
hourly_training, training_patient_id_lookup = unwind(training_dict, common_measurements)
hourly_testing,  testing_patient_id_lookup  = unwind(testing_dict,  common_measurements)

In [None]:
from sklearn import preprocessing

#fit scaler on data columns
scaler = preprocessing.StandardScaler().fit(hourly_training[:,1:])
scaled_hourly_training = np.concatenate((hourly_training[:,:1],scaler.transform(hourly_training[:,1:])), axis=1)
scaled_hourly_testing  = np.concatenate((hourly_testing[:,:1], scaler.transform(hourly_testing[:,1:])),  axis=1)

## Fold back into time-series, map to time window

In [None]:
def fold(hourly_data, id_lookup):
    return {id_lookup[i]: hourly_data[hourly_data[:,0] == i][:,1:] for i in range(len(id_lookup))}

In [None]:
folded_training = fold(hourly_training, training_patient_id_lookup)
folded_testing  = fold(hourly_testing,  testing_patient_id_lookup)

In [None]:
def to_data(folded_data):
    input_data = []
    output_data = []
    for patient, series in folded_data.iteritems():
        if len(series) < LOOKBACK_WINDOW + 1:
            #this visit is too short
            continue
        for i in range(len(series) - LOOKBACK_WINDOW):
            window = series[i:i+LOOKBACK_WINDOW]
            row = [mimic_nsoa[patient]['gender'],
                   mimic_nsoa[patient]['weight'],
                   mimic_nsoa[patient]['age']]
            row.extend([item for sublist in window for item in sublist])
            input_data.append(row)    
            output_data.append(series[i+LOOKBACK_WINDOW])
            
    return input_data, output_data

In [None]:
training = to_data(folded_training)
testing = to_data(folded_testing)

# #Train

In [None]:
from sklearn.neural_network import MLPRegressor
clf = MLPRegressor(hidden_layer_sizes=(200,100), early_stopping = True)

In [None]:
clf.fit(training[0],training[1])

In [None]:
clf.score(testing[0],testing[1])

In [None]:
clf.predict([testing[0][0]])

In [None]:
testing[0][7000]