In [45]:
import os
import pickle
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F 
import pandas as pd

In [46]:
seed = 29
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)

# Define data path
DATA_PATH = "/Users/prashanti.nilayam/Desktop/temp/"
prev_value_map = {}


In [47]:
default_value_map = {"Capillary refill rate": 0.0,
    "Diastolic blood pressure": 59.0,
    "Fraction inspired oxygen": 0.21,
    "Glascow coma scale eye opening": 4,
    "Glascow coma scale motor response": 6,
    "Glascow coma scale total": 15,
    "Glascow coma scale verbal response": 5,
    "Glucose": 128.0,
    "Heart Rate": 86,
    "Height": 170.0,
    "Mean blood pressure": 77.0,
    "Oxygen saturation": 98.0,
    "Respiratory rate": 19,
    "Systolic blood pressure": 118.0,
    "Temperature": 36.6,
    "Weight": 81.0,
    "pH": 7.4}

In [48]:
replacement_map = {
    "Glascow coma scale eye opening":{
        "1 No Response" : 1,
        "No Response" : 1,
        "2 To pain" : 2,
        "To Pain" : 2,
        "3 To speech" : 3,
        "To Speech" : 3,
        "4 Spontaneously" : 4,
        "Spontaneously" : 4,
        "None" : 5
    },
    "Glascow coma scale motor response":{
        "1 No Response": 1,
        "2 Abnorm extensn" : 2,
        "Abnormal extension": 2,
        "3 Abnorm flexion": 3,
        "Abnormal Flexion": 3,
        "4 Flex-withdraws" : 4,
        "Flex-withdraws": 4,
        "5 Localizes Pain": 5,
        "Localizes Pain": 5,
        "6 Obeys Commands": 6,
        "Obeys Commands": 6,
        "No response" : 7,
    },
    "Glascow coma scale verbal response":{
        "1 No Response" :1,
        "No Response":1,
        "2 Incomp sounds": 2,
        "Incomprehensible sounds":2,
        "3 Inapprop words":3,
        "Inappropriate Words":3,
        "4 Confused":4,
        "Confused":4,
        "5 Oriented":5,
        "Oriented":5,
        "No Response-ETT":6,
        "1.0 ET/Trach":7
    }
}

In [49]:
def to_np(elem):
    return np.concatenate([np.array(i) for i in elem])

In [50]:
def cleanup(episode_df):
    episode_df["Glascow coma scale eye opening"] = episode_df["Glascow coma scale eye opening"].apply(lambda x: replacement_map["Glascow coma scale eye opening"][x] if x in replacement_map["Glascow coma scale eye opening"] else x)
    episode_df["Glascow coma scale motor response"] = episode_df["Glascow coma scale motor response"].apply(lambda x: replacement_map["Glascow coma scale motor response"][x] if x in replacement_map["Glascow coma scale motor response"] else x)
    episode_df["Glascow coma scale verbal response"] = episode_df["Glascow coma scale verbal response"].apply(lambda x: replacement_map["Glascow coma scale verbal response"][x] if x in replacement_map["Glascow coma scale verbal response"] else x)


In [51]:
def process_capillary_refill_rate(person_id, value, colname):
    if value is not None and not np.isnan(value):
        prev_value_map[person_id][colname] = value
        return value
    if person_id in prev_value_map and colname in prev_value_map[person_id] and prev_value_map[person_id][colname] is not None:
        prev = prev_value_map[person_id][colname]
    else:
        prev = default_value_map[colname]
    return prev

In [52]:
def fill_missing_values(pateint_id, episode_df):
     prev_value_map[pateint_id] = {}
     episode_df["Capillary refill rate"] = episode_df["Capillary refill rate"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Capillary refill rate"))
     episode_df["Diastolic blood pressure"] = episode_df["Diastolic blood pressure"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Diastolic blood pressure"))
     episode_df["Fraction inspired oxygen"] = episode_df["Fraction inspired oxygen"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Fraction inspired oxygen"))
     episode_df["Glascow coma scale eye opening"] = episode_df["Glascow coma scale eye opening"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Glascow coma scale eye opening"))
     episode_df["Glascow coma scale motor response"] = episode_df["Glascow coma scale motor response"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Glascow coma scale motor response"))
     episode_df["Glascow coma scale total"] = episode_df["Glascow coma scale total"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Glascow coma scale total"))
     episode_df["Glascow coma scale verbal response"] = episode_df["Glascow coma scale verbal response"].apply(lambda x: process_capillary_refill_rate(pateint_id,  x, "Glascow coma scale verbal response"))
     episode_df["Glucose"] = episode_df["Glucose"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Glucose"))
     episode_df["Heart Rate"] = episode_df["Heart Rate"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Heart Rate"))
     episode_df["Mean blood pressure"] = episode_df["Mean blood pressure"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Mean blood pressure"))
     episode_df["Height"] = episode_df["Height"].apply(lambda x: process_capillary_refill_rate(pateint_id,x, "Height"))
     episode_df["Oxygen saturation"] = episode_df["Oxygen saturation"].apply(lambda x: process_capillary_refill_rate(pateint_id,  x, "Oxygen saturation"))
     episode_df["Respiratory rate"] = episode_df["Respiratory rate"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Respiratory rate"))
     episode_df["Systolic blood pressure"] = episode_df["Systolic blood pressure"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Systolic blood pressure"))
     episode_df["Temperature"] = episode_df["Temperature"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Temperature"))
     episode_df["Weight"] = episode_df["Weight"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "Weight"))
     episode_df["pH"] = episode_df["pH"].apply(lambda x: process_capillary_refill_rate(pateint_id, x, "pH"))
     del prev_value_map[pateint_id]

In [53]:
def get_window_indices(data_len):
    i = 0
    indices = []
    while i <= data_len-4:
        indices.append([i, i+1, i+2, i+3])
        i +=1
    return indices

In [54]:
get_window_indices(7)

[[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6]]

In [55]:
def preprocess(path):
    x_path = DATA_PATH +'/'+path+'/'
    X = torch.empty(0,17,4)
    Y = torch.empty(0,)
    y_df = pd.read_csv(DATA_PATH + path +'_listfile.csv') 
    data_files = os.listdir(x_path)
    print(data_files)
    for data_file in data_files:
        print(data_file)
        if data_file.endswith(".csv"):
            episode_df = pd.read_csv(x_path + data_file)
            cleanup(episode_df)
            fill_missing_values(data_file, episode_df)
            episode_df["H_IDX"] = episode_df.Hours.apply(np.floor).astype('int32')
            episode_df = episode_df.groupby(by = "H_IDX").mean()
            episode_df = episode_df[episode_df.Hours>=5].reset_index(drop = True)
            temp_y = y_df[y_df.stay == data_file].sort_values(by = "period_length").reset_index(drop = True)
            temp_y = temp_y[["period_length", "y_true"]].set_index("period_length")
            episode_df = episode_df.join(temp_y, how = "inner").drop('Hours', axis=1).reset_index(drop = True)
            if(len(episode_df) >0):
                indices = get_window_indices(len(episode_df))
                windows = []
                y_values = []
                for idx in indices:
                    window = episode_df.loc[idx]
                    y_values.append(window.loc[idx[-1]].y_true)
                    windows.append(window.drop("y_true", axis=1).transpose().values.astype(np.float32))
                t_windows = torch.tensor(windows)
                t_y_values = torch.tensor(y_values)
                X = torch.cat((X, t_windows), 0)
                Y = torch.cat((Y, t_y_values), 0)
    return (X, Y)

In [56]:
from torch.utils.data import Dataset

class EpisodeDataset(Dataset):
    
    def __init__(self, obs, los):
        self.x = obs
        self.y = los
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, index):
        return (self.x[index], self.y[index])
        
X_train, Y_train = preprocess('train')
train_dataset = EpisodeDataset(X_train, Y_train)
X_val, Y_val = preprocess('val')
val_dataset = EpisodeDataset(X_val, Y_val)

['.DS_Store', '10021_episode1_timeseries.csv', '10010_episode1_timeseries.csv', '10003_episode1_timeseries.csv', '10014_episode1_timeseries.csv', '10007_episode1_timeseries.csv', '1000_episode1_timeseries.csv', '10017_episode1_timeseries.csv', '10022_episode1_timeseries.csv', '10013_episode1_timeseries.csv']
.DS_Store
10021_episode1_timeseries.csv
10010_episode1_timeseries.csv
10003_episode1_timeseries.csv
10014_episode1_timeseries.csv
10007_episode1_timeseries.csv
1000_episode1_timeseries.csv
10017_episode1_timeseries.csv
10022_episode1_timeseries.csv
10013_episode1_timeseries.csv
['10006_episode1_timeseries.csv', '.DS_Store', '10004_episode2_timeseries.csv', '10004_episode1_timeseries.csv']
10006_episode1_timeseries.csv
.DS_Store
10004_episode2_timeseries.csv
10004_episode1_timeseries.csv


In [57]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32,shuffle=True)                              
val_loader = torch.utils.data.DataLoader(val_dataset,batch_size=32, shuffle=False)                                         

In [58]:
for data in val_dataset:
    print(data[0], data[1])
    break

tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [5.0000e+01, 5.7000e+01, 5.7000e+01, 5.1000e+01],
        [2.1000e-01, 2.1000e-01, 2.1000e-01, 2.1000e-01],
        [4.0000e+00, 4.0000e+00, 4.0000e+00, 4.0000e+00],
        [6.0000e+00, 6.0000e+00, 6.0000e+00, 6.0000e+00],
        [1.5000e+01, 1.5000e+01, 1.5000e+01, 1.5000e+01],
        [5.0000e+00, 5.0000e+00, 5.0000e+00, 5.0000e+00],
        [9.3000e+01, 2.3200e+02, 2.3200e+02, 2.3200e+02],
        [7.6000e+01, 7.4000e+01, 7.0000e+01, 7.1000e+01],
        [1.7000e+02, 1.7000e+02, 1.7000e+02, 1.7000e+02],
        [7.2333e+01, 8.0667e+01, 8.0667e+01, 7.4333e+01],
        [1.0000e+02, 1.0000e+02, 1.0000e+02, 9.8000e+01],
        [2.2000e+01, 1.5000e+01, 2.2000e+01, 2.0000e+01],
        [1.1700e+02, 1.2800e+02, 1.2800e+02, 1.2100e+02],
        [3.7556e+01, 3.6444e+01, 3.6444e+01, 3.7389e+01],
        [8.1000e+01, 8.1000e+01, 8.1000e+01, 8.1000e+01],
        [7.4000e+00, 7.4000e+00, 7.4000e+00, 7.4000e+00]]) tensor(31.180

In [59]:

class EpisiodeCNN(nn.Module):
    def __init__(self):
        super(EpisiodeCNN, self).__init__()
        #input shape 1 * 17 * 4
        #output shape 17 * 17 * 4
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=17, kernel_size=3, padding=1, stride = 1)
        #input shape 17 * 17 * 4
        #output shape 68 * 17 * 4
        self.conv2 = nn.Conv2d(in_channels=17, out_channels=34, kernel_size=3, padding=1, stride = 1)
        #input shape 34 * 17 * 4
        #output shape 34 * 8 * 2
        self.pool1 = nn.MaxPool2d(2,2)
        #input shape 34 * 8 * 2
        #output shape 68 * 8 * 2
        self.conv3 = nn.Conv2d(in_channels=34, out_channels=68, kernel_size=3, padding=1, stride = 1)
        self.fc1 = nn.Linear(68*8*2, 256)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        #input is of shape (batch_size=32, 3, 224, 224) if you did the dataloader right
        x = x.unsqueeze(1)
        x = F.leaky_relu(self.conv1(x))
        x = F.leaky_relu(self.conv2(x))
        x = self.pool1(x)
        x = F.leaky_relu(self.conv3(x))
        x = x.view(-1, 68 * 8 * 2)
        x = F.leaky_relu(self.fc1(x))
        x = self.dropout(x)
        x = F.leaky_relu(self.fc2(x))
        x = F.leaky_relu(self.fc3(x))
        x = self.fc4(x)
        return x


In [60]:
model = EpisiodeCNN()
learning_rate = 0.00001
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr =learning_rate )

In [61]:
from sklearn.metrics import mean_squared_error
def eval_model(model, val_loader):
    model.eval()
    all_y_true = torch.DoubleTensor()
    all_y_pred = torch.DoubleTensor()
    for x, y in val_loader:
        y_hat = model(x)
        all_y_true = torch.cat((all_y_true, y.to('cpu')), dim=0)
        all_y_pred = torch.cat((all_y_pred,  y_hat.to('cpu')), dim=0)
    mse= mean_squared_error(all_y_true.detach().numpy(), all_y_pred.detach().numpy())
    print(f"mse: {mse:.3f}")
    return mse

In [62]:
def train(model, train_loader, n_epochs):
    model.train()
    for epoch in range(n_epochs):
        train_loss = 0
        for x, y in train_loader:
            optimizer.zero_grad()
            y_hat = model(x)
            y_hat = y_hat.view(y_hat.shape[0]).double()
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_loss = train_loss / len(train_loader)
            print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch+1, train_loss))
        eval_model(model, val_loader)

    
# number of epochs to train the model
n_epochs = 25
train(model, train_loader, n_epochs)

h: 9 	Training Loss: 3921.843105
Epoch: 9 	Training Loss: 6160.921161
Epoch: 9 	Training Loss: 6840.539089
Epoch: 9 	Training Loss: 4948.254384
Epoch: 9 	Training Loss: 5902.978822
Epoch: 9 	Training Loss: 4705.889201
Epoch: 9 	Training Loss: 5391.649305
Epoch: 9 	Training Loss: 5862.548983
mse: 6886.409
Epoch: 10 	Training Loss: 5495.734153
Epoch: 10 	Training Loss: 4494.424250
Epoch: 10 	Training Loss: 6149.102043
Epoch: 10 	Training Loss: 5965.194009
Epoch: 10 	Training Loss: 3729.190124
Epoch: 10 	Training Loss: 4676.429008
Epoch: 10 	Training Loss: 3691.105245
Epoch: 10 	Training Loss: 5978.315370
Epoch: 10 	Training Loss: 5438.181704
Epoch: 10 	Training Loss: 5107.474373
Epoch: 10 	Training Loss: 5915.862023
Epoch: 10 	Training Loss: 4313.433090
Epoch: 10 	Training Loss: 5741.168999
Epoch: 10 	Training Loss: 4515.954368
Epoch: 10 	Training Loss: 3973.846340
Epoch: 10 	Training Loss: 5583.563349
Epoch: 10 	Training Loss: 4037.188831
Epoch: 10 	Training Loss: 5799.311178
Epoch: 10 

In [65]:
torch.save(model.state_dict(), DATA_PATH+"/model.pt")

In [66]:
model_loaded = EpisiodeCNN()
model_loaded.load_state_dict(torch.load(DATA_PATH+"/model.pt"))
eval_model(model_loaded, val_loader)

NameError: name 'momodel_loadeddel' is not defined