In [1]:
"""
This code is adapted from:
https://www.youtube.com/watch?v=PCgrgHgy26c&t=1567s
"""

'\nThis code is adapted from:\nhttps://www.youtube.com/watch?v=PCgrgHgy26c&t=1567s\n'

In [2]:
import os
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split, Dataset
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import Trainer, seed_everything
import pytorch_lightning as pl
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.feature_selection import mutual_info_classif, chi2
from sklearn.linear_model import LassoCV
import matplotlib.pyplot as plt
from pytorch_lightning.loggers import TensorBoardLogger
import seaborn as sns
import os
import sys
from torch_explain.models.explainer import Explainer
from torch_explain.logic.metrics import formula_consistency
from torchmetrics.functional import accuracy
# from load_datasets import load_mimic
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTEN
from imblearn.combine import SMOTEENN
from torch.nn.functional import one_hot
from func_timeout import func_set_timeout, func_timeout, FunctionTimedOut

In [3]:
class PatientDataset(Dataset):

    def __init__(self, features):

        self.features = features

def __len__(self):
    return len(self.features)

def __getitem__(self, idx):
    feature, label = self.features[idx]
    return dict(
        feature=torch.Tensor(feature.to_numpy()),
        label=torch.Tensor(label).long()
    )

In [4]:
class PatientDataModule(pl.LightningDataModule):

    def __init__(self, train_sequences, test_sequences, batch_size):
        super().__init__()
        self.train_sequences = train_sequences
        self.test_sequences = test_sequences
        self.batch_size = batch_size

    def setup(self, stage=None):
        self.train_dataset = PatientDataset(self.train_sequences)
        self.test_dataset = PatientDataset(self.test_sequences)

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset, 
            batch_size=self.batch_size, 
            shuffle=True,
            num_workers=4)

    def val_dataloader(self):
        return DataLoader(
            self.test_dataset, 
            batch_size=self.batch_size, 
            shuffle=False,
            num_workers=4)

    def test_dataloader(self):
        return DataLoader(
            self.test_dataset, 
            batch_size=self.batch_size, 
            shuffle=False,
            num_workers=4)

In [5]:
epochs=200
batch_size=32

df = pd.read_csv("cleanedTemporalData.csv").set_index('PatientID')

X = []
y = []

for id, group in df.groupby('PatientID'):
    tempGroup = group.reset_index()
    
    tempGroup = tempGroup.drop(['PatientID', 'Mortality14Days'], axis=1)

    X.append(tempGroup.T.values)
    y.append(group['Mortality14Days'].values[0])


X = np.array(X)
y = np.array(y)

# print(X[0])

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_bundle = np.array(list(zip(x_train, y_train)))

test_bundle = np.array(list(zip(x_test, y_test)))

# display(train_bundle[0])

dataModule = PatientDataModule(train_bundle, test_bundle, batch_size)

  train_bundle = np.array(list(zip(x_train, y_train)))
  test_bundle = np.array(list(zip(x_test, y_test)))


In [6]:
class PatientModel(nn.Module):

    def __init__(self, n_features, n_classes, n_hidden=256, n_layers=3):
        super().__init__()

        self.lstm = nn.LSTM(
            input_size=n_features, 
            hidden_size=n_hidden, 
            num_layers=n_layers, 
            batch_first=True,
            dropout=0.75)

        self.classifier = nn.Linear(n_hidden, n_classes)

    def forward(self, x):
        self.lstm.flatten_parameters()
        x, (hidden, _) = self.lstm(x)

        out = hidden[-1]

        return self.classifier(out)

In [7]:
class PatientPredictor(pl.LightningModule):

    def __init__(self, n_features, n_classes):
        super().__init__()

        self.model = PatientModel(n_features, n_classes)
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x, labels=None):

        output = self.model(x)
        loss = 0

        if labels is not None:
            loss = self.criterion(output, labels)

        return loss, output

    def training_step(self, batch, batch_idx):
        features = batch['feature']
        labels = batch['label']
        loss, outputs = self.forward(features, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = accuracy(predictions, labels)

        self.log("train_loss", loss, prog_bar=True, logger=True)
        self.log("train_accuracy", step_accuracy, prog_bar=True, logger=True)

        return {'loss': loss, 'accuracy': step_accuracy}

    def validation_step(self, batch, batch_idx):
        features = batch['feature']
        labels = batch['label']
        loss, outputs = self.forward(features, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = accuracy(predictions, labels)

        self.log("val_loss", loss, prog_bar=True, logger=True)
        self.log("val_accuracy", step_accuracy, prog_bar=True, logger=True)

        return {'loss': loss, 'accuracy': step_accuracy}

    def test_step(self, batch, batch_idx):
        features = batch['feature']
        labels = batch['label']
        loss, outputs = self.forward(features, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = accuracy(predictions, labels)

        self.log("test_loss", loss, prog_bar=True, logger=True)
        self.log("test_accuracy", step_accuracy, prog_bar=True, logger=True)

        return {'loss': loss, 'accuracy': step_accuracy}

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.0001)

In [8]:
x_train.shape

(767, 12, 48)

In [9]:
model = PatientPredictor(n_features=x_train.shape[1], n_classes=2)

In [10]:
checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints", 
    filename="best", 
    save_top_k=1, 
    verbose=True, 
    monitor="val_loss", 
    mode="min"
)

logger = TensorBoardLogger("lightning_logs", name="patient_model")

trainer = pl.Trainer(logger=logger, 
                    checkpoint_callback=checkpoint_callback,
                     max_epochs=1, 
                     gpus=1
                    #  progress_bar_refresh_rate=30
                     )

  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [11]:
trainer.fit(model, dataModule)

Missing logger folder: lightning_logs\lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | PatientModel     | 1.3 M 
1 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
1.3 M     Trainable params
0         Non-trainable params
1.3 M     Total params
5.319     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./lightning_logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 25972), started 0:00:22 ago. (Use '!kill 25972' to kill it.)