In [1]:
import os
import sys
import random
import pandas as pd
import numpy as np
from scipy.linalg import toeplitz
from copy import copy
import matplotlib.pyplot as plt
%matplotlib inline

# Geniuses that worked on hypertools did not update certain package and thus it produces warnings (they break jupyter lab)
import warnings
warnings.filterwarnings("ignore")

# Comment out if you don't want to see all of the values being printed (i.e. default)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

current_dir = os.getcwd()
# utils_path = os.path.join(current_dir, '..', 'utils')
utils_path = os.path.join(current_dir, '../')
utils_abs_path = os.path.abspath(utils_path)
if utils_abs_path not in sys.path:
    sys.path.append(utils_abs_path)

import utils.get_data as get_data
# from impute_methods import *
from utils.impute_methods import impute_linear_interpolation

DATA_PATH = get_data.get_dataset_abspath()

training_setA_path = DATA_PATH + 'training_setA'
training_setB_path = DATA_PATH + 'training_setB'

In [2]:
import os
import sys
import random
import pandas as pd
import numpy as np
from scipy.linalg import toeplitz
from copy import copy
import matplotlib.pyplot as plt
%matplotlib inline

# Geniuses that worked on hypertools did not update certain package and thus it produces warnings (they break jupyter lab)
import warnings
warnings.filterwarnings("ignore")

# Comment out if you don't want to see all of the values being printed (i.e. default)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

current_dir = os.getcwd()
# utils_path = os.path.join(current_dir, '..', 'utils')
utils_path = os.path.join(current_dir, '../')
utils_abs_path = os.path.abspath(utils_path)
if utils_abs_path not in sys.path:
    sys.path.append(utils_abs_path)

import utils.get_data as get_data
# from impute_methods import *
from utils.impute_methods import impute_linear_interpolation

In [149]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

class TransformerTimeSeries(nn.Module):
    def __init__(self, input_dim=1, d_model=64, nhead=4, num_layers=2, dropout=0.2):
        super(TransformerTimeSeries, self).__init__()
        self.encoder = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.encoder(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x.transpose(0, 1))
        x = self.decoder(x.transpose(0, 1))
        return x.squeeze(-1)

In [127]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)


class TransformerTimeSeries(nn.Module):
    def __init__(self, input_dim=1, d_model=64, nhead=4, num_layers=2, dropout=0.2):
        super(TransformerTimeSeries, self).__init__()

        self.encoder = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.encoder(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x.transpose(0, 1))
        x = self.decoder(x)
        return x.squeeze(-1)
        
        


In [4]:
# Load and preprocess data
dataset, patient_id_map = get_data.get_dataset()

   20337
   40337
Dataset loaded into a MultiIndex DataFrame.


In [133]:
dataset.shape

(1552210, 41)

In [6]:
# First lets experiment with only raw data 
# We have to however impute NaN values since Neural Networks can't (natively) handle them

columns_to_linearly_interpolate = [
    'HR', 'O2Sat', 'SBP', 'MAP', 'DBP', 'Resp'
]

# Feel free to omit this (EXPERIMENTAL)
# Normilize the dataset
if True:
    # Check if multiindex_df is indeed a MultiIndex DataFrame
    if isinstance(dataset.index, pd.MultiIndex):
        # Exclude 'SepsisLabel' from normalization
        features_to_normalize = dataset.columns.difference(['SepsisLabel'])

        # Normalize each patient's data
        # This will apply z-score normalization per patient per feature, excluding 'SepsisLabel'
        normalized_data = dataset[features_to_normalize].groupby(level=0).transform(
            lambda x: (x - x.mean()) / x.std())

        # Optionally fill NaN values if they are created by division by zero in cases where std is zero
        normalized_data = normalized_data.fillna(0)

        # Merge normalized data with the 'SepsisLabel' column
        dataset = pd.concat([normalized_data, dataset['SepsisLabel']], axis=1)
    else:
        print("The dataframe does not have a MultiIndex as expected.")

# Linear Interpolation
print("Linearly interpolating:")
for col in columns_to_linearly_interpolate:
    if col != 'SepsisLabel':  # Ensure we do not interpolate 'SepsisLabel'
        dataset = impute_linear_interpolation(dataset, col)
        print(col)
print("Done")

Linearly interpolating:
HR
O2Sat
SBP
MAP
DBP
Resp
Done


In [134]:
dataset.shape

(1552210, 41)

In [15]:
def add_nan_indicators(df):
    for column in df.columns:
        df[column + '_nan'] = df[column].isna().astype(int)
    return df

In [16]:
def downsample(X, y):
    index_0 = np.where(y == 0)[0]
    index_1 = np.where(y == 1)[0]
    print(index_0, index_1)

    if len(index_0) > len(index_1):
        index_0 = np.random.choice(index_0, size=len(index_1), replace=False)

    balanced_indices = np.concatenate([index_0, index_1])
    np.random.shuffle(balanced_indices)

    x_balanced = X.iloc[balanced_indices]
    y_balanced = y.iloc[balanced_indices]

    return x_balanced, y_balanced
    

In [125]:
import torch
from torchsummary import summary
model = TimeSeriesTransformer(input_dim=X_train.shape[1])
summary(model, input_size=(1, 64))

ModuleNotFoundError: No module named 'torchsummary'

In [None]:
import torch.optim as optim
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.utils.class_weight import compute_class_weight

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} for training.")

In [148]:
X = dataset.drop('SepsisLabel', axis=1)
X = add_nan_indicators(X)
y = dataset['SepsisLabel']

for patient_id in X.index.get_level_values('patient_id').unique():
    print(patient_id)
    break

a = 0
for patient_id, patient_data in X.groupby(level='patient_id'):
    print(patient_data.shape)
    if a > 10:
        break
    a += 1


1.0
(54, 80)
(23, 80)
(48, 80)
(29, 80)
(48, 80)
(17, 80)
(45, 80)
(40, 80)
(258, 80)
(23, 80)
(34, 80)
(21, 80)


In [130]:

# Prepare data and add missingness indicators
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val = add_nan_indicators(X_train), add_nan_indicators(X_val)

# X_train.fillna(0, inplace=True)
# X_val.fillna(0, inplace=True)

# Convert to tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).to(device)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).to(device)

# Create DataLoader
train_data = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=1, shuffle=True)
val_data = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_data, batch_size=1, shuffle=False)

# Initialize the model
# model = TimeSeriesTransformer(num_features=X_train.shape[1])  # Assuming features were doubled to account for indicators
model = TimeSeriesTransformer(input_dim=X_train.shape[1])
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# To keep all the data focus on the minority class (sepsis = 1)

# class_weights = compute_class_weight('balanced', classes=[0, 1], y=y_train)
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train.values)

class_weights = torch.tensor(class_weights, dtype=torch.float32).to(device)

criterion = nn.BCELoss(weight=class_weights[1])  # Focus more on the minority class

TimeSeriesTransformer(
  (embedding): Linear(in_features=120, out_features=512, bias=True)
  (positional_encoding): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=128, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=128, out_features=512, bias=True)
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (output_layer): Linear(in_features=512, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [None]:
# Train the model
num_epochs = 30
best_auroc = 0

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data)
        output = output.squeeze()
        
        # output[:, 0][torch.isnan(output[:, 0])] = 0

        # Forward fill NaN values in the rest of the tensor
        # for i in range(1, output.size(1)):
        #     output[:, i][torch.isnan(output[:, i])] = output[:, i-1][torch.isnan(output[:, i])]


        # print("Output: ", output.shape)
        # print("Target: ", target.shape)
        
        loss = criterion(output, target)
        
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Validation
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for data, target in val_loader:
            output = model(data)
            preds = output.squeeze()
            all_preds.extend(preds.tolist())
            all_targets.extend(target.tolist())

    # Convert prediction probabilities to binary predictions
    threshold = 0.5  # This threshold can be adjusted
    binary_preds = (output > threshold).int()
    # binary_preds = [1 if prob >= threshold else 0 for prob in all_preds]

    # Calculate scores
    auroc = roc_auc_score(all_targets, all_preds)
    precision = precision_score(all_targets, binary_preds)
    recall = recall_score(all_targets, binary_preds)
    f1 = f1_score(all_targets, binary_preds)
    
    if auroc > best_auroc:
        best_auroc = auroc
        # Save model and predictions for the best model based on AUROC
        torch.save(model.state_dict(), 'best_model.pth')

    print(f'Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}, Validation AUROC: {auroc}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}')