In [1]:
import os
import sys
import random
import pandas as pd
import numpy as np
from scipy.linalg import toeplitz
from copy import copy
import matplotlib.pyplot as plt
%matplotlib inline

# Geniuses that worked on hypertools did not update certain package and thus it produces warnings (they break jupyter lab)
import warnings
warnings.filterwarnings("ignore")

# Comment out if you don't want to see all of the values being printed (i.e. default)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

current_dir = os.getcwd()
# utils_path = os.path.join(current_dir, '..', 'utils')
utils_path = os.path.join(current_dir, '../')
utils_abs_path = os.path.abspath(utils_path)
if utils_abs_path not in sys.path:
    sys.path.append(utils_abs_path)

import utils.get_data as get_data
# from impute_methods import *
from utils.impute_methods import impute_linear_interpolation

DATA_PATH = get_data.get_dataset_abspath()

training_setA_path = DATA_PATH + 'training_setA'
training_setB_path = DATA_PATH + 'training_setB'

In [2]:
import os
import sys
import random
import pandas as pd
import numpy as np
from scipy.linalg import toeplitz
from copy import copy
import matplotlib.pyplot as plt
%matplotlib inline

# Geniuses that worked on hypertools did not update certain package and thus it produces warnings (they break jupyter lab)
import warnings
warnings.filterwarnings("ignore")

# Comment out if you don't want to see all of the values being printed (i.e. default)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

current_dir = os.getcwd()
# utils_path = os.path.join(current_dir, '..', 'utils')
utils_path = os.path.join(current_dir, '../')
utils_abs_path = os.path.abspath(utils_path)
if utils_abs_path not in sys.path:
    sys.path.append(utils_abs_path)

import utils.get_data as get_data
# from impute_methods import *
from utils.impute_methods import impute_linear_interpolation

In [3]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

class TimeSeriesTransformer(nn.Module):
    def __init__(self, num_features, num_blocks=1, d_model=64, nhead=4, dim_feedforward=128, dropout=0.1):
        super(TimeSeriesTransformer, self).__init__()
        self.embedding = nn.Linear(num_features, d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, 
            nhead=nhead, 
            dim_feedforward=dim_feedforward, 
            dropout=dropout
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_blocks)
        self.output_layer = nn.Linear(d_model, 1)

    def forward(self, src, src_mask=None):
        src = self.embedding(src)
        # Assuming src is now [batch_size, d_model], without a seq_length
        src = src.unsqueeze(1)  # Add a pseudo-sequence dimension
        batch_size, seq_length, embedding_size = src.size()
    
        # Generate positional encodings dynamically based on sequence length and batch size
        positional_encoding = torch.zeros(batch_size, seq_length, embedding_size).to(src.device)
        position = torch.arange(seq_length, dtype=torch.float).unsqueeze(0).unsqueeze(-1).to(src.device)
        div_term = torch.exp(torch.arange(0, embedding_size, 2).float() * (-math.log(10000.0) / embedding_size)).to(src.device)
        positional_encoding[:, :, 0::2] = torch.sin(position * div_term)
        positional_encoding[:, :, 1::2] = torch.cos(position * div_term)
        src += positional_encoding
    
        output = self.transformer_encoder(src, src_key_padding_mask=src_mask)
        output = self.output_layer(output)
        return torch.sigmoid(output)

In [4]:
# Load and preprocess data
dataset, patient_id_map = get_data.get_dataset()

   20337
   40337
Dataset loaded into a MultiIndex DataFrame.


In [5]:
# First lets experiment with only raw data 
# We have to however impute NaN values since Neural Networks can't (natively) handle them

columns_to_linearly_interpolate = [
    'HR', 'O2Sat', 'SBP', 'MAP', 'DBP', 'Resp'
]

# Feel free to omit this (EXPERIMENTAL)
# Normilize the dataset
if True:
    # Check if multiindex_df is indeed a MultiIndex DataFrame
    if isinstance(dataset.index, pd.MultiIndex):
        # Normalize each patient's data
        # This will apply z-score normalization per patient per feature
        normalized_data = dataset.groupby(level=0).transform(lambda x: (x - x.mean()) / x.std())
    
        # Optionally fill NaN values if they are created by division by zero in cases where std is zero
        normalized_data = normalized_data.fillna(0)
    
        # If you need to replace the old DataFrame with the new, normalized one
        dataset.update(normalized_data)
    else:
        print("The dataframe does not have a MultiIndex as expected.")

    
# Linear Interpolation
print("Linearly interpolating:")
for col in columns_to_linearly_interpolate:
    dataset = impute_linear_interpolation(dataset, col)
    print(col)
print("Done")

Linearly interpolating:
HR
O2Sat
SBP
MAP
DBP
Resp
Done


In [6]:
def add_nan_indicators(df):
    """
    This function adds a binary indicator for each feature in the dataframe.
    Each indicator is 1 where the original data was NaN, and 0 otherwise.

    Parameters:
    df (pandas.DataFrame): The original dataframe with possible NaN values.

    Returns:
    pandas.DataFrame: Augmented dataframe with additional NaN indicator features.
    """
    for column in df.columns:
        df[column + '_nan'] = df[column].isna().astype(int)
    return df

In [7]:
def downsample(X, y):
    index_0 = np.where(y == 0)[0]
    index_1 = np.where(y == 1)[0]
    print(index_0, index_1)

    if len(index_0) > len(index_1):
        index_0 = np.random.choice(index_0, size=len(index_1), replace=False)

    balanced_indices = np.concatenate([index_0, index_1])
    np.random.shuffle(balanced_indices)

    x_balanced = X.iloc[balanced_indices]
    y_balanced = y.iloc[balanced_indices]

    return x_balanced, y_balanced
    

In [11]:
import torch.optim as optim
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.utils.class_weight import compute_class_weight

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} for training.")

X = dataset.drop('SepsisLabel', axis=1)
y = dataset['SepsisLabel']

X = add_nan_indicators(X)

# Prepare data and add missingness indicators
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val = add_nan_indicators(X_train), add_nan_indicators(X_val)

# X_train.fillna(0, inplace=True)
# X_val.fillna(0, inplace=True)

# Convert to tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).to(device)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).to(device)

# Create DataLoader
train_data = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_data = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_data, batch_size=64, shuffle=False)

# Initialize the model
model = TimeSeriesTransformer(num_features=X_train.shape[1])  # Assuming features were doubled to account for indicators
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# To keep all the data focus on the minority class (sepsis = 1)

# class_weights = compute_class_weight('balanced', classes=[0, 1], y=y_train)
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train.values)

class_weights = torch.tensor(class_weights, dtype=torch.float32).to(device)

criterion = nn.BCELoss(weight=class_weights[1])  # Focus more on the minority class
criterion = nn.BCELoss()

# Train the model
num_epochs = 30
best_auroc = 0

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output.squeeze(), target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Validation
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for data, target in val_loader:
            output = model(data)
            preds = output.squeeze()
            all_preds.extend(preds.tolist())
            all_targets.extend(target.tolist())

    # Convert prediction probabilities to binary predictions
    threshold = 0.5  # This threshold can be adjusted
    binary_preds = (output > threshold).int()
    # binary_preds = [1 if prob >= threshold else 0 for prob in all_preds]

    # Calculate scores
    auroc = roc_auc_score(all_targets, all_preds)
    precision = precision_score(all_targets, binary_preds)
    recall = recall_score(all_targets, binary_preds)
    f1 = f1_score(all_targets, binary_preds)
    
    if auroc > best_auroc:
        best_auroc = auroc
        # Save model and predictions for the best model based on AUROC
        torch.save(model.state_dict(), 'best_model.pth')

    print(f'Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}, Validation AUROC: {auroc}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}')

Using cpu for training.


TimeSeriesTransformer(
  (embedding): Linear(in_features=120, out_features=64, bias=True)
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=128, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=128, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (output_layer): Linear(in_features=64, out_features=1, bias=True)
)

TimeSeriesTransformer(
  (embedding): Linear(in_features=120, out_features=64, bias=True)
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=128, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=128, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (output_layer): Linear(in_features=64, out_features=1, bias=True)
)

RuntimeError: all elements of target should be between 0 and 1