Q2.1 Classic Machine Learning Methods

In [1]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC 
from sklearn.metrics import roc_auc_score, average_precision_score
from tsfresh.feature_extraction import extract_features, MinimalFCParameters
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import make_forecasting_frame
from tsfresh.feature_extraction import MinimalFCParameters
import torch
import torch.nn as nn
import torch.optim as optim

In [7]:
# === PART 1 OF Q2.1 ===
# Load preprocessed datasets
df_a = pd.read_parquet('C:/User/ETH/2024-2025/MA2/ML for human health/Project 1/ML-For-Healthcare-25/ml_ready_data/set-a-scaled.parquet')
df_b = pd.read_parquet('C:/User/ETH/2024-2025/MA2/ML for human health/Project 1/ML-For-Healthcare-25/ml_ready_data/set-b-scaled.parquet')
df_c = pd.read_parquet('C:/User/ETH/2024-2025/MA2/ML for human health/Project 1/ML-For-Healthcare-25/ml_ready_data/set-c-scaled.parquet')  # Test set

df_train = df_a
df_validate = df_b
df_test = df_c  # Test set is set-c

# Define static and dynamic variables
static_vars = ['Age', 'Gender', 'Height', 'Weight']
time_series_vars = [col for col in df_train.columns if col not in static_vars + ['Hour', 'PatientID', 'RecordID', 'In_hospital_death']]

# Feature extraction function
def extract_features(df):
    features = df.groupby('PatientID')[time_series_vars].agg(['mean', 'max', 'last'])
    features.columns = ['_'.join(col) for col in features.columns]
    
    # Add static variables
    static_data = df.groupby('PatientID')[static_vars].last()
    features = features.merge(static_data, left_index=True, right_index=True)
    
    # Add labels
    labels = df.groupby('PatientID')['In_hospital_death'].last()
    features['In_hospital_death'] = labels
    return features.reset_index()

# Extract features
df_train_features = extract_features(df_train)
df_test_features = extract_features(df_test)

# Prepare data for ML
X_train = df_train_features.drop(columns=['PatientID', 'In_hospital_death'])
y_train = df_train_features['In_hospital_death']
X_test = df_test_features.drop(columns=['PatientID', 'In_hospital_death'])
y_test = df_test_features['In_hospital_death']

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train classifiers
logreg = LogisticRegression()
rf = RandomForestClassifier(n_estimators=100, random_state=42)
svm_model = SVC(kernel='linear', probability=True, random_state=42)

logreg.fit(X_train, y_train)
rf.fit(X_train, y_train)
svm_model.fit(X_train, y_train)

# Evaluate models
def evaluate_model(model, X_test, y_test, name):
    y_pred = model.predict_proba(X_test)[:, 1]
    auroc = roc_auc_score(y_test, y_pred)
    auprc = average_precision_score(y_test, y_pred)
    print(f"{name} - AUROC: {auroc:.4f}, AUPRC: {auprc:.4f}")

# Report performance
evaluate_model(logreg, X_test, y_test, 'Logistic Regression')
evaluate_model(rf, X_test, y_test, 'Random Forest')
evaluate_model(svm_model, X_test, y_test, 'SVM')


Logistic Regression - AUROC: 0.8468, AUPRC: 0.5152
Random Forest - AUROC: 0.8346, AUPRC: 0.5078
SVM - AUROC: 0.8309, AUPRC: 0.4873


In [40]:
from tsfresh import extract_relevant_features
from tsfresh.feature_extraction import MinimalFCParameters
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, average_precision_score
from tsfresh.utilities.dataframe_functions import impute

# === PART 2 OF Q2.1 ===

# Extract features with tsfresh
df_train = df_a
df_test = df_c  # Test set is set-c

# Define static and dynamic variables
static_vars = ['Age', 'Gender', 'Height', 'Weight']
time_series_vars = [col for col in df_train.columns if col not in static_vars + ['Hour', 'PatientID', 'RecordID', 'In_hospital_death']]

# Extract relevant features with tsfresh
def extract_relevant_tsfresh_features(df, y, time_series_vars, static_vars):
    # Extract relevant time-series features using tsfresh
    relevant_features = extract_relevant_features(
        df[time_series_vars + static_vars + ['PatientID', 'Hour']],  # Include necessary columns
        y,
        column_id='PatientID',  # ID column for each patient
        column_sort='Hour',  # Time-related sorting column
        default_fc_parameters=MinimalFCParameters(),  # Set of features to extract
        ml_task='classification',  # Specify the task type
        n_jobs=1  # Adjust as needed for parallel processing
    )
    
    # Add static variables (like Age, Gender, Height, Weight) to the features
    static_data = df.groupby('PatientID')[static_vars].last()  # Get last static data for each patient
    relevant_features = relevant_features.merge(static_data, left_index=True, right_index=True)
    
    return relevant_features

# Prepare target variable for training and testing
y_train = df_train.groupby('PatientID')['In_hospital_death'].last()
y_test = df_test.groupby('PatientID')['In_hospital_death'].last()

# Extract relevant features for training set
X_train = extract_relevant_tsfresh_features(df_train, y_train, time_series_vars, static_vars)

# Extract all features for the test set
X_test_all_features = extract_features(
    df_test[time_series_vars + static_vars + ['PatientID', 'Hour']],
    column_id='PatientID',
    column_sort='Hour',
    default_fc_parameters=MinimalFCParameters(),
    n_jobs=1
)

# Impute missing values in the test set
impute(X_test_all_features)

# Add static variables to the test set
static_data_test = df_test.groupby('PatientID')[static_vars].last()
X_test_all_features = X_test_all_features.merge(static_data_test, left_index=True, right_index=True)

# Align test features with training features
X_test = X_test_all_features[X_train.columns]  # Select only the features present in X_train

# Print the relevant features selected by tsfresh BEFORE standardization
print("Relevant features selected by tsfresh:")
print(X_train.columns.tolist())

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train classifiers
logreg = LogisticRegression()
rf = RandomForestClassifier(n_estimators=100, random_state=42)
svm_model = SVC(kernel='linear', probability=True, random_state=42)

logreg.fit(X_train, y_train)
rf.fit(X_train, y_train)
svm_model.fit(X_train, y_train)

# Evaluate models
def evaluate_model(model, X_test, y_test, name):
    y_pred = model.predict_proba(X_test)[:, 1]
    auroc = roc_auc_score(y_test, y_pred)
    auprc = average_precision_score(y_test, y_pred)
    print(f"{name} - AUROC: {auroc:.4f}, AUPRC: {auprc:.4f}")

# Report performance
evaluate_model(logreg, X_test, y_test, 'Logistic Regression')
evaluate_model(rf, X_test, y_test, 'Random Forest')
evaluate_model(svm_model, X_test, y_test, 'SVM')

Feature Extraction: 100%|██████████| 168000/168000 [00:46<00:00, 3601.17it/s]
Feature Extraction: 100%|██████████| 168000/168000 [00:49<00:00, 3364.73it/s]


Relevant features selected by tsfresh:
['GCS__median', 'BUN__median', 'BUN__sum_values', 'BUN__mean', 'GCS__maximum', 'GCS__sum_values', 'GCS__mean', 'BUN__maximum', 'BUN__minimum', 'Urine__mean', 'Urine__sum_values', 'GCS__root_mean_square', 'Urine__median', 'Urine__standard_deviation', 'Urine__variance', 'Urine__maximum', 'Urine__absolute_maximum', 'Creatinine__maximum', 'Urine__root_mean_square', 'Temp__root_mean_square', 'pH__root_mean_square', 'Creatinine__median', 'ALP__maximum', 'Creatinine__sum_values', 'Creatinine__mean', 'Creatinine__absolute_maximum', 'pH__absolute_maximum', 'BUN__absolute_maximum', 'PaCO2__root_mean_square', 'HCO3__minimum', 'Creatinine__root_mean_square', 'Bilirubin__absolute_maximum', 'pH__standard_deviation', 'pH__variance', 'PaCO2__minimum', 'Na__root_mean_square', 'Bilirubin__root_mean_square', 'Bilirubin__maximum', 'AST__root_mean_square', 'AST__absolute_maximum', 'Glucose__absolute_maximum', 'BUN__root_mean_square', 'Albumin__minimum', 'Creatinine__m

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression - AUROC: 0.8245, AUPRC: 0.4745
Random Forest - AUROC: 0.8318, AUPRC: 0.4798
SVM - AUROC: 0.7929, AUPRC: 0.4369


Q2.2 Recurrent Neural Networks

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, average_precision_score
import numpy as np

# === Implementing LSTM ===

# === Prepare Data for LSTM ===
def prepare_lstm_data(df, time_series_vars, static_vars):
    time_series_data = df.groupby('PatientID')[time_series_vars].apply(lambda x: x.values)
    static_data = df.groupby('PatientID')[static_vars].last()
    labels = df.groupby('PatientID')['In_hospital_death'].last()

    # Pad sequences to ensure uniform length
    max_timesteps = max(time_series_data.apply(len))
    time_series_data = np.array([np.pad(x, ((0, max_timesteps - len(x)), (0, 0)), mode='constant') for x in time_series_data])

    return time_series_data, static_data.values, labels.values

# === Prepare Training and Test Data ===
X_train_ts, X_train_static, y_train = prepare_lstm_data(df_a, time_series_vars, static_vars)
X_validate_ts, X_validate_static, y_validate = prepare_lstm_data(df_b, time_series_vars, static_vars)
X_test_ts, X_test_static, y_test = prepare_lstm_data(df_c, time_series_vars, static_vars)

# === Standardize Features ===
scaler_static = StandardScaler()
X_train_static = scaler_static.fit_transform(X_train_static)
X_test_static = scaler_static.transform(X_test_static)

scaler_ts = StandardScaler()
X_train_ts = np.array([scaler_ts.fit_transform(x) for x in X_train_ts])
X_test_ts = np.array([scaler_ts.transform(x) for x in X_test_ts])

# === Convert to PyTorch Tensors ===
X_train_ts_tensor = torch.tensor(X_train_ts, dtype=torch.float32)
X_test_ts_tensor = torch.tensor(X_test_ts, dtype=torch.float32)
X_train_static_tensor = torch.tensor(X_train_static, dtype=torch.float32)
X_test_static_tensor = torch.tensor(X_test_static, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# === Define the LSTM Model ===
class LSTMModel(nn.Module):
    def __init__(self, input_shape_ts, input_shape_static):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size=input_shape_ts[1], hidden_size=64, batch_first=True, dropout=0.3)
        self.lstm2 = nn.LSTM(input_size=64, hidden_size=32, batch_first=True, dropout=0.3)
        self.fc1 = nn.Linear(32 + input_shape_static, 16)
        self.fc2 = nn.Linear(16, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x_ts, x_static):
        lstm_out, (hn, cn) = self.lstm1(x_ts)
        lstm_out, (hn, cn) = self.lstm2(lstm_out)
        last_lstm_output = lstm_out[:, -1, :]  # Use the last time-step output

        # Concatenate static data with LSTM output
        combined_input = torch.cat((last_lstm_output, x_static), dim=1)
        x = torch.relu(self.fc1(combined_input))
        x = self.fc2(x)
        return self.sigmoid(x)

# === Initialize and Train the Model ===
input_shape_ts = (X_train_ts.shape[1], X_train_ts.shape[2])  # (timesteps, features)
input_shape_static = X_train_static.shape[1]  # Number of static features

model = LSTMModel(input_shape_ts, input_shape_static)
criterion = nn.BCELoss()  # Binary Cross-Entropy loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# === Training Loop ===
def train_model(model, X_train_ts, X_train_static, y_train, epochs=20, batch_size=32):
    model.train()
    for epoch in range(epochs):
        # Create batches
        for i in range(0, len(X_train_ts), batch_size):
            batch_ts = X_train_ts[i:i+batch_size]
            batch_static = X_train_static[i:i+batch_size]
            batch_labels = y_train[i:i+batch_size]
            
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(batch_ts, batch_static)
            loss = criterion(outputs.squeeze(), batch_labels)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
        
        # Print training progress
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

# === Train the Model ===
train_model(model, X_train_ts_tensor, X_train_static_tensor, y_train_tensor)

# === Evaluate the Model ===
model.eval()
with torch.no_grad():
    y_pred = model(X_test_ts_tensor, X_test_static_tensor).squeeze()
    y_pred = y_pred.numpy()
    
    auroc = roc_auc_score(y_test, y_pred)
    auprc = average_precision_score(y_test, y_pred)

print(f"LSTM Model - Test Set C Performance: AUROC: {auroc:.4f}, AUPRC: {auprc:.4f}")




Epoch [1/20], Loss: 0.4042
Epoch [2/20], Loss: 0.3443
Epoch [3/20], Loss: 0.3376
Epoch [4/20], Loss: 0.3208
Epoch [5/20], Loss: 0.3071
Epoch [6/20], Loss: 0.2790
Epoch [7/20], Loss: 0.2689
Epoch [8/20], Loss: 0.2648
Epoch [9/20], Loss: 0.2420
Epoch [10/20], Loss: 0.2425
Epoch [11/20], Loss: 0.1230
Epoch [12/20], Loss: 0.1588
Epoch [13/20], Loss: 0.1638
Epoch [14/20], Loss: 0.1365
Epoch [15/20], Loss: 0.1525
Epoch [16/20], Loss: 0.1138
Epoch [17/20], Loss: 0.0459
Epoch [18/20], Loss: 0.0530
Epoch [19/20], Loss: 0.0760
Epoch [20/20], Loss: 0.0319
LSTM Model - Test Set C Performance: AUROC: 0.6541, AUPRC: 0.2281


In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_auc_score, average_precision_score
from torch.utils.data import DataLoader, TensorDataset

# === Define the Bidirectional LSTM Model in PyTorch ===
class BidirectionalLSTM(nn.Module):
    def __init__(self, input_dim_ts, hidden_dim=64, dropout_rate=0.3):
        super(BidirectionalLSTM, self).__init__()
        self.lstm1 = nn.LSTM(input_dim_ts, hidden_dim, batch_first=True, bidirectional=True)
        self.lstm2 = nn.LSTM(hidden_dim*2, hidden_dim, batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(hidden_dim*2, 16)
        self.fc2 = nn.Linear(16, 1)  # Binary classification
    
    def forward(self, x):
        # Pass through first bidirectional LSTM
        x, _ = self.lstm1(x)  
        x = self.dropout(x)
        
        # Pass through second bidirectional LSTM
        x, _ = self.lstm2(x)
        x = self.dropout(x)
        
        # Take the output from the last timestep
        x = x[:, -1, :]
        
        # Fully connected layers
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        
        return x  # Sigmoid for binary classification

# === Data Preparation for PyTorch ===
def prepare_lstm_data_for_pytorch(df, time_series_vars, static_vars):
    time_series_data, static_data, labels = prepare_lstm_data(df, time_series_vars, static_vars)
    
    # Convert to PyTorch tensors
    time_series_data = torch.tensor(time_series_data, dtype=torch.float32)
    static_data = torch.tensor(static_data, dtype=torch.float32)
    labels = torch.tensor(labels, dtype=torch.float32)
    
    return time_series_data, static_data, labels

# Prepare training and test data
X_train_ts, X_train_static, y_train = prepare_lstm_data_for_pytorch(df_a, time_series_vars, static_vars)
X_test_ts, X_test_static, y_test = prepare_lstm_data_for_pytorch(df_c, time_series_vars, static_vars)

# Create datasets and dataloaders for training and testing
train_dataset = TensorDataset(X_train_ts, X_train_static, y_train)
test_dataset = TensorDataset(X_test_ts, X_test_static, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# === Define the Training Function ===
def train_model(model, train_loader, criterion, optimizer, epochs=20):
    model.train()  # Set the model to training mode
    for epoch in range(epochs):
        epoch_loss = 0
        for batch in train_loader:
            time_series_data, _, labels = batch
            
            optimizer.zero_grad()  # Clear previous gradients
            outputs = model(time_series_data)  # Forward pass
            
            loss = criterion(outputs.squeeze(), labels)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights
            
            epoch_loss += loss.item()
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader):.4f}")

# === Train the Model ===
# Initialize the model, loss function, and optimizer
input_dim_ts = X_train_ts.shape[2]  # Number of time series features
model = BidirectionalLSTM(input_dim_ts)

# Calculate class weights
total_samples = len(df_train)
pos_weight = total_samples / (2 * df_train['In_hospital_death'].sum())

# Define the loss function with class weights
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight]))

#criterion = nn.BCELoss()  # Binary cross-entropy loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, criterion, optimizer, epochs=20)

# === Evaluate the Model ===
def evaluate_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    y_true = []
    y_pred = []
    
    with torch.no_grad():
        for batch in test_loader:
            time_series_data, _, labels = batch
            outputs = model(time_series_data)
            
            y_true.extend(labels.numpy())
            y_pred.extend(outputs.squeeze().numpy())
    
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    auroc = roc_auc_score(y_true, y_pred)
    auprc = average_precision_score(y_true, y_pred)
    
    return auroc, auprc

# Evaluate the model on the test set
auroc, auprc = evaluate_model(model, test_loader)
print(f"Bidirectional LSTM Model - Test Set C Performance: AUROC: {auroc:.4f}, AUPRC: {auprc:.4f}")


Epoch 1/20, Loss: 0.8592
Epoch 2/20, Loss: 0.7491
Epoch 3/20, Loss: 0.6799
Epoch 4/20, Loss: 0.6178
Epoch 5/20, Loss: 0.5649
Epoch 6/20, Loss: 0.5254
Epoch 7/20, Loss: 0.4697
Epoch 8/20, Loss: 0.4436
Epoch 9/20, Loss: 0.3925
Epoch 10/20, Loss: 0.3705
Epoch 11/20, Loss: 0.3243
Epoch 12/20, Loss: 0.3049
Epoch 13/20, Loss: 0.2846
Epoch 14/20, Loss: 0.2514
Epoch 15/20, Loss: 0.2284
Epoch 16/20, Loss: 0.2175
Epoch 17/20, Loss: 0.2025
Epoch 18/20, Loss: 0.2172
Epoch 19/20, Loss: 0.1697
Epoch 20/20, Loss: 0.1859
Bidirectional LSTM Model - Test Set C Performance: AUROC: 0.7551, AUPRC: 0.3387


Q2.3a Transformers

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, average_precision_score
import numpy as np
import pandas as pd

# Define Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, time_series_input_size, static_input_size, num_heads=4, hidden_dim=64, projected_dim=40):
        super(TransformerModel, self).__init__()
        
        # Project time-series features to an embedding size divisible by num_heads
        self.input_projection = nn.Linear(time_series_input_size, projected_dim)
        
        # Multi-Head Attention Layer
        self.attention_layer = nn.MultiheadAttention(embed_dim=projected_dim, num_heads=num_heads, batch_first=True)
        
        # Fully connected layers after attention
        self.fc1 = nn.Linear(projected_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        
        # Process static features
        self.static_fc = nn.Linear(static_input_size, hidden_dim)
        
        # Output layer
        self.output_fc = nn.Linear(hidden_dim, 1)
        
        # Dropout
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, time_series_input, static_input):
        # Project time-series features
        time_series_input = self.input_projection(time_series_input)
        
        # Apply attention
        attention_output, _ = self.attention_layer(time_series_input, time_series_input, time_series_input)
        
        # Pooling
        x = attention_output.mean(dim=1)  # Global average pooling
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        
        # Process static features
        static_out = self.static_fc(static_input)
        
        # Combine time-series and static features
        combined = x + static_out  # Element-wise addition
        combined = self.dropout(combined)
        
        # Final output
        output = self.output_fc(combined)
        return torch.sigmoid(output)


df_train = df_a
df_validate = df_b
df_test = df_c

# Define static and time-series variables
static_vars = ['Age', 'Gender', 'Height', 'Weight']
time_series_vars = [col for col in df_train.columns if col not in static_vars + ['Hour', 'PatientID', 'RecordID', 'In_hospital_death']]

def prepare_data(df):
    grouped = df.groupby('PatientID')
    
    # Time-series tensor: (num_patients, seq_len, num_features)
    time_series_data = np.array([group[time_series_vars].values for _, group in grouped])
    static_data = grouped[static_vars].last().values  # Static features (num_patients, num_static_features)
    labels = grouped['In_hospital_death'].last().values  # Labels
    
    return time_series_data, static_data, labels

# Prepare training & test sets
X_train_ts, X_train_static, y_train = prepare_data(df_train)
X_test_ts, X_test_static, y_test = prepare_data(df_test)

# Standardization
scaler_ts = StandardScaler()
scaler_static = StandardScaler()

X_train_ts = np.array([scaler_ts.fit_transform(seq) for seq in X_train_ts])
X_test_ts = np.array([scaler_ts.transform(seq) for seq in X_test_ts])

X_train_static = scaler_static.fit_transform(X_train_static)
X_test_static = scaler_static.transform(X_test_static)

# Convert to PyTorch tensors
X_train_ts = torch.tensor(X_train_ts, dtype=torch.float32)
X_train_static = torch.tensor(X_train_static, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

X_test_ts = torch.tensor(X_test_ts, dtype=torch.float32)
X_test_static = torch.tensor(X_test_static, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Create DataLoader
batch_size = 32
train_dataset = TensorDataset(X_train_ts, X_train_static, y_train)
test_dataset = TensorDataset(X_test_ts, X_test_static, y_test)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Model Initialization with Adjustments
model = TransformerModel(
    time_series_input_size=len(time_series_vars),
    static_input_size=len(static_vars),
    num_heads=16,  # Increased number of attention heads
    hidden_dim=256,  # Increased hidden dimensions
    projected_dim=128  # Increased projection dimension
)

# Adjusted Loss Function with Class Weights
pos_weight = torch.tensor([len(y_train) / (2 * sum(y_train))], dtype=torch.float32)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

# Optimizer with Weight Decay
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)

# Learning Rate Scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# Training Loop with Validation
epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch_ts, batch_static, batch_y in train_loader:
        optimizer.zero_grad()
        output = model(batch_ts, batch_static)
        loss = criterion(output, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_ts, batch_static, batch_y in test_loader:
            output = model(batch_ts, batch_static)
            loss = criterion(output, batch_y)
            val_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {total_loss/len(train_loader):.4f}, Validation Loss: {val_loss/len(test_loader):.4f}")
    scheduler.step()

# Evaluation
model.eval()
y_preds = []
y_true = []
with torch.no_grad():
    for batch_ts, batch_static, batch_y in test_loader:
        output = model(batch_ts, batch_static).cpu().numpy()
        y_preds.extend(output)
        y_true.extend(batch_y.cpu().numpy())

y_preds = np.array(y_preds).flatten()
y_true = np.array(y_true).flatten()

auroc = roc_auc_score(y_true, y_preds)
auprc = average_precision_score(y_true, y_preds)
print(f"Transformer Model - Test Set Performance: AUROC: {auroc:.4f}, AUPRC: {auprc:.4f}")

Epoch 1/20, Loss: 0.4404
Epoch 2/20, Loss: 0.3822
Epoch 3/20, Loss: 0.3518
Epoch 4/20, Loss: 0.3364
Epoch 5/20, Loss: 0.3255
Epoch 6/20, Loss: 0.3150
Epoch 7/20, Loss: 0.3106
Epoch 8/20, Loss: 0.3018
Epoch 9/20, Loss: 0.2906
Epoch 10/20, Loss: 0.2827
Epoch 11/20, Loss: 0.2699
Epoch 12/20, Loss: 0.2618
Epoch 13/20, Loss: 0.2485
Epoch 14/20, Loss: 0.2395
Epoch 15/20, Loss: 0.2259
Epoch 16/20, Loss: 0.2148
Epoch 17/20, Loss: 0.2029
Epoch 18/20, Loss: 0.1941
Epoch 19/20, Loss: 0.1822
Epoch 20/20, Loss: 0.1790
Transformer Model - Test Set Performance: AUROC: 0.3366, AUPRC: 0.1091


Q2.3b Tokenizing Time-Series Data and Transformers 

In [None]:
from sklearn.preprocessing import OneHotEncoder, RobustScaler
import numpy as np
import pandas as pd

# Combine time-series and static variables
z_vars = time_series_vars + static_vars

# One-hot encode all variables (z)
enc = OneHotEncoder(sparse_output=False)
z_encoded = enc.fit_transform(np.array(z_vars).reshape(-1, 1))
var_dict = {var: z_encoded[i] for i, var in enumerate(z_vars)}

# Normalize time (t) for each dataset
df_train['t'] = df_train['Hour'] / df_train['Hour'].max()
df_validate['t'] = df_validate['Hour'] / df_train['Hour'].max()
df_test['t'] = df_test['Hour'] / df_train['Hour'].max()

# Fit scalers for all variables
scalers = {var: RobustScaler().fit(df_train[[var]].dropna()) for var in z_vars}

# Function to process patient data
def process_patient_data(patient_df, scalers):
    # Normalize time (t)
    t = patient_df['t'].values

    # Scale values (v) for all variables
    v_scaled = np.column_stack([
        scalers[var].transform(patient_df[[var]].fillna(0))[:, 0] for var in z_vars
    ])

    # Encode variable types (z)
    z_encoded = np.array([var_dict[var] for var in z_vars])

    # Repeat t for each variable and flatten
    t_repeated = np.repeat(t, len(z_vars)).reshape(-1, 1)

    # Tile z_encoded for each time step
    z_tiled = np.tile(z_encoded, (len(t), 1))

    # Flatten v_scaled
    v_flattened = v_scaled.flatten().reshape(-1, 1)

    # Combine into triplets (t, z, v) as a single NumPy array
    triplets = np.hstack((t_repeated, z_tiled, v_flattened))

    return triplets

# Process datasets
patient_groups_train = df_train.groupby('PatientID')
patient_groups_validate = df_validate.groupby('PatientID')
patient_groups_test = df_test.groupby('PatientID')

all_patients_train = [process_patient_data(group, scalers) for _, group in patient_groups_train]
all_patients_validate = [process_patient_data(group, scalers) for _, group in patient_groups_validate]
all_patients_test = [process_patient_data(group, scalers) for _, group in patient_groups_test]


[[ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 ...
 [ 1.          0.          0.         ...  0.          0.
  -1.        ]
 [ 1.          0.          0.         ...  0.          0.
  -0.89602804]
 [ 1.          0.          0.         ...  1.          0.
  -2.71591909]]


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, average_precision_score
import numpy as np
import pandas as pd

# Convert to PyTorch tensors
X_train_ts = [torch.tensor([x[0] for x in seq], dtype=torch.float32).unsqueeze(1) for seq in all_patients_train]
X_train_z = [torch.tensor([x[1] for x in seq], dtype=torch.float32).unsqueeze(1) for seq in all_patients_train]
X_train_v = [torch.tensor([x[2] for x in seq], dtype=torch.float32).unsqueeze(1) for seq in all_patients_train]

X_validate_ts = [torch.tensor([x[0] for x in seq], dtype=torch.float32).unsqueeze(1) for seq in all_patients_validate]
X_validate_z = [torch.tensor([x[1] for x in seq], dtype=torch.float32).unsqueeze(1) for seq in all_patients_validate]
X_validate_v = [torch.tensor([x[2] for x in seq], dtype=torch.float32).unsqueeze(1) for seq in all_patients_validate]

X_test_ts = [torch.tensor([x[0] for x in seq], dtype=torch.float32).unsqueeze(1) for seq in all_patients_test]
X_test_z = [torch.tensor([x[1] for x in seq], dtype=torch.float32).unsqueeze(1) for seq in all_patients_test]
X_test_v = [torch.tensor([x[2] for x in seq], dtype=torch.float32).unsqueeze(1) for seq in all_patients_test]

# Dataset preparation (combining t, z, v into a single tensor)
train_data = []
for t, z, v, label in zip(X_train_ts, X_train_z, X_train_v, y_train):
    # Ensure z has the correct shape
    if z.dim() == 2 and z.shape[1] == 1:  # If z is incorrectly shaped, fix it
        z = z.squeeze(1)  # Remove the extra dimension
        z = torch.nn.functional.one_hot(z.to(torch.int64), num_classes=len(var_dict)).float()
    combined = torch.cat((t, z, v), dim=1)  # Concatenate along the feature axis
    train_data.append((combined, torch.tensor(label,dtype=torch.float32)))  # Replace 0.0 with the actual label if available

validate_data = []
for t, z, v, label in zip(X_validate_ts, X_validate_z, X_validate_v, y_validate):
    if z.dim() == 2 and z.shape[1] == 1:
        z = z.squeeze(1)
        z = torch.nn.functional.one_hot(z.to(torch.int64), num_classes=len(var_dict)).float()
    combined = torch.cat((t, z, v), dim=1)
    validate_data.append((combined, torch.tensor(label, dtype=torch.float32)))

test_data = []
for t, z, v,label in zip(X_test_ts, X_test_z, X_test_v, y_test):
    if z.dim() == 2 and z.shape[1] == 1:
        z = z.squeeze(1)
        z = torch.nn.functional.one_hot(z.to(torch.int64), num_classes=len(var_dict)).float()
    combined = torch.cat((t, z, v), dim=1)
    test_data.append((combined, torch.tensor(label, dtype=torch.float32)))
    
# Convert to DataLoader
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
validate_loader = DataLoader(validate_data, batch_size=32, shuffle=False)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

class TransformerModel(nn.Module):
    def __init__(self, time_series_input_size, num_heads=4, hidden_dim=64, projected_dim=40):
        super(TransformerModel, self).__init__()
        
        # Project time-series features to an embedding size divisible by num_heads
        self.input_projection = nn.Linear(time_series_input_size, projected_dim)
        
        # Multi-Head Attention Layer
        self.attention_layer = nn.MultiheadAttention(embed_dim=projected_dim, num_heads=num_heads, batch_first=True)
        
        # Fully connected layers after attention
        self.fc1 = nn.Linear(projected_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        
        # Output layer
        self.output_fc = nn.Linear(hidden_dim, 1)
        
        # Dropout
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, time_series_input):
        # Project input to the correct embedding size
        time_series_input = self.input_projection(time_series_input)
        # Apply attention
        attention_output, _ = self.attention_layer(time_series_input, time_series_input, time_series_input)
        
        # Pooling
        x = attention_output.mean(dim=1)  # Global average pooling
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        
        # Final output
        output = self.output_fc(x)
        return self.output_fc(x)  # No sigmoid here

# ===== Model Initialization and Training =====
model = TransformerModel(time_series_input_size=43,
                        num_heads=4,  # Increased number of attention heads
                        hidden_dim=256,  # Increased hidden dimensions
                        projected_dim=128  # Increased projection dimension)
                    )
# Use BCEWithLogitsLoss with class weights for imbalance
pos_weight = torch.tensor([len(y_train) / (2 * sum(y_train))], dtype=torch.float32)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
# Learning Rate Scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

epochs = 5
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch_ts, batch_y in train_loader:  # Adjust unpacking to match the dataset
        optimizer.zero_grad()
        output = model(batch_ts)  # Pass only the time-series input to the model
        batch_y = batch_y.view(-1, 1)  # Reshape labels to match the output shape
        loss = criterion(output, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")
    scheduler.step()
    
# Evaluation
model.eval()
y_preds = []
y_true = []
with torch.no_grad():
    for batch_x, batch_y in test_loader:
        output = model(batch_x).cpu().numpy()
        y_preds.extend(output)
        y_true.extend(batch_y.cpu().numpy())

y_preds = np.array(y_preds).flatten()
y_true = np.array(y_true).flatten()

auroc = roc_auc_score(y_true, y_preds)
auprc = average_precision_score(y_true, y_preds)
print(f"Transformer Model - Test Set Performance: AUROC: {auroc:.4f}, AUPRC: {auprc:.4f}")

Epoch 1/2, Loss: 0.9093
Epoch 2/2, Loss: 0.8965
Transformer Model - Test Set Performance: AUROC: 0.4974, AUPRC: 0.1457
