In [1]:
# use this to install benchpots in the colab env
# !pip install benchpots

In [2]:
import numpy as np
from benchpots.datasets import preprocess_physionet2012

In [3]:
features=['ALP', 
          'ALT', 
          'AST', 
          'Albumin',
          'BUN',
          'Bilirubin',
          'Cholesterol',
          'Creatinine',
          'FiO2',
          'GCS',
          'Glucose',
          'HCO3',
          'HCT',
          'HR', 
          'K', 
          'Lactate', 
          'Mg', 
          'Na', 
          'PaCO2', 
          'PaO2', 
          'Platelets', 
          'RespRate', 
          'SaO2', 
          'Temp', 
          'TroponinI', 
          'TroponinT', 
          'Urine', 
          'WBC', 
          'Weight', 
          'pH',

          'NISysABP',
          'SysABP',
          'NIMAP',
          'MAP',
          'NIDiasABP',
          'DiasABP',
          ]
data = preprocess_physionet2012('set-a', rate=0, features=features)

for key in ['train_X', 'val_X', 'test_X']:
    a, b = 35, 34
    idx = np.array(np.isnan(data[key][:, :,a]))
    data[key][:, :, a][idx] = data[key][:, :, b][idx]
    a, b = 33, 32
    idx = np.array(np.isnan(data[key][:, :,a]))
    data[key][:, :, a][idx] = data[key][:, :, b][idx]
    a, b = 31, 30
    idx = np.array(np.isnan(data[key][:, :,a]))
    data[key][:, :, a][idx] = data[key][:, :, b][idx]
    data[key] = data[key][:, :, [u for u in range(36) if u not in [30, 32, 34]]]

2024-10-25 16:29:49 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-10-25 16:29:49 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-10-25 16:29:49 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...


2024-10-25 16:29:50 [INFO]: Loaded successfully!
2024-10-25 16:29:59 [INFO]: Total sample number: 3997
2024-10-25 16:29:59 [INFO]: Training set size: 2557 (63.97%)
2024-10-25 16:29:59 [INFO]: Validation set size: 640 (16.01%)
2024-10-25 16:29:59 [INFO]: Test set size: 800 (20.02%)
2024-10-25 16:29:59 [INFO]: Number of steps: 48
2024-10-25 16:29:59 [INFO]: Number of features: 36
2024-10-25 16:29:59 [INFO]: Train set missing rate: 79.54%
2024-10-25 16:29:59 [INFO]: Validating set missing rate: 79.67%
2024-10-25 16:29:59 [INFO]: Test set missing rate: 79.67%


In [4]:
for key in ['train_X', 'val_X', 'test_X']:
    for x, sample in enumerate(data[key]):
        for y in range(sample.shape[-1]):
            replace = 0
            for z, k in enumerate(sample[:, y]):
                if np.isnan(k):
                    data[key][x, z, y] = replace
                else:
                    replace = k

In [5]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, TensorDataset
from sklearn.metrics import roc_auc_score

class gruSimple(nn.Module):
    def __init__(
        self,
        n_features = 33,
        n_classes = 2,
        rnn_hidden_size = 43,
        num_layers = 1,
    ):
        super(gruSimple, self).__init__()
        self.num_layers = num_layers
        self.hidden_dim = rnn_hidden_size
        self.gru = nn.GRU(n_features, rnn_hidden_size, num_layers, batch_first=True)
        # Define the top regressor layer with batch normalization and dropout
        self.fc = nn.Linear(rnn_hidden_size, n_classes)
        self.batch_norm = nn.BatchNorm1d(n_classes)
        self.dropout = nn.Dropout(0.5)
    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        # Forward propagate through GRU
        out, _ = self.gru(x, h0)
        # Get the last hidden state
        last_hidden_state = out[:, -1, :]
        out = self.fc(last_hidden_state)
        out = self.batch_norm(out)
        out = self.dropout(out)
        out = F.softmax(out, dim=1)
        return out

In [44]:
# Early stopping parameters
early_stopping_patience = 3  # Number of epochs to wait for improvement
best_loss = float('inf')
patience_counter = 0

# Training function with early stopping
def train(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    model.to(device)

    global best_loss, patience_counter
    model.train()  # Set the model to training mode
    for epoch in range(num_epochs):
        running_loss = 0.0
        for _, (inputs, labels) in enumerate(train_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(torch.nan_to_num(inputs))

            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/10:.4f}')
        running_loss = 0.0
        
        # Validation phase
        model.eval()  # Set the model to evaluation mode
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                outputs = model(torch.nan_to_num(inputs))
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        
        val_loss /= len(val_loader)
        print(f'Validation Loss after Epoch [{epoch+1}/{num_epochs}]: {val_loss:.4f}')
        
        # Early stopping check
        if val_loss < best_loss:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                print(f'Early stopping at epoch {epoch+1}')
                break
        
        model.train()  # Set back to training mode

    print('Finished Training')

def evaluate_model(val_loader, model, device):
    model.to(device)
    model.eval()
    outputs = torch.tensor([]).to(device)
    lab = torch.tensor([]).to(device)
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            output = model(torch.nan_to_num(inputs))
            outputs = torch.concat([outputs, output])
            lab = torch.concat([lab, labels])
    return outputs, lab

In [32]:
traindata = TensorDataset(torch.tensor(data['train_X'].astype(np.float32)), torch.tensor(data['train_y']))
valdata = TensorDataset(torch.tensor(data['val_X'].astype(np.float32)), torch.tensor(data['val_y']))
testdata = TensorDataset(torch.tensor(data['test_X'].astype(np.float32)), torch.tensor(data['test_y']))

In [33]:
train_loader = DataLoader(traindata, batch_size=64)
val_loader = DataLoader(valdata, batch_size=64)
test_loader = DataLoader(testdata, batch_size=64)

In [40]:
model = gruSimple(33, 2, 64)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [41]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [45]:
# Train the model with early stopping
num_epochs = 50
train(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)

Epoch [1/50], Loss: 2.5049
Validation Loss after Epoch [1/50]: 0.6003
Epoch [2/50], Loss: 2.4446
Validation Loss after Epoch [2/50]: 0.5890
Epoch [3/50], Loss: 2.3881
Validation Loss after Epoch [3/50]: 0.5746
Epoch [4/50], Loss: 2.3240
Validation Loss after Epoch [4/50]: 0.5609
Epoch [5/50], Loss: 2.2784
Validation Loss after Epoch [5/50]: 0.5473
Epoch [6/50], Loss: 2.2839
Validation Loss after Epoch [6/50]: 0.5591
Epoch [7/50], Loss: 2.2619
Validation Loss after Epoch [7/50]: 0.5417
Epoch [8/50], Loss: 2.2425
Validation Loss after Epoch [8/50]: 0.5356
Epoch [9/50], Loss: 2.2588
Validation Loss after Epoch [9/50]: 0.5296
Epoch [10/50], Loss: 2.2196
Validation Loss after Epoch [10/50]: 0.5299
Epoch [11/50], Loss: 2.2060
Validation Loss after Epoch [11/50]: 0.5261
Epoch [12/50], Loss: 2.1950
Validation Loss after Epoch [12/50]: 0.5182
Epoch [13/50], Loss: 2.1644
Validation Loss after Epoch [13/50]: 0.5131
Epoch [14/50], Loss: 2.1688
Validation Loss after Epoch [14/50]: 0.5050
Epoch [15/

In [46]:
output, true = evaluate_model(test_loader, model, device)
display(roc_auc_score(true.cpu(), output[:, 1].cpu()))

0.7947214267968985