In [1]:
import numpy as np
from ..GRUD.model import GRUD

In [2]:
!pip install benchpots



In [3]:
from benchpots.datasets import preprocess_physionet2012
features=['ALP', 
          'ALT', 
          'AST', 
          'Albumin',
          'BUN',
          'Bilirubin',
          'Cholesterol',
          'Creatinine',
          'FiO2',
          'GCS',
          'Glucose',
          'HCO3',
          'HCT',
          'HR', 
          'K', 
          'Lactate', 
          'Mg', 
          'Na', 
          'PaCO2', 
          'PaO2', 
          'Platelets', 
          'RespRate', 
          'SaO2', 
          'Temp', 
          'TroponinI', 
          'TroponinT', 
          'Urine', 
          'WBC', 
          'Weight', 
          'pH',

          'NISysABP',
          'SysABP',
          'NIMAP',
          'MAP',
          'NIDiasABP',
          'DiasABP',
          ]
data = preprocess_physionet2012('set-a', rate=0, features=features)

2024-10-25 16:27:45 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-10-25 16:27:45 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-10-25 16:27:45 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...


2024-10-25 16:27:46 [INFO]: Loaded successfully!
2024-10-25 16:27:55 [INFO]: Total sample number: 3997
2024-10-25 16:27:55 [INFO]: Training set size: 2557 (63.97%)
2024-10-25 16:27:55 [INFO]: Validation set size: 640 (16.01%)
2024-10-25 16:27:55 [INFO]: Test set size: 800 (20.02%)
2024-10-25 16:27:55 [INFO]: Number of steps: 48
2024-10-25 16:27:55 [INFO]: Number of features: 36
2024-10-25 16:27:55 [INFO]: Train set missing rate: 79.50%
2024-10-25 16:27:55 [INFO]: Validating set missing rate: 79.69%
2024-10-25 16:27:55 [INFO]: Test set missing rate: 79.76%


In [4]:
for key in ['train_X', 'val_X', 'test_X']:
    a, b = 35, 34
    idx = np.array(np.isnan(data[key][:, :,a]))
    data[key][:, :, a][idx] = data[key][:, :, b][idx]
    a, b = 33, 32
    idx = np.array(np.isnan(data[key][:, :,a]))
    data[key][:, :, a][idx] = data[key][:, :, b][idx]
    a, b = 31, 30
    idx = np.array(np.isnan(data[key][:, :,a]))
    data[key][:, :, a][idx] = data[key][:, :, b][idx]
    data[key] = data[key][:, :, [u for u in range(36) if u not in [30, 32, 34]]]

In [5]:
train_dataloader = {'X': data['train_X'], 'y': data['train_y']}
val_dataloader = {'X': data['val_X'], 'y': data['val_y']}
test_dataloader = {'X': data['test_X'], 'y': data['test_y']}

In [6]:
model = GRUD(n_steps=48, n_features=33, n_classes=2, rnn_hidden_size=49, batch_size=64, epochs=50, patience=3, device='cuda')
model.fit(train_dataloader, val_dataloader)  # train the model on the dataset
pred = model.predict(test_dataloader)

2024-10-25 16:27:55 [INFO]: Using the given device: cuda
2024-10-25 16:28:15 [INFO]: Epoch 001 - training loss: 0.4322, validation loss: 0.3326
2024-10-25 16:28:20 [INFO]: Epoch 002 - training loss: 0.3273, validation loss: 0.3199
2024-10-25 16:28:25 [INFO]: Epoch 003 - training loss: 0.3079, validation loss: 0.3046
2024-10-25 16:28:32 [INFO]: Epoch 004 - training loss: 0.2918, validation loss: 0.3064
2024-10-25 16:28:38 [INFO]: Epoch 005 - training loss: 0.2730, validation loss: 0.3103
2024-10-25 16:28:43 [INFO]: Epoch 006 - training loss: 0.2586, validation loss: 0.3248
2024-10-25 16:28:43 [INFO]: Exceeded the training patience. Terminating the training procedure...
2024-10-25 16:28:43 [INFO]: Finished training. The best model is from epoch#3.


In [7]:
cls = model.classify(test_dataloader)

In [8]:
cls

array([[0.8627367 , 0.13726331],
       [0.21163161, 0.7883684 ],
       [0.88555366, 0.11444639],
       ...,
       [0.97379714, 0.02620286],
       [0.9750063 , 0.02499372],
       [0.99417675, 0.00582324]], dtype=float32)

In [9]:
from sklearn.metrics import roc_auc_score
display(roc_auc_score(data['test_y'], cls[:, 1]))

0.8009518952915179