# Import libraries 

In [1]:
import os
import sys
import pypots
import numpy as np
import benchpots
import matplotlib.pyplot as plt
from pypots.optim import Adam
from pypots.imputation import SAITS, BRITS, USGAN, GPVAE, MRNN
from pypots.utils.random import set_random_seed
from functions.toolkits import toolkits
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from MAEModify.error import calc_mae
from pypotsModify.benchpotsMAE.datasets import preprocess_physionet2012 as preprocess_physionet2012


2025-05-12 13:52:08.256436: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747068728.278331  902237 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747068728.284896  902237 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-12 13:52:08.306244: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


[34m
████████╗██╗███╗   ███╗███████╗    ███████╗███████╗██████╗ ██╗███████╗███████╗    █████╗ ██╗
╚══██╔══╝██║████╗ ████║██╔════╝    ██╔════╝██╔════╝██╔══██╗██║██╔════╝██╔════╝   ██╔══██╗██║
   ██║   ██║██╔████╔██║█████╗█████╗███████╗█████╗  ██████╔╝██║█████╗  ███████╗   ███████║██║
   ██║   ██║██║╚██╔╝██║██╔══╝╚════╝╚════██║██╔══╝  ██╔══██╗██║██╔══╝  ╚════██║   ██╔══██║██║
   ██║   ██║██║ ╚═╝ ██║███████╗    ███████║███████╗██║  ██║██║███████╗███████║██╗██║  ██║██║
   ╚═╝   ╚═╝╚═╝     ╚═╝╚══════╝    ╚══════╝╚══════╝╚═╝  ╚═╝╚═╝╚══════╝╚══════╝╚═╝╚═╝  ╚═╝╚═╝
ai4ts v0.0.3 - building AI for unified time-series analysis, https://time-series.ai [0m



# Load Dataset

In [2]:
set_random_seed()
physionet2012_dataset = preprocess_physionet2012(subset="all", rate=0.1, normalization=1)
print(physionet2012_dataset.keys())

2025-05-12 13:52:17 [INFO]: Have set the random seed as 2022 for numpy and pytorch.
2025-05-12 13:52:17 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2025-05-12 13:52:17 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2025-05-12 13:52:17 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2025-05-12 13:52:17 [INFO]: Loaded successfully!
2025-05-12 13:52:34 [INFO]: 68807 values masked out in the val set as ground truth, take 9.97% of the original observed values
2025-05-12 13:52:34 [INFO]: 68807 values masked out in the val set as ground truth, take 9.97% of the original observed values
2025-05-12 13:52:34 [INFO]: 86319 values masked out in the test set as ground truth, take 9.99% of the original observed values
2025-05-12 13:52:34 [INFO]: 86319 valu

dict_keys(['n_classes', 'n_steps', 'n_features', 'scaler', 'train_X', 'train_y', 'train_ICUType', 'val_X', 'val_y', 'val_ICUType', 'test_X', 'test_y', 'test_ICUType', 'female_gender_test_X', 'female_gender_test_y', 'test_ICUType_female_gender', 'male_gender_test_X', 'male_gender_test_y', 'test_ICUType_male_gender', 'undefined_gender_test_X', 'undefined_gender_test_y', 'test_ICUType_undefined_gender', 'more_than_or_equal_to_65_test_X', 'more_than_or_equal_to_65_test_y', 'test_ICUType_more_than_or_equal_to_65', 'less_than_65_test_X', 'less_than_65_test_y', 'test_ICUType_less_than_65', 'classificacao_undefined_test_X', 'classificacao_undefined_test_y', 'test_ICUType_classificacao_undefined', 'classificacao_baixo_peso_test_X', 'classificacao_baixo_peso_test_y', 'test_ICUType_classificao_baixo_peso', 'classificacao_normal_peso_test_X', 'classificacao_normal_peso_test_y', 'test_ICUType_classificacao_normal_peso', 'classificacao_sobrepeso_test_X', 'classificacao_sobrepeso_test_y', 'test_ICUTy

In [3]:
dataset_for_training, dataset_for_validating, dataset_for_testing_ori, dataset_for_testing = toolkits.separating_dataset(physionet2012_dataset)

In [4]:
dataset_for_testing_ori= toolkits.dict_to_list(dataset_for_testing_ori)
dataset_for_testing = toolkits.dict_to_list(dataset_for_testing)

In [5]:
indicating_mask_subgroup, test_X_ori_subgroup = toolkits.components_mae(dataset_for_testing_ori, dataset_for_testing)

# Train/Load Models

## SAITS

In [6]:
saits = SAITS(
    n_steps=physionet2012_dataset['n_steps'],
    n_features=physionet2012_dataset['n_features'],
    n_layers=1,
    d_model=256,
    d_ffn=128,
    n_heads=4,
    d_k=64,
    d_v=64,
    dropout=0.1,
    ORT_weight=1,  
    MIT_weight=1,
    batch_size=32,
    epochs=10,
    patience=3,
    optimizer=Adam(lr=1e-3),
    num_workers=0,
    device=None,
    model_saving_strategy="best",
)

2025-05-12 13:52:34 [INFO]: No given device, using default device: cpu
2025-05-12 13:52:34 [INFO]: Using customized MAE as the training loss function.
2025-05-12 13:52:34 [INFO]: Using customized MSE as the validation metric function.
2025-05-12 13:52:34 [INFO]: SAITS initialized with the given hyperparameters, the number of trainable parameters: 720,182


In [7]:
saits.load("../mae/tutorial_results/imputation/saits/20250422_T181642/SAITS.pypots")

2025-05-12 13:52:34 [INFO]: Model loaded successfully from ../mae/tutorial_results/imputation/saits/20250422_T181642/SAITS.pypots


## BRITS

In [8]:
brits = BRITS(
    n_steps=physionet2012_dataset['n_steps'],
    n_features=physionet2012_dataset['n_features'],
    rnn_hidden_size=128,
    batch_size=32,
    epochs=10,
    patience=3,
    optimizer=Adam(lr=1e-3),
    num_workers=0,
    device=None,
    model_saving_strategy="best",
)

2025-05-12 13:52:34 [INFO]: No given device, using default device: cpu
2025-05-12 13:52:34 [INFO]: Using customized MAE as the training loss function.
2025-05-12 13:52:34 [INFO]: Using customized MSE as the validation metric function.
2025-05-12 13:52:34 [INFO]: BRITS initialized with the given hyperparameters, the number of trainable parameters: 239,344


In [9]:
brits.load("../mae/tutorial_results/imputation/brits/20250422_T181643/BRITS.pypots")

2025-05-12 13:52:34 [INFO]: Model loaded successfully from ../mae/tutorial_results/imputation/brits/20250422_T181643/BRITS.pypots


## USGAN

In [10]:
us_gan = USGAN(
    n_steps=physionet2012_dataset['n_steps'],
    n_features=physionet2012_dataset['n_features'],
    rnn_hidden_size=256,
    lambda_mse=1,
    dropout=0.1,
    G_steps=1,
    D_steps=1,
    batch_size=32,
    epochs=10,
    patience=3,
    G_optimizer=Adam(lr=1e-3),
    D_optimizer=Adam(lr=1e-3),
    num_workers=0,
    device=None,
    model_saving_strategy="best",
)

2025-05-12 13:52:34 [INFO]: No given device, using default device: cpu
2025-05-12 13:52:34 [INFO]: USGAN initialized with the given hyperparameters, the number of trainable parameters: 1,258,517


In [11]:
us_gan.load("../mae/tutorial_results/imputation/us_gan/20250422_T181643/USGAN.pypots")

2025-05-12 13:52:34 [INFO]: Model loaded successfully from ../mae/tutorial_results/imputation/us_gan/20250422_T181643/USGAN.pypots


## GPVAE

In [12]:
gp_vae = GPVAE(
    n_steps=physionet2012_dataset['n_steps'],
    n_features=physionet2012_dataset['n_features'],
    latent_size=37,
    encoder_sizes=(128,128),
    decoder_sizes=(256,256),
    kernel="cauchy",
    beta=0.2,
    M=1,
    K=1,
    sigma=1.005,
    length_scale=7.0,
    kernel_scales=1,
    window_size=24,
    batch_size=32,
    epochs=10,
    patience=3,
    optimizer=Adam(lr=1e-3),
    num_workers=0,
    device=None,
    model_saving_strategy="best",
)

2025-05-12 13:52:34 [INFO]: No given device, using default device: cpu
2025-05-12 13:52:34 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 229,652


In [13]:
gp_vae.load("../mae/tutorial_results/imputation/gp_vae/20250422_T181643/GPVAE.pypots")

2025-05-12 13:52:34 [INFO]: Model loaded successfully from ../mae/tutorial_results/imputation/gp_vae/20250422_T181643/GPVAE.pypots


## MRNN

In [14]:
mrnn = MRNN(
    n_steps=physionet2012_dataset['n_steps'],
    n_features=physionet2012_dataset['n_features'],
    rnn_hidden_size=128,
    epochs=10,
    patience=3,
    optimizer=Adam(lr=1e-3),
    num_workers=0,
    device=None,
    model_saving_strategy="best",
)

2025-05-12 13:52:34 [INFO]: No given device, using default device: cpu
2025-05-12 13:52:34 [INFO]: Using customized RMSE as the training loss function.
2025-05-12 13:52:34 [INFO]: Using customized MSE as the validation metric function.
2025-05-12 13:52:34 [INFO]: MRNN initialized with the given hyperparameters, the number of trainable parameters: 107,951


In [15]:
mrnn.load("../mae/tutorial_results/imputation/mrnn/20250422_T181643/MRNN.pypots")

2025-05-12 13:52:34 [INFO]: Model loaded successfully from ../mae/tutorial_results/imputation/mrnn/20250422_T181643/MRNN.pypots


# Imputation Models

## SAITS

In [16]:
saits_imputation = []
for i in range(len(dataset_for_testing)):
    _dict = {"X":dataset_for_testing[i]}
    saits_result = saits.predict(_dict)
    saits_imputation.append(saits_result['imputation'])

## BRITS

In [17]:
brits_imputation = []
for i in range(len(dataset_for_testing)):
    _dict = {"X":dataset_for_testing[i]}
    brits_result = brits.predict(_dict)
    brits_imputation.append(brits_result['imputation'])

## USGAN

In [18]:
us_gan_imputation = []
for i in range(len(dataset_for_testing)):
    _dict = {"X":dataset_for_testing[i]}
    us_gan_result = us_gan.predict(_dict)
    us_gan_imputation.append(us_gan_result['imputation'])

## GPVAE

In [19]:
gp_vae_imputation = []
for i in range(len(dataset_for_testing)):
    _dict = {"X":dataset_for_testing[i]}
    gp_vae_result = gp_vae.predict(_dict)
    gp_vae_imputation.append(gp_vae_result['imputation'])

## MRNN

In [20]:
mrnn_imputation = []
for i in range(len(dataset_for_testing)):
    _dict = {"X":dataset_for_testing[i]}
    mrnn_result = mrnn.predict(_dict)
    mrnn_imputation.append(mrnn_result['imputation'])

# AE/MAE Models

## SAITS

In [21]:
saits_mae = []
saits_ae = []

for i in range(len(saits_imputation)):
    aux_mae, aux_ae = calc_mae(saits_imputation[i], test_X_ori_subgroup[i], indicating_mask_subgroup[i])
    saits_mae.append(aux_mae)
    saits_ae.append(aux_ae)

## BRITS

In [22]:
brits_mae = []
brits_ae = []

for i in range(len(brits_imputation)):
    aux_mae, aux_ae = calc_mae(brits_imputation[i], test_X_ori_subgroup[i], indicating_mask_subgroup[i])
    brits_mae.append(aux_mae)
    brits_ae.append(aux_ae)

## USGAN

In [23]:
us_gan_mae = []
us_gan_ae = []

for i in range(len(us_gan_imputation)):
    aux_mae, aux_ae = calc_mae(us_gan_imputation[i], test_X_ori_subgroup[i], indicating_mask_subgroup[i])
    us_gan_mae.append(aux_mae)
    us_gan_ae.append(aux_ae)

## GPVAE

In [24]:
gp_vae_mae = []
gp_vae_ae = []

for i in range(len(gp_vae_imputation)):
    gp_vae_imputation_fix = np.squeeze(gp_vae_imputation[i], axis=1)
    aux_mae, aux_ae = calc_mae(gp_vae_imputation_fix, test_X_ori_subgroup[i], indicating_mask_subgroup[i])
    gp_vae_mae.append(aux_mae)
    gp_vae_ae.append(aux_ae)

## MRNN

In [25]:
mrnn_mae = []
mrnn_ae = []

for i in range(len(mrnn_imputation)):
    aux_mae, aux_ae = calc_mae(mrnn_imputation[i], test_X_ori_subgroup[i], indicating_mask_subgroup[i])
    mrnn_mae.append(aux_mae)
    mrnn_ae.append(aux_ae)

# IC Models

In [27]:
subgroups = [1,2,4,5,6,7,8,9,10]
for i in subgroups:
    indicating_mask_subgroup[i] = indicating_mask_subgroup[i].reshape(len(indicating_mask_subgroup[i])*48*37)

## SAITS

In [27]:
for i in subgroups:
    saits_ae[i] = saits_ae[i].reshape(len(saits_ae[i]) *48 * 37)

In [28]:
bootstrap_saits = []

for i in subgroups:
    bootstrap_saits.append(toolkits.bootstrap_v2(saits_ae, indicating_mask_subgroup, i, 9000))

In [29]:
lower_bound_saits = []
upper_bound_saits = []

for i in range(len(bootstrap_saits)):
    lower_bound_saits.append(np.percentile(bootstrap_saits[i], 2.5))
    upper_bound_saits.append(np.percentile(bootstrap_saits[i], 97.5))

print(lower_bound_saits)
print(upper_bound_saits)

[np.float64(0.23646403701078456), np.float64(0.2420292305185013), np.float64(0.23205884266152668), np.float64(0.2466574759137788), np.float64(0.25010204921876156), np.float64(0.24168280105467854), np.float64(0.2277976157470241), np.float64(0.23019737142052993), np.float64(0.22442615071501457)]
[np.float64(0.24563971014585365), np.float64(0.25093853302932345), np.float64(0.24101081497948487), np.float64(0.25772797406041914), np.float64(0.26157281464085863), np.float64(0.2823500607648644), np.float64(0.24512983485888834), np.float64(0.24627158891297163), np.float64(0.23843190323116042)]


In [30]:
mean_bound_saits = []

for i in range(len(lower_bound_saits)):
    mean_bound_saits.append((lower_bound_saits[i]+upper_bound_saits[i])/2)

print(mean_bound_saits)

[np.float64(0.2410518735783191), np.float64(0.2464838817739124), np.float64(0.23653482882050578), np.float64(0.252192724987099), np.float64(0.2558374319298101), np.float64(0.26201643090977145), np.float64(0.23646372530295623), np.float64(0.23823448016675078), np.float64(0.23142902697308748)]


## BRITS

In [28]:
for i in subgroups:
    brits_ae[i] = brits_ae[i].reshape(len(brits_ae[i]) *48 * 37)

In [None]:
bootstrap_brits = []

for i in subgroups:
    bootstrap_brits.append(toolkits.bootstrap_v2(brits_ae, indicating_mask_subgroup, i, 9000))

In [None]:
lower_bound_brits = []
upper_bound_brits = []

for i in range(len(bootstrap_brits)):
    lower_bound_brits.append(np.percentile(bootstrap_brits[i], 2.5))
    upper_bound_brits.append(np.percentile(bootstrap_brits[i], 97.5))

print(lower_bound_brits)
print(upper_bound_brits)

In [None]:
mean_bound_brits = []

for i in range(len(lower_bound_brits)):
    mean_bound_brits.append((lower_bound_brits[i]+upper_bound_brits[i])/2)

print(mean_bound_brits)

## USGAN

In [None]:
for i in subgroups:
    us_gan_ae[i] = us_gan_ae[i].reshape(len(us_gan_ae[i]) *48 * 37)

In [None]:
bootstrap_usgan = []

for i in subgroups:
    bootstrap_usgan.append(toolkits.bootstrap_v2(us_gan_ae, indicating_mask_subgroup, i, 9000))

In [None]:
lower_bound_usgan = []
upper_bound_usgan = []

for i in range(len(bootstrap_usgan)):
    lower_bound_usgan.append(np.percentile(bootstrap_usgan[i], 2.5))
    upper_bound_usgan.append(np.percentile(bootstrap_usgan[i], 97.5))

print(lower_bound_usgan)
print(upper_bound_usgan)

In [None]:
mean_bound_usgan = []

for i in range(len(lower_bound_usgan)):
    mean_bound_usgan.append((lower_bound_usgan[i]+upper_bound_usgan[i])/2)

print(mean_bound_usgan)

## GPVAE

In [None]:
for i in subgroups:
    gp_vae_ae[i] = gp_vae_ae[i].reshape(len(gp_vae_ae[i]) *48 * 37)

In [None]:
bootstrap_gpvae = []

for i in subgroups:
    bootstrap_gpvae.append(toolkits.bootstrap_v2(gp_vae_ae, indicating_mask_subgroup, i, 9000))

In [None]:
lower_bound_gpvae = []
upper_bound_gpvae = []

for i in range(len(bootstrap_gpvae)):
    lower_bound_gpvae.append(np.percentile(bootstrap_gpvae[i], 2.5))
    upper_bound_gpvae.append(np.percentile(bootstrap_gpvae[i], 97.5))

print(lower_bound_gpvae)
print(upper_bound_gpvae)

In [None]:
mean_bound_gpvae = []

for i in range(len(lower_bound_gpvae)):
    mean_bound_gpvae.append((lower_bound_gpvae[i]+upper_bound_gpvae[i])/2)

print(mean_bound_gpvae)

## MRNN

In [None]:
for i in subgroups:
    mrnn_ae[i] = mrnn_ae[i].reshape(len(mrnn_ae[i]) *48 * 37)

In [None]:
bootstrap_mrnn = []

for i in subgroups:
    bootstrap_mrnn.append(toolkits.bootstrap_v2(mrnn_ae, indicating_mask_subgroup, i, 9000))

In [None]:
lower_bound_mrnn = []
upper_bound_mrnn = []

for i in range(len(bootstrap_mrnn)):
    lower_bound_mrnn.append(np.percentile(bootstrap_mrnn[i], 2.5))
    upper_bound_mrnn.append(np.percentile(bootstrap_mrnn[i], 97.5))

print(lower_bound_mrnn)
print(upper_bound_mrnn)

In [None]:
mean_bound_mrnn = []

for i in range(len(lower_bound_mrnn)):
    mean_bound_mrnn.append((lower_bound_mrnn[i]+upper_bound_mrnn[i])/2)

print(mean_bound_mrnn)