<h2>Dependency Installation</h2>

In [1]:
! pip install pypots==0.8

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


<h2>Importando bibliotecas</h2>

In [2]:
import pypots
import numpy as np
import benchpots
from pypots.utils.random import set_random_seed
from pypots.optim import Adam
from pypots.imputation import BRITS
from pypots.utils.metrics import calc_mae

2024-12-03 21:36:49.928661: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[34m
████████╗██╗███╗   ███╗███████╗    ███████╗███████╗██████╗ ██╗███████╗███████╗    █████╗ ██╗
╚══██╔══╝██║████╗ ████║██╔════╝    ██╔════╝██╔════╝██╔══██╗██║██╔════╝██╔════╝   ██╔══██╗██║
   ██║   ██║██╔████╔██║█████╗█████╗███████╗█████╗  ██████╔╝██║█████╗  ███████╗   ███████║██║
   ██║   ██║██║╚██╔╝██║██╔══╝╚════╝╚════██║██╔══╝  ██╔══██╗██║██╔══╝  ╚════██║   ██╔══██║██║
   ██║   ██║██║ ╚═╝ ██║███████╗    ███████║███████╗██║  ██║██║███████╗███████║██╗██║  ██║██║
   ╚═╝   ╚═╝╚═╝     ╚═╝╚══════╝    ╚══════╝╚══════╝╚═╝  ╚═╝╚═╝╚══════╝╚══════╝╚═╝╚═╝  ╚═╝╚═╝
ai4ts v0.0.3 - building AI for unified time-series analysis, https://time-series.ai [0m



  @autocast(enabled=False)
  @autocast(enabled=False)


<h2>Carregando base de dados</h2>

In [3]:
set_random_seed()

# Load the PhysioNet-2012 dataset
physionet2012_dataset = benchpots.datasets.preprocess_physionet2012(subset="all", rate=0.1)

# Take a look at the generated PhysioNet-2012 dataset, you'll find that everything has been prepared for you,
# data splitting, normalization, additional artificially-missing values for evaluation, etc.
print(physionet2012_dataset.keys())

2024-12-03 21:36:56 [INFO]: Have set the random seed as 2022 for numpy and pytorch.
2024-12-03 21:36:56 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-12-03 21:36:56 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-12-03 21:36:56 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-12-03 21:36:56 [INFO]: Loaded successfully!
2024-12-03 21:37:16 [INFO]: 68807 values masked out in the val set as ground truth, take 9.97% of the original observed values
2024-12-03 21:37:16 [INFO]: 86319 values masked out in the test set as ground truth, take 9.99% of the original observed values
2024-12-03 21:37:16 [INFO]: Total sample number: 11988
2024-12-03 21:37:16 [INFO]: Training set size: 7671 (63.99%)
2024-12-03 21:37:16 [INFO]: Validation set size: 

dict_keys(['n_classes', 'n_steps', 'n_features', 'scaler', 'train_X', 'train_y', 'train_ICUType', 'val_X', 'val_y', 'val_ICUType', 'test_X', 'test_y', 'test_ICUType', 'val_X_ori', 'test_X_ori'])


<h2>Separando datasets</h2>

In [4]:
# assemble the datasets for training
dataset_for_training = {
    "X": physionet2012_dataset['train_X'],
}
# assemble the datasets for validation
dataset_for_validating = {
    "X": physionet2012_dataset['val_X'],
    "X_ori": physionet2012_dataset['val_X_ori'],
}
# assemble the datasets for test
dataset_for_testing = {
    "X": physionet2012_dataset['test_X'],
}
## calculate the mask to indicate the ground truth positions in test_X_ori, will be used by metric funcs to evaluate models
test_X_indicating_mask = np.isnan(physionet2012_dataset['test_X_ori']) ^ np.isnan(physionet2012_dataset['test_X'])
test_X_ori = np.nan_to_num(physionet2012_dataset['test_X_ori'])  # metric functions do not accpet input with NaNs, hence fill NaNs with 0

<h2>Carregando modelo</h2>

In [5]:
# initialize the model
brits = BRITS(
    n_steps=physionet2012_dataset['n_steps'],
    n_features=physionet2012_dataset['n_features'],
    rnn_hidden_size=128,
    batch_size=32,
    # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
    epochs=10,
    # here we set patience=3 to early stop the training if the evaluting loss doesn't decrease for 3 epoches.
    # You can leave it to defualt as None to disable early stopping.
    patience=3,
    # give the optimizer. Different from torch.optim.Optimizer, you don't have to specify model's parameters when
    # initializing pypots.optim.Optimizer. You can also leave it to default. It will initilize an Adam optimizer with lr=0.001.
    optimizer=Adam(lr=1e-3),
    # this num_workers argument is for torch.utils.data.Dataloader. It's the number of subprocesses to use for data loading.
    # Leaving it to default as 0 means data loading will be in the main process, i.e. there won't be subprocesses.
    # You can increase it to >1 if you think your dataloading is a bottleneck to your model training speed
    num_workers=0,
    # just leave it to default as None, PyPOTS will automatically assign the best device for you.
    # Set it as 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices, even parallelly on ['cuda:0', 'cuda:1']
    device=None,
    # set the path for saving tensorboard and trained model files
    saving_path="tutorial_results/imputation/brits",
    # only save the best model after training finished.
    # You can also set it as "better" to save models performing better ever during training.
    model_saving_strategy="best",
)

2024-11-24 22:36:11 [INFO]: No given device, using default device: cuda
2024-11-24 22:36:11 [INFO]: Model files will be saved to tutorial_results/imputation/brits/20241124_T223611
2024-11-24 22:36:11 [INFO]: Tensorboard file will be saved to tutorial_results/imputation/brits/20241124_T223611/tensorboard


2024-11-24 22:36:17 [INFO]: BRITS initialized with the given hyperparameters, the number of trainable parameters: 239,344


<h2>Treinamento do modelo</h2>

In [6]:
# train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
brits.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2024-11-24 15:50:32 [INFO]: Epoch 001 - training loss: 0.9418, validation loss: 6.7859
2024-11-24 15:50:32 [INFO]: Saved the model to tutorial_results/imputation/brits/20241124_T154225/BRITS_epoch1_loss6.7859427854418755.pypots
2024-11-24 15:57:53 [INFO]: Epoch 002 - training loss: 0.7343, validation loss: 6.7421
2024-11-24 15:57:53 [INFO]: Saved the model to tutorial_results/imputation/brits/20241124_T154225/BRITS_epoch2_loss6.742129882673423.pypots
2024-11-24 16:04:54 [INFO]: Epoch 003 - training loss: 0.6835, validation loss: 6.7306
2024-11-24 16:04:54 [INFO]: Saved the model to tutorial_results/imputation/brits/20241124_T154225/BRITS_epoch3_loss6.730608933418989.pypots
2024-11-24 16:11:15 [INFO]: Epoch 004 - training loss: 0.6591, validation loss: 6.7267
2024-11-24 16:11:15 [INFO]: Saved the model to tutorial_results/imputation/brits/20241124_T154225/BRITS_epoch4_loss6.726703131943941.pypots
2024-11-24 16:18:01 [INFO]: Epoch 005 - training loss: 0.6438, validation loss: 6.7250
2024

### Carregando modelo

In [6]:
brits.load('/data/victor/missingdata/notebooks/tutorial_results/imputation/brits/20241124_T154225/BRITS.pypots')

  loaded_model = torch.load(path, map_location=self.device)
2024-11-24 22:36:29 [INFO]: Model loaded successfully from /data/victor/missingdata/notebooks/tutorial_results/imputation/brits/20241124_T154225/BRITS.pypots


<h2>Imputação</h2>

In [7]:
# the testing stage, impute the originally-missing values and artificially-missing values in the test set
brits_results = brits.predict(dataset_for_testing)
brits_imputation = brits_results["imputation"]

<h2>Avaliação</h2>

In [8]:
# calculate mean absolute error on the ground truth (artificially-missing values)
testing_mae = calc_mae(
    brits_imputation,
    test_X_ori,
    test_X_indicating_mask,
)
print(f"Testing mean absolute error: {testing_mae:.4f}")

Testing mean absolute error: 0.2669
