# 📑 Tutorials for PyPOTS Imputation Models

## 📀 Preparing the **PhysioNet-2012** dataset for this tutorial

In [4]:
from pypots.data.generating import gene_physionet2012

# Load the PhysioNet-2012 dataset
physionet2012_dataset = gene_physionet2012(artificially_missing=True)

# Take a look at the generated PhysioNet-2012 dataset, you'll find that everything has been prepared for you, 
# data splitting, normalization, additional artificially-missing values for evaluation, etc.
print(physionet2012_dataset.keys())

2023-04-25 09:33:46 [INFO]: Loading the dataset physionet_2012 with TSDB (https://github.com/WenjieDu/Time_Series_Database)...
2023-04-25 09:33:46 [INFO]: Starting preprocessing physionet_2012...


Dataset physionet_2012 has already been downloaded. Processing directly...
Dataset physionet_2012 has already been cached. Loading from cache directly...
Loaded successfully!
dict_keys(['n_classes', 'n_steps', 'n_features', 'train_X', 'train_y', 'val_X', 'val_y', 'test_X', 'test_y', 'scaler', 'test_X_intact', 'test_X_indicating_mask', 'val_X_intact', 'val_X_indicating_mask'])


In [5]:
# Assemble the datasets for training, validating, and testing.

dataset_for_training = {
    "X": physionet2012_dataset['train_X'],
}

dataset_for_validating = {
    "X": physionet2012_dataset['val_X'],
    "X_intact": physionet2012_dataset['val_X_intact'],
    "indicating_mask": physionet2012_dataset['val_X_indicating_mask'],
}

dataset_for_testing = {
    "X": physionet2012_dataset['test_X'],
}

## 🚀 An exmaple of **SAITS** for imputation

In [6]:
from pypots.imputation import SAITS

# initialize the model
saits = SAITS(
    n_steps=physionet2012_dataset['n_steps'], 
    n_features=physionet2012_dataset['n_features'], 
    n_layers=2, 
    d_model=256, 
    d_inner=128, 
    n_head=4, 
    d_k=64, 
    d_v=64, 
    dropout=0.1, 
    epochs=10, # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
    patience=3, # here we set patience=5 to early stop the training if the evaluting loss doesn't decrease for 5 epoches. You can leave it to defualt as None to disable early stopping.
    learning_rate=1e-3,
    # device='cpu', # just leave it to default, PyPOTS will automatically assign the best device for you. 
                    # Set it to 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices.
    saving_path="tutorial_results/imputation/saits", # set the path for saving tensorboard files
)

2023-04-25 09:34:15 [INFO]: No given device, using default device: cpu
2023-04-25 09:34:15 [INFO]: saving_path is set as tutorial_results/imputation/saits, the trained model will be saved to tutorial_results/imputation/saits/20230425_T093415, the tensorboard file will be saved to tutorial_results/imputation/saits/20230425_T093415/tensorboard
2023-04-25 09:34:15 [INFO]: Model initialized successfully with the number of trainable parameters: 1378358


In [7]:
# train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
saits.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2023-04-25 09:34:48 [INFO]: epoch 0: training loss 0.7129, validating loss 0.3242
2023-04-25 09:35:20 [INFO]: epoch 1: training loss 0.5033, validating loss 0.2984
2023-04-25 09:35:53 [INFO]: epoch 2: training loss 0.4429, validating loss 0.2737
2023-04-25 09:36:26 [INFO]: epoch 3: training loss 0.4069, validating loss 0.2594
2023-04-25 09:36:59 [INFO]: epoch 4: training loss 0.3779, validating loss 0.2512
2023-04-25 09:37:30 [INFO]: epoch 5: training loss 0.3593, validating loss 0.2440
2023-04-25 09:38:02 [INFO]: epoch 6: training loss 0.3446, validating loss 0.2384
2023-04-25 09:38:34 [INFO]: epoch 7: training loss 0.3338, validating loss 0.2387
2023-04-25 09:39:06 [INFO]: epoch 8: training loss 0.3262, validating loss 0.2329
2023-04-25 09:39:38 [INFO]: epoch 9: training loss 0.3218, validating loss 0.2309
2023-04-25 09:39:38 [INFO]: Finished training.
2023-04-25 09:39:38 [INFO]: Saved successfully to tutorial_results/imputation/saits/20230425_T093415/SAITS.pypots.


In [8]:
# the testing stage, impute the originally-missing values and artificially-missing values in the test set
saits_imputation = saits.impute(dataset_for_testing)

In [9]:
from pypots.utils.metrics import cal_mae

# calculate mean absolute error on the ground truth (artificially-missing values)
testing_mae = cal_mae(saits_imputation, physionet2012_dataset['test_X_intact'], physionet2012_dataset['test_X_indicating_mask'])
print("Testing mean absolute error: %.4f" % testing_mae)

Testing mean absolute error: 0.2302


## 🚀 An exmaple of **Transformer** for imputation

In [10]:
from pypots.imputation import Transformer

# initialize the model
transformer = Transformer(
    n_steps=physionet2012_dataset['n_steps'], 
    n_features=physionet2012_dataset['n_features'], 
    n_layers=6, 
    d_model=512, 
    d_inner=256, 
    n_head=8, 
    d_k=64, 
    d_v=64, 
    dropout=0.1, 
    epochs=10, # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
    patience=3, # here we set patience=5 to early stop the training if the evaluting loss doesn't decrease for 5 epoches. You can leave it to defualt as None to disable early stopping.
    learning_rate=1e-3,
    # device='cpu', # just leave it to default, PyPOTS will automatically assign the best device for you. 
                    # Set it to 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices.
    saving_path="tutorial_results/imputation/transformer", # set the path for saving tensorboard files
)

2023-04-25 09:39:42 [INFO]: No given device, using default device: cpu
2023-04-25 09:39:42 [INFO]: saving_path is set as tutorial_results/imputation/transformer, the trained model will be saved to tutorial_results/imputation/transformer/20230425_T093942, the tensorboard file will be saved to tutorial_results/imputation/transformer/20230425_T093942/tensorboard
2023-04-25 09:39:42 [INFO]: Model initialized successfully with the number of trainable parameters: 7938597


In [11]:
# train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
transformer.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2023-04-25 09:41:40 [INFO]: epoch 0: training loss 0.9016, validating loss 0.3464
2023-04-25 09:43:38 [INFO]: epoch 1: training loss 0.5438, validating loss 0.3159
2023-04-25 09:45:36 [INFO]: epoch 2: training loss 0.4648, validating loss 0.2850
2023-04-25 09:47:38 [INFO]: epoch 3: training loss 0.4212, validating loss 0.2627
2023-04-25 09:50:35 [INFO]: epoch 4: training loss 0.3916, validating loss 0.2648
2023-04-25 09:53:16 [INFO]: epoch 5: training loss 0.3792, validating loss 0.2509
2023-04-25 09:55:13 [INFO]: epoch 6: training loss 0.3636, validating loss 0.2464
2023-04-25 09:57:14 [INFO]: epoch 7: training loss 0.3556, validating loss 0.2418
2023-04-25 09:59:11 [INFO]: epoch 8: training loss 0.3492, validating loss 0.2416
2023-04-25 10:01:06 [INFO]: epoch 9: training loss 0.3441, validating loss 0.2355
2023-04-25 10:01:06 [INFO]: Finished training.
2023-04-25 10:01:06 [INFO]: Saved successfully to tutorial_results/imputation/transformer/20230425_T093942/Transformer.pypots.


In [12]:
# the testing stage, impute the originally-missing values and artificially-missing values in the test set
transformer_imputation = transformer.impute(dataset_for_testing)

In [13]:
from pypots.utils.metrics import cal_mae

# calculate mean absolute error on the ground truth (artificially-missing values)
testing_mae = cal_mae(transformer_imputation, physionet2012_dataset['test_X_intact'], physionet2012_dataset['test_X_indicating_mask'])
print("Testing mean absolute error: %.4f" % testing_mae)

Testing mean absolute error: 0.2352


## 🚀 An exmaple of **BRITS** for imputation

In [14]:
from pypots.imputation import BRITS

# initialize the model
brits = BRITS(
    n_steps=physionet2012_dataset['n_steps'], 
    n_features=physionet2012_dataset['n_features'], 
    rnn_hidden_size=128, 
    epochs=10, # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
    patience=5, # here we set patience=5 to early stop the training if the evaluting loss doesn't decrease for 5 epoches. You can leave it to defualt as None to disable early stopping.
    learning_rate=1e-3,
    # device='cpu', # just leave it to default, PyPOTS will automatically assign the best device for you. 
                    # Set it to 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices.
    saving_path="tutorial_results/imputation/brits", # set the path for saving tensorboard files
)

2023-04-25 10:01:17 [INFO]: No given device, using default device: cpu
2023-04-25 10:01:17 [INFO]: saving_path is set as tutorial_results/imputation/brits, the trained model will be saved to tutorial_results/imputation/brits/20230425_T100117, the tensorboard file will be saved to tutorial_results/imputation/brits/20230425_T100117/tensorboard
2023-04-25 10:01:17 [INFO]: Model initialized successfully with the number of trainable parameters: 239344


In [15]:
# train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
brits.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2023-04-25 10:02:25 [INFO]: epoch 0: training loss 0.9369, validating loss 0.3463
2023-04-25 10:03:16 [INFO]: epoch 1: training loss 0.7322, validating loss 0.3044
2023-04-25 10:04:06 [INFO]: epoch 2: training loss 0.6803, validating loss 0.2856
2023-04-25 10:04:56 [INFO]: epoch 3: training loss 0.6554, validating loss 0.2778
2023-04-25 10:05:46 [INFO]: epoch 4: training loss 0.6413, validating loss 0.2704
2023-04-25 10:06:37 [INFO]: epoch 5: training loss 0.6301, validating loss 0.2670
2023-04-25 10:07:27 [INFO]: epoch 6: training loss 0.6212, validating loss 0.2633
2023-04-25 10:08:16 [INFO]: epoch 7: training loss 0.6145, validating loss 0.2613
2023-04-25 10:09:06 [INFO]: epoch 8: training loss 0.6085, validating loss 0.2600
2023-04-25 10:09:56 [INFO]: epoch 9: training loss 0.6033, validating loss 0.2575
2023-04-25 10:09:56 [INFO]: Finished training.
2023-04-25 10:09:56 [INFO]: Saved successfully to tutorial_results/imputation/brits/20230425_T100117/BRITS.pypots.


In [16]:
# the testing stage, impute the originally-missing values and artificially-missing values in the test set
brits_imputation = brits.impute(dataset_for_testing)

In [17]:
from pypots.utils.metrics import cal_mae

# calculate mean absolute error on the ground truth (artificially-missing values)
testing_mae = cal_mae(brits_imputation, physionet2012_dataset['test_X_intact'], physionet2012_dataset['test_X_indicating_mask'])
print("Testing mean absolute error: %.4f" % testing_mae)

Testing mean absolute error: 0.2594


## 🚀 An exmaple of **LOCF** for imputation

In [18]:
from pypots.imputation import LOCF

# initialize the model
locf = LOCF(
    nan=0 # set the value used to impute data missing at the beginning of the sequence, those cannot use LOCF mechanism to impute
)

In [19]:
# LOCF doesn't need to be trained, just call the impute() function

locf.fit(train_set=dataset_for_training, val_set=dataset_for_validating)



In [20]:
# the testing stage, impute the originally-missing values and artificially-missing values in the test set
locf_imputation = locf.impute(dataset_for_testing)

In [21]:
from pypots.utils.metrics import cal_mae

# calculate mean absolute error on the ground truth (artificially-missing values)
testing_mae = cal_mae(locf_imputation, physionet2012_dataset['test_X_intact'], physionet2012_dataset['test_X_indicating_mask'])
print("Testing mean absolute error: %.4f" % testing_mae)

Testing mean absolute error: 0.4122
