# 📑 Tutorials for PyPOTS Forecasting Models

## 📀 Preparing the **PhysioNet-2012** dataset for this tutorial

In [1]:
from pypots.data.generating import gene_physionet2012
from pypots.utils.random import set_random_seed
from global_config import RANDOM_SEED

set_random_seed(RANDOM_SEED)

# Load the PhysioNet-2012 dataset
physionet2012_dataset = gene_physionet2012()

# Take a look at the generated PhysioNet-2012 dataset, you'll find that everything has been prepared for you,
# data splitting, normalization, additional artificially-missing values for evaluation, etc.
print(physionet2012_dataset.keys())


2024-03-17 22:54:50 [INFO]: Have set the random seed as 16 for numpy and pytorch.
2024-03-17 22:54:50 [INFO]: Loading the dataset physionet_2012 with TSDB (https://github.com/WenjieDu/Time_Series_Data_Beans)...
2024-03-17 22:54:50 [INFO]: Starting preprocessing physionet_2012...
2024-03-17 22:54:50 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-03-17 22:54:50 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-03-17 22:54:50 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-03-17 22:54:50 [INFO]: Loaded successfully!


dict_keys(['n_classes', 'n_steps', 'n_features', 'train_X', 'train_y', 'train_ICUType', 'val_X', 'val_y', 'val_ICUType', 'test_X', 'test_y', 'test_ICUType', 'scaler', 'val_X_ori', 'test_X_ori', 'test_X_indicating_mask'])


In [2]:
# Assemble the datasets for training, validating, and testing.

dataset_for_training = {
    "X": physionet2012_dataset['train_X'],
}

dataset_for_validating = {
    "X": physionet2012_dataset['val_X'],
    "X_intact": physionet2012_dataset['val_X_ori'],
}

dataset_for_testing = {
    "X": physionet2012_dataset['test_X'][:, :36],  # we only take the first 36 steps for model input,
    # and let the model forecast the left 12 steps
}


## 🚀 An example of **BTTF** for forecasting

In [3]:
from pypots.forecasting import BTTF
import numpy as np

# initialize the model
bttf = BTTF(
    36,
    physionet2012_dataset["n_features"],
    pred_step=12,
    rank=10,
    time_lags=[1, 2, 3, 10, 10 + 1, 10 + 2, 20, 20 + 1, 20 + 2],
    burn_iter=5,
    gibbs_iter=5,
    multi_step=1,
)


2024-03-17 22:55:07 [INFO]: No given device, using default device: cuda


In [4]:
# train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
bttf.fit(train_set=dataset_for_training, val_set=dataset_for_validating)
# BTTF does not need to run func fits().




In [5]:
# the testing stage
bttf_results = bttf.predict(dataset_for_testing)
bttf_prediction = bttf_results["forecasting"]

In [6]:
from pypots.utils.metrics import calc_mae

# calculate the mean absolute error on the ground truth in the forecasting task
testing_mae = calc_mae(
    bttf_prediction,
    np.nan_to_num(physionet2012_dataset['test_X'][:, 36:]),
    (~np.isnan(physionet2012_dataset['test_X'][:, 36:])).astype(int),
)
print(f"Testing mean absolute error: {testing_mae:.4f}")


Testing mean absolute error: 1.2796
