# 📑 Tutorials for PyPOTS Classification Models

## 📀 Preparing the **PhysioNet-2012** dataset for this tutorial

In [1]:
from pypots.data.generating import gene_physionet2012

# Load the PhysioNet-2012 dataset, disable artificially-missing values for evaluation
physionet2012_dataset = gene_physionet2012(artificially_missing=False)
 
# Take a look at the generated PhysioNet-2012 dataset, you'll find that everything has been prepared for you, 
# data splitting, normalization, additional artificially-missing values for evaluation, etc.
print(physionet2012_dataset.keys())

2023-04-27 09:09:11 [INFO]: Loading the dataset physionet_2012 with TSDB (https://github.com/WenjieDu/Time_Series_Database)...
2023-04-27 09:09:11 [INFO]: Starting preprocessing physionet_2012...


Dataset physionet_2012 has already been downloaded. Processing directly...
Dataset physionet_2012 has already been cached. Loading from cache directly...
Loaded successfully!
dict_keys(['n_classes', 'n_steps', 'n_features', 'train_X', 'train_y', 'val_X', 'val_y', 'test_X', 'test_y', 'scaler'])


In [2]:
# Assemble the datasets for training, validating, and testing.

dataset_for_training = {
    "X": physionet2012_dataset['train_X'],
    "y": physionet2012_dataset['train_y'],
}

dataset_for_validating = {
    "X": physionet2012_dataset['val_X'],
    "y": physionet2012_dataset['val_y'],
}

dataset_for_testing = {
    "X": physionet2012_dataset['test_X'],
    "y": physionet2012_dataset['test_y'],
}

## 🚀 An exmaple of **Raindrop** for classification

In [3]:
from pypots.classification import Raindrop

# initialize the model
raindrop = Raindrop(
    max_len=physionet2012_dataset['n_steps'], 
    n_features=physionet2012_dataset['n_features'], 
    n_layers=2,
    d_model=physionet2012_dataset["n_features"] * 4,
    d_inner=256,
    n_heads=2,
    dropout=0.3,
    n_classes=physionet2012_dataset["n_classes"],
    d_static=0,
    aggregation="mean",
    sensor_wise_mask=False,
    static=False,
    epochs=10, # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
    patience=None, # here we set patience=5 to early stop the training if the evaluting loss doesn't decrease for 5 epoches. You can leave it to defualt as None to disable early stopping.
    learning_rate=1e-3,
    # device='cpu', # just leave it to default, PyPOTS will automatically assign the best device for you. 
                    # Set it to 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices.
    saving_path="tutorial_results/classification/raindrop", # set the path for saving tensorboard files
)

2023-04-27 09:09:42 [INFO]: No given device, using default device: cpu
2023-04-27 09:09:42 [INFO]: saving_path is set as tutorial_results/classification/raindrop, the trained model will be saved to tutorial_results/classification/raindrop/20230427_T090942, the tensorboard file will be saved to tutorial_results/classification/raindrop/20230427_T090942/tensorboard
2023-04-27 09:09:42 [INFO]: Model initialized successfully with the number of trainable parameters: 1415006


In [4]:
# train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
raindrop.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2023-04-27 09:10:57 [INFO]: epoch 0: training loss 0.3843, validating loss 0.3748
2023-04-27 09:12:02 [INFO]: epoch 1: training loss 0.3273, validating loss 0.3460
2023-04-27 09:13:06 [INFO]: epoch 2: training loss 0.3162, validating loss 0.3311
2023-04-27 09:14:12 [INFO]: epoch 3: training loss 0.3064, validating loss 0.3287
2023-04-27 09:15:20 [INFO]: epoch 4: training loss 0.3050, validating loss 0.3198
2023-04-27 09:16:29 [INFO]: epoch 5: training loss 0.2974, validating loss 0.3392
2023-04-27 09:17:58 [INFO]: epoch 6: training loss 0.2864, validating loss 0.3195
2023-04-27 09:20:04 [INFO]: epoch 7: training loss 0.2851, validating loss 0.3172
2023-04-27 09:22:41 [INFO]: epoch 8: training loss 0.2816, validating loss 0.3263
2023-04-27 09:26:10 [INFO]: epoch 9: training loss 0.2756, validating loss 0.3463
2023-04-27 09:26:10 [INFO]: Finished training.
2023-04-27 09:26:10 [INFO]: Saved successfully to tutorial_results/classification/raindrop/20230427_T090942/Raindrop.pypots.


In [5]:
# the testing stage, impute the originally-missing values and artificially-missing values in the test set
raindrop_prediction = raindrop.classify(dataset_for_testing)

In [7]:
from pypots.utils.metrics import cal_binary_classification_metrics

# calculate mean absolute error on the ground truth (artificially-missing values)
metrics = cal_binary_classification_metrics(raindrop_prediction, dataset_for_testing["y"])
print("Testing classification metrics: \n"
    f'ROC_AUC: {metrics["roc_auc"]}, \n'
    f'PR_AUC: {metrics["pr_auc"]},\n'
    f'F1: {metrics["f1"]},\n'
    f'Precision: {metrics["precision"]},\n'
    f'Recall: {metrics["recall"]},\n'
)

Testing classification metrics: 
ROC_AUC: 0.8370269768822591, 
PR_AUC: 0.4878470339452396,
F1: 0.4178498985801217,
Precision: 0.6130952380952381,
Recall: 0.3169230769230769,



## 🚀 An exmaple of **BRITS** for classification

In [8]:
from pypots.classification import BRITS

# initialize the model
brits = BRITS(
    n_steps=physionet2012_dataset['n_steps'], 
    n_features=physionet2012_dataset['n_features'], 
    n_classes=physionet2012_dataset["n_classes"],
    rnn_hidden_size=256,
    epochs=20, # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
    patience=5, # here we set patience=5 to early stop the training if the evaluting loss doesn't decrease for 5 epoches. You can leave it to defualt as None to disable early stopping.
    learning_rate=5e-4,
    # device='cpu', # just leave it to default, PyPOTS will automatically assign the best device for you. 
                    # Set it to 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices.
    saving_path="tutorial_results/classification/brits", # set the path for saving tensorboard files
)

2023-04-27 09:27:00 [INFO]: No given device, using default device: cpu
2023-04-27 09:27:00 [INFO]: saving_path is set as tutorial_results/classification/brits, the trained model will be saved to tutorial_results/classification/brits/20230427_T092700, the tensorboard file will be saved to tutorial_results/classification/brits/20230427_T092700/tensorboard
2023-04-27 09:27:00 [INFO]: Model initialized successfully with the number of trainable parameters: 730612


In [9]:
# train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
brits.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2023-04-27 09:28:18 [INFO]: epoch 0: training loss 0.9414, validating loss 0.8397
2023-04-27 09:29:18 [INFO]: epoch 1: training loss 0.7946, validating loss 0.7832
2023-04-27 09:30:17 [INFO]: epoch 2: training loss 0.7424, validating loss 0.7488
2023-04-27 09:31:20 [INFO]: epoch 3: training loss 0.7138, validating loss 0.7547
2023-04-27 09:32:20 [INFO]: epoch 4: training loss 0.7034, validating loss 0.7444
2023-04-27 09:33:19 [INFO]: epoch 5: training loss 0.6834, validating loss 0.7223
2023-04-27 09:34:30 [INFO]: epoch 6: training loss 0.6700, validating loss 0.7183
2023-04-27 09:35:33 [INFO]: epoch 7: training loss 0.6597, validating loss 0.7094
2023-04-27 09:36:37 [INFO]: epoch 8: training loss 0.6434, validating loss 0.7195
2023-04-27 09:37:39 [INFO]: epoch 9: training loss 0.6345, validating loss 0.7311
2023-04-27 09:38:42 [INFO]: epoch 10: training loss 0.6269, validating loss 0.7379
2023-04-27 09:39:46 [INFO]: epoch 11: training loss 0.6145, validating loss 0.7371
2023-04-27 09:

In [10]:
# the testing stage, impute the originally-missing values and artificially-missing values in the test set
brits_prediction = brits.classify(dataset_for_testing)

In [11]:
from pypots.utils.metrics import cal_binary_classification_metrics

# calculate mean absolute error on the ground truth (artificially-missing values)

metrics = cal_binary_classification_metrics(brits_prediction, dataset_for_testing["y"])
print("Testing classification metrics: \n"
    f'ROC_AUC: {metrics["roc_auc"]}, \n'
    f'PR_AUC: {metrics["pr_auc"]},\n'
    f'F1: {metrics["f1"]},\n'
    f'Precision: {metrics["precision"]},\n'
    f'Recall: {metrics["recall"]},\n'
)

Testing classification metrics: 
ROC_AUC: 0.8205840661991168, 
PR_AUC: 0.45822567906407086,
F1: 0.3167848699763594,
Precision: 0.6836734693877551,
Recall: 0.20615384615384616,



## 🚀 An exmaple of **GRUD** for classification

In [12]:
from pypots.classification import GRUD

# initialize the model
grud = GRUD(
    n_steps=physionet2012_dataset['n_steps'], 
    n_features=physionet2012_dataset['n_features'], 
    n_classes=physionet2012_dataset["n_classes"],
    rnn_hidden_size=256, 
    epochs=10, # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
    patience=5, # here we set patience=5 to early stop the training if the evaluting loss doesn't decrease for 5 epoches. You can leave it to defualt as None to disable early stopping.
    learning_rate=1e-3,
    # device='cpu', # just leave it to default, PyPOTS will automatically assign the best device for you. 
                    # Set it to 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices.
    saving_path="tutorial_results/classification/grud", # set the path for saving tensorboard files
)

2023-04-27 09:41:01 [INFO]: No given device, using default device: cpu
2023-04-27 09:41:01 [INFO]: saving_path is set as tutorial_results/classification/grud, the trained model will be saved to tutorial_results/classification/grud/20230427_T094101, the tensorboard file will be saved to tutorial_results/classification/grud/20230427_T094101/tensorboard
2023-04-27 09:41:01 [INFO]: Model initialized successfully with the number of trainable parameters: 463232


In [13]:
# train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
grud.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2023-04-27 09:41:34 [INFO]: epoch 0: training loss 0.3574, validating loss 0.3707
2023-04-27 09:41:56 [INFO]: epoch 1: training loss 0.3221, validating loss 0.3481
2023-04-27 09:42:18 [INFO]: epoch 2: training loss 0.3047, validating loss 0.3362
2023-04-27 09:42:41 [INFO]: epoch 3: training loss 0.2947, validating loss 0.3414
2023-04-27 09:43:04 [INFO]: epoch 4: training loss 0.2832, validating loss 0.3445
2023-04-27 09:43:30 [INFO]: epoch 5: training loss 0.2742, validating loss 0.3366
2023-04-27 09:44:02 [INFO]: epoch 6: training loss 0.2596, validating loss 0.3328
2023-04-27 09:44:32 [INFO]: epoch 7: training loss 0.2439, validating loss 0.3493
2023-04-27 09:45:02 [INFO]: epoch 8: training loss 0.2173, validating loss 0.3713
2023-04-27 09:45:33 [INFO]: epoch 9: training loss 0.1986, validating loss 0.3954
2023-04-27 09:45:33 [INFO]: Finished training.
2023-04-27 09:45:33 [INFO]: Saved successfully to tutorial_results/classification/grud/20230427_T094101/GRUD.pypots.


In [14]:
# the testing stage, impute the originally-missing values and artificially-missing values in the test set
grud_prediction = grud.classify(dataset_for_testing)

In [15]:
from pypots.utils.metrics import cal_binary_classification_metrics

# calculate mean absolute error on the ground truth (artificially-missing values)

metrics = cal_binary_classification_metrics(grud_prediction, dataset_for_testing["y"])
print("Testing classification metrics: \n"
    f'ROC_AUC: {metrics["roc_auc"]}, \n'
    f'PR_AUC: {metrics["pr_auc"]},\n'
    f'F1: {metrics["f1"]},\n'
    f'Precision: {metrics["precision"]},\n'
    f'Recall: {metrics["recall"]},\n'
)

Testing classification metrics: 
ROC_AUC: 0.7564970870904301, 
PR_AUC: 0.312294925131615,
F1: 0.3180212014134276,
Precision: 0.37344398340248963,
Recall: 0.27692307692307694,

