In [11]:
from google import protobuf
print(protobuf.__version__)

3.20.1


In [12]:
import numpy as np
from sklearn.preprocessing import StandardScaler
#import pypots
from pypots.data import load_specific_dataset, mcar, masked_fill
from pypots.imputation import SAITS
from pypots.utils.metrics import cal_mae

In [13]:
# Data preprocessing. Tedious, but PyPOTS can help. 🤓
data = load_specific_dataset('physionet_2012')  # For datasets in PyPOTS database, PyPOTS will automatically download and extract it.
X = data['X']
num_samples = len(X['RecordID'].unique())
X = X.drop('RecordID', axis = 1)
X = StandardScaler().fit_transform(X.to_numpy())
X = X.reshape(num_samples, 48, -1)
X_intact, X, missing_mask, indicating_mask = mcar(X, 0.1) # hold out 10% observed values as ground truth
X = masked_fill(X, 1 - missing_mask, np.nan)
# Model training. This is PyPOTS showtime. 💪
saits = SAITS(n_steps=48, n_features=37, n_layers=2, d_model=256, d_inner=128, n_head=4, d_k=64, d_v=64, dropout=0.1, epochs=10)
saits.fit(X)  # train the model. Here I use the whole dataset as the training set, because ground truth is not visible to the model.
imputation = saits.impute(X)  # impute the originally-missing values and artificially-missing values
mae = cal_mae(imputation, X_intact, indicating_mask)  # calculate mean absolute error on the ground truth (artificially-missing values)

Loading the dataset physionet_2012 with TSDB (https://github.com/WenjieDu/Time_Series_Database)...
Starting preprocessing physionet_2012...
Start downloading...
Successfully downloaded data to C:\Users\RUSHI\AppData\Local\Temp\tmpeb8w7bd6\set-a.tar.gz.
Successfully extracted data to C:\Users\RUSHI\.tsdb_cached_datasets\physionet_2012
Successfully downloaded data to C:\Users\RUSHI\AppData\Local\Temp\tmpio14ozpb\set-b.tar.gz.
Successfully extracted data to C:\Users\RUSHI\.tsdb_cached_datasets\physionet_2012
Successfully downloaded data to C:\Users\RUSHI\AppData\Local\Temp\tmpo61wvkoy\set-c.tar.gz.
Successfully extracted data to C:\Users\RUSHI\.tsdb_cached_datasets\physionet_2012
Successfully downloaded data to C:\Users\RUSHI\.tsdb_cached_datasets\physionet_2012\Outcomes-a.txt.
Successfully downloaded data to C:\Users\RUSHI\.tsdb_cached_datasets\physionet_2012\Outcomes-b.txt.
Successfully downloaded data to C:\Users\RUSHI\.tsdb_cached_datasets\physionet_2012\Outcomes-c.txt.
Ignore 140501,

  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part, ignore_index=False, sort=False)  # pad
  df_temp = df_temp.append(missing_part,

Model initialized successfully. Number of the trainable parameters: 1378358
epoch 0: training loss 0.6528
epoch 1: training loss 0.4722
epoch 2: training loss 0.4121
epoch 3: training loss 0.3805
epoch 4: training loss 0.3582
epoch 5: training loss 0.3456
epoch 6: training loss 0.3367
epoch 7: training loss 0.3320
epoch 8: training loss 0.3264
epoch 9: training loss 0.3216
Finished training.


In [15]:
print(mae)

0.2613491454733027
