In [1]:
import simulate
import baseline
import dataload
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
#import pypots
from pypots.data import load_specific_dataset, mcar, masked_fill
from pypots.imputation import SAITS, BRITS
from pypots.utils.metrics import cal_mae, cal_rmse, cal_mre

# PAMAPS2

In [2]:
dict = dataload.pamaps2(window_size=40, drop_nan=True)
X = dict["X"]
y = dict["y"]

In [3]:
# MCAR
dict_MCAR = simulate.simulate_nan(X, 0.1, "MCAR")
X_intact_MCAR = dict_MCAR['X_init']
X_missing_MCAR = dict_MCAR['X_incomp']
X_mask_MCAR = dict_MCAR['mask']

# MAR - logistic
dict_MAR = simulate.simulate_nan(X, 0.1, "MAR")
X_intact_MAR = dict_MAR['X_init']
X_missing_MAR = dict_MAR['X_incomp']
X_mask_MAR = dict_MAR['mask']

# MNAR - logistic
dict_MAR = simulate.simulate_nan(X, 0.1, "MNAR", opt="logistic")
X_intact_MNAR = dict_MAR['X_init']
X_missing_MNAR = dict_MAR['X_incomp']
X_mask_MNAR = dict_MAR['mask']

X_intact = [X_intact_MCAR, X_intact_MAR, X_intact_MNAR]
X_missing = [X_missing_MCAR, X_missing_MAR, X_missing_MNAR]
X_mask = [X_mask_MCAR, X_mask_MAR, X_mask_MNAR]
missingness = ["MCAR", "MAR", "MNAR"]

In [4]:
n_steps_brits = 40
n_features_brits = 52
rnn_hidden_size_brits = 128 # change rnn_hidden_size_brits to smaller number (4/8/16) for faster computation on smaller models
epochs_brits = 100 # change epochs_brits to smaller number (1 to 10) otherwise it takes upwards of 20 hours on good GPUs


n_steps_saits = 40
n_features_saits = 52
n_layers_saits = 2
d_model_saits = 512 # change d_model_saits to smaller number (32/64/128) for faster computation on smaller models
d_inner_saits = 256 # change d_inner_saits to smaller number (16/32/64) for faster computation on smaller models
n_head_saits = 4
d_k_saits = 64
d_v_saits = 64
dropout_saits = 0.1
epochs_saits = 200 # change epochs_saits to smaller number (1 to 10) otherwise it takes upwards of 20 hours on good GPUs

In [5]:
result_pamaps2_imputation = []

In [6]:
# Zero-filling imputation
res = []
for i in range(3):
    X_zero_imputed = baseline.zero_filling_imputation(X_missing[i])
    res.append(cal_mae(X_intact[i], X_zero_imputed, X_mask[i]))
    res.append(cal_rmse(X_intact[i], X_zero_imputed, X_mask[i]))
    res.append(cal_mre(X_intact[i], X_zero_imputed, X_mask[i]))

result_pamaps2_imputation.append(res)

In [7]:
# Mean imputation
res = []

for i in range(3):
    X_mean_imputed = baseline.mean_imputation(X_missing[i])
    res.append(cal_mae(X_intact[i], X_mean_imputed, X_mask[i]))
    res.append(cal_rmse(X_intact[i], X_mean_imputed, X_mask[i]))
    res.append(cal_mre(X_intact[i], X_mean_imputed, X_mask[i]))

result_pamaps2_imputation.append(res)

In [8]:
# Median imputation
res = []

for i in range(3):
    X_median_imputed = baseline.median_imputation(X_missing[i])
    res.append(cal_mae(X_intact[i], X_median_imputed, X_mask[i]))
    res.append(cal_rmse(X_intact[i], X_median_imputed, X_mask[i]))
    res.append(cal_mre(X_intact[i], X_median_imputed, X_mask[i]))

result_pamaps2_imputation.append(res)

In [None]:
# BRITS

res = []

for i in range(3):
    brits = BRITS(n_steps=n_steps_brits, n_features=n_features_brits,rnn_hidden_size=rnn_hidden_size_brits, epochs=epochs_brits)
    brits.fit(X_missing[i])
    imputation = brits.impute(X_missing[i])
    res.append(cal_mae(imputation, X_intact[i], X_mask[i]))
    res.append(cal_rmse(imputation, X_intact[i], X_mask[i]))
    res.append(cal_mre(imputation, X_intact[i], X_mask[i]))

result_pamaps2_imputation.append(res)

In [None]:
# SAITS

res = []

for i in range(3):
    saits = SAITS(n_steps=n_steps_saits, n_features=n_features_saits, n_layers=n_layers_saits, d_model=d_model_saits, d_inner=d_inner_saits, n_head=n_head_saits, d_k=d_k_saits, d_v=d_v_saits, dropout=dropout_saits, epochs=epochs_saits)
    saits.fit(X_missing[i])
    imputation = saits.impute(X_missing[i])
    res.append(cal_mae(imputation, X_intact[i], X_mask[i]))
    res.append(cal_rmse(imputation, X_intact[i], X_mask[i]))
    res.append(cal_mre(imputation, X_intact[i], X_mask[i]))

result_pamaps2_imputation.append(res)

In [12]:
errors = ["mae", "rmse", "mre"]
cols = []

for i in missingness:
    for j in errors:
        cols.append(i+" "+j)

methods = ["zero-filling imputation", "mean imputation", "median imputation", "BRITS", "SAITS"]
result_pamaps2_imputation = pd.DataFrame(result_pamaps2_imputation, columns = cols, index=methods)

In [31]:
result_pamaps2_imputation

Unnamed: 0,MCAR mae,MCAR rmse,MCAR mre,MAR mae,MAR rmse,MAR mre,MNAR mae,MNAR rmse,MNAR mre
zero-filling imputation,43.001326,296.209078,1.269585e+18,44.79544,302.214721,1.322842e+18,43.000029,296.078032,1.269733e+18
mean imputation,1827.534261,2135.402027,1.000626,1843.997157,2148.611512,1.000663,1824.700344,2132.157724,1.000621
median imputation,1827.516164,2135.420835,1.000626,1843.981177,2148.63308,1.000663,1824.687548,2132.176473,1.000621
BRITS,30.418686,147.324833,0.402633,31.762073,151.236273,0.3689206,36.282232,150.060937,0.4005351
SAITS,18.222896,61.980596,0.420607,17.388033,53.826534,0.233121,26.891733,70.600211,0.365387
