In [1]:
import simulate
import baseline
import dataload
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
#import pypots
from pypots.data import load_specific_dataset, mcar, masked_fill
from pypots.imputation import SAITS, BRITS
from pypots.utils.metrics import cal_mae, cal_rmse, cal_mre

  from .autonotebook import tqdm as notebook_tqdm


# UCI HAR

In [2]:
dict = dataload.uci()
X = dict["X"]
y = dict["y"]

In [3]:
# MCAR
dict_MCAR = simulate.simulate_nan(X, 0.1, "MCAR")
X_intact_MCAR = dict_MCAR['X_init']
X_missing_MCAR = dict_MCAR['X_incomp']
X_mask_MCAR = dict_MCAR['mask']

# MAR - logistic
dict_MAR = simulate.simulate_nan(X, 0.1, "MAR")
X_intact_MAR = dict_MAR['X_init']
X_missing_MAR = dict_MAR['X_incomp']
X_mask_MAR = dict_MAR['mask']

# MNAR - logistic
dict_MAR = simulate.simulate_nan(X, 0.1, "MNAR", opt="logistic")
X_intact_MNAR = dict_MAR['X_init']
X_missing_MNAR = dict_MAR['X_incomp']
X_mask_MNAR = dict_MAR['mask']

X_intact = [X_intact_MCAR, X_intact_MAR, X_intact_MNAR]
X_missing = [X_missing_MCAR, X_missing_MAR, X_missing_MNAR]
X_mask = [X_mask_MCAR, X_mask_MAR, X_mask_MNAR]
missingness = ["MCAR", "MAR", "MNAR"]

In [4]:
result_uci_imputation = []

In [5]:
# Zero-filling imputation
res = []
for i in range(3):
    X_zero_imputed = baseline.zero_filling_imputation(X_missing[i])
    res.append(cal_mae(X_intact[i], X_zero_imputed, X_mask[i]))
    res.append(cal_rmse(X_intact[i], X_zero_imputed, X_mask[i]))
    res.append(cal_mre(X_intact[i], X_zero_imputed, X_mask[i]))

result_uci_imputation.append(res)

In [6]:
# Mean imputation
res = []

for i in range(3):
    X_mean_imputed = baseline.mean_imputation(X_missing[i])
    res.append(cal_mae(X_intact[i], X_mean_imputed, X_mask[i]))
    res.append(cal_rmse(X_intact[i], X_mean_imputed, X_mask[i]))
    res.append(cal_mre(X_intact[i], X_mean_imputed, X_mask[i]))

result_uci_imputation.append(res)

In [7]:
# Median imputation
res = []

for i in range(3):
    X_median_imputed = baseline.median_imputation(X_missing[i])
    res.append(cal_mae(X_intact[i], X_median_imputed, X_mask[i]))
    res.append(cal_rmse(X_intact[i], X_median_imputed, X_mask[i]))
    res.append(cal_mre(X_intact[i], X_median_imputed, X_mask[i]))

result_uci_imputation.append(res)

In [8]:
# BRITS

res = []

for i in range(3):
    brits = BRITS(n_steps=128, n_features=9,rnn_hidden_size=64, epochs=20)
    brits.fit(X_missing[i])
    imputation = brits.impute(X_missing[i])
    res.append(cal_mae(imputation, X_intact[i], X_mask[i]))
    res.append(cal_rmse(imputation, X_intact[i], X_mask[i]))
    res.append(cal_mre(imputation, X_intact[i], X_mask[i]))

result_uci_imputation.append(res)

Model initialized successfully. Number of the trainable parameters: 46160
epoch 0: training loss 0.2148
epoch 1: training loss 0.1242
epoch 2: training loss 0.1053
epoch 3: training loss 0.0983
epoch 4: training loss 0.0951
epoch 5: training loss 0.0937
epoch 6: training loss 0.0929
epoch 7: training loss 0.0924
epoch 8: training loss 0.0920
epoch 9: training loss 0.0915
epoch 10: training loss 0.0913
epoch 11: training loss 0.0910
epoch 12: training loss 0.0907
epoch 13: training loss 0.0904
epoch 14: training loss 0.0901
epoch 15: training loss 0.0898
epoch 16: training loss 0.0895
epoch 17: training loss 0.0893
epoch 18: training loss 0.0891
epoch 19: training loss 0.0889
Finished training.
Model initialized successfully. Number of the trainable parameters: 46160
epoch 0: training loss 0.1982
epoch 1: training loss 0.1118
epoch 2: training loss 0.0939
epoch 3: training loss 0.0860
epoch 4: training loss 0.0819
epoch 5: training loss 0.0797
epoch 6: training loss 0.0780
epoch 7: trai

In [9]:
# SAITS

res = []

for i in range(3):
    saits = SAITS(n_steps=128, n_features=9, n_layers=2, d_model=256, d_inner=128, n_head=4, d_k=64, d_v=64, dropout=0.1, epochs=20)
    saits.fit(X_missing[i])
    imputation = saits.impute(X_missing[i])
    res.append(cal_mae(imputation, X_intact[i], X_mask[i]))
    res.append(cal_rmse(imputation, X_intact[i], X_mask[i]))
    res.append(cal_mre(imputation, X_intact[i], X_mask[i]))

result_uci_imputation.append(res)

Model initialized successfully. Number of the trainable parameters: 1332038
epoch 0: training loss 0.1846
epoch 1: training loss 0.0855
epoch 2: training loss 0.0677
epoch 3: training loss 0.0608
epoch 4: training loss 0.0557
epoch 5: training loss 0.0517
epoch 6: training loss 0.0487
epoch 7: training loss 0.0465
epoch 8: training loss 0.0455
epoch 9: training loss 0.0444
epoch 10: training loss 0.0435
epoch 11: training loss 0.0430
epoch 12: training loss 0.0425
epoch 13: training loss 0.0422
epoch 14: training loss 0.0416
epoch 15: training loss 0.0416
epoch 16: training loss 0.0412
epoch 17: training loss 0.0407
epoch 18: training loss 0.0404
epoch 19: training loss 0.0404
Finished training.
Model initialized successfully. Number of the trainable parameters: 1332038
epoch 0: training loss 0.1793
epoch 1: training loss 0.1126
epoch 2: training loss 0.1004
epoch 3: training loss 0.0940
epoch 4: training loss 0.0900
epoch 5: training loss 0.0872
epoch 6: training loss 0.0850
epoch 7: 

In [10]:
errors = ["mae", "rmse", "mre"]
cols = []

for i in missingness:
    for j in errors:
        cols.append(i+" "+j)

methods = ["zero-filling imputation", "mean imputation", "median imputation", "BRITS", "SAITS"]
result_uci_imputation = pd.DataFrame(result_uci_imputation, columns = cols, index=methods)

In [11]:
result_uci_imputation

Unnamed: 0,MCAR mae,MCAR rmse,MCAR mre,MAR mae,MAR rmse,MAR mre,MNAR mae,MNAR rmse,MNAR mre
zero-filling imputation,0.11505,0.233926,136917100000000.0,0.160622,0.306553,95633680000000.0,0.136915,0.269877,162067700000000.0
mean imputation,0.115961,0.232973,7.953407,0.161896,0.306161,5.371414,0.138038,0.269118,6.215174
median imputation,0.113834,0.235492,5.513651,0.159096,0.308446,4.446997,0.135557,0.271391,4.838404
BRITS,0.02621,0.068698,0.2278151,0.103265,0.215991,0.6429036,0.090474,0.191687,0.6608023
SAITS,0.031206,0.081772,0.2712413,0.160455,0.3031,0.9989568,0.137143,0.269069,1.001666
