In [1]:
import simulate
import baseline
import dataload
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
#import pypots
from pypots.data import load_specific_dataset, mcar, masked_fill
from pypots.imputation import SAITS, BRITS
from pypots.utils.metrics import cal_mae, cal_rmse, cal_mre

# UCI HAR

In [2]:
dict = dataload.uci()
X = dict["X"]
y = dict["y"]

File Path :  D:/GitHub/ece209as_project/data/UCI_HAR_Dataset/train/Inertial_Signals/
File Path :  D:/GitHub/ece209as_project/data/UCI_HAR_Dataset/test/Inertial_Signals/


In [3]:
# MCAR
dict_MCAR = simulate.simulate_nan(X, 0.1, "MCAR")
X_intact_MCAR = dict_MCAR['X_init']
X_missing_MCAR = dict_MCAR['X_incomp']
X_mask_MCAR = dict_MCAR['mask']

# MAR - logistic
dict_MAR = simulate.simulate_nan(X, 0.1, "MAR")
X_intact_MAR = dict_MAR['X_init']
X_missing_MAR = dict_MAR['X_incomp']
X_mask_MAR = dict_MAR['mask']

# MNAR - logistic
dict_MAR = simulate.simulate_nan(X, 0.1, "MNAR", opt="logistic")
X_intact_MNAR = dict_MAR['X_init']
X_missing_MNAR = dict_MAR['X_incomp']
X_mask_MNAR = dict_MAR['mask']

X_intact = [X_intact_MCAR, X_intact_MAR, X_intact_MNAR]
X_missing = [X_missing_MCAR, X_missing_MAR, X_missing_MNAR]
X_mask = [X_mask_MCAR, X_mask_MAR, X_mask_MNAR]
missingness = ["MCAR", "MAR", "MNAR"]

In [4]:
result_uci_imputation = []

In [5]:
# Zero-filling imputation
res = []
for i in range(3):
    X_zero_imputed = baseline.zero_filling_imputation(X_missing[i])
    res.append(cal_mae(X_intact[i], X_zero_imputed, X_mask[i]))
    res.append(cal_rmse(X_intact[i], X_zero_imputed, X_mask[i]))
    res.append(cal_mre(X_intact[i], X_zero_imputed, X_mask[i]))

result_uci_imputation.append(res)

In [6]:
# Mean imputation
res = []

for i in range(3):
    X_mean_imputed = baseline.mean_imputation(X_missing[i])
    res.append(cal_mae(X_intact[i], X_mean_imputed, X_mask[i]))
    res.append(cal_rmse(X_intact[i], X_mean_imputed, X_mask[i]))
    res.append(cal_mre(X_intact[i], X_mean_imputed, X_mask[i]))

result_uci_imputation.append(res)

In [7]:
# Median imputation
res = []

for i in range(3):
    X_median_imputed = baseline.median_imputation(X_missing[i])
    res.append(cal_mae(X_intact[i], X_median_imputed, X_mask[i]))
    res.append(cal_rmse(X_intact[i], X_median_imputed, X_mask[i]))
    res.append(cal_mre(X_intact[i], X_median_imputed, X_mask[i]))

result_uci_imputation.append(res)

In [None]:
# BRITS

res = []

for i in range(3):
    brits = BRITS(n_steps=128, n_features=9,rnn_hidden_size=4, epochs=20)
    brits.fit(X_missing[i])
    imputation = brits.impute(X_missing[i])
    res.append(cal_mae(imputation, X_intact[i], X_mask[i]))
    res.append(cal_rmse(imputation, X_intact[i], X_mask[i]))
    res.append(cal_mre(imputation, X_intact[i], X_mask[i]))

result_uci_imputation.append(res)

In [9]:
errors = ["mae", "rmse", "mre"]
cols = []

for i in missingness:
    for j in errors:
        cols.append(i+" "+j)

methods = ["zero-filling imputation", "mean imputation", "median imputation", "BRITS"]
result_uci_imputation = pd.DataFrame(result_uci_imputation, columns = cols, index=methods)

In [10]:
result_uci_imputation

Unnamed: 0,MCAR mae,MCAR rmse,MCAR mre,MAR mae,MAR rmse,MAR mre,MNAR mae,MNAR rmse,MNAR mre
zero-filling imputation,0.242178,0.413517,286973000000000.0,0.241268,0.412009,286515800000000.0,0.241828,0.413479,286530100000000.0
mean imputation,0.777366,0.898209,0.9491571,0.777086,0.897553,0.9488416,0.775993,0.897156,0.9480133
median imputation,0.756448,0.875055,0.951453,0.756012,0.874249,0.9511194,0.75506,0.873975,0.950274
BRITS,0.123852,0.22107,0.5114092,0.128857,0.225485,0.5340805,0.123321,0.217309,0.5099554
