In [2]:
import torch
import numpy as np
import os
from model import *
from utils import *

np.random.seed(0)

def data_load():
    #% Data
    dataset_file = 'data/V_228.csv'
    # Data generation
    Data = np.loadtxt(dataset_file, delimiter=",",skiprows=1)
    No = len(Data)
    Dim = len(Data[0,:])
    train_rate = 0.8
    p_miss = 0.3
    
    Min_Val = np.zeros(Dim)
    Max_Val = np.zeros(Dim)

    for i in range(Dim):
        Min_Val[i] = np.min(Data[:,i])
        Data[:,i] = Data[:,i] - np.min(Data[:,i])
        Max_Val[i] = np.max(Data[:,i])
        Data[:,i] = Data[:,i] / (np.max(Data[:,i]) + 1e-6)  

    # Missing introducing
    p_miss_vec = p_miss * np.ones((Dim,1)) 
    
    Missing = np.zeros((No,Dim))

    for i in range(Dim):
        A = np.random.uniform(0., 1., size = [len(Data),])
        B = A > p_miss_vec[i]
        Missing[:,i] = 1.*B


    # Train Test Division    
    
    idx = np.random.permutation(No)

    Train_No = int(No * train_rate)
    Test_No = No - Train_No

    # Train / Test Features
    trainX = Data[:Train_No,:]
    testX = Data[Train_No:,:]

    # Train / Test Missing Indicators
    trainM = Missing[:Train_No,:]
    testM = Missing[Train_No:,:]
    
    train_missing = trainX * trainM

    return trainX,train_missing

trainX,train_missing = data_load()



In [3]:
import pandas as pd

df_null = pd.DataFrame(train_missing)
df_data = pd.DataFrame(trainX)
df_null = df_null.replace(0, np.NaN)

In [4]:
# moving_window 방식
"""
from impyute.imputation.ts import moving_window

np_imputed=  moving_window(df_null, nindex = -1)
moving_window_imputed = pd.DataFrame(np_imputed)

from sklearn.metrics import mean_absolute_error

from sklearn.metrics import mean_squared_error

RMSE = mean_squared_error(df_data, moving_window_imputed)**0.5

print('RMSE: {}'.format(RMSE))

MAE = mean_absolute_error(df_data, moving_window_imputed)
print('MAE: {}'.format(MAE))"""


"\nfrom impyute.imputation.ts import moving_window\n\nnp_imputed=  moving_window(df_null, nindex = -1)\nmoving_window_imputed = pd.DataFrame(np_imputed)\n\nfrom sklearn.metrics import mean_absolute_error\n\nfrom sklearn.metrics import mean_squared_error\n\nRMSE = mean_squared_error(df_data, moving_window_imputed)**0.5\n\nprint('RMSE: {}'.format(RMSE))\n\nMAE = mean_absolute_error(df_data, moving_window_imputed)\nprint('MAE: {}'.format(MAE))"

In [4]:
# mice 방식

from impyute.imputation.cs import mice
np_imputed=mice(df_null.values)
mice_imputed = pd.DataFrame(np_imputed)

from sklearn.metrics import mean_absolute_error

from sklearn.metrics import mean_squared_error

RMSE = mean_squared_error(df_data, mice_imputed)**0.5

print('RMSE: {}'.format(RMSE))

MAE = mean_absolute_error(df_data, mice_imputed)
print('MAE: {}'.format(MAE))

RMSE: 0.05769585698210762
MAE: 0.018760873109859667


In [5]:
# knn 방식
"""
from impyute.imputation.cs import fast_knn
np_imputed=fast_knn(df_null.values, k=5)# KNN 학습 
knn_imputed = pd.DataFrame(np_imputed)

from sklearn.metrics import mean_squared_error

RMSE = mean_squared_error(df_data, knn_imputed)**0.5

RMSE"""

'\nfrom impyute.imputation.cs import fast_knn\nnp_imputed=fast_knn(df_null.values, k=5)# KNN 학습 \nknn_imputed = pd.DataFrame(np_imputed)\n\nfrom sklearn.metrics import mean_squared_error\n\nRMSE = mean_squared_error(df_data, knn_imputed)**0.5\n\nRMSE'

In [6]:
# mean 방식
"""
import impyute as impy
np_imputed=impy.mean(df_null)
mean_imputed = pd.DataFrame(np_imputed)

from sklearn.metrics import mean_squared_error

RMSE = mean_squared_error(df_data, mean_imputed)**0.5

RMSE"""

'\nimport impyute as impy\nnp_imputed=impy.mean(df_null)\nmean_imputed = pd.DataFrame(np_imputed)\n\nfrom sklearn.metrics import mean_squared_error\n\nRMSE = mean_squared_error(df_data, mean_imputed)**0.5\n\nRMSE'

In [7]:
# EM 방식
"""
from impyute.imputation.cs import em

np_imputed = em(df_null)
em_imputed = pd.DataFrame(np_imputed)

from sklearn.metrics import mean_squared_error

RMSE = mean_squared_error(df_data, em_imputed)**0.5

RMSE"""

'\nfrom impyute.imputation.cs import em\n\nnp_imputed = em(df_null)\nem_imputed = pd.DataFrame(np_imputed)\n\nfrom sklearn.metrics import mean_squared_error\n\nRMSE = mean_squared_error(df_data, em_imputed)**0.5\n\nRMSE'

In [5]:
df_null

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,218,219,220,221,222,223,224,225,226,227
0,0.835052,0.809961,,0.888889,0.789713,,0.912869,0.807531,0.930097,,...,0.911150,0.868925,0.835255,,,,0.860530,0.902736,,0.898380
1,0.833763,0.777195,0.797357,0.927203,,0.880721,0.884718,0.762901,,0.465465,...,0.939024,0.944035,0.835255,,0.875530,0.930909,0.860530,0.940729,0.920930,0.896907
2,0.837629,0.823067,0.787078,,0.841150,,0.879357,0.845188,0.883495,,...,0.921603,,,0.921986,0.882603,0.894545,,0.943769,0.893023,0.901325
3,0.845361,0.845347,,0.902299,0.782148,0.847434,0.841823,,0.902913,0.423423,...,,,0.835255,0.941489,,0.881818,,0.907295,0.894574,0.888071
4,0.815722,0.840105,0.775330,0.860153,,0.857143,0.859249,0.828452,0.900971,0.400901,...,,,0.835255,0.939716,,0.896364,,0.936170,,0.889543
5,0.805412,0.854522,0.720999,0.846743,0.794251,0.877947,0.878016,0.845188,0.900971,0.438438,...,,0.914580,,,,,0.860530,,,0.886598
6,0.837629,0.858453,,0.840996,0.788200,,0.809651,,0.846602,0.396396,...,,0.958763,0.835255,0.904255,,0.901818,0.860530,0.896657,0.872868,
7,0.849227,0.884666,0.675477,,0.747352,0.807212,0.778820,0.896792,0.920388,0.310811,...,0.837979,,0.835255,0.907801,0.886846,0.920000,0.860530,0.857143,0.916279,0.883652
8,0.872423,,0.712188,0.846743,,0.825243,0.789544,0.866109,,0.339339,...,,0.812960,0.828666,0.909574,,0.854545,0.854951,0.892097,0.924031,0.886598
9,0.872423,0.917431,0.713656,,0.779123,0.839112,0.841823,0.885635,,0.364865,...,0.893728,,0.818781,0.898936,0.864215,0.840000,0.857741,0.899696,0.824806,0.871870
