In [1]:
import torch
import numpy as np
import os
from model import *
from utils import *

# System Parameters
# 1. Mini batch size
mb_size = 128
# 2. Missing rate
p_miss = 0.2
# 3. Hint rate
p_hint = 0
# 4. Loss Hyperparameters
alpha = 10
# 5. Train Rate
train_rate = 0.8

#% Data
dataset_file = 'data/V_228.csv'
# Data generation
Data = np.loadtxt(dataset_file, delimiter=",",skiprows=1)

# Parameters
No = len(Data)
Dim = len(Data[0,:])

# Hidden state dimensions
H_Dim1 = Dim
H_Dim2 = Dim

# Normalization (0 to 1)
Min_Val = np.zeros(Dim)
Max_Val = np.zeros(Dim)

for i in range(Dim):
    Min_Val[i] = np.min(Data[:,i])
    #print(np.min(Data[:,i]))
    Data[:,i] = Data[:,i] - np.min(Data[:,i])
    Max_Val[i] = np.max(Data[:,i])
    Data[:,i] = Data[:,i] / (np.max(Data[:,i]) + 1e-6)    
    



# Missing introducing
p_miss_vec = p_miss * np.ones((Dim,1)) 
   
Missing = np.zeros((No,Dim))

for i in range(Dim):
    A = np.random.uniform(0., 1., size = [len(Data),])
    B = A > p_miss_vec[i]
    Missing[:,i] = 1.*B

    
# Train Test Division    
   
idx = np.random.permutation(No)

Train_No = int(No * train_rate)
Test_No = No - Train_No
    
# Train / Test Features
trainX = Data[idx[:Train_No],:]
testX = Data[idx[Train_No:],:]

# Train / Test Missing Indicators
trainM = Missing[idx[:Train_No],:]
testM = Missing[idx[Train_No:],:]

netD = NetD()
netG = NetG()


optimD = torch.optim.Adam(netD.parameters(), lr=0.001)
optimG = torch.optim.Adam(netG.parameters(), lr=0.001)


bce_loss = torch.nn.BCEWithLogitsLoss(reduction="elementwise_mean")
mse_loss = torch.nn.MSELoss(reduction="elementwise_mean")




In [2]:
i = 1
# Start Iterations
for it in range(5000): 
    #%% Inputs
    mb_idx = sample_idx(Train_No, mb_size)
    X_mb = trainX[mb_idx,:]  

    Z_mb = sample_Z(mb_size, Dim) 
    M_mb = trainM[mb_idx,:]  
    H_mb1 = sample_M(mb_size, Dim, 1-p_hint)
    H_mb = M_mb * H_mb1 
    
    New_X_mb = M_mb * X_mb + (1-M_mb) * Z_mb  # Missing Data Introduce
    
    X_mb = torch.tensor(X_mb).float()
    New_X_mb = torch.tensor(New_X_mb).float()
    Z_mb = torch.tensor(Z_mb).float()
    M_mb = torch.tensor(M_mb).float()
    H_mb = torch.tensor(H_mb).float()
    
    # Train D
    G_sample = netG(X_mb, New_X_mb, M_mb)
    D_prob = netD(X_mb, M_mb, G_sample, H_mb)
    D_loss = bce_loss(D_prob, M_mb)

    optimD.zero_grad()
    D_loss.backward()
    optimD.step()
    
    
    # Train G
    G_sample = netG(X_mb, New_X_mb, M_mb)
    D_prob = netD(X_mb, M_mb, G_sample, H_mb)
    D_prob.detach_()
    G_loss1 = ((1 - M_mb) * (torch.sigmoid(D_prob)+1e-8).log()).mean()/(1-M_mb).sum()
    G_mse_loss = mse_loss(M_mb*X_mb, M_mb*G_sample) / M_mb.sum()
    G_loss = G_loss1 + alpha*G_mse_loss
    
    G_loss.backward()
    optimG.step()
    optimG.zero_grad()
    
    G_mse_test = mse_loss((1-M_mb)*X_mb, (1-M_mb)*G_sample) / (1-M_mb).sum()


    if it % 100 == 0:
        print('Iter: {}'.format(it),end='\t')
        print('Train_loss: {:.4}'.format(np.sqrt(G_mse_loss.item())),end='\t')
        print('Test_loss: {:.4}'.format(np.sqrt(G_mse_test.item())),end='\t')
        print('G_loss: {:.4}'.format(G_loss),end='\t')
        print('D_loss: {:.4}'.format(D_loss))






Iter: 0	Train_loss: 0.3516	Test_loss: 0.3513	G_loss: 1.236	D_loss: 0.5771
Iter: 100	Train_loss: 0.1074	Test_loss: 0.108	G_loss: 0.1153	D_loss: 0.5152


KeyboardInterrupt: 

In [None]:

G_sample = netG(X_mb, New_X_mb, M_mb)

Z_mb = sample_Z(Test_No, Dim) 
M_mb = testM
X_mb = testX
        
New_X_mb = M_mb * X_mb + (1-M_mb) * Z_mb  # Missing Data Introduce

X_mb = torch.tensor(X_mb).float()
M_mb = torch.tensor(M_mb).float()
New_X_mb = torch.tensor(New_X_mb).float()



imputed_data = M_mb * X_mb + (1-M_mb) * G_sample

# Normalization (0 to 1)
for i in range(Dim):
    imputed_data[:,i] = imputed_data[:,i]* (Max_Val[i]+1e-6)
    imputed_data[:,i] = imputed_data[:,i]+ Min_Val[i]
    
renomal = imputed_data.detach().numpy()

In [None]:
import pandas as pd
renomal_df = pd.DataFrame(renomal)
 
renomal_df.to_csv('sample.csv', index=False)

In [None]:
X_mb_df = X_mb.detach().numpy()
Actual_df = X_mb.detach().numpy()
for i in range(Dim):
    X_mb_df[:,i] = X_mb_df[:,i]* (Max_Val[i]+1e-6)
    X_mb_df[:,i] = X_mb_df[:,i]+ Min_Val[i]

import pandas as pd
X_mb_df = pd.DataFrame(X_mb_df)
 
X_mb_df.to_csv('sample1.csv', index=False)

In [None]:
# mean 방식
Actual_df = testM * testX 
Actual_df = pd.DataFrame(Actual_df)
Actual_df = Actual_df.replace(0, np.NaN)

import impyute as impy
np_imputed=impy.mean(Actual_df)
np_imputed = np_imputed.to_numpy()
for i in range(Dim):
    np_imputed[:,i] = np_imputed[:,i]* (Max_Val[i]+1e-6)
    np_imputed[:,i] = np_imputed[:,i]+ Min_Val[i]

mean_imputed = pd.DataFrame(np_imputed)



In [None]:
import pickle
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
from matplotlib import pyplot as plt 
%matplotlib inline 

import seaborn as sns
X_mb_df = X_mb_df.rolling(window=5).mean()
renomal_df = renomal_df.rolling(window=5).mean()
mean_imputed = mean_imputed.rolling(window=5).mean()



In [None]:
parameters = {'axes.labelsize': 25,'axes.titlesize': 35,'legend.fontsize':25,'xtick.labelsize':25,'ytick.labelsize':25}
plt.rcParams.update(parameters)

In [None]:

plt.figure(figsize=(30,10))
plt.xlabel('Time(5m)')
plt.ylabel('Speed(Km/s)')

plt.plot(X_mb_df[0][:300],label='Actual',color='r',linestyle='-',linewidth=5.0)
plt.plot(renomal_df[0][:300],label='GAIN',color='b',linestyle=':',linewidth=5.0)
plt.plot(mean_imputed[0][:300],label='Our',color='g',linestyle='--',linewidth=5.0)
plt.legend(loc='lower right')

In [None]:
imputed_data