In [19]:
import torch
import numpy as np
import os
from model import *
from utils import *

# System Parameters
# 1. Mini batch size
mb_size = 128
# 2. Missing rate
p_miss = 0.2
# 3. Hint rate
p_hint = 0.9
# 4. Loss Hyperparameters
alpha = 10
# 5. Train Rate
train_rate = 0.8

#% Data
dataset_file = 'data/V_228.csv'
# Data generation
Data = np.loadtxt(dataset_file, delimiter=",",skiprows=1)

# Parameters
No = len(Data)
Dim = len(Data[0,:])

# Hidden state dimensions
H_Dim1 = Dim
H_Dim2 = Dim

# Normalization (0 to 1)
Min_Val = np.zeros(Dim)
Max_Val = np.zeros(Dim)

for i in range(Dim):
    Min_Val[i] = np.min(Data[:,i])
    #print(np.min(Data[:,i]))
    Data[:,i] = Data[:,i] - np.min(Data[:,i])
    Max_Val[i] = np.max(Data[:,i])
    Data[:,i] = Data[:,i] / (np.max(Data[:,i]) + 1e-6)    
    



# Missing introducing
p_miss_vec = p_miss * np.ones((Dim,1)) 
   
Missing = np.zeros((No,Dim))

for i in range(Dim):
    A = np.random.uniform(0., 1., size = [len(Data),])
    B = A > p_miss_vec[i]
    Missing[:,i] = 1.*B

    
# Train Test Division    
   
idx = np.random.permutation(No)

Train_No = int(No * train_rate)
Test_No = No - Train_No
    
# Train / Test Features
trainX = Data[idx[:Train_No],:]
testX = Data[idx[Train_No:],:]

# Train / Test Missing Indicators
trainM = Missing[idx[:Train_No],:]
testM = Missing[idx[Train_No:],:]

netD = NetD()
netG = NetG()


optimD = torch.optim.Adam(netD.parameters(), lr=0.001)
optimG = torch.optim.Adam(netG.parameters(), lr=0.001)


bce_loss = torch.nn.BCEWithLogitsLoss(reduction="elementwise_mean")
mse_loss = torch.nn.MSELoss(reduction="elementwise_mean")


i = 1
# Start Iterations
for it in range(5000): 
    #%% Inputs
    mb_idx = sample_idx(Train_No, mb_size)
    X_mb = trainX[mb_idx,:]  

    Z_mb = sample_Z(mb_size, Dim) 
    M_mb = trainM[mb_idx,:]  
    H_mb1 = sample_M(mb_size, Dim, 1-p_hint)
    H_mb = M_mb * H_mb1 
    
    New_X_mb = M_mb * X_mb + (1-M_mb) * Z_mb  # Missing Data Introduce
    
    X_mb = torch.tensor(X_mb).float()
    New_X_mb = torch.tensor(New_X_mb).float()
    Z_mb = torch.tensor(Z_mb).float()
    M_mb = torch.tensor(M_mb).float()
    H_mb = torch.tensor(H_mb).float()
    
    # Train D
    G_sample = netG(X_mb, New_X_mb, M_mb)
    D_prob = netD(X_mb, M_mb, G_sample, H_mb)
    D_loss = bce_loss(D_prob, M_mb)

    optimD.zero_grad()
    D_loss.backward()
    optimD.step()
    
    
    # Train G
    G_sample = netG(X_mb, New_X_mb, M_mb)
    D_prob = netD(X_mb, M_mb, G_sample, H_mb)
    D_prob.detach_()
    G_loss1 = ((1 - M_mb) * (torch.sigmoid(D_prob)+1e-8).log()).mean()/(1-M_mb).sum()
    G_mse_loss = mse_loss(M_mb*X_mb, M_mb*G_sample) / M_mb.sum()
    G_loss = G_loss1 + alpha*G_mse_loss
    
    G_loss.backward()
    optimG.step()
    optimG.zero_grad()
    
    G_mse_test = mse_loss((1-M_mb)*X_mb, (1-M_mb)*G_sample) / (1-M_mb).sum()


    if it % 100 == 0:
        print('Iter: {}'.format(it),end='\t')
        print('Train_loss: {:.4}'.format(np.sqrt(G_mse_loss.item())),end='\t')
        print('Test_loss: {:.4}'.format(np.sqrt(G_mse_test.item())),end='\t')
        print('G_loss: {:.4}'.format(G_loss),end='\t')
        print('D_loss: {:.4}'.format(D_loss))




Iter: 0	Train_loss: 0.3443	Test_loss: 0.3436	G_loss: 1.185	D_loss: 0.713
Iter: 100	Train_loss: 0.1083	Test_loss: 0.1126	G_loss: 0.1174	D_loss: 0.4858
Iter: 200	Train_loss: 0.1125	Test_loss: 0.1159	G_loss: 0.1265	D_loss: 0.466
Iter: 300	Train_loss: 0.1063	Test_loss: 0.1083	G_loss: 0.1129	D_loss: 0.4512
Iter: 400	Train_loss: 0.09721	Test_loss: 0.1032	G_loss: 0.09449	D_loss: 0.4251
Iter: 500	Train_loss: 0.09417	Test_loss: 0.1023	G_loss: 0.08867	D_loss: 0.4025
Iter: 600	Train_loss: 0.08802	Test_loss: 0.09052	G_loss: 0.07747	D_loss: 0.3964
Iter: 700	Train_loss: 0.08705	Test_loss: 0.09447	G_loss: 0.07576	D_loss: 0.3903
Iter: 800	Train_loss: 0.09043	Test_loss: 0.1003	G_loss: 0.08176	D_loss: 0.3815
Iter: 900	Train_loss: 0.0805	Test_loss: 0.08787	G_loss: 0.06478	D_loss: 0.3657
Iter: 1000	Train_loss: 0.08495	Test_loss: 0.09243	G_loss: 0.07215	D_loss: 0.3677
Iter: 1100	Train_loss: 0.08166	Test_loss: 0.09152	G_loss: 0.06665	D_loss: 0.3603
Iter: 1200	Train_loss: 0.08388	Test_loss: 0.09598	G_loss: 0

KeyboardInterrupt: 

In [16]:
def test_loss(X, M):
    
    
    #%% MSE Performance metric
    MSE_test_loss = torch.mean(((1-M) * X - (1-M)*G_sample)**2) / torch.mean(1-M)
    return MSE_test_loss

G_sample = netG(X_mb, New_X_mb, M_mb)

Z_mb = sample_Z(Test_No, Dim) 
M_mb = testM
X_mb = testX
        
New_X_mb = M_mb * X_mb + (1-M_mb) * Z_mb  # Missing Data Introduce

X_mb = torch.tensor(X_mb).float()
M_mb = torch.tensor(M_mb).float()
New_X_mb = torch.tensor(New_X_mb).float()

MSE_final= test_loss(X=X_mb, M=M_mb)
print('Final Test RMSE: ' + str(np.sqrt(MSE_final.item())))

Final Test RMSE: 0.0931326287300339


In [17]:
imputed_data = M_mb * X_mb + (1-M_mb) * G_sample
print("Imputed test data:")
np.set_printoptions(formatter={'float': lambda x: "{0:0.8f}".format(x)})


print(imputed_data.detach().numpy())

Imputed test data:
[[0.24097937 0.18885008 0.66421413 ... 0.73685551 0.78604650 0.68041235]
 [0.28221649 0.85190040 0.41845506 ... 0.79027355 0.81737083 0.80706918]
 [0.79252577 0.76671034 0.67254037 ... 0.76899695 0.75038761 0.75257730]
 ...
 [0.41559702 0.82568806 0.81204110 ... 0.86322188 0.86821705 0.84388804]
 [0.51935673 0.81127131 0.38842636 ... 0.82749230 0.87678355 0.68232536]
 [0.24097937 0.78505898 0.34508076 ... 0.80168140 0.79844958 0.78497791]]


In [18]:
# Normalization (0 to 1)
renomal = imputed_data 

for i in range(Dim):
    renomal[:,i] = renomal[:,i]* (Max_Val[i]+1e-6)
    renomal[:,i] = renomal[:,i]+ Min_Val[i]
    
print(renomal.cpu().detach().numpy())

[[21.99999809 19.40926170 57.53298187 ... 56.78509521 60.40000153
  54.00000000]
 [25.19999886 70.00000000 40.79679108 ... 60.29999924 62.42041779
  62.59999847]
 [64.80000305 63.50000000 58.09999847 ... 58.90000153 58.10000229
  58.89999771]
 ...
 [35.55032730 68.00000000 67.59999847 ... 65.10000610 65.69999695
  65.09999847]
 [43.60208130 66.90000153 38.75183487 ... 62.74899673 66.25254059
  54.12989426]
 [21.99999809 64.90000153 35.79999924 ... 61.05063629 61.19999695
  61.10000229]]


In [21]:
renomal.size()

torch.Size([2535, 228])