In [7]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import torch.nn.functional as F
from tqdm import tqdm
import math
import random
import numpy as np
import pandas as pd
import torch.optim as optim

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device = "cuda" if torch.cuda.is_available() else "cpu"

In [8]:
class FeatureRegression(nn.Module):
    def __init__(self, input_size):
        super(FeatureRegression, self).__init__()
        self.build(input_size)

    def build(self, input_size):
        self.W = Parameter(torch.Tensor(input_size, input_size))
        self.b = Parameter(torch.Tensor(input_size))

        m = torch.ones(input_size, input_size).cuda() - torch.eye(input_size, input_size).cuda()
        self.register_buffer('m', m)

        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.W.size(0))
        self.W.data.uniform_(-stdv, stdv)
        if self.b is not None:
            self.b.data.uniform_(-stdv, stdv)

    def forward(self, x):
        z_h = F.linear(x, self.W * Variable(self.m), self.b)
        return z_h

class TemporalDecay(nn.Module):
    def __init__(self, input_size, output_size, diag = False):
        super(TemporalDecay, self).__init__()
        self.diag = diag

        self.build(input_size, output_size)

    def build(self, input_size, output_size):
        self.W = Parameter(torch.Tensor(output_size, input_size)).cuda()
        self.b = Parameter(torch.Tensor(output_size)).cuda()
        self.relu = nn.ReLU(inplace=False)
        if self.diag == True:
            assert(input_size == output_size)
            m = torch.eye(input_size, input_size).cuda()
            self.register_buffer('m', m)

        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.W.size(0))
        self.W.data.uniform_(-stdv, stdv)
        if self.b is not None:
            self.b.data.uniform_(-stdv, stdv)

    def forward(self, d):
        gamma = self.relu(F.linear(d, self.W, self.b))
        gamma = torch.exp(-gamma)
        return gamma

In [9]:
class MGRU(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(MGRU, self).__init__()

        self.temp_decay_h = TemporalDecay(input_size, output_size = hidden_size, diag = False)
        self.temp_decay_x = TemporalDecay(input_size, input_size, diag = True)
        self.temp_decay_r = TemporalDecay(input_size, input_size, diag = True)
        
        self.hidden_size = hidden_size
        self.input_size = input_size

        self.build()

    def build(self):
        self.output_layer = nn.Linear(self.hidden_size, self.input_size, bias=True)
        
        self.z_layer = FeatureRegression(self.input_size)
        self.beta_layer = nn.Linear(self.input_size * 2, self.input_size)
        self.grucell = nn.GRUCell(self.input_size * 2, self.hidden_size)
        self.concat_lyaer = nn.Linear(self.input_size * 2, self.input_size)
        

    def loss(self, hat, y, m):
        return torch.sum(torch.abs((y - hat)) * m) / (torch.sum(m) + 1e-5)

    
    def forward(self, input):
        values = input[:,0,::]
        delta = input[:,1,::]
        masks = input[:,2,::]
        rbfs = input[:,3,::]

        hid = torch.zeros((values.size(0), self.hidden_size)).cuda()

        x_loss = 0.0
        imputations = []
        c_hat_list = []
        for i in range(values.size(1)):

            v = values[:,i,:]
            d = delta[:,i,:]
            m = masks[:,i,:]
            r = rbfs[:,i,:]

            gamma_x = self.temp_decay_x(d)
            gamma_h = self.temp_decay_h(d)
            
            hid = hid * gamma_h

            r_hat = self.temp_decay_r(r)
            
            x_hat = self.output_layer(hid)
            x_loss += torch.sum(torch.abs(v - x_hat) * m) / (torch.sum(m) + 1e-5)

            RG = torch.cat([x_hat, r_hat], dim = 1)
            concat_hat = self.concat_lyaer(RG)
            x_loss += torch.sum(torch.abs(v - concat_hat) * m) / (torch.sum(m) + 1e-5)

            x_c = m * v + (1 - m) * x_hat

            z_hat = self.z_layer(x_c)
            x_loss += torch.sum(torch.abs(v - z_hat) * m) / (torch.sum(m) + 1e-5)

            beta_weight = torch.cat([gamma_x, m], dim = 1)
            beta = torch.sigmoid(self.beta_layer(beta_weight))

            c_hat = beta * z_hat + (1 - beta) * x_hat
            x_loss += torch.sum(torch.abs(v - c_hat) * m) / (torch.sum(m) + 1e-5)

            c_c = m * v + (1 - m) * c_hat

            gru_input = torch.cat([c_c, m], dim = 1)
            imputations.append(c_c.unsqueeze(dim = 1))
            c_hat_list.append(c_hat.unsqueeze(1))
            
            # GRU cell
            hid = self.grucell(gru_input, hid)

        c_hat_list = torch.cat(c_hat_list, dim = 1)
        imputations = torch.cat(imputations, dim = 1)
        return imputations, x_loss, c_hat_list

In [10]:
import torch
import torch.nn as nn

class Discriminator(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Discriminator, self).__init__()
        
        self.hidden_size = hidden_size
        self.input_size = input_size

        self.grucell = nn.GRUCell(input_size, hidden_size)
        
        self.fc = nn.Linear(hidden_size, input_size)

    
    def forward(self, X):
        hid = torch.zeros((X.size(0), self.hidden_size)).cuda()
        D_list = []

        for i in range(X.size(1)):

            gru_input = X[:,i,:]
            Y_hat = torch.sigmoid(self.fc(hid))

            hid = self.grucell(gru_input, hid)
            D_list.append(Y_hat.unsqueeze(1))
        
        D_list = torch.cat(D_list, dim = 1)

        return D_list  


In [15]:
from utils2 import missing_data_rbf2, eval_model2

In [13]:
dfpath = 'pm25_missing.txt'
df = pd.read_csv("./dataset/"+dfpath).drop(["datetime"], axis = 1)
rbfpath = 'air_1000_0.05_time.csv'
batch_size = 64
dataset = missing_data_rbf2(df, rbfpath, batch_size)

In [39]:
def train(G, D, dataset, lr, epochs):

    optimizer_G = torch.optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.999))
    optimizer_D = torch.optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.999))
    progress = tqdm(range(epochs))

    G.train()
    D.train()
    G.to(device)
    D.to(device)

    for epoch in progress:
        batch_loss_G = 0.0
        batch_loss_D = 0.0
        for data in dataset:
            
            Mask = data[:,2,::].clone().detach().to(device)
            
            # Generator train
            imputations, x_loss, c_hat_list = G(data.to(device))
            D_prob = D(imputations)

            # Discriminator train
            D_loss = -1 * torch.sum((Mask * torch.log(D_prob + 1e-08)) + (1 - Mask) * torch.log(D_prob + 1e-8))
            optimizer_D.zero_grad()
            D_loss.backward(retain_graph=True)
            optimizer_D.step()

            # Generator train
            G_loss = -1 * torch.sum((1-Mask) * torch.log(D_prob.clone().detach() + 1e-8)) + x_loss

            optimizer_G.zero_grad()
            G_loss.backward()
            optimizer_G.step()

            batch_loss_G += G_loss
            batch_loss_D += D_loss

        progress.set_description("G_loss: {}, D_loss : {}, x_loss: {}".format(batch_loss_G, batch_loss_D, x_loss))

    

In [40]:
G = MGRU(36, 64)
D = Discriminator(36, 16)

train(G,D,dataset, 0.001, 300)
Nonscale_imputataion = eval_model2(G, rbfpath, "pm25_ground.csv", "pm25_missing.csv")

G_loss: 1482.7755126953125, D_loss : 5400.09326171875, x_loss: 40.622379302978516: 100%|██████████| 300/300 [06:03<00:00,  1.21s/it]


Scale MAE : tensor(0.1874)
Scale MRE : tensor(0.2706)
Original MAE : 15.15376421311461
Original MRE : 0.21286257843902506


In [41]:
G = MGRU(36, 64)
D = Discriminator(36, 16)

train(G,D,dataset, 0.001, 300)
Nonscale_imputataion = eval_model2(G, "air_20_8.0_scale.csv", "pm25_ground.csv", "pm25_missing.csv")

G_loss: 1452.34765625, D_loss : 5326.833984375, x_loss: 34.20378494262695: 100%|██████████| 300/300 [06:05<00:00,  1.22s/it]        


Scale MAE : tensor(0.1950)
Scale MRE : tensor(0.2815)
Original MAE : 15.737390787235013
Original MRE : 0.22106069051637206
