In [6]:
%matplotlib inline

import torch
from torch import nn
from d2l import torch as d2l
import matplotlib
import numpy as np
from tqdm import tqdm
import pandas as pd

import optuna
import numpy as np

def generate_mask(data, miss_rate):
    dim = data.shape[1]
    size = data.shape[0]
    A = np.random.uniform(0., 1., size=(size,dim))
    B = A > miss_rate
    mask = 1. * B

    return mask

def generate_hint(mask, hint_rate):
    hint_mask = generate_mask(mask, 1-hint_rate)
    hint = mask * hint_mask 

    return hint


def update_D(batch, mask, hint, Z, net_D, net_G, loss, optimizer_D, alpha):
    batch_size = batch.shape[0]
    
    ones = torch.ones_like(batch)
    zeros = torch.zeros_like (batch)

    #ones = torch.ones(batch_size)
    #zeros = torch.zeros(batch_size)
    
    new_X = mask * batch + (1-mask) * Z
    optimizer_D.zero_grad()
    
    input_D = torch.cat((batch, hint), 1).float()
    input_G = torch.cat((new_X, mask), 1).float()


    real_Y = net_D(input_D)
    #print(real_Y)
    sample_G = net_G(input_G)
    fake_X = new_X * mask + sample_G * (1-mask)
    fake_input_D = torch.cat((fake_X.detach(), hint), 1).float()
    fake_Y = net_D(fake_input_D)
    
    loss_D = (loss(fake_Y.float(), mask.float()) ).mean()
    loss_D.backward()

    optimizer_D.step()

    return loss_D

def update_G(batch, mask, hint, Z, net_D, net_G, loss, optimizer_G, alpha):
    
    loss_mse = nn.MSELoss(reduction = 'none')
    
    batch_size = Z.shape[0]

    ones = torch.ones_like(batch)
    #ones = torch.ones(batch_size)
    
    optimizer_G.zero_grad()

    new_X = mask * batch + (1-mask) * Z 
    input_G = torch.cat((new_X, mask), 1).float()
    sample_G = net_G(input_G)
    fake_X = new_X * mask + sample_G * (1-mask)

    fake_input_D = torch.cat((fake_X, hint), 1).float()
    fake_Y = net_D(fake_input_D)

    loss_G = (loss(fake_Y, ones.reshape(fake_Y.shape).float()) * (1-mask) ).mean() + alpha * ( loss_mse((sample_G*mask).float(), (batch*mask).float()) ).mean() / torch.mean(mask) 
    loss_G.backward()

    optimizer_G.step()

    return loss_G


def train(net_D, net_G, lr_D, lr_G, data_iter, num_epochs, data, hint_rate, alpha):
    dim = data.shape[1]
    size = data.shape[0]
    
    #loss = nn.BCEWithLogitsLoss(reduction = 'sum')
    loss = nn.BCELoss(reduction = 'none')

    loss_D_values = np.zeros(num_epochs)
    loss_G_values = np.zeros(num_epochs)
     
    #for w in net_D.parameters():
    #    nn.init.normal_(w, 0, 0.02)
    #for w in net_G.parameters():
    #    nn.init.normal_(w, 0, 0.02)

    # Initialize weights for net_D
    for name, param in net_D.named_parameters():
        if 'weight' in name:
            nn.init.xavier_uniform_(param)

    # Initialize weights for net_G
    for name, param in net_G.named_parameters():
        if 'weight' in name:
            nn.init.xavier_uniform_(param)


    #optimizer_D = torch.optim.SGD(net_D.parameters(), lr = lr_D)
    #optimizer_G = torch.optim.SGD(net_G.parameters(), lr = lr_G)

    optimizer_D = torch.optim.Adam(net_D.parameters(), lr = lr_D)
    optimizer_G = torch.optim.Adam(net_G.parameters(), lr = lr_G)

    
    pbar = tqdm(range(num_epochs))
    for epoch in pbar:
        
        for batch in data_iter:
            batch_size = batch.shape[0]

            mask = torch.from_numpy(np.where(batch == 0, 0.0, 1.0))
            hint = generate_hint(mask, hint_rate)

            #Z = torch.normal(0, 1, size=(batch_size, dim))
            Z = torch.rand((batch_size, dim)) / 0.01
            loss_D = update_D(batch, mask, hint, Z, net_D, net_G, loss, optimizer_D, alpha)
            loss_G = update_G(batch, mask, hint, Z, net_D, net_G, loss, optimizer_G, alpha)

        if epoch % 500 == 0:
            s = "{:6d}) loss D {:0.3f} loss G {:0.3f}".format(
                epoch,
                loss_D.detach().numpy(),
                loss_G.detach().numpy())
            pbar.clear()
            #logger.info('{}'.format(s))
            pbar.set_description(s)

        loss_D_values[epoch] = loss_D.detach().numpy()
        loss_G_values[epoch] = loss_G.detach().numpy()



        #Z = torch.normal(0,1,(size, dim))
        Z = torch.rand((size, dim)) / 0.01

        mask = torch.from_numpy(np.where(data == 0, 0.0, 1.0))
        hint = generate_hint(mask, hint_rate)

        new_X = mask * data + (1-mask) * Z 
        input_G = torch.cat((new_X, mask), 1).float()
        sample_G = net_G(input_G)

        fake_X = torch.from_numpy(data) * mask + sample_G * (1-mask)
        #fake_X = data * mask + sample_G * (1-mask)

    df = pd.DataFrame(fake_X.detach().numpy())
    #df.to_csv("my_imputed.csv", index = False)
    df.to_csv("imputed_dist2.csv", index = False)

    df_loss_D = pd.DataFrame(loss_D_values)
    df_loss_D.to_csv("loss_D.csv", index = False)
    df_loss_G = pd.DataFrame(loss_G_values)
    df_loss_G.to_csv("loss_G.csv", index = False)



def objective(trial):
    lr_D = trial.suggest_uniform('lr_D', 0.0001, 0.1 )
    lr_G = trial.suggest_uniform('lr_G', 0.0001, 0.1 )

    miss_rate = 0.2
    hint_rate = trial.suggest_uniform('hint_rate', 0.0, 0.9 )
    alpha = trial.suggest_uniform('alpha', 0, 0.5 )

    batch_size = 128
    num_epochs = 20

    size = 1000
    dim = 2

    h_dim1 = dim
    h_dim2 = dim

    dim = data.shape[1]
    size = data.shape[0]
    
    #loss = nn.BCEWithLogitsLoss(reduction = 'sum')
    loss = nn.BCELoss(reduction = 'none')
     
    #for w in net_D.parameters():
    #    nn.init.normal_(w, 0, 0.02)
    #for w in net_G.parameters():
    #    nn.init.normal_(w, 0, 0.02)


    #optimizer_D = torch.optim.SGD(net_D.parameters(), lr = lr_D)
    #optimizer_G = torch.optim.SGD(net_G.parameters(), lr = lr_G)

    optimizer_D = torch.optim.Adam(net_D.parameters(), lr = lr_D)
    optimizer_G = torch.optim.Adam(net_G.parameters(), lr = lr_G)

    
    pbar = tqdm(range(num_epochs))
    for epoch in pbar:
        for batch in data_iter:
            batch_size = batch.shape[0]

            mask = torch.from_numpy(np.where(batch == 0, 0.0, 1.0))
            hint = generate_hint(mask, hint_rate)

            #Z = torch.normal(0, 1, size=(batch_size, dim))
            Z = torch.rand((batch_size, dim)) / 0.01
            loss_D = update_D(batch, mask, hint, Z, net_D, net_G, loss, optimizer_D, alpha)
            loss_G = update_G(batch, mask, hint, Z, net_D, net_G, loss, optimizer_G, alpha)

        if epoch % 500 == 0:
            s = "{:6d}) loss D {:0.3f} loss G {:0.3f}".format(
                epoch,
                loss_D.detach().numpy(),
                loss_G.detach().numpy())
            pbar.clear()
            #logger.info('{}'.format(s))
            pbar.set_description(s)

        #Z = torch.normal(0,1,(size, dim))
        Z = torch.rand((size, dim)) / 0.01

        mask = torch.from_numpy(np.where(data == 0, 0.0, 1.0))
        hint = generate_hint(mask, hint_rate)

        new_X = mask * data + (1-mask) * Z 
        input_G = torch.cat((new_X, mask), 1).float()
        sample_G = net_G(input_G)

        #fake_X = torch.from_numpy(data) * mask + sample_G * (1-mask)
        fake_X = data * mask + sample_G * (1-mask)

        loss_D = update_D(data, mask, hint, Z, net_D, net_G, loss, optimizer_D, alpha)
        loss_G = update_G(data, mask, hint, Z, net_D, net_G, loss, optimizer_G, alpha)

    
    return loss_D, loss_G



df = pd.read_csv("missing_dist.csv")
features = list(df.columns)
data = df.values

mask = np.where(np.isnan(data), 0.0, 1.0)
data = np.where(mask, data, 0.0)
#print(data)

lr_D = 0.001
lr_G = 0.001

dim = data.shape[1]
size = data.shape[0]
miss_rate = 0.2
hint_rate = 0.9
alpha = 0.2
batch_size = 128
num_epochs = 2000

size = 5000
dim = 2

h_dim1 = dim
h_dim2 = dim


#X = torch.normal(0.0, 1, (size, dim))
#A = torch.tensor([[1,2], [-0.1, 0.5]])
#b = torch.tensor([0,0])
#data = torch.matmul(X, A) + b
#mask = generate_mask(data, miss_rate)
#hint = generate_hint(mask, hint_rate)
#data = data*mask

#print(data)

#d2l.set_figsize((5,5))
#d2l.plt.scatter(data[:100, (0)].detach().numpy(), data[:100, (1)].detach().numpy())
#d2l.plt.hist(data)
#print(f'The covariance matrix is\n{torch.matmul(A.T, A)}')

data_iter = torch.utils.data.DataLoader(data, batch_size, shuffle=True)

net_G = nn.Sequential(
    nn.Linear(dim*2, h_dim1), nn.ReLU(),
    nn.Linear(h_dim1, h_dim2), nn.ReLU(),
    nn.Linear(h_dim2, dim), nn.Sigmoid())

net_D = nn.Sequential(
    nn.Linear(dim*2, h_dim1), nn.ReLU(),
    nn.Linear(h_dim1, h_dim2), nn.ReLU(),
    nn.Linear(h_dim2, dim), nn.Sigmoid())

train(net_D, net_G, lr_D, lr_G, data_iter, num_epochs, data, hint_rate, alpha)

#study = optuna.create_study(directions=["minimize", "minimize"])
#study.optimize(objective, n_trials=1000)

#data = torch.where(data == 0, float('nan'), data)
#df = pd.DataFrame(data)
#df.to_csv("my_missing.csv", index=False)
#df.to_csv("missing_dist.csv", index=False)

  1500) loss D 0.177 loss G 0.913: 100%|██████████| 2000/2000 [06:48<00:00,  4.90it/s]


In [16]:
print(f"Number of trials on the Pareto front: {len(study.best_trials)}")

trial_with_highest_accuracy = max(study.best_trials, key=lambda t: t.values[1])
print(f"Trial with highest accuracy: ")
print(f"\tnumber: {trial_with_highest_accuracy.number}")
print(f"\tparams: {trial_with_highest_accuracy.params}")
print(f"\tvalues: {trial_with_highest_accuracy.values}")

optuna.visualization.plot_pareto_front(study, target_names=["D", "G"])
optuna.visualization.plot_param_importances(study, target=lambda t: t.values[0], target_name="lr_D")

Number of trials on the Pareto front: 12
Trial with highest accuracy: 
	number: 13
	params: {'lr_D': 0.06734943837301482, 'lr_G': 0.007092001564598319, 'hint_rate': 0.8574708312231205, 'alpha': 0.4279746567186455}
	values: [0.2199554741382599, 0.9027137371674904]


KeyboardInterrupt: 