In [1]:
import os
import torch
import torch.utils.data
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader

import numpy as np
from time import time

CUDA = torch.cuda.is_available()

### VAE Model

In [2]:
RAW_DATA_DIR = "../Data/Raw/"
PROCESSED_DATA_DIR = "../Data/Processed/VAE/"

class CovariateDataset(Dataset):
    def __init__(self, file_name_pattern, file_name_args):
        self.file_name = file_name_pattern.format(*file_name_args)
        self.data = np.loadtxt(RAW_DATA_DIR + self.file_name + ".csv", delimiter=",")[:, 1:] # remove bias
        
    def __getitem__(self, index):
        return (self.data[index].astype(float), 0)

    def __len__(self):
        return self.data.shape[0]
    
    def save_processed_data(self, data):
        name = PROCESSED_DATA_DIR + self.file_name+".csv"
        np.savetxt(name, data, delimiter=",")

In [3]:
# Based on an example from https://github.com/pytorch/examples/blob/master/vae/main.py
# Extended to place a different prior on binary vs normal vars

SEED = 1

ZDIMS = 4 # latent dimensions
INTERMEDIATE_DIMS = 32
FEATURES = 10
DIAG_VAR = True

BINARY = [0, 2, 5, 7, 8]
NORMAL = [1, 3, 5, 6, 9]

torch.manual_seed(SEED)
if CUDA:
    torch.cuda.manual_seed(SEED)

class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        
        self.relu = nn.ReLU()
        
        # ENCODER LAYERS
        self.dense1 = nn.Linear(FEATURES, INTERMEDIATE_DIMS)
        self.dense2_1 = nn.Linear(INTERMEDIATE_DIMS, ZDIMS)  # mu layer
        self.dense2_2 = nn.Linear(INTERMEDIATE_DIMS, ZDIMS)  # logvariance layer
        
        # this last layer bottlenecks through ZDIMS connections

        # DECODER LAYERS
        self.dense3 = nn.Linear(ZDIMS, INTERMEDIATE_DIMS)
        self.dense4 = nn.Linear(INTERMEDIATE_DIMS, FEATURES)

    def encode(self, x):
        h1 = self.relu(self.dense1(x))
        return self.dense2_1(h1), self.dense2_2(h1) #mu, logvar

    def reparameterize(self, mu, logvar):
        if self.training:
            # If we sampled directly from the latent distribution
            # we wouldn't be able to backprop the results because
            # there is no clear grad on the distribution

            # This reparam samples from a unit gaussian and then scales
            # by the latent parameters giving a defined route to backprop.

            std = logvar.mul(0.5).exp_() 

            # Sample from a unit gaussian with dimensions matching
            # the latent space.
            eps = Variable(std.data.new(std.size()).normal_())

            return eps.mul(std).add_(mu) # rescale and return
        else:
            return mu

    def decode(self, z):
        h3 = self.relu(self.dense3(z))
        mu_out = self.dense4(h3)# Deleted: self.sigmoid(self.dense4(h3))
        
        return mu_out

    def forward(self, x):
        mu, logvar = self.encode(x.view(-1, FEATURES))
        z = self.reparameterize(mu, logvar)
        mu_out = self.decode(z)
        return mu_out, mu, logvar

def loss_function(recon_batch_mu, batch_x, mu_latent, logvar_latent):
    
    # MSE: how good is the reconstruction in terms of
    mse_loss = nn.MSELoss(size_average=False)
    recon_loss = mse_loss(recon_batch_mu, batch_x)
    
    recon_loss /= batch_x.size()[0]
    
    # KLD is Kullback–Leibler divergence. Regularize VAE by
    # penalizing divergence from the prior

    # See Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    KLD = -0.5 * torch.sum(1 + logvar_latent - mu_latent.pow(2) - logvar_latent.exp())
    # Normalise by same number of elements as in reconstruction
    KLD /= batch_x.size()[0] * FEATURES
    
#     print("RL", recon_loss)
#     print("KLD", KLD)
    return recon_loss + KLD

def train(model, optimizer, epoch, data_loader, log_results=False):
    model.train()
    train_loss = 0

    for batch_idx, (data, _) in enumerate(data_loader):
        data = Variable(data)
        data = data.float()
        if CUDA:
            data = data.cuda()
        
        optimizer.zero_grad()

        recon_data, mu_latent, logvar_latent = model(data)
        
        # calculate loss
        loss = loss_function(recon_data, data, mu_latent, logvar_latent)
        train_loss += loss.data[0]
        
        # Find the gradient and descend
        loss.backward()
        optimizer.step()
        
    if log_results:
        print('====> Epoch: {} Average loss: {:.8f}'.format(
              epoch, train_loss / len(data_loader.dataset)))

In [4]:
# Based on an example from https://github.com/pytorch/examples/blob/master/vae/main.py
# Extended to place a different prior on binary vs normal vars

SEED = 1

ZDIMS = 4 # latent dimensions
INTERMEDIATE_DIMS = 32
FEATURES = 10
DIAG_VAR = True

BINARY = [0, 2, 5, 7, 8]
NORMAL = [1, 3, 5, 6, 9]

torch.manual_seed(SEED)
if CUDA:
    torch.cuda.manual_seed(SEED)

class ModifiedVAE(nn.Module):
    def __init__(self):
        super(ModifiedVAE, self).__init__()
        
        self.relu = nn.ReLU()
        
        # ENCODER LAYERS
        self.dense1 = nn.Linear(FEATURES, INTERMEDIATE_DIMS)
        self.dense2_1 = nn.Linear(INTERMEDIATE_DIMS, ZDIMS)  # mu layer
        self.dense2_2 = nn.Linear(INTERMEDIATE_DIMS, ZDIMS)  # logvariance layer
        
        # this last layer bottlenecks through ZDIMS connections

        # DECODER LAYERS
        self.dense3 = nn.Linear(ZDIMS, INTERMEDIATE_DIMS)
        self.dense4 = nn.Linear(INTERMEDIATE_DIMS, len(BINARY))
        self.dense5 = nn.Linear(INTERMEDIATE_DIMS, len(NORMAL))
        
        self.sigmoid = nn.Sigmoid()

    def encode(self, x):
        h1 = self.relu(self.dense1(x))
        return self.dense2_1(h1), self.dense2_2(h1) #mu, logvar

    def reparameterize(self, mu, logvar):
        if self.training:
            # If we sampled directly from the latent distribution
            # we wouldn't be able to backprop the results because
            # there is no clear grad on the distribution

            # This reparam samples from a unit gaussian and then scales
            # by the latent parameters giving a defined route to backprop.

            std = logvar.mul(0.5).exp_() 

            # Sample from a unit gaussian with dimensions matching
            # the latent space.
            eps = Variable(std.data.new(std.size()).normal_())

            return eps.mul(std).add_(mu) # rescale and return
        else:
            return mu

    def decode(self, z):
        h3 = self.relu(self.dense3(z))
        binary_mu_out = self.sigmoid(self.dense4(h3))
        normal_mu_out = self.dense5(h3)
        
        return binary_mu_out, normal_mu_out

    def forward(self, x):
        latent_mu, latent_logvar = self.encode(x.view(-1, FEATURES))
        z = self.reparameterize(latent_mu, latent_logvar)
        binary_mu_out, normal_mu_out = self.decode(z)
        return binary_mu_out, normal_mu_out, latent_mu, latent_logvar

def loss_function(recon_binary_mu, recon_normal_mu, batch_x, mu_latent, logvar_latent):
    
    # MSE: how good is the reconstruction in terms of
    mse_loss = nn.MSELoss(size_average=False)
    normal_recon_loss = mse_loss(recon_normal_mu, batch_x[:, NORMAL])
    normal_recon_loss /= (batch_x.size()[0])
    
    # Cross Entropy:
    BCE = F.binary_cross_entropy(recon_binary_mu, batch_x[:, BINARY], size_average=False)
    BCE /= (batch_x.size()[0])
    
    # KLD is Kullback–Leibler divergence. Regularize VAE by
    # penalizing divergence from the prior

    # See Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    KLD = -0.5 * torch.sum(1 + logvar_latent - mu_latent.pow(2) - logvar_latent.exp())
    # Normalise by same number of elements as in reconstruction
    KLD /= batch_x.size()[0] * FEATURES
    
#     print("RL", normal_recon_loss.data.cpu().numpy()[0])
#     print("BCE", BCE.data.cpu().numpy()[0])
#     print("KLD", KLD.data.cpu().numpy()[0])
    
    return normal_recon_loss + BCE + KLD

def train(model, optimizer, epoch, data_loader, log_results=False):
    model.train()
    train_loss = 0

    for batch_idx, (data, _) in enumerate(data_loader):
        data = Variable(data)
        data = data.float()
        if CUDA:
            data = data.cuda()
        
        optimizer.zero_grad()

        binary_mu_out, normal_mu_out, mu_latent, logvar_latent = model(data)
        
        # calculate loss
        loss = loss_function(binary_mu_out, normal_mu_out, data, mu_latent, logvar_latent)
        train_loss += loss.data[0]
        
        # Find the gradient and descend
        loss.backward()
        optimizer.step()
        
    if log_results:
        print('====> Epoch: {} Average loss: {:.8f}'.format(
              epoch, train_loss / len(data_loader.dataset)))

### Train and Process Utils

In [5]:
def train_model(model_class, dataset, dataset_number, verbose=True):
    model = model_class()
    if CUDA:
        model = model.cuda()

    num_epochs = 10000
    batch_size = 1000
    learning_rate = 1e-2
    lr_sched = False
         
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [int(num_epochs/4), int(3*num_epochs/4)], gamma=0.1)

    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    for epoch in range(1, num_epochs+1):
        checkpoint_interval = int(num_epochs/10)
        
        if lr_sched:
            scheduler.step()

        log = False
        if epoch%checkpoint_interval == 0:
            log = True
            
        train(model, optimizer, epoch, data_loader, log_results=log)
    

    torch.save(model.state_dict(), "../Models/VAE_{}.pth".format(dataset_number))
    
    # Show reconstruction
    model.eval()
    print("Training state: ", model.training)
    
    original_data,_ = next(iter(data_loader))
    original_data = Variable(original_data)
    original_data = original_data.float()
    if CUDA:
        original_data = original_data.cuda()
        
    binary_mu_out, normal_mu_out, mu_latent, logvar_latent = model(original_data)
    
    return model, original_data, binary_mu_out, normal_mu_out, mu_latent, logvar_latent

def encode_data(model, dataset):
    all_data = torch.from_numpy(dataset.data)
    all_data = Variable(all_data)
    all_data = all_data.float()
    
    if CUDA:
        all_data = all_data.cuda()

    latent_mu, latent_var = model.encode(all_data)
    
    if CUDA:
        latent_mu = latent_mu.cpu()
        latent_var = latent_var.cpu()
        
    data = np.hstack([latent_mu.data.numpy(), latent_var.data.numpy()])
    dataset.save_processed_data(data)

In [72]:
dataset = CovariateDataset("n_{}_model_{}_v_{}_covar_data", [1000, "A_add_lin", 1])
trained_model, original_data, binary_mu_out, normal_mu_out, mu_latent, logvar_latent = \
    train_model(ModifiedVAE, dataset, 1,verbose=True)

encode_data(trained_model, dataset)

====> Epoch: 10 Average loss: 0.00720191
====> Epoch: 20 Average loss: 0.00628152
====> Epoch: 30 Average loss: 0.00559646
====> Epoch: 40 Average loss: 0.00534063
====> Epoch: 50 Average loss: 0.00487277
====> Epoch: 60 Average loss: 0.00463829
====> Epoch: 70 Average loss: 0.00453654
====> Epoch: 80 Average loss: 0.00445161
====> Epoch: 90 Average loss: 0.00436299
====> Epoch: 100 Average loss: 0.00429253
Training state:  False


In [61]:
mu_out = torch.Tensor(1000, 10)

for index in BINARY:
    mu_out[:, index] = binary_mu_out[:, BINARY.index(index)].data.cpu()
    
for index in NORMAL:
    mu_out[:, index] = normal_mu_out[:, NORMAL.index(index)].data.cpu()
    
for i in np.random.choice(list(range(1000)), size=5, ):
    print("Orginal:", list(np.round(original_data[i].data.cpu().numpy(), 2)))
    print("Mu out:", list(np.round(mu_out[i].numpy(), 2)))
    print("Mu Latent:", list(np.round(mu_latent[i].data.cpu().numpy(), 2)))
    print("Std latent:", list(np.round(logvar_latent[i].mul(0.5).exp().data.cpu().numpy(), 2)))
    print()

Orginal: [1.0, 1.0, 1.0, -1.32, 0.02, 1.0, -0.35, 0.0, 1.0, -0.93]
Mu out: [1.0, 1.15, 1.0, -1.01, 0.0, 0.98, -0.18, 0.12, 1.0, -1.18]
Mu Latent: [1.1, -0.14, 1.1, -0.44]
Std latent: [0.06, 0.03, 0.06, 0.07]

Orginal: [1.0, -1.1, 0.0, -0.12, -0.33, 0.0, -2.12, 0.0, 0.0, -1.77]
Mu out: [0.36, -1.32, 0.02, 0.53, 0.0, 0.01, -2.24, 0.0, 0.0, -1.25]
Mu Latent: [-1.4, -0.21, 1.64, 1.06]
Std latent: [0.09, 0.03, 0.07, 0.1]

Orginal: [0.0, -2.11, 0.0, 0.21, 0.58, 0.0, 0.38, 0.0, 0.0, 1.91]
Mu out: [0.02, -1.66, 0.01, 0.41, 50774.44, 0.08, 0.46, 0.0, 0.0, 1.06]
Mu Latent: [-1.34, 0.32, 0.84, 1.66]
Std latent: [0.06, 0.03, 0.04, 0.07]

Orginal: [1.0, 1.29, 0.0, 0.74, 0.57, 1.0, -1.03, 1.0, 0.0, -1.44]
Mu out: [1.0, 1.09, 0.0, 0.78, 0.0, 0.97, -0.86, 1.0, 0.0, -1.25]
Mu Latent: [0.34, -2.01, 0.03, -1.48]
Std latent: [0.05, 0.19, 0.03, 0.16]

Orginal: [1.0, -1.29, 1.0, 0.23, 1.92, 1.0, 1.66, 0.0, 1.0, 1.65]
Mu out: [0.98, -1.53, 1.0, 0.19, 0.0, 0.97, 1.58, 0.0, 1.0, 1.81]
Mu Latent: [-1.46, 0.14, 

In [None]:
assignment_model_names = ['A_add_lin', 'B_add_mild_nlin', 'C_add_mod_nlin', 'D_mild_nadd_lin',
                     'E_mild_nadd_mild_nlin', 'F_mod_nadd_lin', 'G_mod_nadd_mod_nlin']

for dataset_number in range(25, 100):
    print("Starting run for Dataset {}".format(dataset_number))
    
    for model_name in assignment_model_names:
        print("-- Running for model name: ", model_name)
        
        start = time()

        dataset = CovariateDataset("n_{}_model_{}_v_{}_covar_data", [1000, model_name, dataset_number])

        trained_model, original_data, binary_mu_out, normal_mu_out, mu_latent, logvar_latent = \
            train_model(ModifiedVAE, dataset, dataset_number,verbose=True)

        encode_data(trained_model, dataset)

        print("---- Done in ", time() - start, " seconds\n")
                
    print("================\n\n")

Starting run for Dataset 25
-- Running for model name:  A_add_lin
====> Epoch: 1000 Average loss: 0.00266409
====> Epoch: 2000 Average loss: 0.00196114
====> Epoch: 3000 Average loss: 0.00178170
====> Epoch: 4000 Average loss: 0.00167729
====> Epoch: 5000 Average loss: 0.00164007
====> Epoch: 6000 Average loss: 0.00160345
====> Epoch: 7000 Average loss: 0.00160483
====> Epoch: 8000 Average loss: 0.00157590
====> Epoch: 9000 Average loss: 0.00156361
====> Epoch: 10000 Average loss: 0.00153984
Training state:  False
---- Done in  176.25343918800354  seconds

-- Running for model name:  B_add_mild_nlin
====> Epoch: 1000 Average loss: 0.00287171
====> Epoch: 2000 Average loss: 0.00261413
====> Epoch: 3000 Average loss: 0.00252240
====> Epoch: 4000 Average loss: 0.00245967
====> Epoch: 5000 Average loss: 0.00229948
====> Epoch: 6000 Average loss: 0.00223907
====> Epoch: 7000 Average loss: 0.00218793
====> Epoch: 8000 Average loss: 0.00215036
====> Epoch: 9000 Average loss: 0.00208439
====> 

====> Epoch: 1000 Average loss: 0.00323027
====> Epoch: 2000 Average loss: 0.00309351
====> Epoch: 3000 Average loss: 0.00292347
====> Epoch: 4000 Average loss: 0.00281765
====> Epoch: 5000 Average loss: 0.00269726
====> Epoch: 6000 Average loss: 0.00265869
====> Epoch: 7000 Average loss: 0.00262306
====> Epoch: 8000 Average loss: 0.00255308
====> Epoch: 9000 Average loss: 0.00252183
====> Epoch: 10000 Average loss: 0.00252009
Training state:  False
---- Done in  196.0692789554596  seconds

-- Running for model name:  C_add_mod_nlin
====> Epoch: 1000 Average loss: 0.00322431
====> Epoch: 2000 Average loss: 0.00272926
====> Epoch: 3000 Average loss: 0.00250364
====> Epoch: 4000 Average loss: 0.00243513
====> Epoch: 5000 Average loss: 0.00241009
====> Epoch: 6000 Average loss: 0.00233923
====> Epoch: 7000 Average loss: 0.00231074
====> Epoch: 8000 Average loss: 0.00227529
====> Epoch: 9000 Average loss: 0.00227535
====> Epoch: 10000 Average loss: 0.00223168
Training state:  False
---- Do

====> Epoch: 1000 Average loss: 0.00307291
====> Epoch: 2000 Average loss: 0.00255749
====> Epoch: 3000 Average loss: 0.00243355
====> Epoch: 4000 Average loss: 0.00230022
====> Epoch: 5000 Average loss: 0.00226357
====> Epoch: 6000 Average loss: 0.00219988
====> Epoch: 7000 Average loss: 0.00212450
====> Epoch: 8000 Average loss: 0.00206406
====> Epoch: 9000 Average loss: 0.00204018
====> Epoch: 10000 Average loss: 0.00218212
Training state:  False
---- Done in  196.34409093856812  seconds

-- Running for model name:  D_mild_nadd_lin
====> Epoch: 1000 Average loss: 0.00266906
====> Epoch: 2000 Average loss: 0.00242558
====> Epoch: 3000 Average loss: 0.00228332
====> Epoch: 4000 Average loss: 0.00217508
====> Epoch: 5000 Average loss: 0.00216319
====> Epoch: 6000 Average loss: 0.00213803
====> Epoch: 7000 Average loss: 0.00210938
====> Epoch: 8000 Average loss: 0.00207522
====> Epoch: 9000 Average loss: 0.00205739
====> Epoch: 10000 Average loss: 0.00207796
Training state:  False
---- 

In [8]:
models_to_rerun = [('A_add_lin', 12, 'sparsity'), ('G_mod_nadd_mod_nlin', 40, 'sparsity')]

for model_name, dataset_number, loss_type in models_to_rerun:
    dataset = CovariateDataset("n_{}_model_{}_v_{}_covar_data", [1000, model_name, dataset_number])
    trained_model, final_loss = train_model(
                                        autoencoder,
                                        dataset,
                                        loss=loss_type,
                                        verbose=True)
    encode_data(trained_model, dataset, loss=loss_type)

epoch [1000/10000], loss:0.4875
epoch [2000/10000], loss:0.4456
epoch [3000/10000], loss:0.4104
epoch [4000/10000], loss:0.3195
epoch [5000/10000], loss:0.3145
epoch [6000/10000], loss:0.3127
epoch [7000/10000], loss:0.3118
epoch [8000/10000], loss:0.3107
epoch [9000/10000], loss:0.3096
epoch [10000/10000], loss:0.3084
Final loss: loss:0.3084
epoch [1000/10000], loss:0.5869
epoch [2000/10000], loss:0.5415
epoch [3000/10000], loss:0.5173
epoch [4000/10000], loss:0.4197
epoch [5000/10000], loss:0.4216
epoch [6000/10000], loss:0.4136
epoch [7000/10000], loss:0.4132
epoch [8000/10000], loss:0.4127
epoch [9000/10000], loss:0.4123
epoch [10000/10000], loss:0.4118
Final loss: loss:0.4118
