<a href="https://colab.research.google.com/github/GiovaniValdrighi/inferencia_causal/blob/master/vae.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip3 install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.0-{platform}-linux_x86_64.whl
!pip3 install torchvision
!pip3 install pyro-ppl

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pyro
import pyro.distributions
import pyro.infer
import pyro.optim
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import networkx as nx

In [31]:
assert pyro.__version__.startswith('1.0.0')
pyro.enable_validation(True)
pyro.distributions.enable_validation(False)
pyro.set_rng_seed(0)

AssertionError: ignored

Estudo de variational autoencoder através do texto: https://towardsdatascience.com/understanding-variational-autoencoders-vaes-f70510919f73

Implementação de variational autoencoder com Pyro através do texto: https://pyro.ai/examples/vae.html

In [7]:
from google.colab import files
uploaded = files.upload()

Saving dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz to dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz


As propriedades latentes das imagens são: shape, scale, size, position X e position Y. Vou criar um DAG para inserir relações causais entre as variáveis e em seguida criar um SCM para este DAG.

In [0]:
dataset_zip = np.load('dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz', allow_pickle = True, encoding = 'latin1')

In [33]:
print('Keys in the dataset:', dataset_zip.files)
imgs = dataset_zip['imgs']
latents_values = dataset_zip['latents_values']
latents_classes = dataset_zip['latents_classes']
metadata = dataset_zip['metadata']

print('Metadata: \n', metadata)
imgs_array = np.copy(dataset_zip['imgs'])
imgs_array = imgs_array.reshape(737280, 4096)
index = np.random.permutation(imgs_array.shape[0])
imgs_array = imgs_array[index]
latents_classes = latents_classes[index]

Keys in the dataset: ['metadata', 'imgs', 'latents_classes', 'latents_values']
Metadata: 
 {'date': 'April 2017', 'description': 'Disentanglement test Sprites dataset.Procedurally generated 2D shapes, from 6 disentangled latent factors.This dataset uses 6 latents, controlling the color, shape, scale, rotation and position of a sprite. All possible variations of the latents are present. Ordering along dimension 1 is fixed and can be mapped back to the exact latent values that generated that image.We made sure that the pixel outputs are different. No noise added.', 'version': 1, 'latents_names': ('color', 'shape', 'scale', 'orientation', 'posX', 'posY'), 'latents_possible_values': {'orientation': array([0.        , 0.16110732, 0.32221463, 0.48332195, 0.64442926,
       0.80553658, 0.96664389, 1.12775121, 1.28885852, 1.44996584,
       1.61107316, 1.77218047, 1.93328779, 2.0943951 , 2.25550242,
       2.41660973, 2.57771705, 2.73882436, 2.89993168, 3.061039  ,
       3.22214631, 3.3832536

In [0]:
#function that pick images from the dataset and return the batchs
#with test and training data
def setup_data_loader(batch_size = 128):
  #create the dataframes for training and test from the dataset_zip['imgs]
  #test with 1/5 of the data
  train_df = torch.from_numpy(imgs_array[0:int(737280*4/5)])
  test_df = torch.from_numpy(imgs_array[int(737280*4/5):737280])
  #creates a iterable dataset to train and test, each iteration have batch_size rows of data
  train_loader = torch.utils.data.DataLoader(train_df, batch_size, shuffle = True)
  test_loader = torch.utils.data.DataLoader(test_df, batch_size, shuffle = False)
  return train_loader, test_loader

In [0]:
class Encoder(nn.Module):
  '''This class receive the images data as vectors 1x4096
  and the labels of the figure in the image, as a 1x5 vector 
  and should encode it to the latent space, in the latent space
  our variables are the mean and variance of a gaussian distribution, 
  a vector 1x2

  :param img_dim: dimension of image vector
  :param label_dim: dimension of label vector
  :param latent_dim: dimension of latent space, output
  '''
  def __init__(self, img_dim = 4096, label_dim = 5, latent_dim = 200):
    super(Encoder, self).__init__()
    self.img_dim = img_dim
    self.label_dim = label_dim
    self.latent_dim = latent_dim 
    #linear transformations used
    self.fc1 = nn.Linear(img_dim + label_dim, 1000)
    self.fc21 = nn.Linear(1000, latent_dim)
    self.fc22 = nn.Linear(1000, latent_dim)
    #non-linear transformation used
    self.softplus = nn.Softplus()

  def forward(self, img, label):
    #use the transformation to get the hidden variable
    data = torch.cat((img, label))
    hidden = self.softplus(self.fc1(data))
    #use the transformation to get the mean and the variance
    mean_z = self.fc21(hidden)
    cov_z = torch.exp(self.fc22(hidden))
    return mean_z, cov_z


class Decoder(nn.Module):
  '''This class receive a sample of the latent variable
  and return the image as a data vector 1x4096 and
  the latent classes as a vector 1x5

  :param img_dim: dimension of image vector
  :param label_dim: dimension of label vector
  :param latent_dim: dimension of latent space, output
  '''
  def __init__(self, img_dim = 4096, label_dim = 5, latent_dim = 200):
    super(Decoder, self).__init__()
    self.img_dim = img_dim
    self.label_dim = label_dim
    self.latent_dim = latent_dim
    #linear transformations used
    self.fc1 = nn.Linear(latent_dim+label_dim, 1000)
    self.fc2 = nn.Linear(1000, img_dim)
    #non-linear transformations used
    self.softplus = nn.Softplus()
    self.sigmoid = nn.Sigmoid()

  def forward(self, latent, label):
    #use the transformation to get the hidden variable
    data = torch.cat((latent, label))
    hidden = self.softplus(self.fc1(data))
    #use the transformation to get the image
    image = self.sigmoid(self.fc2(hidden))
    return image



In [0]:
class VAE(nn.Module):
  '''
  This class define the p(z|x) and the p(x|z)
  and use the scm model to call the encoder and
  decoder

  
  :param img_dim: dimension of image vector
  :param label_dim: dimension of label vector
  :param latent_dim: dimension of latent space, output
  '''
  def __init__(self, img_dim = 4096, label_dim = 5, latent_dim = 200):
    super(VAE, self).__init__()
    #creating networks
    self.encoder = Encoder(img_dim, label_dim, latent_dim)
    self.decoder = Decoder(img_dim, label_dim, latent_dim)
    self.img_dim = img_dim
    self.label_dim = label_dim
    self.latent_dim = latent_dim

  def model(self, img, label):
    '''
    Function in the VAE that defines
    p(x|z)
    '''
    data = torch.cat((img, label))
    pyro.module("decoder", self.decoder)
    with pyro.plate("data", data.shape[0]):
      z_mean = data.new_zeros(torch.Size((data.shape[0], self.latent_dim)))
      z_variance = data.new_ones(torch.Size((data.shape[0], self.latent_dim)))
      z_sample = pyro.sample("latent", pyro.distributions.Normal(z_mean, z_variance).to_event(1))
      img = self.decoder.forward(z_sample)
      pyro.sample("obs", pyro.distributions.Bernoulli(img).to_event(1), obs = data)


  def guide(self, img, label):
    '''
    Function that is the guide to the model
    shape, scale, orientation, posX, posY = g(img)
    the p(z|x) use on the encoder
    '''
    data = torch.cat((img, label))
    pyro.module("encoder", self.encoder)
    with pyro.plate("data", data.shape[0]):
      z_mean, z_variance = self.encoder.forward(data)
      pyro.sample("latent", pyro.distributions.Normal(z_mean, z_variance).to_event(1))

In [46]:
#the training routine
train_loader, test_loader = setup_data_loader()
vae = VAE()

#optimizer
optimizer = pyro.optim.Adam({"lr" : 1.0e-3})

#inference algorithm
elbo = pyro.infer.Trace_ELBO()
svi = pyro.infer.SVI(vae.model, vae.guide, optimizer, elbo)

train_elbo = []
test_elbo = []
num_epochs = 5
test_freq = 1
for epoch in range(num_epochs):
  epoch_loss = 0
  for x in train_loader:
    epoch_loss += svi.step(x)
  total_epoch_loss_train = epoch_loss/len(train_loader)
  train_elbo.append(total_epoch_loss_train)

  if epoch % test_freq == 0:
    test_loss = 0
    for x in test_loader:
      test_loss += svi.evaluate_loss(x)
    total_epoch_loss_test  = epoch_loss/len(test_loader)
    test_elbo.append(total_epoch_loss_test)


TypeError: ignored