<a href="https://colab.research.google.com/github/EVA4-RS-Group/Phase2/blob/master/S7_Variational_AutoEncoders/VariationalAutoEncoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [37]:
!wget -q https://github.com/EVA4-RS-Group/Phase2/releases/download/S6/data.zip
!unzip data.zip
!rm -rf data.zip
!mkdir models
!mkdir samples

Archive:  data.zip
   creating: data/
   creating: data/test/
   creating: data/test/data_test/
  inflating: data/test/data_test/desktop.ini  
  inflating: data/test/data_test/img_002.jpg  
  inflating: data/test/data_test/img_005.jpg  
  inflating: data/test/data_test/img_012.jpg  
  inflating: data/test/data_test/img_015.jpg  
  inflating: data/test/data_test/img_022.jpg  
  inflating: data/test/data_test/img_025.jpg  
  inflating: data/test/data_test/img_032.jpg  
  inflating: data/test/data_test/img_035.jpg  
  inflating: data/test/data_test/img_042.jpg  
  inflating: data/test/data_test/img_045.jpg  
  inflating: data/test/data_test/img_052.jpg  
  inflating: data/test/data_test/img_055.jpg  
  inflating: data/test/data_test/img_062.jpg  
  inflating: data/test/data_test/img_065.jpg  
  inflating: data/test/data_test/img_072.jpg  
  inflating: data/test/data_test/img_075.jpg  
  inflating: data/test/data_test/img_082.jpg  
  inflating: data/test/data_test/img_085.jpg  
  inflating

In [27]:
#!mkdir models
!mkdir samples

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [22]:
import os
import torch
import time
import numpy as np
import torch.utils.data
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from torch.distributions.normal import Normal
from torch.distributions import kl_divergence


In [7]:
CUDA = True
SEED = 2
BATCH_SIZE = 128
# LOG_INTERVAL = 10
# EPOCHS = 10
# ZDIMS = 36

In [8]:
torch.manual_seed(SEED)
if CUDA:
    torch.cuda.manual_seed(SEED)

In [20]:


# # I do this so that the MNIST dataset is downloaded where I want it
# #os.chdir("/home/cpbotha/Downloads/pytorch-vae")



# # DataLoader instances will load tensors directly into GPU memory
# kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}

# # Download or load downloaded MNIST dataset
# # shuffle data at every epoch
# train_loader = torch.utils.data.DataLoader(
#     datasets.MNIST('data', train=True, download=True,
#                    transform=transforms.ToTensor()),
#     batch_size=BATCH_SIZE, shuffle=True, **kwargs)

# # Same for test data
# test_loader = torch.utils.data.DataLoader(
#     datasets.MNIST('data', train=False, transform=transforms.ToTensor()),
#     batch_size=BATCH_SIZE, shuffle=True, **kwargs)


In [9]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean=(0.5, 0.5, 0.5),
                                std=(0.5, 0.5, 0.5))
                               ])

In [38]:
dataset_test = datasets.ImageFolder('data/test/', transform=transform)
test_loader = DataLoader(dataset=dataset_test, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=4, pin_memory=True)

dataset_train = datasets.ImageFolder('data/train/', transform=transform)
train_loader = DataLoader(dataset=dataset_train, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=4, pin_memory=True)

In [15]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        try:
            nn.init.xavier_uniform_(m.weight.data)
            m.bias.data.fill_(0)
        except AttributeError:
            print("Skipping initialization of ", classname)


In [16]:
class VAE(nn.Module):
    def __init__(self, input_dim, dim, z_dim):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(input_dim, dim, 4, 2, 1),
            nn.BatchNorm2d(dim),
            nn.ReLU(True),
            nn.Conv2d(dim, dim, 4, 2, 1),
            nn.BatchNorm2d(dim),
            nn.ReLU(True),
            nn.Conv2d(dim, dim, 5, 1, 0),
            nn.BatchNorm2d(dim),
            nn.ReLU(True),
            nn.Conv2d(dim, z_dim * 2, 3, 1, 0),
            nn.BatchNorm2d(z_dim * 2)
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(z_dim, dim, 3, 1, 0),
            nn.BatchNorm2d(dim),
            nn.ReLU(True),
            nn.ConvTranspose2d(dim, dim, 5, 1, 0),
            nn.BatchNorm2d(dim),
            nn.ReLU(True),
            nn.ConvTranspose2d(dim, dim, 4, 2, 1),
            nn.BatchNorm2d(dim),
            nn.ReLU(True),
            nn.ConvTranspose2d(dim, input_dim, 4, 2, 1),
            nn.Tanh()
        )

        self.apply(weights_init)

    def forward(self, x):
        mu, logvar = self.encoder(x).chunk(2, dim=1)

        q_z_x = Normal(mu, logvar.mul(.5).exp())
        p_z = Normal(torch.zeros_like(mu), torch.ones_like(logvar))
        kl_div = kl_divergence(q_z_x, p_z).sum(1).mean()

        x_tilde = self.decoder(q_z_x.rsample())
        return x_tilde, kl_div


In [10]:
# # Not this 
# class VAE(nn.Module):
#     def __init__(self):
#         super(VAE, self).__init__()

#         # ENCODER
#         # 28 x 28 pixels = 784 input pixels, 400 outputs
#         self.fc1 = nn.Linear(784, 400)
#         # rectified linear unit layer from 400 to 400
#         # max(0, x)
#         self.relu = nn.ReLU()
#         self.fc21 = nn.Linear(400, ZDIMS)  # mu layer
#         self.fc22 = nn.Linear(400, ZDIMS)  # logvariance layer
#         # this last layer bottlenecks through ZDIMS connections

#         # DECODER
#         # from bottleneck to hidden 400
#         self.fc3 = nn.Linear(ZDIMS, 400)
#         # from hidden 400 to 784 outputs
#         self.fc4 = nn.Linear(400, 784)
#         self.sigmoid = nn.Sigmoid()

#     def encode(self, x: Variable) -> (Variable, Variable):
#         """Input vector x -> fully connected 1 -> ReLU -> (fully connected
#         21, fully connected 22)

#         Parameters
#         ----------
#         x : [128, 784] matrix; 128 digits of 28x28 pixels each

#         Returns
#         -------

#         (mu, logvar) : ZDIMS mean units one for each latent dimension, ZDIMS
#             variance units one for each latent dimension

#         """

#         # h1 is [128, 400]
#         h1 = self.relu(self.fc1(x))  # type: Variable
#         return self.fc21(h1), self.fc22(h1)

#     def reparameterize(self, mu: Variable, logvar: Variable) -> Variable:
#         """THE REPARAMETERIZATION IDEA:

#         For each training sample (we get 128 batched at a time)

#         - take the current learned mu, stddev for each of the ZDIMS
#           dimensions and draw a random sample from that distribution
#         - the whole network is trained so that these randomly drawn
#           samples decode to output that looks like the input
#         - which will mean that the std, mu will be learned
#           *distributions* that correctly encode the inputs
#         - due to the additional KLD term (see loss_function() below)
#           the distribution will tend to unit Gaussians

#         Parameters
#         ----------
#         mu : [128, ZDIMS] mean matrix
#         logvar : [128, ZDIMS] variance matrix

#         Returns
#         -------

#         During training random sample from the learned ZDIMS-dimensional
#         normal distribution; during inference its mean.
#         """

#         if self.training:
#             # multiply log variance with 0.5, then in-place exponent
#             # yielding the standard deviation
#             std = logvar.mul(0.5).exp_()  # type: Variable
#             # - std.data is the [128,ZDIMS] tensor that is wrapped by std
#             # - so eps is [128,ZDIMS] with all elements drawn from a mean 0
#             #   and stddev 1 normal distribution that is 128 samples
#             #   of random ZDIMS-float vectors
#             eps = Variable(std.data.new(std.size()).normal_())
#             # - sample from a normal distribution with standard
#             #   deviation = std and mean = mu by multiplying mean 0
#             #   stddev 1 sample with desired std and mu, see
#             #   https://stats.stackexchange.com/a/16338
#             # - so we have 128 sets (the batch) of random ZDIMS-float
#             #   vectors sampled from normal distribution with learned
#             #   std and mu for the current input
#             return eps.mul(std).add_(mu)

#         else:
#             # During inference, we simply spit out the mean of the
#             # learned distribution for the current input.  We could
#             # use a random sample from the distribution, but mu of
#             # course has the highest probability.
#             return mu

#     def decode(self, z: Variable) -> Variable:
#         h3 = self.relu(self.fc3(z))
#         return self.sigmoid(self.fc4(h3))

#     def forward(self, x: Variable) -> (Variable, Variable, Variable):
#         mu, logvar = self.encode(x.view(-1, 784))
#         z = self.reparameterize(mu, logvar)
#         return self.decode(z), mu, logvar

In [None]:
INPUT_DIM = 3
DIM = 256
Z_DIM = 128

In [17]:
#model = VAE()
model = VAE(INPUT_DIM, DIM, Z_DIM)
if CUDA:
    model.cuda()

In [39]:
BATCH_SIZE = 32
N_EPOCHS = 150
PRINT_INTERVAL = 500
DATASET = 'DATATSET' #'FashionMNIST'  # CIFAR10 | MNIST | FashionMNIST
NUM_WORKERS = 4

In [None]:
opt = torch.optim.Adam(model.parameters(), lr=1e-3, amsgrad=True)

In [None]:

def train():
    train_loss = []
    model.train()
    for batch_idx, (x, _) in enumerate(train_loader):
        start_time = time.time()
        x = x.cuda()

        x_tilde, kl_d = model(x)
        loss_recons = F.mse_loss(x_tilde, x, size_average=False) / x.size(0)
        loss = loss_recons + kl_d

        nll = -Normal(x_tilde, torch.ones_like(x_tilde)).log_prob(x)
        log_px = nll.mean().item() - np.log(128) + kl_d.item()
        log_px /= np.log(2)

        opt.zero_grad()
        loss.backward()
        opt.step()

        train_loss.append([log_px, loss.item()])

        if (batch_idx + 1) % PRINT_INTERVAL == 0:
            print('\tIter [{}/{} ({:.0f}%)]\tLoss: {} Time: {:5.3f} ms/batch'.format(
                batch_idx * len(x), len(train_loader.dataset),
                PRINT_INTERVAL * batch_idx / len(train_loader),
                np.asarray(train_loss)[-PRINT_INTERVAL:].mean(0),
                1000 * (time.time() - start_time)
            ))


def test():
    start_time = time.time()
    val_loss = []
    model.eval()
    with torch.no_grad():
        for batch_idx, (x, _) in enumerate(test_loader):
            x = x.cuda()
            x_tilde, kl_d = model(x)
            loss_recons = F.mse_loss(x_tilde, x, size_average=False) / x.size(0)
            loss = loss_recons + kl_d
            val_loss.append(loss.item())

    print('\nValidation Completed!\tLoss: {:5.4f} Time: {:5.3f} s'.format(
        np.asarray(val_loss).mean(0),
        time.time() - start_time
    ))
    return np.asarray(val_loss).mean(0)


In [40]:
def generate_reconstructions():
    model.eval()
    x, _ = test_loader.__iter__().next()
    x = x[:32].cuda()
    x_tilde, kl_div = model(x)

    x_cat = torch.cat([x, x_tilde], 0)
    images = (x_cat.cpu().data + 1) / 2

    save_image(
        images,
        'samples/vae_reconstructions_{}.png'.format(DATASET),
        nrow=8
    )


def generate_samples():
    model.eval()
    z_e_x = torch.randn(64, Z_DIM, 1, 1).cuda()
    x_tilde = model.decoder(z_e_x)

    images = (x_tilde.cpu().data + 1) / 2

    save_image(
        images,
        'samples/vae_samples_{}.png'.format(DATASET),
        nrow=8
    )




Epoch 1:





Validation Completed!	Loss: 2777.6169 Time: 0.861 s
Saving model!
Epoch 2:

Validation Completed!	Loss: 1126.2717 Time: 0.885 s
Saving model!
Epoch 3:

Validation Completed!	Loss: 972.3497 Time: 0.929 s
Saving model!
Epoch 4:

Validation Completed!	Loss: 1135.3329 Time: 0.927 s
Not saving model! Last saved: 3
Epoch 5:

Validation Completed!	Loss: 790.3290 Time: 0.910 s
Saving model!
Epoch 6:

Validation Completed!	Loss: 755.8874 Time: 0.915 s
Saving model!
Epoch 7:

Validation Completed!	Loss: 717.1599 Time: 0.932 s
Saving model!
Epoch 8:

Validation Completed!	Loss: 684.0746 Time: 0.912 s
Saving model!
Epoch 9:

Validation Completed!	Loss: 708.1324 Time: 0.939 s
Not saving model! Last saved: 8
Epoch 10:

Validation Completed!	Loss: 709.3853 Time: 0.914 s
Not saving model! Last saved: 8
Epoch 11:

Validation Completed!	Loss: 659.0143 Time: 0.935 s
Saving model!
Epoch 12:

Validation Completed!	Loss: 744.4080 Time: 0.916 s
Not saving model! Last saved: 11
Epoch 13:

Validation Complete

Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


Epoch 51:

Validation Completed!	Loss: 470.0304 Time: 0.925 s
Not saving model! Last saved: 46
Epoch 52:

Validation Completed!	Loss: 528.2658 Time: 0.893 s
Not saving model! Last saved: 46
Epoch 53:

Validation Completed!	Loss: 492.2996 Time: 0.923 s
Not saving model! Last saved: 46
Epoch 54:

Validation Completed!	Loss: 443.7544 Time: 0.885 s
Saving model!
Epoch 55:

Validation Completed!	Loss: 425.8155 Time: 0.894 s
Saving model!
Epoch 56:

Validation Completed!	Loss: 440.9887 Time: 0.917 s
Not saving model! Last saved: 55
Epoch 57:

Validation Completed!	Loss: 447.4692 Time: 0.919 s
Not saving model! Last saved: 55
Epoch 58:

Validation Completed!	Loss: 501.8758 Time: 0.895 s
Not saving model! Last saved: 55
Epoch 59:

Validation Completed!	Loss: 580.7373 Time: 0.904 s
Not saving model! Last saved: 55
Epoch 60:

Validation Completed!	Loss: 528.8986 Time: 0.931 s
Not saving model! Last saved: 55
Epoch 61:

Validation Completed!	Loss: 533.1783 Time: 0.905 s
Not saving model! Last sav

In [None]:
BEST_LOSS = 99999
LAST_SAVED = -1
for epoch in range(1, N_EPOCHS):
    print("Epoch {}:".format(epoch))
    train()
    cur_loss = test()

    if cur_loss <= BEST_LOSS:
        BEST_LOSS = cur_loss
        LAST_SAVED = epoch
        print("Saving model!")
        torch.save(model.state_dict(), 'models/{}_vae.pt'.format(DATASET))
    else:
        print("Not saving model! Last saved: {}".format(LAST_SAVED))

    generate_reconstructions()
    generate_samples()

In [42]:
# def loss_function(recon_x, x, mu, logvar) -> Variable:
#     # how well do input x and output recon_x agree?
#     BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784))

#     # KLD is Kullback–Leibler divergence -- how much does one learned
#     # distribution deviate from another, in this specific case the
#     # learned distribution from the unit Gaussian

#     # see Appendix B from VAE paper:
#     # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
#     # https://arxiv.org/abs/1312.6114
#     # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
#     # note the negative D_{KL} in appendix B of the paper
#     KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
#     # Normalise by same number of elements as in reconstruction
#     KLD /= BATCH_SIZE * 784

#     # BCE tries to make our reconstruction as accurate as possible
#     # KLD tries to push the distributions as close as possible to unit Gaussian
#     return BCE + KLD

# # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam!
# optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [43]:
# def train(epoch):
#     # toggle model to train mode
#     model.train()
#     train_loss = 0
#     # in the case of MNIST, len(train_loader.dataset) is 60000
#     # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
#     for batch_idx, (data, _) in enumerate(train_loader):
#         data = Variable(data)
#         if CUDA:
#             data = data.cuda()
#         optimizer.zero_grad()

#         # push whole batch of data through VAE.forward() to get recon_loss
#         recon_batch, mu, logvar = model(data)
#         # calculate scalar loss
#         loss = loss_function(recon_batch, data, mu, logvar)
#         # calculate the gradient of the loss w.r.t. the graph leaves
#         # i.e. input variables -- by the power of pytorch!
#         loss.backward()
#         train_loss += loss.data#[0]
#         optimizer.step()
#         if batch_idx % LOG_INTERVAL == 0:
#             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
#                 epoch, batch_idx * len(data), len(train_loader.dataset),
#                 100. * batch_idx / len(train_loader),
#                 loss.data/ len(data)))

#     print('====> Epoch: {} Average loss: {:.4f}'.format(
#           epoch, train_loss / len(train_loader.dataset)))


In [44]:
# def test(epoch):
#     # toggle model to test / inference mode
#     model.eval()
#     test_loss = 0

#     # each data is of BATCH_SIZE (default 128) samples
#     for i, (data, _) in enumerate(test_loader):
#         if CUDA:
#             # make sure this lives on the GPU
#             data = data.cuda()

#         # we're only going to infer, so no autograd at all required: volatile=True
#         data = Variable(data, volatile=True)
#         recon_batch, mu, logvar = model(data)
#         test_loss += loss_function(recon_batch, data, mu, logvar).data#[0]
#         if i == 0:
#           n = min(data.size(0), 8)
#           # for the first 128 batch of the epoch, show the first 8 input digits
#           # with right below them the reconstructed output digits
#           comparison = torch.cat([data[:n],
#                                   recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]])
#           save_image(comparison.data.cpu(),
#                      'results/reconstruction_' + str(epoch) + '.png', nrow=n)

#     test_loss /= len(test_loader.dataset)
#     print('====> Test set loss: {:.4f}'.format(test_loss))




In [49]:
!rm -rf results
!mkdir results

In [None]:
# for epoch in range(1, EPOCHS + 1):
#     train(epoch)
#     test(epoch)

#     # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST
#     # digits in latent space
#     sample = Variable(torch.randn(64, ZDIMS))
#     if CUDA:
#         sample = sample.cuda()
#     sample = model.decode(sample).cpu()

#     # save out as an 8x8 matrix of MNIST digits
#     # this will give you a visual idea of how well latent space can generate things
#     # that look like digits
#     save_image(sample.data.view(64, 1, 28, 28),'results/sample_' + str(epoch) + '.png')