* Aladdin Persson:  Variational Autoencoder from scratch in PyTorch

Link: https://www.youtube.com/watch?v=VELQT1-hILo

* Implement Deep Autoencoder in PyTorch for Image Reconstruction

Link: https://www.geeksforgeeks.org/implement-deep-autoencoder-in-pytorch-for-image-reconstruction/

* How to Generate Images using Autoencoders

Link: https://towardsdatascience.com/how-to-generate-images-using-autoencoders-acfbc6c3555e


https://github.com/aladdinpersson/Machine-Learning-Collection/tree/master/ML/Pytorch/more_advanced/VAE


https://medium.com/@rekalantar/variational-auto-encoder-vae-pytorch-tutorial-dce2d2fe0f5f

In [21]:
import torch
from torch import nn
from torchsummary import summary
import torchvision.datasets as datasets
import torchvision.transforms as transforms


In [22]:
other_data_dir = 'other_data'

In [23]:
batch_size = 32
workers = 4
shuffle = True

dataset = datasets.MNIST(root = other_data_dir, 
                         train = True, 
                         transform = transforms.Compose([
                             transforms.ToTensor(),
                             transforms.Normalize((0.5,), (0.5,)), # MNIST is grayscale
                         ]),
                         download=True)

dataloader = torch.utils.data.DataLoader(dataset, batch_size = batch_size,
                                         shuffle = shuffle, num_workers = workers)

In [24]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = 28*28
h_dim=200
z_dim=20

model = VariationalAutoEncoder(input_dim, h_dim, z_dim)

In [25]:
model

VariationalAutoEncoder(
  (img_2hid): Linear(in_features=784, out_features=200, bias=True)
  (hid_2mu): Linear(in_features=200, out_features=20, bias=True)
  (hid_2sigma): Linear(in_features=200, out_features=20, bias=True)
  (z_2hid): Linear(in_features=20, out_features=200, bias=True)
  (hid_2img): Linear(in_features=200, out_features=784, bias=True)
  (relu): ReLU()
)

In [26]:
summary(model, (1, 784))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 200]         157,000
              ReLU-2               [-1, 1, 200]               0
            Linear-3                [-1, 1, 20]           4,020
            Linear-4                [-1, 1, 20]           4,020
            Linear-5               [-1, 1, 200]           4,200
              ReLU-6               [-1, 1, 200]               0
            Linear-7               [-1, 1, 784]         157,584
Total params: 326,824
Trainable params: 326,824
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 1.25
Estimated Total Size (MB): 1.26
----------------------------------------------------------------


In [15]:
n_epochs = 10
LR_RATE = 3e-4  # Karpathy constant

In [1]:
import torch
from torch import nn


class VariationalAutoEncoder(nn.Module):
    def __init__(self, input_dim, h_dim=200, z_dim=20):
        super().__init__()
        # encoder
        self.img_2hid = nn.Linear(input_dim, h_dim)
        self.hid_2mu = nn.Linear(h_dim, z_dim)
        self.hid_2sigma = nn.Linear(h_dim, z_dim)

        # decoder
        self.z_2hid = nn.Linear(z_dim, h_dim)
        self.hid_2img = nn.Linear(h_dim, input_dim)

        self.relu = nn.ReLU()

    def encode(self, x):
        h = self.relu(self.img_2hid(x))
        mu, sigma = self.hid_2mu(h), self.hid_2sigma(h)
        return mu, sigma

    def decode(self, z):
        h = self.relu(self.z_2hid(z))
        return torch.sigmoid(self.hid_2img(h))

    def forward(self, x):
        mu, sigma = self.encode(x)
        epsilon = torch.randn_like(sigma)
        z_new = mu + sigma*epsilon
        x_reconstructed = self.decode(z_new)
        return x_reconstructed, mu, sigma