# This is a notebook

In [1]:
import torch
from torch import cuda
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils import data
from torchvision.utils import save_image

In [2]:
data_dir = '../../../Data/'
batch_size = 64

torch.manual_seed(22)
device = torch.device("cuda" if cuda.is_available() else "cpu")
print(device)


cuda


In [3]:
dataset = datasets.ImageFolder(data_dir, transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
]))

loader = data.DataLoader(dataset,
                         batch_size=batch_size,
                         shuffle=True,
                         pin_memory=cuda.is_available(),
                         num_workers= 1 if cuda.is_available() else 4)

In [8]:
## VAE - Existing Work
First the VAE that was already developed was migrated from Tensorflow to Pytorch
before starting to work on the VSC

(96, 96, 3)

class VAE(nn.Module):
    def __init__(self, latent_dim):
        super(VAE, self).__init__()
        self.latent_dim = latent_dim

        channels = 'placeholder'
        # Encoder
        # in_channel = same as image, filters=128, kernel=3, strides=1, padding=same, activation=relu
        # TODO Calculate padding size
        self.encoder_conv1 = self.getConvolutionLayer(3, 128)
        self.encoder_conv2 = self.getConvolutionLayer(128, 64)
        self.encoder_conv3 = self.getConvolutionLayer(64, 32)

        self.encoder_fc1 = nn.Linear(204800, self.latent_dim)
        self.encoder_fc2 = nn.Linear(204800, self.latent_dim)

    def getConvolutionLayer(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )


    def encode(self, x):
        print(x.shape)
        x = self.encoder_conv1(x)
        print(x.shape)
        x = self.encoder_conv2(x)
        print(x.shape)
        x = self.encoder_conv3(x)
        print(x.shape)
        x = torch.flatten(x)
        print(x.shape)
        mu = self.encoder_fc1(x)
        sigma = self.encoder_fc1(x)

        return mu, sigma

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        return torch.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x.view(-1, 784))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

In [None]:
vae = VAE(256).to(device)
vae

In [None]:
vae.eval()
images, _ = next(iter(loader))
images = images.to(device)
images[0].shape

In [None]:
vae.encode(images)

In [22]:
# Reconstruction + KL divergence losses summed over all elements and batch
def loss_function(recon_x, x, mu, logvar):
    BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), reduction='sum')

    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return BCE + KLD

VAE(
  (encoder_conv1): Sequential(
    (0): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (encoder_conv2): Sequential(
    (0): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (encoder_conv3): Sequential(
    (0): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (encoder_fc1): Linear(in_features=204800, out_features=256, bias=True)
  (encoder_fc2): Linear(in_features=204800, out_features=256, bias=True)
)

In [23]:
vae.eval()
images, _ = next(iter(loader))
images = images.to(device)
images[0].shape

torch.Size([3, 96, 96])

In [24]:
vae.encode(images)

torch.Size([64, 3, 96, 96])
torch.Size([64, 128, 47, 47])
torch.Size([64, 64, 22, 22])
torch.Size([64, 32, 10, 10])
torch.Size([204800])


(tensor([-0.0317,  0.1145,  0.0514, -0.2172, -0.0066,  0.1483,  0.0683,  0.0021,
          0.0288, -0.0779, -0.0307,  0.0071, -0.0198, -0.0838,  0.0385,  0.0360,
         -0.1263, -0.0085, -0.1800,  0.0750, -0.0464,  0.0898,  0.0709,  0.0111,
         -0.0580,  0.0093,  0.0509, -0.0154, -0.1185,  0.0053, -0.0628,  0.0523,
         -0.0328, -0.0397, -0.0276,  0.0601,  0.0437,  0.0265,  0.0677,  0.0509,
         -0.0590, -0.0324, -0.1205, -0.0275, -0.0789, -0.0169, -0.0592,  0.0883,
          0.0240, -0.0626, -0.0341, -0.0232,  0.0354,  0.0673,  0.1285,  0.0425,
         -0.0344,  0.1079, -0.0058,  0.0493, -0.1381,  0.0340, -0.1397,  0.0521,
          0.0432,  0.0355,  0.0382,  0.0197, -0.0688,  0.0234,  0.1086, -0.1126,
         -0.0594,  0.0768, -0.0139,  0.0596,  0.0729,  0.0511, -0.0322,  0.0178,
          0.0599, -0.0466,  0.0675,  0.0657,  0.0285,  0.0120, -0.1209,  0.0134,
          0.0505,  0.1134, -0.0223,  0.1140,  0.0841,  0.0985, -0.1348, -0.0560,
         -0.0098,  0.0150, -

In [None]:
# Reconstruction + KL divergence losses summed over all elements and batch
def loss_function(recon_x, x, mu, logvar):
    BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), reduction='sum')

    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return BCE + KLD