In [1]:
import pandas as pd
import numpy as np
import torch
import torchvision 
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch import optim
from torch import nn
from torch.utils.data import DataLoader
from tqdm import tqdm

This is a set of convenience functions to use instead of using the layers with thier hyperparamters.

In [2]:
def conv(channels_in, channels_out): 
    return nn.Conv2d(channels_in, channels_out, kernel_size = 3, stride = 1, padding = 'same', bias = False)

def pool(): 
    return nn.MaxPool2d(kernel_size = 2, stride = 2)

def conv1x1(channels_in, channels_out): 
    return nn.Conv2d(channels_in, channels_out, kernel_size = 1, stride = 1, padding = 'same')

def bn(channels_in): 
    return nn.BatchNorm2d(channels_in)

def relu():
    return nn.ReLU(inplace = True)

def up():
    return nn.Upsample(scale_factor = 2)

def convUp(channels_in, channels_out): 
    return nn.Conv2d(channels_in, channels_out, kernel_size = 3, stride = 1, padding = 'same', bias = False)

This a Sequential Module for a single block in the encoder.

In [3]:
class down_Sample_Block(nn.Sequential):
    def __init__(self, channels_in, channels_out):
        super(down_Sample_Block, self).__init__()
        self.add_module('conv1', conv(channels_in, channels_out))
        self.add_module('conv2', conv(channels_out, channels_out))
        self.add_module('conv3', conv(channels_out, channels_out))
        self.add_module('conv4', conv(channels_out, channels_out))
        self.add_module('norm', bn(channels_out))
        self.add_module('relu', relu())
        self.add_module('pool', pool())

This a Sequential Module for a single block in the decoder.

In [4]:
class up_Sample_Block(nn.Sequential):
    def __init__(self, channels_in, channels_out):
        super(up_Sample_Block, self).__init__()
        self.add_module('upSample', up())
        self.add_module('conv1', convUp(channels_in, channels_out))
        self.add_module('conv2', convUp(channels_out, channels_out))
        self.add_module('conv3', convUp(channels_out, channels_out))
        self.add_module('conv4', convUp(channels_out, channels_out))
        self.add_module('norm', bn(channels_out))
        self.add_module('relu', relu())

Sequential module for the encoder block as a whole, it is designed to halve the height and width of the image, and double the number of channels, until it reaches (batch, channels, 1, 1).

It is then passed through a conv_1x1 to reduce channels to 128, then it is flattened to remove the height and width dimensions (last two).

In [5]:
class Encoder(nn.Sequential):
    def __init__(self, encoder_output_length = 128):
        super(Encoder, self).__init__()
        self.add_module('layer_1', down_Sample_Block(3, 8)) # 64
        self.add_module('layer_2', down_Sample_Block(8, 16)) # 32
        self.add_module('layer_3', down_Sample_Block(16, 32)) # 16
        self.add_module('layer_4', down_Sample_Block(32, 64)) # 8
        self.add_module('layer_5', down_Sample_Block(64, 128)) # 4
        self.add_module('layer_6', down_Sample_Block(128, 256)) # 2
        self.add_module('layer_7', down_Sample_Block(256, 256)) # 1

        self.add_module('conv1x1', conv1x1(256, encoder_output_length))
        self.add_module('flatten', nn.Flatten())

Module for the bottleneck block, it is the part where the mean and standard deviation layers are found, and where the reparameterization occurs.

The output of the reconstruction has dimensions of (batch_size, channels), so inorder to make it suitable for the decoder, it is unsqueezed at dimensions 2 and 3 (the height and width dimensions are added again just like they were removed at the end of the encoder)

In [6]:
class Bottleneck(nn.Module):
    def __init__(
        self, 
        latent_vec_len = 32,
        encoder_output_length = 128,
        decoder_input_length = 256
    ):
        super().__init__()
        
        self.add_module('mean_layer', nn.Linear(encoder_output_length, latent_vec_len)),
        self.add_module('standard_deviation_layer', nn.Linear(encoder_output_length, latent_vec_len)),
        self.add_module('output_linear_layer', nn.Linear(latent_vec_len, decoder_input_length))
        
    def forward(self, x):
        mean = self.mean_layer(x)
        standard_deviation = self.standard_deviation_layer(x)
        epsilon = torch.randn_like(standard_deviation)
        
        x_reparameterized = mean + standard_deviation*epsilon
        x_reconstructed = self.output_linear_layer(x_reparameterized)
        
        x_reconstructed = torch.unsqueeze(x_reconstructed, 2)
        x_reconstructed = torch.unsqueeze(x_reconstructed, 3)
        
        return x_reconstructed, mean, standard_deviation

Sequential module for the decoder block as a whole, works in opposite fashion to the encoder.

It is designed to take a tensor of dimensions (batch, channels, 1, 1) and double its height and width, and halve the number of channels, until it reaches (batch, 3, 128, 128).

In [7]:
class Decoder(nn.Sequential):
    def __init__(self):
        super(Decoder, self).__init__()
        self.add_module('layer_1', up_Sample_Block(256, 256)) # 2
        self.add_module('layer_2', up_Sample_Block(256, 128)) # 4
        self.add_module('layer_3', up_Sample_Block(128, 64)) # 8
        self.add_module('layer_4', up_Sample_Block(64, 32)) # 16
        self.add_module('layer_5', up_Sample_Block(32, 16)) # 32
        self.add_module('layer_6', up_Sample_Block(16, 8)) # 64
        self.add_module('layer_7', up_Sample_Block(8, 3)) # 128

Module for the VAE itself.

In [8]:
class VAE(nn.Module):
    def __init__(self):
        super().__init__()
        self.add_module('encoder', Encoder())
        self.add_module('bottleneck', Bottleneck())
        self.add_module('decoder', Decoder())
    
    def forward(self, x):
        x = self.encoder(x)
        x, mean, standard_deviation = self.bottleneck(x)
        x = self.decoder(x) 
        return x, mean, standard_deviation
    
    def generate(self, device):
        with torch.no_grad():
            x = torch.randn(1, 128)
            x = x.to(device)
            x, _, _ = self.bottleneck(x)
            x = self.decoder(x)
        return x

In [10]:
model = VAE()

# please change the path to that of the weights on your machine.

PATH = "E:\\College\\FCAI-4th Year\\First Term\\Generative Adversarial Networks\\Assginments\\Assignment 4\\VAE_weights.pth"
model.load_state_dict(torch.load(PATH))
model.eval()

VAE(
  (encoder): Encoder(
    (layer_1): down_Sample_Block(
      (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
      (conv2): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
      (conv3): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
      (conv4): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
      (norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (layer_2): down_Sample_Block(
      (conv1): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
      (conv3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
      (conv4): Conv2d(16, 16, kernel_size=(3, 3), stride

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [12]:
model = model.to(device)

Generate Images from test data

In [17]:
import torchvision.transforms as T

model.generate(device)
new_image = model.generate(device)
new_image = torch.squeeze(new_image, 0)
transform = T.ToPILImage()
img = transform(new_image)
img.show()

In [41]:
from tkinter import *
import torchvision.transforms as T

root = Tk()
root.title('VAE Slider')
root.geometry("400x400")

number_of_images = 0

def generate():
    number_of_images = slider.get()
    for i in range(number_of_images):
        model.generate(device)
        new_image = model.generate(device)
        new_image = torch.squeeze(new_image, 0)
        transform = T.ToPILImage()
        img = transform(new_image)
        img.show()

var = DoubleVar()
slider = Scale(root, from_=1, to= 5, orient = HORIZONTAL)
slider.get()
slider.pack()

button = Button(root, text="Generate", command=generate)
button.pack()

root.mainloop()