<a href="https://colab.research.google.com/github/SydanJainen/4EU-DeepLearningCourse/blob/main/Assignments/AssignementVAE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from matplotlib import cm
import torch; torch.manual_seed(0)
import torch.nn as nn
import torch.nn.functional as F
import torch.utils
import torch.distributions
import numpy as np
import matplotlib.pyplot as plt; plt.rcParams['figure.dpi'] = 200
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import random_split

In [None]:
batch_size = 32

train_dataset = datasets.MNIST('./data',
                               train=True,
                               download=True,
                               transform=transforms.Compose([
                       transforms.ToTensor(),
                   ]))

test_dataset = datasets.MNIST('./data',
                                    train=False,
                               transform=transforms.Compose([
                       transforms.ToTensor(),
                   ]))

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                                batch_size=batch_size,
                                                shuffle=False)

val_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                                batch_size=batch_size,
                                                shuffle=False)

train_images = train_loader.dataset.data.detach().numpy().reshape(-1, 28*28)
print(f'Num training images: {len(train_images)},\tmin val: {train_images.min():.3f},\tmax val: {train_images.max():.3f}')

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

Try to implement a **denoising autoencoder**; you can use the function below to generate noisy input samples with a gaussian noise.

1. modifiy the function to allow adding salt 'n pepper noise to the images
2. implement a DAE; you can use different numbers of layers / activation functions / dimension of latent space
3. Make a plot with the original image, the noised image and the reconstructed image for several examples of input images.
4. compare different types (gaussian / salt n pepper) and levels of noise on the performance of the DAE
5. compare different DAE architectures regarding the denoising performances.


In [None]:
train_images = train_loader.dataset.data.detach().numpy()
test_images = test_loader.dataset.data.detach().numpy()

In [None]:
def add_gaussian_noise(img,mean=10,var=30):
    import math
    img=img.astype(np.float32)

    sigma=var**.5
    noise=np.random.normal(mean,sigma,img.shape)
    img=img+noise
    return img



In [None]:
noised_train = np.zeros((len(train_images),28,28),dtype='float32')
for i in range(len(train_images)):
  noised_train[i]=add_gaussian_noise(train_images[i]).reshape(28,28)
  noised_train[i] = np.float32(noised_train[i])

noised_test = np.zeros((len(test_images),28,28),dtype='float32')
for i in range(len(test_images)):
  noised_test[i]=add_gaussian_noise(test_images[i]).reshape(28,28)
  noised_test[i] = np.float32(noised_test[i])

In [None]:
class noisedDataset():

  def __init__(self,datasetnoised,datasetclean,transform):
    self.noise=datasetnoised
    self.clean=datasetclean
    #self.labels=labels
    self.transform=transform

  def __len__(self):
    return len(self.noise)

  def __getitem__(self,idx):
    xNoise=self.noise[idx]
    xClean=self.clean[idx]
    #y=self.labels[idx]

    if self.transform != None:
      xNoise=self.transform(xNoise)
      xClean=self.transform(xClean)


    return (xNoise,xClean)


tsfms=transforms.Compose([
    transforms.ToTensor()
])

data = noisedDataset(noised_train,train_images,tsfms)

In [None]:
class VariationalEncoder(nn.Module):
    def __init__(self, latent_dims,input_dim):
        super(VariationalEncoder, self).__init__()
        self.linear1 = nn.Linear(input_dim**2, 512)
        self.linear2 = nn.Linear(512, latent_dims)
        self.linear3 = nn.Linear(512, latent_dims)

        self.N = torch.distributions.Normal(0, 1)
        self.N.loc = self.N.loc.to(device)
        self.N.scale = self.N.scale.to(device)
        self.kl = 0

    def forward(self, x):
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.linear1(x))
        sigma = torch.exp(self.linear3(x))
        mu =  self.linear2(x)
        z = mu + sigma*self.N.sample(mu.shape)
        self.kl = (0.5*sigma**2 + 0.5*mu**2 - torch.log(sigma) - 1/2).sum()
        return z

In [None]:
class Decoder(nn.Module):
    def __init__(self, latent_dims,input_dim):
        super(Decoder, self).__init__()
        self.linear1 = nn.Linear(latent_dims, 512)
        self.linear2 = nn.Linear(512, input_dim**2)

    def forward(self, z):
        z = F.relu(self.linear1(z))
        z = torch.sigmoid(self.linear2(z))
        return z.reshape((-1, 1, input_dim, input_dim))

In [None]:
class VariationalAutoencoder(nn.Module):
    def __init__(self, latent_dims,ipnut_dim):
        super(VariationalAutoencoder, self).__init__()
        self.encoder = VariationalEncoder(latent_dims,input_dim)
        self.decoder = Decoder(latent_dims,input_dim)

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)

In [None]:
def train(autoencoder, data, epochs=20,l=5):
    opt = torch.optim.Adam(autoencoder.parameters())
    losses_recon = np.array([])
    losses_kl = np.array([])
    for epoch in range(epochs):
        print("epoch: ",epoch)
        loss_recon_e=0
        loss_kl_e=0
        for x, y in data:
            x = x.to(device) # GPU
            opt.zero_grad()
            x_hat = autoencoder(x)
            loss_recon = ((x - x_hat)**2).sum()
            loss_kl = autoencoder.encoder.kl
            loss = loss_recon + l*loss_kl
            loss.backward()
            loss_recon_e += loss_recon.to('cpu').detach().numpy()
            loss_kl_e += loss_kl.to('cpu').detach().numpy()
            opt.step()
        losses_recon = np.append(losses_recon,loss_recon_e)
        losses_kl = np.append(losses_kl,loss_kl_e)
    return autoencoder, losses_recon,losses_kl

In [None]:
data = torch.utils.data.DataLoader(
        torchvision.datasets.MNIST('./data',
               transform=torchvision.transforms.ToTensor(),
               download=True),
        batch_size=128,
        shuffle=True)

In [None]:
latent_dims = 2
input_dim=28
vae = VariationalAutoencoder(latent_dims,input_dim).to(device) # GPU
vae,loss_recon,loss_kl = train(vae, data,epochs=40,l=1)

epoch:  0
epoch:  1
epoch:  2
epoch:  3
epoch:  4
epoch:  5
epoch:  6
epoch:  7
epoch:  8
epoch:  9
epoch:  10
epoch:  11
epoch:  12
epoch:  13
epoch:  14
epoch:  15
epoch:  16
epoch:  17
epoch:  18
epoch:  19
epoch:  20
epoch:  21
epoch:  22
epoch:  23
epoch:  24
epoch:  25
epoch:  26
epoch:  27
epoch:  28
epoch:  29
epoch:  30
epoch:  31
epoch:  32
epoch:  33
epoch:  34
epoch:  35
epoch:  36
epoch:  37
epoch:  38
epoch:  39
