

**AutoEncoder**

Autoencoders are artificial neural networks capable of learning dense representations of the input data, called latent representations or codings, without any supervision". This Neural Network architecture is divided into the encoder structure, the decoder structure, and the latent space, also known as the “bottleneck”.

# Setup

In [None]:
import torch
import torch.nn as nn
from torch import optim
from torchvision import datasets
from torchvision import transforms


In [None]:
import matplotlib.pyplot as plt

plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

In [None]:
import sys

if not torch.cuda.is_available():
    print("No GPU was detected. Neural nets can be very slow without a GPU.")
    if "google.colab" in sys.modules:
        print("Go to Runtime > Change runtime and select a GPU hardware "
              "accelerator.")
    device = "cpu"
else:
    device = "cuda"

In [None]:
tensor_transform = transforms.ToTensor()

train_ds = datasets.MNIST(root = "./data",
						train = True,
						download = True,
						transform = tensor_transform)

train_dl = torch.utils.data.DataLoader(dataset = train_ds,
									batch_size = 32,
									shuffle = True)

#Stacked Autoencoders

In [None]:
stacked_encoder = nn.Sequential(
                nn.Linear(28 * 28, 128),
                nn.ReLU(),
                nn.Linear(128, 64),
                nn.ReLU(),
                nn.Linear(64, 36),
                nn.ReLU(),
                nn.Linear(36, 18),
                nn.ReLU(),
                nn.Linear(18, 9)
            )

stacked_decoder = nn.Sequential(
                nn.Linear(9, 18),
                nn.ReLU(),
                nn.Linear(18, 36),
                nn.ReLU(),
                nn.Linear(36, 64),
                nn.ReLU(),
                nn.Linear(64, 128),
                nn.ReLU(),
                nn.Linear(128, 28 * 28),
                nn.Sigmoid()
            )

stacked_ae = nn.Sequential(
    stacked_encoder,
    stacked_decoder
)

In [None]:
stacked_ae.to(device)
stacked_ae

In [None]:
learning_rate = 0.001

criterion = nn.MSELoss()
optimizer = optim.Adam(stacked_ae.parameters(), lr=0.001)

In [None]:
from tqdm import tqdm

epochs = 20

outputs = []


for epoch in range(epochs):

    train_loss = 0.0
    for (image, _) in tqdm(train_dl):

        # Reshaping the image to (-1, 784)
        image = image.reshape(-1, 28*28)
        image = image.to(device)
        reconstructed = stacked_ae(image)
        loss = criterion(reconstructed, image)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        outputs.append((epochs, image, reconstructed))
    print(f"Epoch {epoch+1}/{epochs}: train loss: {train_loss/len(train_dl)}")


In [None]:
import numpy as np

@torch.no_grad()
def plot_reconstructions(model, images, n_images=5):
    reconstructions = model(images[:n_images])
    images = images.to("cpu").reshape(-1, 28, 28)
    reconstructions = reconstructions.to("cpu").reshape(-1, 28, 28)
    fig = plt.figure(figsize=(n_images * 1.5, 3))
    for image_index in range(n_images):
        plt.subplot(2, n_images, 1 + image_index)
        plt.imshow(images[image_index], cmap="binary")
        plt.axis("off")
        plt.subplot(2, n_images, 1 + n_images + image_index)
        plt.imshow(reconstructions[image_index], cmap="binary")
        plt.axis("off")

plot_reconstructions(stacked_ae, image, 10)
plt.show()

#Convolutional AutoEncoder

In [None]:
conv_encoder = nn.Sequential(
    nn.Conv2d(1, 16, 3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),  # output = 14 * 14 * 16
    nn.Conv2d(16, 32, 3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Conv2d(32, 64, 3, padding=1),  # output = 7 * 7 * 32
    nn.ReLU(),
    nn.MaxPool2d(2, 2),  # output = 3 * 3 * 64
    nn.AdaptiveAvgPool2d(1),
    nn.Flatten()
)

conv_decoder = nn.Sequential(
    nn.Linear(64, 3 * 3 * 64),
    nn.Unflatten(1, (64, 3, 3)),
    nn.ConvTranspose2d(64, 32, 3, stride=2), # output = 32 * 7 * 7
    nn.ReLU(),
    nn.ConvTranspose2d(32, 16, 3, padding=1, stride=2), # outout = 16 * 14 * 14
    nn.ReLU(),
    nn.ConvTranspose2d(16, 1, 3, stride=2, output_padding=1),  # output =  1 * 28 * 28
    nn.Sigmoid()
)

conv_ae = nn.Sequential(
    conv_encoder,
    conv_decoder
)

In [None]:
conv_ae = conv_ae.to(device)

In [None]:
from tqdm import tqdm

epochs = 20

outputs = []


for epoch in range(epochs):

    train_loss = 0.0
    for (image, _) in tqdm(train_dl):

        # Reshaping the image to (-1, 784)
        image = image.to(device)
        reconstructed = conv_ae(image)
        loss = criterion(reconstructed.view(-1, 28 * 28), image.view(-1, 28 * 28))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        outputs.append((epochs, image, reconstructed))
    print(f"Epoch {epoch+1}/{epochs}: train loss: {train_loss/len(train_dl)}")

# VAE

In [None]:
class VAE(nn.Module):
    def __init__(self, input_size, latent_size, device="cuda"):
        super().__init__()
        self.device = device
        self.encoder = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_size, 300),
            nn.ReLU(),
            nn.Linear(300, 100),
            nn.ReLU()
        )

        self.mean_layer = nn.Linear(100, latent_size)
        self.logvar_layer = nn.Linear(100, latent_size)

        self.decoder = nn.Sequential(
            nn.Linear(latent_size, 100),
            nn.ReLU(),
            nn.Linear(100, 300),
            nn.ReLU(),
            nn.Linear(300, input_size),
            nn.Sigmoid()
        )

    def forward(self, x):

        enc_out = self.encoder(x)
        mean, logvar = self.mean_layer(enc_out), self.logvar_layer(enc_out)
        eps = torch.randn_like(mean).to(self.device)
        z = eps * logvar + mean
        return self.decoder(z), mean, logvar


In [None]:
class VAELoss(nn.Module):
    def forward(self, reconstruction, x, mean, logvar):
        loss_reconstruction = nn.MSELoss()(reconstruction, x)
        loss_kld = -0.5 * torch.sum(1 + logvar - logvar.exp() - mean.pow(2))
        return loss_reconstruction + loss_kld

In [None]:
learning_rate = 0.001

model = VAE(784, 50, device=device).to(device)
criterion = VAELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
from tqdm import tqdm

epochs = 20

for epoch in range(epochs):

    train_loss = 0.0
    for (image, _) in tqdm(train_dl):

        # Reshaping the image to (-1, 784)
        image = image.to(device)
        reconstructed, mean, logvar = model(image)
        loss = criterion(reconstructed, image.view(-1, 784), mean, logvar)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}: train loss: {train_loss/len(train_dl)}")