# AutoEncoders

Autoencoder is an unsupervised learning method that uses neural networks for representation learning tasks. The key to autoencoders is to build a neural network that places a bottleneck in the network to create a compressed knowledge representation of the original input data. Compression and subsequent reconstruction will be very difficult if the characteristics of the input data are independent of each other. However, if there is a correlation between input features, it can be learned and used when input through the network bottleneck. In other words, the bottleneck serves to limit the amount of information that can pass through the entire network, leading to learned compression of the input data.

They are a type of an unsupervised learning methods. In detail, they are trained by supervised learning methods, referred to as self-supervised.

Dataset: MNIST

In [9]:
import os

import torch
import torch.nn as nn
from torchvision import datasets

import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from torchvision.utils import save_image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
batch_size = 100

img_shape = (28, 28) # 28*28 for MNIST
input_dim  = 784  # 28*28 for MNIST
hidden_dim = 128
latent_dim = 3

lr = 1e-3

n_epochs = 10


In [11]:
data_path = './data'

In [12]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])

loader_kwargs = {'num_workers': os.cpu_count()//2, 'pin_memory': True} 

train_data = datasets.MNIST(root=data_path,  train=True,download=True, transform=transform)
test_data = datasets.MNIST(root=data_path, train=False, download=True, transform=transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False, **loader_kwargs)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, **loader_kwargs)


0: "T-Shirt",
1: "Trouser",
2: "Pullover",
3: "Dress",
4: "Coat",
5: "Sandal",
6: "Shirt",
7: "Sneaker",
8: "Bag",
9: "Ankle Boot",


In [13]:
# Defining Model

class AutoEncoder(nn.Module):

    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(AutoEncoder,self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LeakyReLU(True),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(True), 
            nn.Linear(hidden_dim, latent_dim))

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.LeakyReLU(True),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(True), 
            nn.Linear(hidden_dim, input_dim), nn.Tanh())

    def forward(self,x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

model = AutoEncoder(input_dim, hidden_dim, latent_dim).to(device)

def tanh_to_img(x): 
    x = (x + 1)*0.5
    x = x.view(batch_size, 1, 28, 28)
    return x

In [14]:
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr = lr)

In [15]:
saved_dir = 'ae_images'
os.makedirs(saved_dir, exist_ok= True)

In [16]:
for epoch in range(n_epochs):
    for batch_idx, data in enumerate(train_loader):
        img, label = data
        img = img.view(batch_size, -1)
        img = img.to(device)

        output = model(img)
        loss = criterion(output, img)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 1 == 0:
        output = tanh_to_img(output.data)
        save_image(output[:25], f"{saved_dir}/{epoch+1}.png", nrow=5, normalize=True)
            
    # ===== log =====
    print(f'epoch: {epoch+1}/{n_epochs}, loss:{loss.item() :.4f}')

epoch: 1/10, loss:0.2148
epoch: 2/10, loss:0.2129
epoch: 3/10, loss:0.2109
epoch: 4/10, loss:0.2096
epoch: 5/10, loss:0.2088
epoch: 6/10, loss:0.2081
epoch: 7/10, loss:0.2079
epoch: 8/10, loss:0.2076
epoch: 9/10, loss:0.2072
epoch: 10/10, loss:0.2070
