In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

In [None]:
#DATA LOADER - 1
transform = transforms.ToTensor()

mnist_data = datasets.MNIST(root = './data',train = True, download = True, transform = transform)

data_loader = torch.utils.data.DataLoader(dataset = mnist_data, bathc_size = 64, shuffle = True)

Data Loader 2 with Normalize


In [None]:
#DATA LOADER - 2
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5),(0.5))  #((Mean), (SD))
])

mnist_data = datasets.MNIST(root = './data',train = True, download = True, transform = transform)

data_loader = torch.utils.data.DataLoader(dataset = mnist_data, bathc_size = 64, shuffle = True)

In [None]:
#Visualizing the first set of images or batch of images using iter.

dataiter = iter(data_loader)
images, labels = dataiter.next()
print(torch.min(images),torch.max(images))

In [None]:
from turtle import forward


class Autoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        #The goal is to repeatedly reduce size.
        #Initial size --> N, 785 (N --> Batch size, 28*28 -> Pixels of images in the dataset)
        self.encoder = nn.Sequential(
            nn.Linear(28*28,128), #Reduce size by N,784 --> N,128
            nn.ReLU(),
            nn.Linear(128,64),
            nn.ReLU(),
            nn.Linear(64,12),
            nn.ReLU(),
            nn.Linear(12,3), # -> N,3
        )

        self.decoder = nn.Sequential(
            nn.Linear(3,12),
            nn.ReLU(),
            nn.Linear(12,64),
            nn.ReLU(),
            nn.Linear(64,128),
            nn.ReLU(),
            nn.Linear(128,28*28), # -> Reduce size by N,3 --> N,784
            nn.Sigmoid() # We use Sigmoid as the value of the image pixel tensors are between 0 and 1.
        )
         

    def forward(self,x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
#Note : [-1, 1] -> nn.Tanh  --> Can happen if you use normalisation in the dataloader step as shown above in the data loader 2 section.


In [None]:
model = Autoencoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-5)

In [None]:
#Training
num_epochs = 10 
outputs = []

for epoch in range(num_epochs):
    for (img,_) in data_loader:
        img = img.reshape(-1,28*28)
        recon = model(img)   #reconstructed image
        loss = criterion(recon,img)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch : {epoch+1}, Loss : {loss.item():.4f}')
    outputs.appen((epoch, img, recon))

In [None]:
#Plot images to check

for k in range(0,num_epochs,4):
    plt.figure(figsize=(9,2))
    plt.gray()
    imgs = outputs[k][1].detach().numpy()
    recon = outputs[k][2].detach().numpy()

    for i, item in enumerate(imgs):
        if i >= 9: break
        plt.subplot(2,9,i+1)
        #items : 1,28,28
        item = item.reshape(-1,28,28)
        plt.imshow(item[0])
    
    for i, item in enumerate(recon):
        if i >= 9: break
        plt.subplot(2,9,9+i+1) #row_length + i + 1
        item = item.reshape(-1,28,28)
        #items : 1,28,28
        plt.imshow(item[0])


Autoencoder With the CNN network

In [None]:
from turtle import forward


class Autoencoder_cnn(nn.Module):
    def __init__(self):
        super().__init__()
        #The goal is to repeatedly reduce size.
        #Initial size --> N,1, 28, 28 (N --> Batch size, 28*28 -> Pixels of images in the dataset)
        self.encoder = nn.Sequential(
            nn.Conv2d(1,16,3,stride =2, padding =1), # 1--> Input channel , 16 --> Output channels, 3 --> Kernel size,   Layer output size = N, 16, 14, 14
            nn.ReLU(),
            nn.Conv2d(16,32,3,stride =2, padding =1), # N, 32 , 7, 7 
            nn.ReLU(),
            nn.Conv2d(32, 64 , 7)  # N, 64, 1 , 1 
            # Above we increase the channels and decrease the size of the image.
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 7), # N, 32, 7 , 7 
            nn.ReLU(),
            nn.ConvTranspose2d(32,16,3,stride =2, padding =1,output_padding = 1), # N, 16, 14, 14. output_padding :  Add zeroes to get 14,14 else 13,13
            nn.ReLU(),
            nn.ConvTranspose2d(16,1,3,stride =2, padding =1,output_padding = 1)  # N, 1, 27,27 after output_padding --> N,1,28,28
            nn.Sigmoid()
        )
         

    def forward(self,x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
#Note : [-1, 1] -> nn.Tanh  --> Can happen if you use normalisation in the dataloader step as shown above in the data loader 2 section.
#If you use nn.MaxPool2d then use nn.MaxUnpool2d in Decoder block.

In [None]:
model = Autoencoder_cnn()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-5)

In [None]:
#Training
num_epochs = 10 
outputs = []

for epoch in range(num_epochs):
    for (img,_) in data_loader:
        img = img.reshape(-1,28*28)
        recon = model(img)   #reconstructed image
        loss = criterion(recon,img)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch : {epoch+1}, Loss : {loss.item():.4f}')
    outputs.appen((epoch, img, recon))

In [None]:
#Plot images to check

for k in range(0,num_epochs,4):
    plt.figure(figsize=(9,2))
    plt.gray()
    imgs = outputs[k][1].detach().numpy()
    recon = outputs[k][2].detach().numpy()

    for i, item in enumerate(imgs):
        if i >= 9: break
        plt.subplot(2,9,i+1)
        #items : 1,28,28

        plt.imshow(item[0])
    
    for i, item in enumerate(recon):
        if i >= 9: break
        plt.subplot(2,9,9+i+1) #row_length + i + 1
        #items : 1,28,28
        plt.imshow(item[0])
