In [2]:
from plot_lib import plot_data, plot_model, set_default

In [3]:
set_default()

In [4]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.datasets import FashionMNIST
import matplotlib.pyplot as plt
import numpy
from torchsummary import summary
from torchvision.utils import save_image
import os

In [5]:
# Creating the folder "pictures" where I will save the reconstructed images
path = './pictures'
if not os.path.exists(path):
    os.makedirs(path)

In [6]:
# function to count number of parameters
def get_n_params(model):
    np=0
    for p in list(model.parameters()):
        np += p.nelement()
    return np

In [7]:
# Convert vector to image

def to_img(x):
    x = 0.5 * (x + 1)
    x = x.view(x.size(0), 28, 28)
    return x

In [8]:
# Displaying routine

def display_images(in_, out, n=1):
    for N in range(n):
        if in_ is not None:
            in_pic = to_img(in_.cpu().data)
            plt.figure(figsize=(18, 6))
            for i in range(4):
                plt.subplot(1,4,i+1)
                plt.imshow(in_pic[i+4*N])
                plt.axis('off')
        out_pic = to_img(out.cpu().data)
        plt.figure(figsize=(18, 6))
        for i in range(4):
            plt.subplot(1,4,i+1)
            plt.imshow(out_pic[i+4*N])
            plt.axis('off')

In [10]:
# Define data loading step

batch_size =  128 #256

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

dataset = FashionMNIST('./data', transform=img_transform, download=True)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [12]:
# Define model architecture and reconstruction loss

class Autoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=3,  padding=1),
           nn.LeakyReLU(), nn.AvgPool2d( kernel_size=2,stride=2,  padding=0 ), 
        nn.Conv2d(16, 8, kernel_size=3, stride=2,  padding=1) , nn.LeakyReLU(),
            nn.AvgPool2d( kernel_size=2, stride=1,  padding=0 )
        )
        self.decoder = nn.Sequential( nn.ConvTranspose2d(8, 16, kernel_size=3, stride=2,  padding=0), 
            nn.LeakyReLU(), nn.ConvTranspose2d(16, 8, kernel_size=5, stride=3,  padding=1), 
            nn.LeakyReLU(), nn.ConvTranspose2d( 8, 1, kernel_size=2, stride=2,  padding=1), nn.Tanh()
        )
        
        #self.fc =  nn.Linear(8*2*2, 10)
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    
model = Autoencoder().to(device)
criterion = nn.MSELoss()

In [13]:
#Print the parameters size of the model
summary(model, (1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 10, 10]             160
         LeakyReLU-2           [-1, 16, 10, 10]               0
         AvgPool2d-3             [-1, 16, 5, 5]               0
            Conv2d-4              [-1, 8, 3, 3]           1,160
         LeakyReLU-5              [-1, 8, 3, 3]               0
         AvgPool2d-6              [-1, 8, 2, 2]               0
   ConvTranspose2d-7             [-1, 16, 5, 5]           1,168
         LeakyReLU-8             [-1, 16, 5, 5]               0
   ConvTranspose2d-9            [-1, 8, 15, 15]           3,208
        LeakyReLU-10            [-1, 8, 15, 15]               0
  ConvTranspose2d-11            [-1, 1, 28, 28]              33
             Tanh-12            [-1, 1, 28, 28]               0
Total params: 5,729
Trainable params: 5,729
Non-trainable params: 0
-----------------------------------

In [14]:
print(model.parameters)

<bound method Module.parameters of Autoencoder(
  (encoder): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.01)
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (3): Conv2d(16, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (4): LeakyReLU(negative_slope=0.01)
    (5): AvgPool2d(kernel_size=2, stride=1, padding=0)
  )
  (decoder): Sequential(
    (0): ConvTranspose2d(8, 16, kernel_size=(3, 3), stride=(2, 2))
    (1): LeakyReLU(negative_slope=0.01)
    (2): ConvTranspose2d(16, 8, kernel_size=(5, 5), stride=(3, 3), padding=(1, 1))
    (3): LeakyReLU(negative_slope=0.01)
    (4): ConvTranspose2d(8, 1, kernel_size=(2, 2), stride=(2, 2), padding=(1, 1))
    (5): Tanh()
  )
)>


In [15]:
# Configure the optimiser

learning_rate = 1e-3
L2_regularization = 1e-5
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=learning_rate, weight_decay = L2_regularization
)

In [16]:
# Train standard or denoising autoencoder (AE)

num_epochs = 20
# do = nn.Dropout()  # comment out for standard AE
for epoch in range(num_epochs):
    for data in dataloader:
        img, label = data
        img.requires_grad_()
        #img = img.view(img.size(0), -1)
        img = img.view(-1,1,28,28)
#         img_bad = do(img).to(device)  # comment out for standard AE
        # ===================forward=====================
        output = model(img)  # feed <img> (for std AE) or <img_bad> (for denoising AE)
        #output = model(label)
        #output = output.view(-1,1,28,28)
        loss = criterion(output, img.data)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================log========================
    print('epoch[{}/{}], loss : {}'.format(epoch + 1, num_epochs, loss.item()))
    
    #Saving the images at each epoch
    save_image(output, "./pictures/{}.png".format(epoch+1))
    
    #displaying the images
    display_images(None, output)  # pass (None, output) for std AE, (img_bad, output) for denoising AE

epoch[1/20], loss : 0.15426026284694672
epoch[2/20], loss : 0.10638400167226791
epoch[3/20], loss : 0.1068296954035759
epoch[4/20], loss : 0.1015646681189537
epoch[5/20], loss : 0.09650999307632446
epoch[6/20], loss : 0.08459876477718353
epoch[7/20], loss : 0.08838369697332382
epoch[8/20], loss : 0.09778234362602234
epoch[9/20], loss : 0.08739129453897476
epoch[10/20], loss : 0.08597636967897415
epoch[11/20], loss : 0.0928310677409172
epoch[12/20], loss : 0.07788118720054626
epoch[13/20], loss : 0.08396041393280029
epoch[14/20], loss : 0.09564739465713501
epoch[15/20], loss : 0.07875486463308334
epoch[16/20], loss : 0.08217321336269379
epoch[17/20], loss : 0.07887201011180878
epoch[18/20], loss : 0.081071637570858
epoch[19/20], loss : 0.0847182422876358
epoch[20/20], loss : 0.0804489329457283
