Hello students! The following is the second task in the **Generative Computer Vision Models** module

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In below code block with simple autoencoder, introduce the upsampling layer according to the shape of the previous layer to reconstruct the image.

In [2]:
# Hyperparameters
batch_size = 64
learning_rate = 0.001
num_epochs = 25

# Define the autoencoder architecture
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(28*28, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 14*14),
            nn.ReLU()  # No sigmoid here, we'll use Mean Squared Error loss
        )
        # Todo : self.upsample (Upsampling layer)
        

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        x = x.view(x.size(0), 1, 14, 14)  # Reshape to image size
        # self.upsample(x)
        return x


Load the dataset and Train the autoencoder block

In [None]:
# Load the FashionMNIST dataset
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize the autoencoder model
autoencoder = Autoencoder()

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    for data in train_loader:
        img, _ = data
        img = img.view(img.size(0), -1)  # Flatten the images
        img = img.cuda() if torch.cuda.is_available() else img  # Move to GPU if available

        # Forward pass
        output = autoencoder(img)
        output = output.view(img.size(0), -1)
        #print(output.shape)
        loss = criterion(output, img)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Print progress
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    

# Save the trained model
torch.save(autoencoder.state_dict(), 'model.pth') # change the name of the trained model according to the upsample layer used

## Exercise 1
1. Design the upsampling layer using Nearest Neighbour Interpolation and train the above autoencoder & Plot the loss curves
2. Design the upsampling layer using Bilinear Interpolation and train the above autoencoder & Plot the loss curves
3. Design the upsampling layer using Bicubic Interpolation and train the above autoencoder & Plot the loss curves

## Exercise 2 
1. Sample an image from the test set and evaluate the Fourier Transform of the input image and the reconstructed image using a trained autoencoder with different upsampling layers.
2. Analyze the differences in the Fourier Transforms of the predictions obtained in the above task.