# Model-1: Standard Autoencoder (AE)

You are required to build a standard Autoencoder (AE) to learn meaningful representations
from the CIFAR-10 dataset.
1. The encoder should consist of convolutional layers for feature extraction.
2. The decoder should use transpose convolutions (ConvTranspose2D) to reconstruct
images.
3. Train the model using Mean Squared Error (MSE) loss.
Task:
1. Train the AE on CIFAR-10 and evaluate reconstruction quality on test data(using
SSIM,PSNR,MAE,MSE).
2. Visualize latent space representations using t-SNE or PCA.
Presentation of Results:
● Visualize original vs. reconstructed images.
● Plot average error (y-axis) vs. epochs (x-axis).
Date: 01/04/2025

● Compare latent space structure for clean vs. noisy input images.
● Discuss the effectiveness of autoencoders in feature learning.

In [4]:
import torch

# Check if CUDA (GPU) is available
print("CUDA Available:", torch.cuda.is_available())

# If available, print GPU name
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))


CUDA Available: True
GPU Name: NVIDIA GeForce RTX 3050 Ti Laptop GPU


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
import matplotlib.pyplot as plt
import numpy as np


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import numpy as np
import matplotlib
matplotlib.use('Agg')  # Use non-interactive backend
import matplotlib.pyplot as plt

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [7]:
# ------------------ Config ------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 128
EPOCHS = 20
LATENT_DIM = 128


In [8]:
# ------------------ Data ------------------
transform = transforms.Compose([
    transforms.ToTensor(),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testloader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)


100%|██████████| 170M/170M [17:34<00:00, 162kB/s]  


In [9]:
# ------------------ Autoencoder ------------------
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, 3, stride=2, padding=1),
            nn.ReLU(),
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 3, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        latent = self.encoder(x)
        recon = self.decoder(latent)
        return recon, latent

model = Autoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


In [10]:
# ------------------ Training ------------------
train_errors = []
for epoch in range(EPOCHS):
    running_loss = 0.0
    for inputs, _ in trainloader:
        inputs = inputs.to(device)
        outputs, _ = model(inputs)
        loss = criterion(outputs, inputs)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(trainloader)
    train_errors.append(avg_loss)
    print(f"Epoch [{epoch+1}/{EPOCHS}] Loss: {avg_loss:.4f}")




Epoch [1/20] Loss: 0.0163
Epoch [2/20] Loss: 0.0064
Epoch [3/20] Loss: 0.0050
Epoch [4/20] Loss: 0.0040
Epoch [5/20] Loss: 0.0034
Epoch [6/20] Loss: 0.0030
Epoch [7/20] Loss: 0.0027
Epoch [8/20] Loss: 0.0025
Epoch [9/20] Loss: 0.0023
Epoch [10/20] Loss: 0.0022
Epoch [11/20] Loss: 0.0020
Epoch [12/20] Loss: 0.0019
Epoch [13/20] Loss: 0.0018
Epoch [14/20] Loss: 0.0017
Epoch [15/20] Loss: 0.0017
Epoch [16/20] Loss: 0.0016
Epoch [17/20] Loss: 0.0015
Epoch [18/20] Loss: 0.0015
Epoch [19/20] Loss: 0.0014
Epoch [20/20] Loss: 0.0014


In [11]:
# Plot training error
plt.plot(train_errors)
plt.xlabel('Epochs')
plt.ylabel('Training Error')
plt.title('Error vs. Epochs')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig('mse_loss_curve.png')
plt.show()

  plt.legend()
  plt.show()


In [12]:
model.eval()
originals, reconstructions, latent_vectors, labels = [], [], [], []

with torch.no_grad():
    for inputs, targets in testloader:
        inputs = inputs.to(device)
        outputs, latents = model(inputs)
        originals.append(inputs.cpu())
        reconstructions.append(outputs.cpu())
        latent_vectors.append(latents.view(latents.size(0), -1).cpu())
        labels.append(targets)

originals = torch.cat(originals)
reconstructions = torch.cat(reconstructions)
latent_vectors = torch.cat(latent_vectors)
labels = torch.cat(labels)


In [13]:
# SSIM, PSNR, MAE, MSE
ssim_vals, psnr_vals, mae_vals, mse_vals = [], [], [], []

for orig, recon in zip(originals, reconstructions):
    orig_np = orig.permute(1, 2, 0).numpy()
    recon_np = recon.permute(1, 2, 0).numpy()

    ssim_val = ssim(orig_np, recon_np, win_size=7, channel_axis=-1, data_range=1.0)
    psnr_val = psnr(orig_np, recon_np, data_range=1.0)
    mae_val = mean_absolute_error(orig_np.flatten(), recon_np.flatten())
    mse_val = mean_squared_error(orig_np.flatten(), recon_np.flatten())

    ssim_vals.append(ssim_val)
    psnr_vals.append(psnr_val)
    mae_vals.append(mae_val)
    mse_vals.append(mse_val)

print(f"Average SSIM: {np.mean(ssim_vals):.4f}")
print(f"Average PSNR: {np.mean(psnr_vals):.4f}")
print(f"Average MAE: {np.mean(mae_vals):.4f}")
print(f"Average MSE: {np.mean(mse_vals):.4f}")


Average SSIM: 0.9322
Average PSNR: 28.5033
Average MAE: 0.0293
Average MSE: 0.0016


In [18]:
# ------------------ Visualize Reconstructions ------------------

def imshow_tensor(tensor):
    npimg = tensor.numpy().transpose(1, 2, 0)
    return np.clip(npimg, 0, 1)

fig, axes = plt.subplots(5, 2, figsize=(8, 12), dpi=300)
for i in range(5):
    axes[i, 0].imshow(imshow_tensor(originals[i]))
    axes[i, 0].set_title("Original", fontsize=10)
    axes[i, 0].axis('off')

    axes[i, 1].imshow(imshow_tensor(reconstructions[i]))
    axes[i, 1].set_title("Reconstructed", fontsize=10)
    axes[i, 1].axis('off')

plt.tight_layout()
plt.savefig("original_vs_reconstructed.png", dpi=300)
plt.show()


  plt.show()


In [15]:
# ------------------ Latent Space Visualization ------------------

# PCA
pca = PCA(n_components=2)
latent_pca = pca.fit_transform(latent_vectors)
plt.scatter(latent_pca[:, 0], latent_pca[:, 1], c=labels, cmap='tab10', s=10)
plt.title("Latent Space (PCA)")
plt.colorbar()
plt.savefig('latent_pca.png')
plt.show()


  plt.show()


In [19]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
latent_pca = pca.fit_transform(latent_vectors)

plt.figure(figsize=(8, 6), dpi=300)
scatter = plt.scatter(latent_pca[:, 0], latent_pca[:, 1], c=labels, cmap='tab10', s=15)
plt.title("Latent Space (PCA)", fontsize=12)
plt.colorbar(scatter)
plt.tight_layout()
plt.savefig("latent_space_pca.png", dpi=300)
plt.show()


  plt.show()


In [16]:
# t-SNE
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
latent_tsne = tsne.fit_transform(latent_vectors)
plt.scatter(latent_tsne[:, 0], latent_tsne[:, 1], c=labels, cmap='tab10', s=10)
plt.title("Latent Space (t-SNE)")
plt.colorbar()
plt.savefig('latent_tsne.png')
plt.show()


  plt.show()


In [20]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, perplexity=30, random_state=42)
latent_tsne = tsne.fit_transform(latent_vectors)

plt.figure(figsize=(8, 6), dpi=300)
scatter = plt.scatter(latent_tsne[:, 0], latent_tsne[:, 1], c=labels, cmap='tab10', s=15)
plt.title("Latent Space (t-SNE)", fontsize=12)
plt.colorbar(scatter)
plt.tight_layout()
plt.savefig("latent_space_tsne.png", dpi=300)
plt.show()


  plt.show()
