# AI6103 Final Project
<br>

## Team Details:
### 1. Shanthakumar Karan (G2403054G) karan013@e.ntu.edu.sg
### 2. Ramakrishnan Shivaraman (G2402819B) shivaram003@e.ntu.edu.sg
### 3. Shrikant Ameya Sunil (G2403333J) ameyasun001@e.ntu.edu.sg

In [None]:
import os
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, Subset
from torchvision import transforms, datasets
from torchvision.utils import save_image
from tqdm import tqdm
from math import log10
from torch.cuda.amp import GradScaler, autocast
import matplotlib.pyplot as plt

import torch.backends.cudnn as cudnn
cudnn.benchmark = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
IMG_SIZE = 64 
BATCH_SIZE = 8
DATASET_DIR = "/kaggle/input/celeba-dataset/img_align_celeba" 

transform = transforms.Compose([
    transforms.CenterCrop(178),
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])  
])

dataset = datasets.ImageFolder(root=DATASET_DIR, transform=transform)

subset_size = 5000 
indices = torch.randperm(len(dataset))[:subset_size]
dataset = Subset(dataset, indices)

dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Define U-Net
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
        )

        # Middle Layers
        self.middle = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Tanh(),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.middle(x)
        x = self.decoder(x)
        assert x.shape[-2:] == (IMG_SIZE, IMG_SIZE), f"Output shape mismatch: {x.shape}"
        return x

In [None]:
def calculate_psnr(original, generated):
    mse = torch.mean((original - generated) ** 2)
    if mse == 0:
        return 100  
    psnr = 10 * log10(1 / mse.item())
    return psnr

def pixel_accuracy(original, generated, threshold=0.1):
    return torch.mean((torch.abs(original - generated) < threshold).float()).item()

def plot_results(loss_history, psnr_history):
    plt.figure(figsize=(10, 5))

    plt.subplot(1, 2, 1)
    plt.plot(loss_history, label='Training Loss')
    plt.title("Training Loss Over Epochs")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(psnr_history, label='PSNR (dB)', color='orange')
    plt.title("PSNR Over Epochs")
    plt.xlabel("Epochs")
    plt.ylabel("PSNR (dB)")
    plt.legend()

    plt.subplot(1, 3, 3)
    plt.plot(accuracy_history, label="Pixel Accuracy (%)", color="green")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy (%)")
    plt.title("Pixel Accuracy Over Epochs")
    plt.legend()

    plt.tight_layout()
    plt.show()

### Generate Sample Images

In [None]:
import torch
import matplotlib.pyplot as plt
import numpy as np

def denormalize(image_tensor):
    return (image_tensor + 1) / 2

def plot_generated_faces(images, num_images=8):
    print("Tensor shape:", images.shape)
    print("Tensor dtype:", images.dtype)
    print("Tensor min/max:", images.min().item(), images.max().item())

    if images.shape[1] == 3:  
        images = images.permute(0, 2, 3, 1)  

    num_images = min(num_images, len(images))

    cols = int(num_images**0.5)
    rows = (num_images // cols) + (num_images % cols > 0)
    fig, axes = plt.subplots(rows, cols, figsize=(cols * 3, rows * 3))

    for i, ax in enumerate(axes.flatten()):
        if i < num_images:
            img = denormalize(images[i].detach().cpu()).clip(0, 1).numpy()
            img = img.astype(np.float32) 
            ax.imshow(img)
            ax.axis('off')
        else:
            ax.axis('off') 

    plt.tight_layout()
    plt.show()

### Learning Rates

In [None]:
criterion_mse = nn.MSELoss()
scaler = GradScaler() 
EPOCHS = 15

rates = [1e-2, 1e-3, 1e-4]

for rate in rates:
    loss_history = []
    psnr_history = []
    accuracy_history = []
    model = UNet().to(device)
    optimizer = Adam(model.parameters(), lr=rate)
    # scheduler = StepLR(optimizer, step_size=10, gamma=0.5)
    
    
    for epoch in range(EPOCHS):
        model.train()
        running_loss = 0.0
        psnr_total = 0.0
        accuracy_total = 0.0 
    
        for images, _ in tqdm(dataloader):
            images = images.to(device)
    
            # Adding noise
            noise = torch.randn_like(images) * 0.1
            noisy_images = images + noise.to(device)
            noisy_images = torch.clip(noisy_images, -1, 1)
    
            with autocast():
                outputs = model(noisy_images)
    
                mse_loss = criterion_mse(outputs, images)
                total_loss = mse_loss
    
            optimizer.zero_grad()
            scaler.scale(total_loss).backward()
            scaler.step(optimizer)
            scaler.update()
    
            running_loss += total_loss.item()
    
            psnr_total += calculate_psnr(images, outputs)
            accuracy_total += pixel_accuracy(images, outputs)
    
            torch.cuda.empty_cache()
    
        avg_loss = running_loss / len(dataloader)
        avg_psnr = psnr_total / len(dataloader)
        avg_accuracy = accuracy_total / len(dataloader) 
    
        loss_history.append(avg_loss)
        psnr_history.append(avg_psnr)
        accuracy_history.append(avg_accuracy)
    
        # scheduler.step()
        print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {avg_loss:.4f}, PSNR: {avg_psnr:.2f} dB, Accuracy: {avg_accuracy * 100:.2f}%")
    plot_results(loss_history, psnr_history)

## Without any Regularization Techniques

### Image Size set to 64. Without Noise.

In [None]:
criterion_mse = nn.MSELoss()
scaler = GradScaler()
EPOCHS = 100

RATE = 1e-4

loss_history = []
psnr_history = []
accuracy_history = []
base_model = UNet().to(device)
optimizer = Adam(base_model.parameters(), lr=RATE)
# scheduler = StepLR(optimizer, step_size=25, gamma=0.5)

for epoch in range(EPOCHS):
    base_model.train()
    running_loss = 0.0
    psnr_total = 0.0
    accuracy_total = 0.0 

    for images, _ in tqdm(dataloader):
        images = images.to(device)

        # Adding noise
        # noise = torch.randn_like(images) * 0.1
        # noisy_images = images + noise.to(device)
        # noisy_images = torch.clip(noisy_images, -1, 1)

        with autocast():
            base_outputs = base_model(images)

            mse_loss = criterion_mse(base_outputs, images)
            total_loss = mse_loss

        optimizer.zero_grad()
        scaler.scale(total_loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += total_loss.item()

        psnr_total += calculate_psnr(images, base_outputs)
        accuracy_total += pixel_accuracy(images, base_outputs)

        torch.cuda.empty_cache()

    # Store average metrics
    avg_loss = running_loss / len(dataloader)
    avg_psnr = psnr_total / len(dataloader)
    avg_accuracy = accuracy_total / len(dataloader) 

    loss_history.append(avg_loss)
    psnr_history.append(avg_psnr)
    accuracy_history.append(avg_accuracy)

    # scheduler.step()
    print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {avg_loss:.4f}, PSNR: {avg_psnr:.2f} dB, Accuracy: {avg_accuracy * 100:.2f}%")
plot_results(loss_history, psnr_history)
plot_generated_faces(base_outputs, num_images=8)

### 1. STEP LR WITH WEIGHT DECAY

### LR with weight Decay

In [None]:
criterion_mse = nn.MSELoss()
scaler = GradScaler()
EPOCHS = 100

RATE = 1e-4

loss_history = []
psnr_history = []
accuracy_history = []
model = UNet().to(device)
optimizer = Adam(model.parameters(), lr=RATE, weight_decay=2e-6)
scheduler = StepLR(optimizer, step_size=25, gamma=0.5)

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    psnr_total = 0.0
    accuracy_total = 0.0  

    for images, _ in tqdm(dataloader):
        images = images.to(device)

        # Adding noise
        # noise = torch.randn_like(images) * 0.1
        # noisy_images = images + noise.to(device)
        # noisy_images = torch.clip(noisy_images, -1, 1)

        with autocast():
            outputs = model(images)

            mse_loss = criterion_mse(outputs, images)
            total_loss = mse_loss

        optimizer.zero_grad()
        scaler.scale(total_loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += total_loss.item()

        psnr_total += calculate_psnr(images, outputs)
        accuracy_total += pixel_accuracy(images, outputs)

        torch.cuda.empty_cache()

    avg_loss = running_loss / len(dataloader)
    avg_psnr = psnr_total / len(dataloader)
    avg_accuracy = accuracy_total / len(dataloader)

    loss_history.append(avg_loss)
    psnr_history.append(avg_psnr)
    accuracy_history.append(avg_accuracy)

    scheduler.step()
    print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {avg_loss:.4f}, PSNR: {avg_psnr:.2f} dB, Accuracy: {avg_accuracy * 100:.2f}%")
plot_results(loss_history, psnr_history)
plot_generated_faces(outputs, num_images=8)

### 2. NOISE INJECTION

In [None]:
criterion_mse = nn.MSELoss()
scaler = GradScaler()
EPOCHS = 100

RATE = 1e-4

loss_history = []
psnr_history = []
accuracy_history = []
model = UNet().to(device)
optimizer = Adam(model.parameters(), lr=RATE, weight_decay=2e-6)
# scheduler = StepLR(optimizer, step_size=25, gamma=1e-6)

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    psnr_total = 0.0
    accuracy_total = 0.0  

    for images, _ in tqdm(dataloader):
        images = images.to(device)
        
        noise = torch.randn_like(images) * 0.001
        noisy_images = images + noise.to(device)
        noisy_images = torch.clip(noisy_images, -1, 1)

        noisy_images = noisy_images.to(device)

        with autocast(): 
            outputs = model(noisy_images)

            mse_loss = criterion_mse(outputs, images)
            total_loss = mse_loss


        optimizer.zero_grad()
        scaler.scale(total_loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += total_loss.item()

        psnr_total += calculate_psnr(images, outputs)
        accuracy_total += pixel_accuracy(images, outputs)

        torch.cuda.empty_cache()

    avg_loss = running_loss / len(dataloader)
    avg_psnr = psnr_total / len(dataloader)
    avg_accuracy = accuracy_total / len(dataloader) 

    loss_history.append(avg_loss)
    psnr_history.append(avg_psnr)
    accuracy_history.append(avg_accuracy)

    # scheduler.step()
    print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {avg_loss:.4f}, PSNR: {avg_psnr:.2f} dB, Accuracy: {avg_accuracy * 100:.2f}%")
plot_results(loss_history, psnr_history)
plot_generated_faces(outputs, num_images=8)