# **Assignment 3**




##Fine-tuning a Pretrained ResNet Model for Image Classification

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import time
import os

In [None]:
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the transformations for training and testing
transform_train = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # Convert grayscale to 3 channels (RGB)
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

transform_test = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # Convert grayscale to 3 channels (RGB)
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

<style>
blue {
  color: skyblue;
}

red {
  color: red;
}

green {
  color: lightgreen;
}
</style>

1) This code outlines loading the <blue>**Cifar-10**</blue> dataset for <green>**image classification**</green>.
2) The <blue>**train_loader**</blue> and <blue>**test_loader**</blue> are used to load the datasets in batches of 32, with shuffling applied only to the training set.

Note: The placeholders **"None"** need to be replaced with the correct dataset-loading code.

In [None]:
# TODO: Use Cifar-10 dataset, you can download and load it through torchvision.datasets.ImageFolder
train_dataset = None  # Replace None with code to load the dataset with training transformations defined above
test_dataset = None  # Replace None with code to load the dataset with test transformations defined above

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

In [None]:
# TODO: Replace None with correct code for loading a pretrained Resnet-18 model
model = None

# Freeze the model weights except for the final layer
for param in model.parameters():
    # TODO: Freeze the model weights
    param.requires_grad = None

# Modify the final fully connected layer
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)  # 10 classes in Cifar-10

In [None]:
# TODO: Replace None with correct code to send model to device
model = None

# Define loss function and optimizer
# TODO: Use a suitable loss criterion and optimizer with learning rate 0.001
criterion = None
optimizer = None

In [None]:
# Train and evaluate the model
def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and testing phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
                dataloader = train_loader
            else:
                model.eval()   # Set model to evaluation mode
                dataloader = test_loader

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for inputs, labels in dataloader:
                # TODO: Move images and labels to the device
                images = None  # Replace None with the correct code
                labels = None

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                with torch.set_grad_enabled(phase == 'train'):

                    # TODO: Forward Pass
                    outputs = None
                    _, preds = torch.max(outputs, 1)
                    loss = None # Replace None with the correct code to find error between labels and outputs

                    # Backward + optimize only in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = running_corrects.double() / len(dataloader.dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    return model


# Fine-tune the model
fine_tuned_model = train_model(model, criterion, optimizer, num_epochs=5)

In [None]:
# Evaluate the performance of the model before and after fine-tuning
# Function to visualize images and their predictions
def visualize_predictions(model, dataloader, num_images=2):
    model.eval()  # Set the model to evaluation mode
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for inputs, labels in dataloader:
            # TODO: Move images to the device
            inputs = None

            # TODO: Forward Pass = Get Predictions
            outputs = None
            _, preds = torch.max(outputs, 1)

            for i in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'Predicted: {preds[i].item()}')
                img = inputs.cpu().data[i]
                img = img.permute(1, 2, 0).numpy()  # Convert from Tensor format
                img = np.clip(img, 0, 1)  # Clip the values for display
                ax.imshow(img)

                if images_so_far == num_images:
                    return

# Function to denormalize the image for visualization
def denormalize(image_tensor):
    # Means and stds used for normalization
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    image = image_tensor.permute(1, 2, 0).numpy()  # Convert tensor to HWC
    image = std * image + mean  # Denormalize
    image = np.clip(image, 0, 1)  # Clip values to be in [0, 1] range
    return image


def make_map_classes(path):
    classes_dir = os.listdir(path)
    classes_dir.sort()
    classes_dict = {}
    for c in classes_dir:
        index = int(c[:3])
        name = c[4:]
        classes_dict[index] = name

    return classes_dict

In [None]:
# Function to compare predictions of non-finetuned and finetuned models
def compare_predictions(pretrained_model, finetuned_model, dataloader, path_dir, num_images=5):
    pretrained_model.eval()
    finetuned_model.eval()

    images_shown = 0
    classes_dict = make_map_classes(path_dir)

    with torch.no_grad():
        for inputs, labels in dataloader:
            # TODO: Move inputs to device
            inputs = None

            # Predictions from pre-trained model (without fine-tuning)
            # TODO: Calculate outputs using forward pass of pretrained model
            pre_outputs = None
            _, pre_preds = torch.max(pre_outputs, 1)

            # Predictions from fine-tuned model
            # TODO: Calculate the outputs using the gforward pass of the finetuned model
            fin_outputs = None
            _, fin_preds = torch.max(fin_outputs, 1)

            for i in range(inputs.size()[0]):
                if images_shown == num_images:
                    return  # Stop after showing num_images
                images_shown += 1

                img = denormalize(inputs.cpu().data[i])

                plt.figure(figsize=(10, 4))

                # Show pre-trained model's prediction
                plt.subplot(1, 2, 1)
                plt.imshow(img)
                plt.title(f'Pre-trained Prediction: {pre_preds[i].item()} : {classes_dict[pre_preds[i].item()]}')
                plt.axis('off')

                # Show fine-tuned model's prediction
                plt.subplot(1, 2, 2)
                plt.imshow(img)
                plt.title(f'Fine-tuned Prediction: {fin_preds[i].item()} : {classes_dict[fin_preds[i].item()+1]}')
                plt.axis('off')

                plt.show()

In [None]:
# Evaluate and visualize predictions
print("Comparing predictions of pre-trained and fine-tuned models on example images...")

# Ensure both models have the same structure (so, freeze layers for pretrained_model)
pretrained_model = models.resnet18(pretrained=True)
pretrained_model.fc = nn.Linear(pretrained_model.fc.in_features, 10)
pretrained_model = pretrained_model.to(device)

# Compare predictions using a couple of test images
# TODO: Fill in the path of the 256_ObjectCategories directory in the path variable below.
path_to256_ObjectCategories_dir = None
compare_predictions(pretrained_model, fine_tuned_model, test_loader, path_to256_ObjectCategories_dir ,num_images=10)
