## Q1
1. Perform classification on FashionMNIST, fashion apparels dataset, using a pre-
trained model which is trained on MNIST handwritten digit classification dataset.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
# Define the CNN Classifier
class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(64 * 5 * 5, 128)
        self.fc2 = nn.Linear(128, 10)
        self.pool = nn.MaxPool2d(2, 2)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data transformations
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Download MNIST dataset
trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Data loaders
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Instantiate the model
model = CNNClassifier().to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [3]:
# Train the model
for epoch in range(5):  # Loop over the dataset multiple times
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            running_loss = 0.0

    print(f"[Epoch: {epoch + 1}] loss: {running_loss / 100:.3f}")

# Save the model
torch.save(model.state_dict(), "./ModelFiles/model.pt")

print("Finished Training and Saved Model")

[Epoch: 1] loss: 0.035
[Epoch: 2] loss: 0.025
[Epoch: 3] loss: 0.014
[Epoch: 4] loss: 0.010
[Epoch: 5] loss: 0.005
Finished Training and Saved Model


### Using the Pretrained Model for Inference on Fashion MNIST

In [4]:
# Define the CNN Classifier (same architecture as the MNIST model)
class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(64 * 5 * 5, 128)
        self.fc2 = nn.Linear(128, 10)
        self.pool = nn.MaxPool2d(2, 2)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [5]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data transformations
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Download Fashion-MNIST dataset (we will train on the test set as an example)
mnist_testset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(mnist_testset, batch_size=64, shuffle=False)

# Instantiate the model and load the pre-trained weights
model = CNNClassifier()
model.load_state_dict(torch.load("./ModelFiles/model.pt"))
model.to(device)

# Freeze the convolutional layers, so they don't get updated during training
for param in model.conv1.parameters():
    param.requires_grad = False
for param in model.conv2.parameters():
    param.requires_grad = False

# We will train the fully connected layers (fc1, fc2)
for param in model.fc1.parameters():
    param.requires_grad = True
for param in model.fc2.parameters():
    param.requires_grad = True

# Print model's state_dict (parameter sizes)
print("Model's state_dict:")
for param_tensor in model.state_dict().keys():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
print()

# Set the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01, momentum=0.9)

Model's state_dict:
conv1.weight 	 torch.Size([32, 1, 3, 3])
conv1.bias 	 torch.Size([32])
conv2.weight 	 torch.Size([64, 32, 3, 3])
conv2.bias 	 torch.Size([64])
fc1.weight 	 torch.Size([128, 1600])
fc1.bias 	 torch.Size([128])
fc2.weight 	 torch.Size([10, 128])
fc2.bias 	 torch.Size([10])



  model.load_state_dict(torch.load("./ModelFiles/model.pt"))


In [6]:
# Fine-tuning the model
model.train()
for epoch in range(5):  # Train for a few epochs
    running_loss = 0.0
    for i, data in enumerate(test_loader, 0):  # Use test_loader for this example
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print(f"[Epoch {epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

[Epoch 1, 100] loss: 0.996
[Epoch 2, 100] loss: 0.520
[Epoch 3, 100] loss: 0.457
[Epoch 4, 100] loss: 0.418
[Epoch 5, 100] loss: 0.389


In [7]:
# After training, evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for i, vdata in enumerate(test_loader):
        tinputs, tlabels = vdata
        tinputs, tlabels = tinputs.to(device), tlabels.to(device)
        
        # Forward pass
        toutputs = model(tinputs)
        
        # Get predicted class
        _, predicted = torch.max(toutputs, 1)
        
        # Print the true and predicted labels
        print(f"True label: {tlabels}")
        print(f"Predicted: {predicted}")
        
        total += tlabels.size(0)
        correct += (predicted == tlabels).sum().item()

# Calculate accuracy
accuracy = 100.0 * correct / total
print(f"\nThe overall accuracy is {accuracy:.2f}%")

True label: tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 9,
        1, 4, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 6, 1, 3, 7, 6, 7, 2, 1,
        2, 2, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5], device='cuda:0')
Predicted: tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 5, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 5,
        1, 2, 6, 0, 9, 4, 8, 8, 3, 6, 8, 0, 7, 5, 7, 9, 0, 1, 0, 9, 4, 9, 2, 1,
        4, 6, 4, 2, 5, 8, 2, 2, 8, 6, 8, 0, 7, 7, 8, 5], device='cuda:0')
True label: tensor([1, 1, 2, 3, 9, 8, 7, 0, 2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2,
        0, 6, 5, 3, 6, 7, 1, 8, 0, 1, 4, 2, 3, 6, 7, 2, 7, 8, 5, 9, 9, 4, 2, 5,
        7, 0, 5, 2, 8, 6, 7, 8, 0, 0, 9, 9, 3, 0, 8, 4], device='cuda:0')
Predicted: tensor([1, 1, 3, 4, 7, 8, 7, 0, 2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2,
        0, 2, 5, 3, 6, 7, 1, 8, 0, 1, 2, 2, 3, 6, 7, 6, 7, 8, 5, 9, 9, 4, 2, 5,
        7, 0, 5, 2, 8, 6, 7, 8, 0, 0, 9, 9, 3, 0, 8, 4], device='cuda:0')
True label: tensor

## Q2 
Learn the AlexNet architecture and apply transfer learning to perform the classification
task. Using the pre-trained AlexNet, classify images from the cats_and_dogs_filtered
dataset downloaded from the below link. Finetune the classifier given in AlexNet as a two-
class classifier. Perform pre-processing of images as per the requirement.

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import os
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

In [11]:
transform = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),    # Resize and crop images to 224x224
        transforms.RandomHorizontalFlip(),    # Randomly flip the image horizontally for data augmentation
        transforms.ToTensor(),                # Convert image to tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize based on ImageNet stats
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),               # Resize image while maintaining aspect ratio
        transforms.CenterCrop(224),           # Crop the image to 224x224 from the center
        transforms.ToTensor(),                # Convert image to tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize based on ImageNet stats
    ])
}

# Load the dataset (train and validation sets)
data_dir = './cats_and_dogs_filtered'  # Path to the dataset
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'validation')

train_dataset = datasets.ImageFolder(train_dir, transform['train'])
val_dataset = datasets.ImageFolder(val_dir, transform['val'])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Check the class labels in the dataset
print("Classes: ", train_dataset.classes)  # Should print ['cats', 'dogs']

Classes:  ['cats', 'dogs']


In [13]:
# Load pre-trained AlexNet model
model = models.alexnet(pretrained=True)

# Freeze the convolutional layers (feature extractor part of the model)
for param in model.parameters():
    param.requires_grad = False

# Modify the final fully connected layer to classify only 2 classes (cats and dogs)
model.classifier[6] = nn.Linear(model.classifier[6].in_features, 2)

# Move the model to the GPU (if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Print the modified model to see the change
print(model)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [14]:
# Loss function (for binary classification)
criterion = nn.CrossEntropyLoss()

# Optimizer (only for the final classifier layers)
optimizer = optim.SGD(model.classifier.parameters(), lr=0.001, momentum=0.9)

In [16]:
# Train the model
num_epochs = 10  # Define the number of epochs for fine-tuning

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    print("-" * 10)

    # Training phase
    model.train()
    running_loss = 0.0
    running_corrects = 0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Track statistics
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)

    print(f"Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

    # Validation phase
    model.eval()
    running_corrects = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            running_corrects += torch.sum(preds == labels.data)

    val_acc = running_corrects.double() / len(val_dataset)
    print(f"Validation Accuracy: {val_acc:.4f}")

    print()


Epoch 1/10
----------
Train Loss: 0.2695 Acc: 0.8815
Validation Accuracy: 0.9620

Epoch 2/10
----------
Train Loss: 0.2137 Acc: 0.9060
Validation Accuracy: 0.9590

Epoch 3/10
----------
Train Loss: 0.1786 Acc: 0.9175
Validation Accuracy: 0.9660

Epoch 4/10
----------
Train Loss: 0.1762 Acc: 0.9255
Validation Accuracy: 0.9710

Epoch 5/10
----------
Train Loss: 0.1852 Acc: 0.9220
Validation Accuracy: 0.9670

Epoch 6/10
----------
Train Loss: 0.1681 Acc: 0.9290
Validation Accuracy: 0.9750

Epoch 7/10
----------
Train Loss: 0.1650 Acc: 0.9340
Validation Accuracy: 0.9720

Epoch 8/10
----------
Train Loss: 0.1497 Acc: 0.9290
Validation Accuracy: 0.9760

Epoch 9/10
----------
Train Loss: 0.1613 Acc: 0.9335
Validation Accuracy: 0.9720

Epoch 10/10
----------
Train Loss: 0.1538 Acc: 0.9305
Validation Accuracy: 0.9640



In [23]:
# Save the fine-tuned model
torch.save(model.state_dict(), "fine_tuned_alexnet.pth")
print("Model saved as fine_tuned_alexnet.pth")

Model saved as fine_tuned_alexnet.pth


In [24]:
# Load the trained model (if needed)
model.load_state_dict(torch.load("fine_tuned_alexnet.pth"))
model.eval()  # Set the model to evaluation mode

# You can now run predictions using the trained model on new data.


  model.load_state_dict(torch.load("fine_tuned_alexnet.pth"))


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 