# Assignment 2
Implementing an Image classification model using CNN in PyTorch

## Importing libraries and loading the dataset
I'm using MNIST dataset which contains handwritten numbers from 0 to 9

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
# Transform to convert images to tensors and normalize to range [-1, 1]
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize grayscale channel
])

# Download and load the MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Making the Model
The next two cells are me trying to change the number of channels to make the model better but the third one (which I used) worked the best.

In [20]:
# import torch.nn as nn
# import torch.nn.functional as F

# class MNIST_CNN(nn.Module):
#     def __init__(self):
#         super(MNIST_CNN, self).__init__()
#         self.conv1 = nn.Conv2d(1, 128, kernel_size=3, padding=1)  # Input: 1 channel, Output: 32 channels
#         self.conv2 = nn.Conv2d(128, 64, kernel_size=3, padding=1)  # Input: 32 channels, Output: 64 channels
#         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Max pooling
#         self.fc1 = nn.Linear(64 * 7 * 7, 128)  # Fully connected layer
#         self.fc2 = nn.Linear(128, 10)  # Output layer for 10 classes

#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))  # Conv1 -> ReLU -> Pool
#         x = self.pool(F.relu(self.conv2(x)))  # Conv2 -> ReLU -> Pool
#         x = x.view(-1, 64 * 7 * 7)  # Flatten feature maps
#         x = F.relu(self.fc1(x))  # Fully connected layer -> ReLU
#         x = self.fc2(x)  # Output layer
#         return x

In [22]:
# import torch.nn as nn
# import torch.nn.functional as F

# class MNIST_CNN(nn.Module):
#     def __init__(self):
#         super(MNIST_CNN, self).__init__()
#         self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)  # Input: 1 channel, Output: 32 channels
#         self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)  # Input: 32 channels, Output: 64 channels
#         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Max pooling
#         self.fc1 = nn.Linear(64 * 7 * 7, 128)  # Fully connected layer
#         self.fc2 = nn.Linear(128, 10)  # Output layer for 10 classes

#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))  # Conv1 -> ReLU -> Pool
#         x = self.pool(F.relu(self.conv2(x)))  # Conv2 -> ReLU -> Pool
#         x = x.view(-1, 64 * 7 * 7)  # Flatten feature maps
#         x = F.relu(self.fc1(x))  # Fully connected layer -> ReLU
#         x = self.fc2(x)  # Output layer
#         return x

### This is the model I used

In [2]:
import torch.nn as nn
import torch.nn.functional as F

class MNIST_CNN(nn.Module):
    def __init__(self):
        super(MNIST_CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # Input: 1 channel, Output: 32 channels
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # Input: 32 channels, Output: 64 channels
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Max pooling
        self.fc1 = nn.Linear(64 * 7 * 7, 128)  # Fully connected layer
        self.fc2 = nn.Linear(128, 10)  # Output layer for 10 classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # Conv1 -> ReLU -> Pool
        x = self.pool(F.relu(self.conv2(x)))  # Conv2 -> ReLU -> Pool
        x = x.view(-1, 64 * 7 * 7)  # Flatten feature maps
        x = F.relu(self.fc1(x))  # Fully connected layer -> ReLU
        x = self.fc2(x)  # Output layer
        return x


# Training the model
I have only trained the model for 10 epochs, I tried training it for more like 15 and 20, that didn't change the accuracy much so I'm sticking to 10.

In [4]:
import torch.optim as optim

# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model, loss function, and optimizer
model = MNIST_CNN().to(device)
# using cross entropy loss function
criterion = nn.CrossEntropyLoss()
# using the adam optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(10):  # Number of epochs
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the gradients
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item()
    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader):.4f}")


Epoch 1, Loss: 0.1641
Epoch 2, Loss: 0.0464
Epoch 3, Loss: 0.0337
Epoch 4, Loss: 0.0243
Epoch 5, Loss: 0.0178
Epoch 6, Loss: 0.0151
Epoch 7, Loss: 0.0113
Epoch 8, Loss: 0.0098
Epoch 9, Loss: 0.0086
Epoch 10, Loss: 0.0067


# Evaluating the model

In [5]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 99.05%


# Using a pretrained model
I used RESNET18 from torchvision model zoo, I only had one convolution layer and the final fully connected layer for this model

In [6]:
from torchvision import models

# Load a pretrained ResNet
pretrained_model = models.resnet18(pretrained=True)

# Modify the first convolution layer and final fully connected layer
pretrained_model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3)  # Adjust for 1 input channel
pretrained_model.fc = nn.Linear(pretrained_model.fc.in_features, 10)  # Adjust for 10 output classes

pretrained_model = pretrained_model.to(device)

# Use the same training and evaluation process for the pretrained model




# Fine tuning RESNET18
I tuned this model for 5 epochs because its a pretrained model and only requires fine tuning

In [7]:
import torch.optim as optim

# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load a pretrained ResNet model
from torchvision import models
pretrained_model = models.resnet18(pretrained=True)

# Modify the first convolutional layer and the final fully connected layer
pretrained_model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3)  # Adjust for grayscale input
pretrained_model.fc = nn.Linear(pretrained_model.fc.in_features, 10)  # Adjust for 10 output classes
pretrained_model = pretrained_model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(pretrained_model.parameters(), lr=0.0001)  # Using a smaller learning rate for fine-tuning

# Training loop
for epoch in range(5):  # Fine-tune for fewer epochs
    pretrained_model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the gradients
        outputs = pretrained_model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item()
    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader):.4f}")


Epoch 1, Loss: 0.2919
Epoch 2, Loss: 0.0822
Epoch 3, Loss: 0.0549
Epoch 4, Loss: 0.0417
Epoch 5, Loss: 0.0348


# Evaluating the pretrained model

In [8]:
# Evaluation mode
pretrained_model.eval()

correct = 0
total = 0

# Disable gradient calculation for evaluation
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = pretrained_model(images)  # Forward pass
        _, predicted = torch.max(outputs, 1)  # Get the class with the highest score
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Print test accuracy
print(f"Pretrained Model Test Accuracy: {100 * correct / total:.2f}%")


Pretrained Model Test Accuracy: 98.86%
