In [None]:

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 1. Multilayer Perceptron (MLP):
* Develop a Multilayer Perceptron (MLP) model with an appropriate architecture (number
of layers, hidden units, etc.).
* Train the MLP model on the CIFAR-10 dataset.
* Evaluate its performance on the test set using metrics like accuracy and loss. Analyze
the results

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

# Define MLP model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32 * 32 * 3, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 32 * 32 * 3)  # Flatten the input
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

model = MLP()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 20
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs, labels

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:  # Print every 200 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Training')

# Test the model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images, labels
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[1,   200] loss: 1.746
[2,   200] loss: 1.440
[3,   200] loss: 1.302
[4,   200] loss: 1.202
[5,   200] loss: 1.108
[6,   200] loss: 1.027
[7,   200] loss: 0.953
[8,   200] loss: 0.872
[9,   200] loss: 0.795
[10,   200] loss: 0.717
[11,   200] loss: 0.671
[12,   200] loss: 0.591
[13,   200] loss: 0.526
[14,   200] loss: 0.480
[15,   200] loss: 0.431
[16,   200] loss: 0.382
[17,   200] loss: 0.369
[18,   200] loss: 0.335
[19,   200] loss: 0.305
[20,   200] loss: 0.308
Finished Training
Accuracy of the network on the 10000 test images: 53 %


# Observation:
* Since MLP models may not perform well due to their inability to effectively capture spatial dependencies and patterns present in images, as they lack the ability to handle the high-dimensional input data and spatial information effectively the accuracy was some what on lower side.

# Convolutional Neural Network
* Design a simple Convolutional Neural Network (CNN) architecture with convolutional
layers, pooling layers, and fully connected layers.
* Train the CNN model on the CIFAR-10 dataset.
* Evaluate its performance on the test set using the same metrics as the MLP. Critically
compare CNN's performance to the MLP's.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

# Define CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # channels = 3, input = 32*32, kernel_size = 3*3, padding =1
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        # (w-f+2d)/s + 1 (Output size = (32 - 3 + 2*1)/1 + 1 = 32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

num_epochs = 20
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:  # Print every 200 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Training')

# Test the model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[1,   200] loss: 1.620
[2,   200] loss: 1.055
[3,   200] loss: 0.828
[4,   200] loss: 0.677
[5,   200] loss: 0.555
[6,   200] loss: 0.459
[7,   200] loss: 0.356
[8,   200] loss: 0.264
[9,   200] loss: 0.199
[10,   200] loss: 0.136
[11,   200] loss: 0.099
[12,   200] loss: 0.073
[13,   200] loss: 0.070
[14,   200] loss: 0.048
[15,   200] loss: 0.053
[16,   200] loss: 0.044
[17,   200] loss: 0.052
[18,   200] loss: 0.060
[19,   200] loss: 0.034
[20,   200] loss: 0.032
Finished Training
Accuracy of the network on the 10000 test images: 76 %


# Observation and Analysis
* Since CNN has the to capture spatial hierarchies and local patterns through convolutional layers and reduce the number of parameters by sharing weights, making them more suitable for handling image data.
* So it better performed than regular MLP.

# Transfer Learning with VGG
* Utilize a pre-trained VGG model (such as VGG16 or VGG19) available in common deep
learning libraries.
* Adapt the pre-trained VGG model for the CIFAR-10 or CIFAR-100 classification task.
* Evaluate its performance on the test set. Analyze the benefits of transfer learning
compared to your MLP and CNN models trained from scratch.

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torch.optim as optim
from tqdm import tqdm

# Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),  # to reduce overfitting
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# Load pre-trained VGG model
vgg_model = torch.hub.load("pytorch/vision", "vgg16", weights="IMAGENET1K_V1")

# Modify the last layer for CIFAR-10 classification
num_features = vgg_model.fc.in_features
vgg_model.fc = nn.Linear(num_features, 10)  # CIFAR-10 has 10 classes

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg_model.parameters(), lr=0.001, momentum=0.9)

# Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg_model.to(device)

num_epochs = 5
for epoch in range(num_epochs):
    vgg_model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(trainloader, desc=f'Epoch {epoch + 1}/{num_epochs}'):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = vgg_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(trainset)
    print(f'Training Loss: {epoch_loss:.4f}')

Files already downloaded and verified
Files already downloaded and verified


Using cache found in /root/.cache/torch/hub/pytorch_vision_main
Epoch 1/5: 100%|██████████| 782/782 [12:40<00:00,  1.03it/s]


Training Loss: 0.4749


Epoch 2/5: 100%|██████████| 782/782 [12:39<00:00,  1.03it/s]


Training Loss: 0.2551


Epoch 3/5: 100%|██████████| 782/782 [12:37<00:00,  1.03it/s]


Training Loss: 0.1916


Epoch 4/5: 100%|██████████| 782/782 [12:36<00:00,  1.03it/s]


Training Loss: 0.1538


Epoch 5/5: 100%|██████████| 782/782 [12:37<00:00,  1.03it/s]


Training Loss: 0.1225


Testing: 100%|██████████| 157/157 [01:04<00:00,  2.42it/s]

Test Accuracy: 93.74





In [None]:
# Evaluate the model on the test set
vgg_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(testloader, desc='Testing'):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = vgg_model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print('Test Accuracy:',accuracy*100,"%")

Testing: 100%|██████████| 157/157 [01:05<00:00,  2.40it/s]

Test Accuracy: 93.57 %





# Observation and analysis:
* Using VGG model for transfer learning improved performance by leveraging pre-trained weights learned from large-scale image datasets. This allowed the model to benefit from features learned by the VGG model, which are transferable to the new task, thereby accelerating convergence, reducing the need for large amounts of labeled data, and resulted in higher accuracy compared to training from scratch.