#### Program 3:
##### Objective:
Write a program using the PyTorch framework to highlight the use of Batch Normalization and Dropout Regularization techniques in CNNs on the CIFAR-10 image dataset.

Perform the following steps:
- Preprocess data
- Define CNN architecture with & without the use of Batch Normalization and Dropout
- Define model train function
- Train both CNNs using suitable criterion and optimizer

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
import matplotlib.pyplot as plt
import numpy as np

In [2]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.dense1 = nn.Linear(64 * 8 * 8, 512)
        self.dense2 = nn.Linear(512, 10)
        self.relu = nn.ReLU()
        self.flatten = nn.Flatten()

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.relu(x)
        x = self.dense2(x)
        return x

class CNNWithBNDropout(nn.Module):
    def __init__(self):
        super(CNNWithBNDropout, self).__init__()
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.dense1 = nn.Linear(64 * 8 * 8, 512)
        self.dense2 = nn.Linear(512, 10)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
        self.flatten = nn.Flatten()

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.relu(x)
        x = self.dense2(x)
        x = self.dropout(x)
        return x

# Data preprocessing and loading
transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_subset = Subset(train_dataset, range(200))
test_subset = Subset(test_dataset, range(50))

train_loader = DataLoader(train_subset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_subset, batch_size=10, shuffle=False)

# Function to train and evaluate a model
def train(model, optimizer, criterion, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct_train = 0
        total_train = 0
        for data, target in train_loader:
            output = model(data)
            loss = criterion(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            predicted = torch.argmax(output.data, dim=1)
            total_train += target.size(0)
            correct_train += (predicted == target).sum().item()
        avg_train_loss = train_loss / len(train_loader)
        train_acc = 100 * correct_train / total_train

        model.eval()
        test_loss = 0.0
        correct_test = 0
        total_test = 0
        with torch.no_grad():
            for data, target in test_loader:
                output = model(data)
                loss = criterion(output, target)
                test_loss += loss.item()
                predicted = torch.argmax(output.data, dim=1)
                total_test += target.size(0)
                correct_test += (predicted == target).sum().item()
        avg_test_loss = test_loss / len(test_loader)
        test_acc = 100 * correct_test / total_test

        print(f'Epoch: [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_acc:.4f}%, Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_acc:.4f}%')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 48740501.89it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
model1 = SimpleCNN()
model2 = CNNWithBNDropout()
criterion = nn.CrossEntropyLoss()
optimizer1 = optim.Adam(model1.parameters(), lr=0.001)
optimizer2 = optim.Adam(model2.parameters(), lr=0.001)

train(model1, optimizer1, criterion, 10)
train(model2, optimizer2, criterion, 10)

Epoch: [1/10], Train Loss: 2.3141, Train Accuracy: 13.5000%, Test Loss: 2.3018, Test Accuracy: 6.0000%
Epoch: [2/10], Train Loss: 2.2600, Train Accuracy: 14.0000%, Test Loss: 2.2744, Test Accuracy: 6.0000%
Epoch: [3/10], Train Loss: 2.1210, Train Accuracy: 23.0000%, Test Loss: 2.2544, Test Accuracy: 10.0000%
Epoch: [4/10], Train Loss: 1.9539, Train Accuracy: 27.0000%, Test Loss: 2.1527, Test Accuracy: 18.0000%
Epoch: [5/10], Train Loss: 1.7908, Train Accuracy: 41.5000%, Test Loss: 1.9042, Test Accuracy: 36.0000%
Epoch: [6/10], Train Loss: 1.4651, Train Accuracy: 51.0000%, Test Loss: 1.8328, Test Accuracy: 30.0000%
Epoch: [7/10], Train Loss: 1.1462, Train Accuracy: 63.5000%, Test Loss: 2.2093, Test Accuracy: 24.0000%
Epoch: [8/10], Train Loss: 0.9314, Train Accuracy: 70.0000%, Test Loss: 2.3066, Test Accuracy: 26.0000%
Epoch: [9/10], Train Loss: 0.6946, Train Accuracy: 73.0000%, Test Loss: 2.1470, Test Accuracy: 32.0000%
Epoch: [10/10], Train Loss: 0.4464, Train Accuracy: 89.0000%, Test