# CNN Model

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

class CustomCNN(nn.Module):
    def __init__(self, input_size, conv_layers, output_size):
        super(CustomCNN, self).__init__()

        self.conv_layers = nn.ModuleList()
        self.flatten_layers = nn.ModuleList()
        in_channels = 1

        self.current_size = input_size
        self.intermediate_sizes = []

        for (out_channels, kernel_size, stride, padding,pooling_kernel,pooling_stride) in conv_layers:
            conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
            self.conv_layers.append(conv)
            self.conv_layers.append(nn.ReLU())
            self.conv_layers.append(nn.MaxPool2d(kernel_size=pooling_kernel, stride=pooling_stride))

            self.current_size = (self.current_size - kernel_size + 2 * padding) // stride + 1
            self.current_size = (self.current_size - pooling_kernel) // pooling_stride + 1
            self.intermediate_sizes.append((out_channels, self.current_size, self.current_size))
            in_channels = out_channels

        # Compute the flatten size
        self.flatten_size = 0
        for (out_channels, size, _) in self.intermediate_sizes:
            self.flatten_size += out_channels * size * size

        self.fc1 = nn.Linear(self.flatten_size, 512)
        self.fc2 = nn.Linear(512, output_size)

    def forward(self, x):
        intermediate_outputs = []

        for i, layer in enumerate(self.conv_layers):
            x = layer(x)
            if i % 3 == 2:  # After each pooling layer
                intermediate_outputs.append(x.view(x.size(0), -1))

        concatenated = torch.cat(intermediate_outputs, dim=1)
        x = F.relu(self.fc1(concatenated))
        x = self.fc2(x)
        return x

# Dataset loading and hyper parameters

In [2]:
input_size = 28
# channels, kernel, stride, padding, pooling kernel size, pooling stride
# len(conv_layers) == no of convolution layers
conv_layers = [
    (16, 3, 1, 1,2,2),
    (32, 3, 1, 1,2,2)
]
output_size = 10
batch_size = 64
learning_rate = 0.001
num_epochs = 5

# Load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Initialize the model, loss function, and optimizer
model = CustomCNN(input_size, conv_layers, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training

In [3]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

Epoch 1/5, Loss: 0.1937
Epoch 2/5, Loss: 0.0512
Epoch 3/5, Loss: 0.0367
Epoch 4/5, Loss: 0.0259
Epoch 5/5, Loss: 0.0185


# Evaluation

In [4]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the network on the test images: {accuracy:.2f}%')

Accuracy of the network on the test images: 99.13%
