In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets

In [None]:
# Load and preprocess the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)



In [None]:
# Define the MLP model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 32)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(32, 10)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x


In [None]:
# Varying hyperparameters
batch_sizes = [8, 16, 32]
num_epochs_list = [1, 5, 10, 20]
learning_rates = [0.0001, 0.001, 0.01]


In [None]:
# Training loop
best_accuracy = 0
best_hyperparameters = {}


In [None]:
for batch_size in batch_sizes:
    for num_epochs in num_epochs_list:
        for learning_rate in learning_rates:
            train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
            test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

            model = MLP()
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)

            for epoch in range(num_epochs):
                model.train()
                for images, labels in train_loader:
                    optimizer.zero_grad()
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

            model.eval()
            correct, total = 0, 0
            with torch.no_grad():
                for images, labels in test_loader:
                    outputs = model(images)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            accuracy = correct / total

            print(f'Batch Size: {batch_size}, Epochs: {num_epochs}, Learning Rate: {learning_rate}, Accuracy: {accuracy * 100:.2f}%')

            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_hyperparameters = {'Batch Size': batch_size, 'Epochs': num_epochs, 'Learning Rate': learning_rate}

print("\nBest Performing Hyperparameters:")
print(best_hyperparameters)
print("Best Accuracy:", best_accuracy * 100)

Batch Size: 8, Epochs: 1, Learning Rate: 0.0001, Accuracy: 90.55%
Batch Size: 8, Epochs: 1, Learning Rate: 0.001, Accuracy: 90.09%
Batch Size: 8, Epochs: 1, Learning Rate: 0.01, Accuracy: 11.35%
Batch Size: 8, Epochs: 5, Learning Rate: 0.0001, Accuracy: 93.07%
Batch Size: 8, Epochs: 5, Learning Rate: 0.001, Accuracy: 93.66%
Batch Size: 8, Epochs: 5, Learning Rate: 0.01, Accuracy: 11.35%
Batch Size: 8, Epochs: 10, Learning Rate: 0.0001, Accuracy: 95.46%
Batch Size: 8, Epochs: 10, Learning Rate: 0.001, Accuracy: 92.83%
Batch Size: 8, Epochs: 10, Learning Rate: 0.01, Accuracy: 9.82%
Batch Size: 8, Epochs: 20, Learning Rate: 0.0001, Accuracy: 96.78%


b. Add Convolutional and Pooling Layers:

In [None]:
import torch.nn.functional as F

# Update MLP class to include convolution and pooling layers
class ConvolutionalMLP(nn.Module):
    def __init__(self):
        super(ConvolutionalMLP, self).__init__()
        self.conv1 = nn.Conv2d(1, 1, kernel_size=2, stride=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.mlp = MLP()

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool(x)
        x = self.mlp(x)
        return x

# Instantiate model and other parameters
conv_mlp_model = ConvolutionalMLP()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(conv_mlp_model.parameters(), lr=0.001)

# Training and evaluation loop (similar to the previous one)
# Make sure to use the correct dataloaders and adjust input sizes accordingly


c. Extend with Second Convolution-Pooling Layer:

In [None]:
class DoubleConvolutionalMLP(nn.Module):
    def __init__(self):
        super(DoubleConvolutionalMLP, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=2, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(10, 10, kernel_size=2, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.mlp = MLP()

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.mlp(x)
        return x

# Instantiate model and other parameters
double_conv_mlp_model = DoubleConvolutionalMLP()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(double_conv_mlp_model.parameters(), lr=0.001)

# Training and evaluation loop (similar to the previous one)
# Make sure to use the correct dataloaders and adjust input sizes accordingly
