In [27]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [28]:
class AlexNetSmall(nn.Module):
    def __init__(self, input_channels=3, num_classes=10, input_size=32):
        super(AlexNetSmall, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channels, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 192, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        # Dynamically compute the feature map size
        dummy_input = torch.randn(1, input_channels, input_size, input_size)
        feature_map_size = self.features(dummy_input).view(1, -1).size(1)

        # Define the classifier
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(feature_map_size, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Flatten feature maps
        x = self.classifier(x)
        return x


In [29]:
# Data preparation
transform_mnist = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

transform_cifar10 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load datasets
mnist_train = datasets.MNIST(root='./data', train=True, transform=transform_mnist, download=True)
mnist_test = datasets.MNIST(root='./data', train=False, transform=transform_mnist, download=True)

cifar10_train = datasets.CIFAR10(root='./data', train=True, transform=transform_cifar10, download=True)
cifar10_test = datasets.CIFAR10(root='./data', train=False, transform=transform_cifar10, download=True)

mnist_loader = DataLoader(mnist_train, batch_size=64, shuffle=True)
mnist_test_loader = DataLoader(mnist_test, batch_size=64, shuffle=False)

cifar10_loader = DataLoader(cifar10_train, batch_size=64, shuffle=True)
cifar10_test_loader = DataLoader(cifar10_test, batch_size=64, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified


In [30]:
# Training function
def train_model(model, dataloader, criterion, optimizer, epochs=10, device='cuda'):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(dataloader)}")

# Evaluation function
def evaluate_model(model, dataloader, device='cuda'):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total



In [31]:
# Instantiate models for MNIST and CIFAR-10
model_mnist = AlexNetSmall(input_channels=1, num_classes=10, input_size=28)  # MNIST: single-channel, 28x28
model_cifar10 = AlexNetSmall(input_channels=3, num_classes=10, input_size=32)  # CIFAR-10: RGB, 32x32

# Loss function and optimizers
criterion = nn.CrossEntropyLoss()
optimizer_mnist = optim.Adam(model_mnist.parameters(), lr=0.001)
optimizer_cifar10 = optim.Adam(model_cifar10.parameters(), lr=0.001)

# Train models
print("Training on MNIST...")
train_model(model_mnist, mnist_loader, criterion, optimizer_mnist, epochs=10)

print("Training on CIFAR-10...")
train_model(model_cifar10, cifar10_loader, criterion, optimizer_cifar10, epochs=10)

Training on MNIST...
Epoch 1, Loss: 0.3120521061866582
Epoch 2, Loss: 0.0829659236697441
Epoch 3, Loss: 0.06493796481250928
Epoch 4, Loss: 0.05780306165435176
Epoch 5, Loss: 0.04678346093722305
Epoch 6, Loss: 0.03934474174501157
Epoch 7, Loss: 0.036584022263451345
Epoch 8, Loss: 0.03449093912196088
Epoch 9, Loss: 0.03415327072666233
Epoch 10, Loss: 0.030134496451542682
Training on CIFAR-10...
Epoch 1, Loss: 1.5988977322797946
Epoch 2, Loss: 1.2088941129882012
Epoch 3, Loss: 1.0180883635492886
Epoch 4, Loss: 0.9056638935414116
Epoch 5, Loss: 0.8158742270963576
Epoch 6, Loss: 0.7373091511409301
Epoch 7, Loss: 0.6894703542484957
Epoch 8, Loss: 0.6450552409109862
Epoch 9, Loss: 0.6057050711549151
Epoch 10, Loss: 0.5622022246841885


In [32]:
# Evaluate models
print("Evaluating MNIST model...")
mnist_accuracy = evaluate_model(model_mnist, mnist_test_loader)
print(f"MNIST Accuracy: {mnist_accuracy}%")

print("Evaluating CIFAR-10 model...")
cifar10_accuracy = evaluate_model(model_cifar10, cifar10_test_loader)
print(f"CIFAR-10 Accuracy: {cifar10_accuracy}%")


Evaluating MNIST model...
MNIST Accuracy: 98.8%
Evaluating CIFAR-10 model...
CIFAR-10 Accuracy: 75.63%
