In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

## Hyperparameters

In [7]:
num_epochs = 10
batch_size = 100
learning_rate = 0.001

## Preparation

In [8]:
# Device configuration (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# MNIST dataset (images and labels), adapted for AlexNet
transform = transforms.Compose([
    transforms.Resize(224),  # Resize images to fit AlexNet input size
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # mean and std of MNIST dataset
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

## AlexNet

In [9]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()        
        self.features = nn.Sequential(
            # C1: 224x224x1 -> 55x55x64 -> 27x27x64
            nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # C2: 27x27x64 -> 27x27x192 -> 13x13x192
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # C3: 13x13x192 -> 13x13x384
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # C4: 13x13x384 -> 13x13x256
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # C5: 13x13x256 -> 13x13x256 -> 6x6x256
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            # FC6
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            # FC7
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            # output
            nn.Linear(4096, num_classes),
        )
    
    def forward(self, x):        
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


# Initialize the model, loss function, and optimizer
model = AlexNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

## Training

In [10]:
# Training the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/10], Step [100/600], Loss: 0.2689
Epoch [1/10], Step [200/600], Loss: 0.0576
Epoch [1/10], Step [300/600], Loss: 0.0627
Epoch [1/10], Step [400/600], Loss: 0.0233
Epoch [1/10], Step [500/600], Loss: 0.1149
Epoch [1/10], Step [600/600], Loss: 0.0373
Epoch [2/10], Step [100/600], Loss: 0.0185
Epoch [2/10], Step [200/600], Loss: 0.1872
Epoch [2/10], Step [300/600], Loss: 0.0211
Epoch [2/10], Step [400/600], Loss: 0.0865
Epoch [2/10], Step [500/600], Loss: 0.0593
Epoch [2/10], Step [600/600], Loss: 0.0266
Epoch [3/10], Step [100/600], Loss: 0.0468
Epoch [3/10], Step [200/600], Loss: 0.0646
Epoch [3/10], Step [300/600], Loss: 0.6289
Epoch [3/10], Step [400/600], Loss: 0.0530
Epoch [3/10], Step [500/600], Loss: 0.0719
Epoch [3/10], Step [600/600], Loss: 0.1739
Epoch [4/10], Step [100/600], Loss: 0.1019
Epoch [4/10], Step [200/600], Loss: 0.0013
Epoch [4/10], Step [300/600], Loss: 0.0179
Epoch [4/10], Step [400/600], Loss: 0.0641
Epoch [4/10], Step [500/600], Loss: 0.1198
Epoch [4/10

## Test the model

In [11]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

Test Accuracy of the model on the 10000 test images: 99.18 %


In [12]:
# Save the model
torch.save(model.state_dict(), 'model.ckpt')
