In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torchvision import transforms
from importlib import reload
import util 
reload(util)
from util import get_CIFAR10_loaders, train_model, plot_accuracy, plot_training_loss

# Ctorch.nn.: AlexNet

![LeNet-5](img/AlexNet-architecture.png)
*Source: Original paper available at: https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf*

In [None]:
class AlexNet(torch.nn.Module):
    def __init__(self, num_classes):
        super(AlexNet, self).__init__()
        self.features = torch.nn.Sequential(
            torch.nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            torch.nn.ReLU(inplace=True),
            torch.nn.MaxPool2d(kernel_size=3, stride=2),
            torch.nn.Conv2d(64, 192, kernel_size=5, padding=2),
            torch.nn.ReLU(inplace=True),
            torch.nn.MaxPool2d(kernel_size=3, stride=2),
            torch.nn.Conv2d(192, 384, kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),
            torch.nn.Conv2d(384, 256, kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),
            torch.nn.Conv2d(256, 256, kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),
            torch.nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = torch.nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = torch.nn.Sequential(
            torch.nn.Dropout(0.5),
            torch.nn.Linear(256 * 6 * 6, 4096),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(4096, 4096),
            torch.nn.ReLU(inplace=True),
            torch.nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        logits = self.classifier(x)
        return logits

## AlexNet on CIFAR-10

In [None]:
# Architecture
num_classes = 10

# Hyperparameters
learning_rate = 0.001
batch_size = 256
num_epochs = 15
random_seed = 0

# Data transforms and loaders
train_transforms = transforms.Compose([transforms.Resize((70, 70)),
                                       transforms.RandomCrop((64, 64)),
                                       transforms.ToTensor()])

test_transforms = transforms.Compose([transforms.Resize((70, 70)),
                                      transforms.CenterCrop((64, 64)),
                                      transforms.ToTensor()])

train_loader, valid_loader, test_loader = get_CIFAR10_loaders(
    batch_size=batch_size,
    train_transform = train_transforms,
    test_transform = test_transforms,
    num_workers=2
)

# GPU setup
DEVICE = "cpu"
if torch.cuda.is_available():
    DEVICE = "cuda:0"
    torch.backends.cudtorch.nn.benchmark = False
    torch.backends.cudtorch.nn.deterministic = True

In [11]:
import time
torch.manual_seed(random_seed)
model = AlexNet(num_classes)
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = torch.nn.CrossEntropyLoss()

minibatch_loss, train_acc, valid_acc = train_model(
    model=model, 
    optimizer=optimizer, 
    loss_fn=loss_fn, 
    train_loader=train_loader, 
    valid_loader=valid_loader, 
    test_loader=test_loader, 
    num_epochs=num_epochs, 
    batch_size=batch_size,
    device=DEVICE
)

Epoch: 001/015 | Batch 050/256 | Cost: 2.3005


In [None]:
plot_accuracy(train_acc, valid_acc)

In [None]:
plot_training_loss(minibatch_loss)