In [1]:
import torch
from torch import nn
import torchvision
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision.transforms import v2

In [2]:
writer = SummaryWriter()

**Use Imagenette instead of Imagenet**

In [3]:
img_transforms = v2.Compose([
    v2.Resize((256, 256)),
    v2.RandomCrop((224, 224)),
    v2.RandomHorizontalFlip(p=0.5),
    ToTensor()
])

In [4]:
imagenette_train = torchvision.datasets.Imagenette(root='data', split='train', download=True, transform=img_transforms)
imagenette_test = torchvision.datasets.Imagenette(root='data', split='val', download=True, transform=img_transforms)
len(imagenette_train), len(imagenette_test)

(9469, 3925)

In [5]:
class AlexNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.model =  nn.Sequential(nn.Conv2d(3,96,11, stride=4),
                                    nn.ReLU(),
                                    nn.MaxPool2d(3, stride=2),
                                    nn.Conv2d(96,256,5, padding=2),
                                    nn.ReLU(),
                                    nn.MaxPool2d(3, stride=2),
                                    nn.Conv2d(256,384,3, padding=1),
                                    nn.ReLU(),
                                    nn.Conv2d(384,384,3, padding=1),
                                    nn.ReLU(),
                                    nn.Conv2d(384,256,3, padding=1),
                                    nn.ReLU(),
                                    nn.MaxPool2d(3, stride=2),
                                    nn.Flatten(),
                                    nn.Linear(6400, 4096),
                                    nn.ReLU(),
                                    nn.Dropout(p=0.5),
                                    nn.Linear(4096, 4096),
                                    nn.ReLU(),
                                    nn.Dropout(p=0.5),
                                    nn.Linear(4096, 10),
                                    )

        # initialize based on AlexNet Paper
        for m in self.modules():
            if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d): # Default
                nn.init.normal_(m.weight, std=0.01)
                nn.init.constant_(m.bias, 0.0)

            if isinstance(m, nn.Linear): # Hidden Layer Bias
                nn.init.constant_(m.bias, 1.0)

        for idx in [3, 8, 10]: # Conv Layer 2, 4 and 5
            nn.init.constant_(self.model[idx].bias, 1.0)

        nn.init.constant_(self.model[-1].bias, 0.0) # last layer isn't hidden

    def forward(self, x):
        x = self.model(x)
        return x

In [6]:
model = AlexNet()
model

AlexNet(
  (model): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (13): Flatten(start_dim=1, end_dim=-1)
    (14): Linear(in_features=6400, out_features=4096, bias=True)
    (15): ReLU()
    (16): Dropout(p=0.5, inplace=False)
    (17): Linear(in_features=4096, out_features=4096, bias=True)
    (18): ReLU()
    (19): Dropout(p

In [7]:
epochs = 100
learning_rate = 0.01
batch_size = 128

In [8]:
dataloader_train = DataLoader(imagenette_train, batch_size=batch_size, shuffle=True)
dataloader_test = DataLoader(imagenette_test, batch_size=batch_size, shuffle=True)

num_train_batches = len(dataloader_train)
num_test_batches = len(dataloader_test)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0005)
num_train_batches, num_test_batches

(74, 31)

In [None]:
for epoch in range(epochs):
    train_loss = 0
    train_acc = 0

    model.train()
    for batch_idx, (train_features, train_labels) in enumerate(dataloader_train):
        optimizer.zero_grad()

        predictions = model(train_features)
        predictions_labels = torch.argmax(predictions, dim=1)

        train_batch_acc = (predictions_labels == train_labels).sum().item() / train_features.shape[0]

        train_batch_loss = loss(predictions, train_labels)
        train_batch_loss.backward()

        optimizer.step()

        train_loss += train_batch_loss.item()
        train_acc += train_batch_acc

    val_loss = 0
    val_acc = 0

    model.eval()
    with torch.no_grad():
        for batch_idx, (test_features, test_labels) in enumerate(dataloader_test):
            predictions = model(test_features)
            predictions_labels = torch.argmax(predictions, dim=1)

            test_batch_acc = (predictions_labels == test_labels).sum().item() / test_features.shape[0]
            test_batch_loss = loss(predictions, test_labels)

            val_loss += test_batch_loss.item()
            val_acc += test_batch_acc

    train_loss /= num_train_batches
    train_acc /= num_train_batches

    val_loss /= num_test_batches
    val_acc /= num_test_batches

    writer.add_scalar("Loss/train", train_loss, epoch)
    writer.add_scalar('Accuracy/train', train_acc, epoch)

    writer.add_scalar("Loss/val", val_loss, epoch)
    writer.add_scalar('Accuracy/val', val_acc, epoch)

In [None]:
writer.flush()