<a href="https://colab.research.google.com/github/Redcoder815/Deep_Learning_PyTorch/blob/main/16AlexNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
from torch.utils import data
from torchvision import transforms
from torchvision import datasets

In [2]:
class AlexNet(nn.Module):
    def __init__(self, lr=0.1, num_classes=10):
        super().__init__()
        self.net = nn.Sequential(
            # Original AlexNet uses kernel_size=11, stride=4 for 224x224 images.
            # For 28x28, we need less aggressive downsampling.
            nn.LazyConv2d(96, kernel_size=5, stride=1, padding=2), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # Second convolutional layer
            nn.LazyConv2d(256, kernel_size=3, stride=1, padding=1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # Remaining convolutional layers with smaller kernel size and stride
            nn.LazyConv2d(384, kernel_size=3, stride=1, padding=1), nn.ReLU(),
            nn.LazyConv2d(384, kernel_size=3, stride=1, padding=1), nn.ReLU(),
            nn.LazyConv2d(256, kernel_size=3, stride=1, padding=1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Flatten(),
            nn.LazyLinear(4096), nn.ReLU(), nn.Dropout(p=0.5),
            nn.LazyLinear(4096), nn.ReLU(),nn.Dropout(p=0.5),
            nn.LazyLinear(num_classes))
    def forward(self, X):
      return self.net(X)

In [3]:
model = AlexNet()

In [4]:
batch_size = 256
max_epochs = 3

# Data loading
transformer = transforms.ToTensor()
mnist_train = datasets.FashionMNIST(root="../data", train=True, transform=transformer, download=True)
mnist_val = datasets.FashionMNIST(root="../data", train=False, transform=transformer, download=True)

train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4)
val_iter = data.DataLoader(mnist_val, batch_size, shuffle=False, num_workers=4)

100%|██████████| 26.4M/26.4M [00:02<00:00, 11.7MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 211kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.92MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 30.1MB/s]


In [5]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
loss_fn = nn.CrossEntropyLoss()

In [None]:
for epoch in range(max_epochs):
  model.train()
  train_loss_sum, train_accuracy_sum, n = 0.0, 0.0, 0
  for x, y in train_iter:
    y_pred = model(x)
    l = loss_fn(y_pred, y)
    optimizer.zero_grad()
    l.backward()
    optimizer.step()
    train_loss_sum += l
    predicted_labels = torch.argmax(y_pred, dim=1)
    train_accuracy_sum += (predicted_labels == y).float().sum()
    n += y.numel()

  model.eval()
  test_accuracy_sum, test_n = 0.0, 0
  with torch.no_grad():
    for x, y in val_iter:
      y_pred = model(x)
      predicted_labels = torch.argmax(y_pred, dim=1)
      test_accuracy_sum += (predicted_labels == y).float().sum()
      test_n += y.numel()
  test_accuracy = test_accuracy_sum / test_n
  print(f'Epoch {epoch + 1}, Loss: {train_loss_sum / n:.4f}, Train Accuracy: {train_accuracy_sum / n:.4f}, Validation Accuracy: {test_accuracy:.4f}')