## Imports

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

## Define the LeNet-5 model using ReLU

In [4]:
class LeNet5(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet5, self).__init__()

        #Convolutional Layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=20,kernel_size=5,padding=0)
        self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=5, padding=0)

        # Fully connected layers
        self.fc1 = nn.Linear(50 * 4 * 4, 500)
        self.fc2 = nn.Linear(500, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x)) #Conv1 + ReLU
        x = F.max_pool2d(x, kernel_size=2, stride=2) # Max pooling(2 x 2)

        x = F.relu(self.conv2(x)) #Conv2 + ReLU
        x = F.max_pool2d(x, kernel_size=2, stride= 2) # Max pooling (2x2)

        x = torch.flatten(x, 1) # Flatten for fully connected layer

        x = F.relu(self.fc1(x)) # Fully connected layer + ReLU
        x = self.fc2(x) # Fully connected layer (Output)

        return x

# Instantiate model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LeNet5(num_classes=10).to(device)

#Define Loss and Optimizer
criterion = nn.CrossEntropyLoss() #For classification tasks
optimizer = optim.Adam(model.parameters(), lr=0.001) #Adam optimizer

#Model summary
print(model)

LeNet5(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)


## Training on MNIST

In [None]:
# Define data transforms

transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5))
])

#Load MNISt dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

In [7]:
# Trainig  Loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader): .4f}')
print('Training complete')

Epoch 1/10, Loss:  0.1738
Epoch 2/10, Loss:  0.0447
Epoch 3/10, Loss:  0.0324
Epoch 4/10, Loss:  0.0228
Epoch 5/10, Loss:  0.0190
Epoch 6/10, Loss:  0.0134
Epoch 7/10, Loss:  0.0131
Epoch 8/10, Loss:  0.0102
Epoch 9/10, Loss:  0.0079
Epoch 10/10, Loss:  0.0101
Training complete
