In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim

# LeNet Architecture

In [2]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=(5, 5), stride=(1, 1), padding=(0, 0))
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=(5, 5), stride=(1, 1), padding=(0, 0))
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=(4, 4), stride=(1, 1), padding=(0, 0))
        self.linear1 = nn.Linear(120, 84)
        self.linear2 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x))
        x = x.reshape(x.shape[0], -1)
        x = self.relu(self.linear1(x))
        x = self.linear2(x)

        return x

# Define the transform to normalize the data

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalizes the grayscale values to range [-1, 1]
])

# Download and load the training data

In [4]:
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

# Download and load the test data

In [5]:
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)

# Initialize the network

In [6]:
net = LeNet()

# Define the loss function and optimizer

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Training loop

In [8]:
for epoch in range(10):  # Number of epochs

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print('Finished Training')

[1, 100] loss: 2.306
[1, 200] loss: 2.299
[1, 300] loss: 2.292
[1, 400] loss: 2.280
[1, 500] loss: 2.259
[1, 600] loss: 2.219
[1, 700] loss: 2.097
[1, 800] loss: 1.705
[1, 900] loss: 1.151
[1, 1000] loss: 0.828
[1, 1100] loss: 0.592
[1, 1200] loss: 0.448
[1, 1300] loss: 0.362
[1, 1400] loss: 0.356
[1, 1500] loss: 0.309
[1, 1600] loss: 0.247
[1, 1700] loss: 0.251
[1, 1800] loss: 0.246
[2, 100] loss: 0.219
[2, 200] loss: 0.195
[2, 300] loss: 0.179
[2, 400] loss: 0.180
[2, 500] loss: 0.182
[2, 600] loss: 0.173
[2, 700] loss: 0.175
[2, 800] loss: 0.166
[2, 900] loss: 0.156
[2, 1000] loss: 0.150
[2, 1100] loss: 0.132
[2, 1200] loss: 0.143
[2, 1300] loss: 0.137
[2, 1400] loss: 0.125
[2, 1500] loss: 0.151
[2, 1600] loss: 0.146
[2, 1700] loss: 0.137
[2, 1800] loss: 0.125
[3, 100] loss: 0.101
[3, 200] loss: 0.118
[3, 300] loss: 0.112
[3, 400] loss: 0.107
[3, 500] loss: 0.114
[3, 600] loss: 0.099
[3, 700] loss: 0.117
[3, 800] loss: 0.088
[3, 900] loss: 0.108
[3, 1000] loss: 0.087
[3, 1100] loss:

# Evaluate

In [9]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')

Accuracy of the network on the 10000 test images: 98.73%
