This notebook implements a CNN for image classification of handwritten digits from the MNIST dataset using PyTorch.

In [None]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets
from  torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

1. Import the relevant training and testing data.

The MNIST dataset consists of handwritten digits (0-9) and is commonly used for training various image processing systems. Each image is a 28x28 pixel grayscale image. The dataset was created from samples of handwritten digits by high school students and employees of the United States Census Bureau.

In [None]:
train_data = datasets.MNIST(
  root='data',
  train=True,
  transform=ToTensor(),
  download=True
)

test_data = datasets.MNIST(
  root='data',
  train=False,
  transform=ToTensor(),
  download=True
)

print(train_data, "\n")

print(test_data, "\n")

print(f"Training Data Shape: {train_data.data.shape}")
print(f"Test Data Shape: {test_data.data.shape}")
print(f"Training Data Labels Shape: {train_data.targets.shape}")
print(f"Training Data Labels: {train_data.targets}")

Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor() 

Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor() 

Training Data Shape: torch.Size([60000, 28, 28])
Test Data Shape: torch.Size([10000, 28, 28])
Training Data Labels Shape: torch.Size([60000])
Training Data Labels: tensor([5, 0, 4,  ..., 5, 6, 8])


2. Create a DataLoader for the training and testing data to facilitate batch processing and shuffling of the data during training.

In [None]:
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

3. Creating the CNN architecture

In [None]:
class DigitRecognitionCNN(nn.Module):
  def __init__(self):
    super(DigitRecognitionCNN, self).__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1, padding=1)
    self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)
    self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
    self.fully_connected1 = nn.Linear(32 * 3 * 3, 10)
    self.flatten = nn.Flatten()

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = F.max_pool2d(x, 2)
    x = F.relu(self.conv2(x))
    x = F.max_pool2d(x, 2)
    x = F.relu(self.conv3(x))
    x = F.max_pool2d(x, 2)
    x = self.flatten(x)
    x = self.fully_connected1(x)
    return F.log_softmax(x, dim=1)


4. Initialize the model, define the optimizer, loss function, and hyperparameters.

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DigitRecognitionCNN().to(device)

learning_rate = 0.01
batch_size = 64
num_epochs = 50
momentum = 0.9

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
loss_fn = nn.CrossEntropyLoss()

5. Define the training loop to train the model over multiple epochs.

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  model.train()

  for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)

    pred = model(X)
    loss = loss_fn(pred, y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if batch % 20 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


6. Define the test_loop to evaluate the model's performance on the test dataset.

In [None]:
def test_loop(dataloader, model, loss_fn):
  model.eval()
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  test_loss, correct = 0, 0

  with torch.no_grad():
        for X, y in dataloader:
          X, y = X.to(device), y.to(device)
          pred = model(X)

          test_loss += loss_fn(pred, y).item()
          correct += (pred.argmax(1) == y).type(torch.float).sum().item()
  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

7. Training the model and evaluating its performance on the test dataset after each epoch.

In [None]:
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train_loop(train_loader, model, loss_fn, optimizer)
    test_loop(test_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.300274  [   64/60000]
loss: 2.294496  [ 1344/60000]
loss: 2.285552  [ 2624/60000]
loss: 2.201403  [ 3904/60000]
loss: 1.825495  [ 5184/60000]
loss: 0.806883  [ 6464/60000]
loss: 0.740524  [ 7744/60000]
loss: 0.393999  [ 9024/60000]
loss: 0.462359  [10304/60000]
loss: 0.247856  [11584/60000]
loss: 0.276822  [12864/60000]
loss: 0.164345  [14144/60000]
loss: 0.168202  [15424/60000]
loss: 0.170900  [16704/60000]
loss: 0.135937  [17984/60000]
loss: 0.159248  [19264/60000]
loss: 0.130405  [20544/60000]
loss: 0.059370  [21824/60000]
loss: 0.236946  [23104/60000]
loss: 0.130011  [24384/60000]
loss: 0.119459  [25664/60000]
loss: 0.144647  [26944/60000]
loss: 0.130797  [28224/60000]
loss: 0.195089  [29504/60000]
loss: 0.152732  [30784/60000]
loss: 0.293827  [32064/60000]
loss: 0.173267  [33344/60000]
loss: 0.075566  [34624/60000]
loss: 0.099846  [35904/60000]
loss: 0.147054  [37184/60000]
loss: 0.265110  [38464/60000]
loss: 0.256566  [39744/60000]


8. Save the trained model to a file for future use.

In [None]:
# Save the model
torch.save(model.state_dict(), "digit_recog_cnn.pt")