#Digit Recognition using classification CNN


Importing Data

In [23]:
import torch
import torchvision
import matplotlib.pyplot as plt
import numpy as np

In [24]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

normalize_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean = (0.5), 
                                     std = (0.5))])

In [25]:
train_data = torchvision.datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = normalize_transform, 
    download = True,            
)

test_data = torchvision.datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = normalize_transform,
    download = True
)

Hyperparameters

In [52]:
num_epochs = 5
batch_size = 100
learning_rate = 0.001

Generating Data Loaders

In [27]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

In [33]:
classes = []
for batch_idx, data in enumerate(train_loader, 0):
    x, y = data 
    classes.extend(y.tolist())
labels = np.unique(classes)
labels

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [38]:
x.shape, y.shape

(torch.Size([100, 1, 28, 28]), torch.Size([100]))

In [53]:
class CNN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.model = torch.nn.Sequential(
            #Input = 1 x 32 x 32, Output = 32 x 32 x 32
            torch.nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size = 3, padding = 1), 
            torch.nn.ReLU(),
            #Input = 32 x 32 x 32, Output = 32 x 16 x 16
            torch.nn.MaxPool2d(kernel_size=2),
  
            #Input = 32 x 16 x 16, Output = 64 x 16 x 16
            torch.nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, padding = 1),
            torch.nn.ReLU(),
            #Input = 64 x 16 x 16, Output = 64 x 8 x 8
            torch.nn.MaxPool2d(kernel_size=2),
  
            torch.nn.Flatten(),
            torch.nn.Linear(64*7*7, 1000),
            torch.nn.ReLU(),
            torch.nn.Linear(1000, 10)
        )
  
    def forward(self, x):
        return self.model(x)

In [54]:
model = CNN().to(device)

loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
  
    #Iterating over the training dataset in batches
    model.train()
    for i, (images, labels) in enumerate(train_loader):
          
        #Extracting images and target labels for the batch being iterated
        images = images.to(device)
        labels = labels.to(device)
  
        #Calculating the model output and the cross entropy loss
        outputs = model(images)
        loss = loss_func(outputs, labels)
  
        #Updating weights according to calculated loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [55]:
test_acc=0
model.eval()
  
with torch.no_grad():
    #Iterating over the training dataset in batches
    for i, (images, labels) in enumerate(test_loader):
          
        images = images.to(device)
        y_true = labels.to(device)
          
        #Calculating outputs for the batch being iterated
        outputs = model(images)
          
        #Calculated prediction labels from models
        _, y_pred = torch.max(outputs.data, 1)
          
        #Comparing predicted and true labels
        test_acc += (y_pred == y_true).sum().item()
      
    print(f"Test set accuracy = {100 * test_acc / len(test_data)} %")

Test set accuracy = 98.97 %
