In [34]:
# importing essential libraries
import torch # core library for PyTorch providing tensor operations
import torchvision # contains dataset(MNIST)
from torchvision import datasets,transforms # transformations(converting images to tensors(thier pixel values))
import torch.nn as nn # provides modules (neural network building blocks) like Conv2d and ReLU for creating models
import torch.optim as optim # contains optimization algos(provide optimizers like adam for updating model weigths during training
from torch.utils.data import DataLoader

downloading and loading the mnist dataset which consists of greyscale images of handwritten digits(0-9), each of 28*28 pixels

In [36]:
# transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(), # converts image to tensor(multi-dimensional arrays)
    transforms.Normalize((0.5,),(0.5,)) # normalize pixel values, which improves convergence during training
])

In [37]:
train_data = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_data = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

In [38]:
train_load = DataLoader(dataset=train_data, batch_size=64, shuffle=True)
test_load = DataLoader(dataset=test_data, batch_size=64, shuffle=False)

cnn architecture ~~~

In [40]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding="same")
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding='same')

        # fully connected layers(flatten, dense(hidden) & output)
        self.flatten = nn.Flatten() # flattens feature maps coming from convolutional layers into sigle vector
        self.fc1 = nn.Linear(64*7*7, 1024) # 7*7*64 comes from flattened feature maps & 1024 are the no of units
        self.fc2 = nn.Linear(1024, 10) # 10 output classes/categories (0-9)

        # activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x)) # first convolutional layer + relu
        x = self.pool(x) # first pooling layer(max pooling)
        x = self.relu(self.conv2(x)) # second convolutional layer + relu
        x = self.pool(x) # second pooling layer(max pooling)
        x = self.flatten(x) # flatten layer
        x = self.relu(self.fc1(x)) # dense(hidden) layer with relu
        x = self.fc2(x) # output layer (logits, raw scores before activation function applied)
        return x

In [41]:
# initiating model, loss func & optimizer
model = CNN()
criterion = nn.CrossEntropyLoss() # loss func(combining softmax & neg log likelihood loss, suitable for classification) for multi-class classification(here 0-9)
optimizer = optim.Adam(model.parameters()) # adam optimizer for weight updation

In [42]:
# training model
num_epochs = 5
for epoch in range(num_epochs):
    model.train() # sets model to training mode
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_load:
        # zeroing param gradients i.e. clearing previous gradients to avoid accumulations
        optimizer.zero_grad()

        # forward propogation
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # backward propogation
        loss.backward() # computes gradient for backpropogation
        optimizer.step() # updates model weights based on gradients

        # updating running loss
        running_loss += loss.item()

        # calculating accuracy
        _, predicted = torch.max(outputs, 1) # gets predicted class
        total += labels.size(0) # total samples in batch
        correct += (predicted==labels).sum().item() # correct predictions in batch

    epoch_loss = running_loss/len(train_load) # avg epoch loss
    epoch_acc = correct/total*100 # accuracy in %
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")

Epoch 1/5, Loss: 0.1377, Accuracy: 95.72%
Epoch 2/5, Loss: 0.0410, Accuracy: 98.69%
Epoch 3/5, Loss: 0.0273, Accuracy: 99.12%
Epoch 4/5, Loss: 0.0199, Accuracy: 99.37%
Epoch 5/5, Loss: 0.0165, Accuracy: 99.48%


In [43]:
# evaluating the model
model.eval() # sets model to evaluation mode
correct = 0
total = 0
with torch.no_grad(): # disabling gradient computation for faster evaluation and saving memory
    for inputs, labels in test_load:
        outputs = model(inputs)
        _, predicted  = torch.max(outputs, 1) # gets class with highest score(probability)
        total += labels.size(0)
        correct += (predicted==labels).sum().item()

accuracy = 100*correct/total
print(f"Test Accuracy: {accuracy:.2f}%")

Test Accuracy: 98.89%


In [56]:
# saving trained model
torch.save(model.state_dict(), "mnist_cnn.pth")
print("model saved successfully")

model saved successfully
