In [1]:
# Handwritten Digit Recognition using PyTorch (MNIST Dataset)
import torch                      # import PyTorch library
import torch.nn as nn               # import neural network module
import torch.optim as optim          # import optimization algorithms
import torch.nn.functional as F             # import functional API for activations
from torchvision import datasets, transforms          # for datasets and preprocessing
from torch.utils.data import DataLoader           # for batching data

In [4]:
# 1. SET DEVICE

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")   # use GPU if available, else CPU


In [5]:
# 2. LOAD AND TRANSFORM DATA

# Transform: convert images to tensors and normalize to range [-1, 1]
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize mean=0.5, std=0.5
])


In [6]:
# Download and load training data
train_dataset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)   # load training set
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)    # load training set


100%|██████████| 9.91M/9.91M [00:00<00:00, 35.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.12MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 8.14MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 4.49MB/s]


In [7]:
# Download and load test data
test_dataset = datasets.MNIST(root="./data", train=False, download=True, transform=transform)  # load test set
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)       # batch test data


In [8]:
# 3. BUILD NEURAL NETWORK MODEL

class DigitClassifier(nn.Module):             # define a neural network class
    def __init__(self):                         # constructor
        super(DigitClassifier, self).__init__()    # call parent class constructor
        self.fc1 = nn.Linear(28 * 28, 128)  # Input layer (784) → Hidden layer (128)/first fully connected layer (input=784, output=128)
        self.fc2 = nn.Linear(128, 64)       # Hidden layer (128) → Hidden layer (64)/second fully connected layer (128 -> 64)
        self.fc3 = nn.Linear(64, 10)        # Hidden layer (64) → Output layer (10 classes)/output layer (64 -> 10 classes for digits 0-9)

    def forward(self, x):           # forward pass
        x = x.view(-1, 28 * 28)  # Flatten 28x28 to 784/flatten image to vector
        x = F.relu(self.fc1(x))  # ReLU activation/ apply ReLU activation on first layer
        x = F.relu(self.fc2(x))  # ReLU activation/apply ReLU activation on second layer
        x = self.fc3(x)          # Output layer (no activation, logits)
        return x

model = DigitClassifier().to(device)             # create model instance and move to GPU/CPU


In [9]:
# 4. LOSS FUNCTION AND OPTIMIZER

criterion = nn.CrossEntropyLoss()  # Suitable for classification
optimizer = optim.Adam(model.parameters(), lr=0.001) #Adam optimizer with learning rate 0.001


In [10]:
# 5. TRAINING LOOP
epochs = 15           # number of training epochs
for epoch in range(epochs):      # loop through epochs
    running_loss = 0.0                # track loss
    for images, labels in train_loader:        # loop through batches
        images, labels = images.to(device), labels.to(device)       # move data to device

        optimizer.zero_grad()        # Clear previous gradients
        outputs = model(images)      # Forward pass
        loss = criterion(outputs, labels)  # Calculate loss
        loss.backward()              # Backpropagation
        optimizer.step()              # Update weights

        running_loss += loss.item()       # add loss for this batch

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}")         # show epoch loss



Epoch [1/15], Loss: 0.3934
Epoch [2/15], Loss: 0.1873
Epoch [3/15], Loss: 0.1357
Epoch [4/15], Loss: 0.1095
Epoch [5/15], Loss: 0.0915
Epoch [6/15], Loss: 0.0810
Epoch [7/15], Loss: 0.0730
Epoch [8/15], Loss: 0.0657
Epoch [9/15], Loss: 0.0586
Epoch [10/15], Loss: 0.0523
Epoch [11/15], Loss: 0.0502
Epoch [12/15], Loss: 0.0455
Epoch [13/15], Loss: 0.0398
Epoch [14/15], Loss: 0.0414
Epoch [15/15], Loss: 0.0382


In [11]:
# 6. TESTING LOOP (Accuracy)
correct = 0                                 # count correct predictions
total = 0                                      # count total predictions
with torch.no_grad():  # No need to compute gradients during testing
    for images, labels in test_loader:      # loop through test batches
        images, labels = images.to(device), labels.to(device)  # move to device
        outputs = model(images)               # forward pass
        _, predicted = torch.max(outputs.data, 1)  # Get class with highest score
        total += labels.size(0)                            # update total count
        correct += (predicted == labels).sum().item()             # update correct count

print(f"\nTest Accuracy: {100 * correct / total:.2f}%")               # print test accuracy



Test Accuracy: 97.67%


In [12]:
# 7. PREDICTION EXAMPLE
sample_image, sample_label = test_dataset[0]           # get first sample from test set
model.eval()                                             # set model to evaluation mode
with torch.no_grad():                                          # no gradient calculation
    output = model(sample_image.unsqueeze(0).to(device))         # add batch dimension and predict
    predicted_class = output.argmax(dim=1).item()                # get predicted class

print(f"\nPredicted Digit: {predicted_class}")                  # print predicted digit
print(f"Actual Digit: {sample_label}")                               # print actual digit


Predicted Digit: 7
Actual Digit: 7


In [15]:
from google.colab import files         # to upload image files from your computer to colab
uploaded = files.upload()            # opens a file upload dialog


Saving digit (1).png to digit (1).png


In [16]:
from PIL import Image      # to open and process image files
import torchvision.transforms as transforms    # for image preprocessing
import torch              # for tensor operations

# Load image
image = Image.open(list(uploaded.keys())[0]).convert('L')  # Convert to grayscale

# Transform image (resize, convert to tensor, normalize)
transform = transforms.Compose([
    transforms.Resize((28, 28)),     # Resize to 28x28
    transforms.ToTensor(),           # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize
])

image = transform(image).unsqueeze(0)  # Add batch dimension

# Predict
model.eval()
with torch.no_grad():
    output = model(image.to(device))
    predicted_class = output.argmax(dim=1).item()

print(f"Predicted Digit: {predicted_class}")


Predicted Digit: 9
