# Image Classification

In this tutorial, you will build your a image classifier with PyTorch. You will work with MNIST dataset that contain grayscale images of handwritten digits. It has 60,000 training images and 10,000 test images. Each of these are 28 by 28 pixels in 


## Importing Packages

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision                                 # PyTorch's computer vision library
import torchvision.transforms as transforms
import torchvision.transforms.v2 as transforms_v2
from torch.utils.data import DataLoader            # Provides inbuilt datasets and sampler

## Data Ingestion & Preprocessing

In [None]:
# Defines required data transformations (creating tensors followed by normalization)
transform = transforms_v2.Compose([
    # transforms.ToTensor(),    # Not recommended in transforms.v2
    transforms_v2.ToImage(),    # Converts to tensor - only needed for PIL images
    transforms_v2.ToDtype(torch.float32, scale=True),    # Requires only normalization that expects float input
    transforms_v2.Normalize((0.1307,), (0.3081,))        # 0.1307 and 0.3081 are mean and std. dev. of the entire training set
])

In [45]:
# Loads the MNIST datasets

train_dataset = torchvision.datasets.MNIST(
    root="./datasets/",    # Root directory to keep dataset files
    train=True,            # If 'True', loads train set, otherwise test set
    transform=transform,   # Applies automatic transformation (defined in above cell in this case)
    download=True          # If 'True', downloads the datasets if is not already downloaded
)

test_dataset = torchvision.datasets.MNIST(root="./datasets", train=False, transform=transform, download=True)

In [46]:
# Creates data loaders

train_loader = DataLoader(
    train_dataset,    # Dataset the data to load from
    batch_size=64,    # Samples (images) per batch to load
    shuffle=True      # If 'True', shuffles the samples at every epoch for model to see samples in random order
)

test_loader = DataLoader(
    test_dataset
    batch_size=1000,    # Larger batch size as no need for gradient calculation for testing
    shuffle=False       # Shuffling is not required for testing
)

## Modeling

In [7]:
# Defines a neural network for modeling
class MNISTClassifier(nn.Module):    # Inheriting from `nn.module` gives all of PyTorch's neural network functionality
    def __init__(self):
        super().__init__()

        # Defines the layers
        self.flatten = nn.Flatten()     # Flattens the 3-D image [channel, height, width],  as following linear layers expect flat vectors
        self.layers = nn.Sequential(
            nn.Linear(784, 128),        # Inputs: <batch_size> x 784, outputs: <batch_size> x 128
            nn.ReLU(),                  # Activation function outputs only positive values zeroing negative values
            nn.Linear(128, 10)          # Inputs: <batch_size> x 128, outputs: <batch_size> x 10 [10 outputs being one for each digit class]
        )

    def forward(self, x):
        x = self.flatten(x)
        x = self.layers(x)
        return x


# Instead of custom class, the layers can also be organized using class `nn.Sequential`.
# model = nn.Sequential(
#     nn.Flatten()
#     nn.Linear(784, 128)
#     nn.ReLU(),
#     nn.Linear(128, 10)
# )

## Model Training

In [9]:
# Checks for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device}")

Using cpu


In [47]:
# Initializes the model and moves to device
model = MNISTClassifier().to(device)

# Sets loss function
loss_function = nn.CrossEntropyLoss()

# Sets optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [25]:
def train_epoch(model, train_loader, loss_function, optimizer, device):
    """
    Trains a model using data loader over one epoch
    """
    model.train()  # Sets the mode into training

    running_loss = 0.0
    true_predictions = 0
    sample_count = 0

    # Training model over batches
    for batch_idx, (data, targets) in enumerate(train_loader):
        
        # Moves both data and targets to the device
        data, targets = data.to(device), targets.to(device)

        optimizer.zero_grad()    # Resets gradients before processing next batch
        outputs = model(data)    # Model processes already transformed data that arrives in batches
        loss = loss_function(outputs, targets)    # Computes the cross entropy loss
        loss.backward()
        optimizer.step()

        # Tracks improvement
        running_loss += loss.item()             # Aggregates losses over 100 batches
        _, predicted_class = outputs.max(dim=1)
        sample_count += targets.size(0)         # Aggregates count of samples over 100 batches
        true_predictions += predicted_class.eq(targets).sum().item()

        # Shows progress with improvement
        if (batch_idx % 100) == 0 and batch_idx > 0:
            avg_loss = running_loss / 100
            accuracy = true_predictions / sample_count * 100.0
            print(f"[Batch #: {batch_idx * 64}] Avg. Loss: {avg_loss:.3f}, Accuracy: {accuracy:.1f}%") 
            running_loss = 0.0                  # Resets variable
            true_predictions = 0
            sample_count = 0

**Training Loop**

In [48]:
num_epochs = 10

for epoch in range(num_epochs):
    print(f"\nEpoch: {epoch + 1}")
    train_epoch(model, train_loader, loss_function, optimizer, device)


Epoch: 1
[Batch #: 6400] Avg. Loss: 0.627, Accuracy: 82.8%
[Batch #: 12800] Avg. Loss: 0.350, Accuracy: 89.6%
[Batch #: 19200] Avg. Loss: 0.272, Accuracy: 91.7%
[Batch #: 25600] Avg. Loss: 0.225, Accuracy: 93.4%
[Batch #: 32000] Avg. Loss: 0.210, Accuracy: 93.8%
[Batch #: 38400] Avg. Loss: 0.194, Accuracy: 94.3%
[Batch #: 44800] Avg. Loss: 0.160, Accuracy: 95.3%
[Batch #: 51200] Avg. Loss: 0.154, Accuracy: 95.1%
[Batch #: 57600] Avg. Loss: 0.147, Accuracy: 95.9%

Epoch: 2
[Batch #: 6400] Avg. Loss: 0.128, Accuracy: 96.4%
[Batch #: 12800] Avg. Loss: 0.107, Accuracy: 96.8%
[Batch #: 19200] Avg. Loss: 0.114, Accuracy: 96.3%
[Batch #: 25600] Avg. Loss: 0.111, Accuracy: 96.8%
[Batch #: 32000] Avg. Loss: 0.109, Accuracy: 96.8%
[Batch #: 38400] Avg. Loss: 0.112, Accuracy: 96.8%
[Batch #: 44800] Avg. Loss: 0.113, Accuracy: 96.5%
[Batch #: 51200] Avg. Loss: 0.100, Accuracy: 96.8%
[Batch #: 57600] Avg. Loss: 0.102, Accuracy: 97.0%

Epoch: 3
[Batch #: 6400] Avg. Loss: 0.071, Accuracy: 97.8%
[Bat

## Model Evaluation

In [31]:
def evaluate(model, test_loader, device):
    """
    Evaluates a trained model's prediction nperformance against a data loader and device
    """
    model.eval()                                 # Sets model's gradient mode
    true_predictions = 0
    sample_count = 0
    
    with torch.no_grad():                        # Disables gradient tracking        
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted_class = torch.max(outputs, 1) # Gets the class with highest score
            sample_count += targets.size(0)
            true_predictions += (predicted_class == targets).sum().item()

    return 100.0 * true_predictions/sample_count

In [49]:
# Calculates the accuracy on the test dataset
accuracy = evaluate(model, test_loader, device)
print(f"Test Accuracy: {accuracy:.2f}%")

Test Accuracy: 97.72%
