# Importing Libraries

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim 
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

In [7]:
# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x10e3f2710>

In [8]:
# Transforms: convert PIL image to Tensor and optionally normalize
transform = transforms.Compose([
    transforms.ToTensor(),
    # For MNIST, pixel values range [0, 1]. Normalizing is optional.
    # transforms.Normalize((0.1307,), (0.3081,))
])

# Download and load training data
mnist_train = datasets.MNIST(root='data', train=True, download=True, transform=transform)
mnist_val   = datasets.MNIST(root='data', train=False, download=True, transform=transform)

# DataLoaders help batch and shuffle data
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=64, shuffle=True)
val_loader   = torch.utils.data.DataLoader(mnist_val, batch_size=64, shuffle=False)


# Defining a Simple ANN

In [9]:
class MNISTClassifier(nn.Module):
    def __init__(self):
        super(MNISTClassifier, self).__init__()

        # First fully connected layer: 28*28 inputs -> 128 hidden units
        self.layer1 = nn.Linear(28*28, 128)
        # Second fully connected layer: 128 hidden units -> 10 outputs
        self.layer2 = nn.Linear(128, 10)

    def forward(self, img):

        # Flatten the 2D image (28 x 28) into a 1D vector of size 784        
        x=img.view(-1, 28*28)
        # Pass through layer1, apply ReLU activation
        x=F.relu(self.layer1(x))
        # Pass through layer2, output is 10 logits        
        x=self.layer2(x)

        return x 


# Define Loss Function and Optimizer

In [10]:
# Initialize the network
model = MNISTClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)


# Training Loop

In [None]:
def train_one_epoch(model, data_loader, optimizer, criterion):
    model.train()  # put model in training mode
    running_loss = 0.0
    
    for images, labels in data_loader:
        # 1) Zero the parameter gradients
        optimizer.zero_grad()
        
        # 2) Forward pass
        outputs = model(images)
        
        # 3) Compute the loss
        loss = criterion(outputs, labels)
        
        # 4) Backprop
        loss.backward()
        
        # 5) Update the weights
        optimizer.step()
        
        running_loss += loss.item()
    
    return running_loss / len(data_loader)

def validate(model, data_loader, criterion):
    model.eval()  # put model in eval mode (e.g. turns off dropout if any)
    val_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():  # no need to track gradients in eval
        for images, labels in data_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
            # Get predicted class by taking the argmax over the 10 logits
            _, predicted = torch.max(outputs, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = correct / total
    return val_loss / len(data_loader), accuracy


Epoch [1/5], Train Loss: 0.2319, Val Loss: 0.1878, Val Accuracy: 94.56%
Epoch [2/5], Train Loss: 0.1737, Val Loss: 0.1538, Val Accuracy: 95.51%
Epoch [3/5], Train Loss: 0.1389, Val Loss: 0.1316, Val Accuracy: 96.05%
Epoch [4/5], Train Loss: 0.1157, Val Loss: 0.1201, Val Accuracy: 96.62%
Epoch [5/5], Train Loss: 0.0995, Val Loss: 0.1046, Val Accuracy: 97.00%


# Training

In [None]:
# Actual training
num_epochs = 5
for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_accuracy = validate(model, val_loader, criterion)
    
    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {train_loss:.4f}, "
          f"Val Loss: {val_loss:.4f}, "
          f"Val Accuracy: {val_accuracy*100:.2f}%")