In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.optim import lr_scheduler

In [2]:
import random
import numpy as np
import os # To set environment variables, useful for some libraries

def set_seed(seed):
    """
    Sets the random seed for reproducibility across different libraries.
    """
    # 1. Set seed for Python's built-in random module
    random.seed(seed)

    # 2. Set seed for NumPy
    np.random.seed(seed)

    # 3. Set seed for PyTorch (CPU and GPU)
    torch.manual_seed(seed) # For CPU operations
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed) # For current GPU
        torch.cuda.manual_seed_all(seed) # For all GPUs (if you have multiple)

    # 4. Ensure deterministic behavior for CuDNN (GPU operations)
    #    This can sometimes slightly slow down training, but ensures exact reproducibility.
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False # Disable CuDNN auto-tuner for deterministic ops

    # 5. Set environment variable for Python hashing (affects dicts, sets, etc.)
    os.environ['PYTHONHASHSEED'] = str(seed)

    print(f"Random seed set to {seed} for all relevant libraries.")

MY_RANDOM_SEED = 42
set_seed(MY_RANDOM_SEED)

Random seed set to 42 for all relevant libraries.


In [3]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using {device} device")

Using cuda device


In [4]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [10]:
batch_size = 128

training_dataloader = DataLoader(training_data, batch_size = batch_size, shuffle = True)
test_dataloader = DataLoader(test_data, batch_size = batch_size, shuffle = False)

for X,y in training_dataloader:
    print(X.shape)
    print(y.shape)
    break 

torch.Size([128, 1, 28, 28])
torch.Size([128])


In [11]:
class MNIST_CNN(nn.Module):
    def __init__(self, input_channels, hidden_channels, output_size, images_size, dropout_prob):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(input_channels,hidden_channels, kernel_size=(3,3), stride=(1,1), padding=(1,1) ),
            nn.BatchNorm2d(hidden_channels),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(dropout_prob),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(hidden_channels * images_size[0]//2 * images_size[1]//2, output_size)
        )

    def forward(self, x):
        logits = self.cnn(x)
        return logits

In [12]:
input_channels = 1
hidden_channels = 32
output_size = 10
images_size = (28,28)
dropout_prob = 0.3

model_CNN = MNIST_CNN(input_channels, hidden_channels, output_size, images_size, dropout_prob).to(device)
criterion_CNN = nn.CrossEntropyLoss() # We use CrossEntropyLoss as we are solving a classification problem
optimizer_CNN = torch.optim.AdamW(model_CNN.parameters(), lr=0.0001, weight_decay=0.0001) 

In [15]:
def training_loop(dataloader,model,criterion, optimizer):
    size = len(dataloader.dataset)
    total_samples_processed_in_epoch = 0 #Initialize a variable to track the total samples processed in this epoch
    model.train()
    for batch,(X,y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        pred=model(X)
        loss = criterion(pred, y)

        loss.backward() # Backpropagation
        optimizer.step() #update of weights and biases  
        optimizer.zero_grad() #gradient reset 

        #Accumulate the number of samples processed in the current batch
        total_samples_processed_in_epoch += len(X) 


        if batch%100==0:
            loss_val = loss.item()
            print(f"loss: {loss_val:>7f}  [{total_samples_processed_in_epoch:>5d}/{size:>5d}]")

def test_loop(dataloader, model, criterion):
    size = len(dataloader.dataset)
    elements_per_batch = len(dataloader)
    model.eval()
    sum_loss_per_batch, correct = 0, 0
    with torch.no_grad():
        for X,y in dataloader:
            X,y = X.to(device), y.to(device)

            pred = model(X)
            sum_loss_per_batch+=criterion(pred,y).item()
            correct += (pred.argmax(1)==y).type(torch.float).sum().item()
    sum_loss_per_batch/=elements_per_batch
    correct/=size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {sum_loss_per_batch:>8f} \n")

In [16]:
epochs = 5
scheduler_CNN = lr_scheduler.CosineAnnealingLR(optimizer_CNN, T_max=epochs, eta_min=1e-5)
for iteration in range(epochs):
    print(f"Epoch {iteration+1}\n-------------------------------")
    training_loop(training_dataloader,model_CNN,criterion_CNN,optimizer_CNN)
    test_loop(test_dataloader,model_CNN,criterion_CNN)
    scheduler_CNN.step()
    #display the current learning rate 
    current_lr = optimizer_CNN.param_groups[0]['lr']
    print(f"Current Learning Rate: {current_lr:.6f}")
print("Done!")

Epoch 1
-------------------------------
loss: 0.082898  [  128/60000]
loss: 0.130018  [12928/60000]
loss: 0.126484  [25728/60000]
loss: 0.083000  [38528/60000]
loss: 0.161801  [51328/60000]
Test Error: 
 Accuracy: 96.9%, Avg loss: 0.106428 

Current Learning Rate: 0.000091
Epoch 2
-------------------------------
loss: 0.130855  [  128/60000]
loss: 0.123612  [12928/60000]
loss: 0.046557  [25728/60000]
loss: 0.092584  [38528/60000]
loss: 0.078742  [51328/60000]
Test Error: 
 Accuracy: 97.2%, Avg loss: 0.094638 

Current Learning Rate: 0.000068
Epoch 3
-------------------------------
loss: 0.066172  [  128/60000]
loss: 0.145414  [12928/60000]
loss: 0.140044  [25728/60000]
loss: 0.109267  [38528/60000]
loss: 0.072652  [51328/60000]
Test Error: 
 Accuracy: 97.4%, Avg loss: 0.085248 

Current Learning Rate: 0.000041
Epoch 4
-------------------------------
loss: 0.035366  [  128/60000]
loss: 0.047261  [12928/60000]
loss: 0.045005  [25728/60000]
loss: 0.090875  [38528/60000]
loss: 0.054712  [5