In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

from src.cnn_model import Cnn
from src.train_utils import train, test, get_device

### Data Preparation

In [3]:
# --- HYPERPARAMETERS & CONSTANTS ---
BATCH_SIZE = 64
NUM_EPOCHS = 10
LEARNING_RATE = 0.01

# MNIST Mean and Standard Deviation for Z-Score Normalization
MNIST_MEAN = 0.1307
MNIST_STD = 0.3081

# Define the transformation pipeline
transform = transforms.Compose([
    transforms.ToTensor(), # Converts image to Tensor and scales pixels to [0, 1]
    transforms.Normalize((MNIST_MEAN,), (MNIST_STD,)) # Z-Score standardization
])

# --- DATASETS ---
# Load datasets and apply transformations directly.
# Download is set to True to ensure data is present.
train_dataset = datasets.MNIST("./data", train=True, download=True, transform=transform)
test_dataset = datasets.MNIST("./data", train=False, download=True, transform=transform)

# --- DATA LOADERS ---
# Create DataLoaders for efficient, batch-wise processing.
# Shuffle=True is vital for SGD training.
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# --- DEVICE SETUP ---
# (Assuming get_device() is imported or defined to check for MPS/CPU)
device = get_device() 
print(f"Using device: {device}")

100.0%
100.0%
100.0%
100.0%

ðŸŽ‰ Successfully found Apple Silicon GPU (MPS) for acceleration.
Using device: mps





### Model Instantiation and Learning Setup

In [5]:
# 1. Instantiate the CNN Model
# Cnn class is imported from your local source file (src/cnn_model.py)
model = Cnn() 

# 2. Assign Model Parameters to the GPU (MPS)
# 'device' was defined in the data setup block
model.to(device)

# 3. Define the Loss Function (Criterion)
# CrossEntropyLoss is the standard choice for multi-class classification
criterion = nn.CrossEntropyLoss()

# 4. Define the Optimizer (Stochastic Gradient Descent)
# model.parameters() collects all weights/biases to be optimized.
# LEARNING_RATE is the step size.
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

### Execution Block

In [6]:
print(f"Starting Training on device {device} for {NUM_EPOCHS} epochs with LR={LEARNING_RATE} and Batch={BATCH_SIZE}.")

# The loop runs the full training and testing cycle for the specified number of epochs.
for epoch in range(1, NUM_EPOCHS + 1):
    
    # 1. Train the model (Performs forward/backward pass and weight updates)
    train(model, device, train_loader, optimizer, criterion, epoch)
    
    # 2. Test the model (Measures accuracy and loss on unseen data)
    test(model, device, test_loader, criterion)

print("\n--- Training Completed ---")

Starting Training on device mps for 10 epochs with LR=0.01 and Batch=64.
Train Epoch: 1 [0/60000] Loss: 2.302527
Train Epoch: 1 [6400/60000] Loss: 2.173326
Train Epoch: 1 [12800/60000] Loss: 1.292525
Train Epoch: 1 [19200/60000] Loss: 0.476414
Train Epoch: 1 [25600/60000] Loss: 0.488684
Train Epoch: 1 [32000/60000] Loss: 0.371889
Train Epoch: 1 [38400/60000] Loss: 0.287345
Train Epoch: 1 [44800/60000] Loss: 0.245444
Train Epoch: 1 [51200/60000] Loss: 0.168701
Train Epoch: 1 [57600/60000] Loss: 0.100424

Test Run:
  Avg. Loss: 0.1914, Accuracy: 9471/10000 (94.7%)
Train Epoch: 2 [0/60000] Loss: 0.179342
Train Epoch: 2 [6400/60000] Loss: 0.162103
Train Epoch: 2 [12800/60000] Loss: 0.192224
Train Epoch: 2 [19200/60000] Loss: 0.153971
Train Epoch: 2 [25600/60000] Loss: 0.253597
Train Epoch: 2 [32000/60000] Loss: 0.169866
Train Epoch: 2 [38400/60000] Loss: 0.209518
Train Epoch: 2 [44800/60000] Loss: 0.153994
Train Epoch: 2 [51200/60000] Loss: 0.200347
Train Epoch: 2 [57600/60000] Loss: 0.206