In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

In [13]:
# =================================================================================
# 1. TWEAKABLE HYPERPARAMETERS
# =================================================================================
# Feel free to change these values to experiment with the model.

# The number of times the model will see the entire training dataset.
NUM_EPOCHS = 10
# The number of images processed in a single batch.
BATCH_SIZE = 64
# How fast the model learns. A smaller value means slower, potentially more stable learning.
LEARNING_RATE = 0.001

In [14]:
# =================================================================================
# 2. DEVICE CONFIGURATION
# =================================================================================
# This sets up the device to use a GPU (like NVIDIA's CUDA) if available.
# Training on a GPU is significantly faster than on a CPU.

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [15]:
# =================================================================================
# 3. DATA PREPARATION
# =================================================================================
# We need to prepare the CIFAR-10 dataset for our model.
# This involves two main steps:
#   a) Transforming the data into a format the model can use (tensors).
#   b) Normalizing the data to help the model train more effectively.

# The transform pipeline first converts images to PyTorch tensors,
# then normalizes their pixel values. Normalization helps stabilize training.
# The mean and standard deviation values are standard for the CIFAR-10 dataset.
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [16]:
# Download the training dataset if not already present.
train_dataset = torchvision.datasets.CIFAR10(root='./data',
                                             train=True,
                                             download=True,
                                             transform=transform)

# Download the test dataset if not already present.
test_dataset = torchvision.datasets.CIFAR10(root='./data',
                                            train=False,
                                            download=True,
                                            transform=transform)

# Create data loaders. These are utilities that feed the data to the model
# in shuffled batches. Shuffling the training data is important to prevent
# the model from learning the order of the data.
# Using pin_memory=True can speed up data transfer to the GPU.
# num_workers allows for multi-process data loading.
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True,
                                           num_workers=2,
                                           pin_memory=True)

test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=BATCH_SIZE,
                                          shuffle=False,
                                          num_workers=2,
                                          pin_memory=True)

# Define the 10 classes in the CIFAR-10 dataset for reference.
classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

In [17]:
# =================================================================================
# 4. MODEL DEFINITION (MINI-VGG)
# =================================================================================
# This is our neural network architecture. It's a "mini" version of the famous
# VGG network. It consists of two blocks of convolutional layers followed by
# a classifier (fully connected layers).

class MiniVGG(nn.Module):
    def __init__(self, num_classes=10):
        super(MiniVGG, self).__init__()

        # --- Convolutional Blocks ---
        # These layers are responsible for detecting features like edges,
        # textures, and shapes in the images.
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1), # 3 input channels (RGB), 32 output channels
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2), # Reduces spatial dimensions by half

            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2) # Reduces dimensions again
        )

        # --- Classifier Block ---
        # These layers take the features detected by the convolutional blocks
        # and classify the image into one of the 10 categories.
        self.classifier = nn.Sequential(
            nn.Flatten(), # Flattens the 2D feature map into a 1D vector
            nn.Linear(64 * 8 * 8, 512), # The input size depends on the output of the last maxpool layer
            nn.ReLU(inplace=True),
            nn.Dropout(0.5), # Dropout helps prevent overfitting
            nn.Linear(512, num_classes) # The final output layer has 10 neurons, one for each class
        )

    def forward(self, x):
        # Defines the forward pass: how data flows through the network.
        x = self.features(x)
        x = self.classifier(x)
        return x

In [19]:
model = MiniVGG(num_classes=10).to(device)
print("\nModel Architecture:")
print(model)


Model Architecture:
MiniVGG(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=4096, out_features=512, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [20]:
# =================================================================================
# 5. LOSS FUNCTION AND OPTIMIZER
# =================================================================================
# The loss function measures how wrong the model's predictions are.
# The optimizer adjusts the model's parameters to reduce this loss.

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [21]:
# =================================================================================
# 6. TRAINING LOOP
# =================================================================================
# This is where the magic happens. We loop over our dataset multiple times (epochs),
# and for each batch of images, we perform the following steps:
#   1. Forward pass: Get the model's predictions.
#   2. Calculate loss: See how wrong the predictions are.
#   3. Backward pass: Calculate gradients to see how to adjust the model.
#   4. Optimization step: Update the model's weights and biases.

print("\nStarting Training...")
start_time = time.time()

for epoch in range(NUM_EPOCHS):
    model.train() # Set the model to training mode
    running_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        # Move images and labels to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # 1. Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # 2. Backward pass and optimization
        optimizer.zero_grad() # Clear the gradients from the previous batch
        loss.backward()       # Calculate the gradients
        optimizer.step()      # Update the model's parameters

        running_loss += loss.item()

    # --- Print training status ---
    epoch_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}], Loss: {epoch_loss:.4f}")

    # --- Evaluate on the test set after each epoch ---
    model.eval() # Set the model to evaluation mode
    with torch.no_grad(): # We don't need to calculate gradients during evaluation
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        print(f'Accuracy on the 10000 test images: {accuracy:.2f} %')


end_time = time.time()
print("\nFinished Training.")
print(f"Total training time: {((end_time - start_time) / 60):.2f} minutes")


Starting Training...
Epoch [1/10], Loss: 1.4645
Accuracy on the 10000 test images: 60.99 %
Epoch [2/10], Loss: 1.0245
Accuracy on the 10000 test images: 68.83 %
Epoch [3/10], Loss: 0.8509
Accuracy on the 10000 test images: 71.66 %
Epoch [4/10], Loss: 0.7351
Accuracy on the 10000 test images: 74.85 %
Epoch [5/10], Loss: 0.6386
Accuracy on the 10000 test images: 75.90 %
Epoch [6/10], Loss: 0.5585
Accuracy on the 10000 test images: 76.95 %
Epoch [7/10], Loss: 0.4961
Accuracy on the 10000 test images: 76.72 %
Epoch [8/10], Loss: 0.4300
Accuracy on the 10000 test images: 76.93 %
Epoch [9/10], Loss: 0.3853
Accuracy on the 10000 test images: 77.64 %
Epoch [10/10], Loss: 0.3386
Accuracy on the 10000 test images: 77.36 %

Finished Training.
Total training time: 2.84 minutes


In [10]:
# =================================================================================
# 7. SAVE THE MODEL (Optional)
# =================================================================================
# You can save the trained model's state for later use.
torch.save(model.state_dict(), 'mini_vgg_cifar10.pth')
print("Model saved to mini_vgg_cifar10.pth")

Model saved to mini_vgg_cifar10.pth
