In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

In [12]:
# =================================================================================
# 1. TWEAKABLE HYPERPARAMETERS
# =================================================================================
# Feel free to change these values to experiment with the model.

# The number of times the model will see the entire training dataset.
NUM_EPOCHS = 20
# The number of images processed in a single batch.
BATCH_SIZE = 64
# How fast the model learns. A smaller value means slower, potentially more stable learning.
LEARNING_RATE = 0.001

In [3]:
# =================================================================================
# 2. DEVICE CONFIGURATION
# =================================================================================
# This sets up the device to use a GPU (like NVIDIA's CUDA) if available.
# Training on a GPU is significantly faster than on a CPU.

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [4]:
# =================================================================================
# 3. DATA PREPARATION
# =================================================================================
# We need to prepare the CIFAR-10 dataset for our model.
# This involves two main steps:
#   a) Transforming the data into a format the model can use (tensors).
#   b) Normalizing the data to help the model train more effectively.

# The transform pipeline first converts images to PyTorch tensors,
# then normalizes their pixel values. Normalization helps stabilize training.
# The mean and standard deviation values are standard for the CIFAR-10 dataset.
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [5]:
# Download the training dataset if not already present.
train_dataset = torchvision.datasets.CIFAR10(root='./data',
                                             train=True,
                                             download=True,
                                             transform=transform)

# Download the test dataset if not already present.
test_dataset = torchvision.datasets.CIFAR10(root='./data',
                                            train=False,
                                            download=True,
                                            transform=transform)

# Create data loaders. These are utilities that feed the data to the model
# in shuffled batches. Shuffling the training data is important to prevent
# the model from learning the order of the data.
# Using pin_memory=True can speed up data transfer to the GPU.
# num_workers allows for multi-process data loading.
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True,
                                           num_workers=2,
                                           pin_memory=True)

test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=BATCH_SIZE,
                                          shuffle=False,
                                          num_workers=2,
                                          pin_memory=True)

# Define the 10 classes in the CIFAR-10 dataset for reference.
classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

100%|██████████| 170M/170M [00:04<00:00, 40.5MB/s]


In [7]:
# =================================================================================
# 4. MODEL DEFINITION (MINI-VGG)
# =================================================================================
# This is our neural network architecture. It's a "mini" version of the famous
# VGG network. It consists of two blocks of convolutional layers followed by
# a classifier (fully connected layers).

class MiniVGG(nn.Module):
    def __init__(self, num_classes=10):
        super(MiniVGG, self).__init__()

        # --- Convolutional Blocks ---
        # These layers are responsible for detecting features like edges,
        # textures, and shapes in the images.
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),# 3 input channels (RGB), 32 output channels
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2), # Reduces spatial dimensions by half

            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2) # Reduces dimensions again
        )

        # --- Classifier Block ---
        # These layers take the features detected by the convolutional blocks
        # and classify the image into one of the 10 categories.
        self.classifier = nn.Sequential(
            nn.Flatten(), # Flattens the 2D feature map into a 1D vector
            nn.Linear(64 * 8 * 8, 512), # The input size depends on the output of the last maxpool layer
            nn.ReLU(inplace=True),
            nn.Dropout(0.5), # Dropout helps prevent overfitting
            nn.Linear(512, num_classes) # The final output layer has 10 neurons, one for each class
        )

    def forward(self, x):
        # Defines the forward pass: how data flows through the network.
        x = self.features(x)
        x = self.classifier(x)
        return x

In [8]:
model = MiniVGG(num_classes=10).to(device)
print("\nModel Architecture:")
print(model)


Model Architecture:
MiniVGG(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU(inplace=True)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=4096, out_features=512, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0

In [10]:
# =================================================================================
# 5. LOSS FUNCTION AND OPTIMIZER
# =================================================================================
# The loss function measures how wrong the model's predictions are.
# The optimizer adjusts the model's parameters to reduce this loss.

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2)

In [13]:
# =================================================================================
# 6. TRAINING LOOP (Final Version with ReduceLROnPlateau)
# =================================================================================
print("\nStarting Training...")
start_time = time.time()

# Initialize the learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2)

for epoch in range(NUM_EPOCHS):
    model.train()  # Set model to training mode
    running_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        # Move data to device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # --- Training statistics ---
    epoch_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}], Training Loss: {epoch_loss:.4f}")

    # --- Validation Phase ---
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(test_loader)
    accuracy = 100 * correct / total
    print(f"Validation Accuracy: {accuracy:.2f}% | Validation Loss: {val_loss:.4f}")

    # Step the scheduler with validation loss
    scheduler.step(val_loss)

end_time = time.time()
print("\nFinished Training.")
print(f"Total training time: {((end_time - start_time) / 60):.2f} minutes")



Starting Training...
Epoch [1/20], Training Loss: 0.1747
Validation Accuracy: 79.30% | Validation Loss: 0.7812
Epoch [2/20], Training Loss: 0.1449
Validation Accuracy: 79.17% | Validation Loss: 0.8012
Epoch [3/20], Training Loss: 0.1255
Validation Accuracy: 78.90% | Validation Loss: 0.8309
Epoch [4/20], Training Loss: 0.1166
Validation Accuracy: 79.36% | Validation Loss: 0.8343
Epoch [5/20], Training Loss: 0.1023
Validation Accuracy: 79.32% | Validation Loss: 0.8380
Epoch [6/20], Training Loss: 0.1017
Validation Accuracy: 79.28% | Validation Loss: 0.8481
Epoch [7/20], Training Loss: 0.0995
Validation Accuracy: 79.27% | Validation Loss: 0.8544
Epoch [8/20], Training Loss: 0.0977
Validation Accuracy: 79.38% | Validation Loss: 0.8477
Epoch [9/20], Training Loss: 0.0974
Validation Accuracy: 79.28% | Validation Loss: 0.8483
Epoch [10/20], Training Loss: 0.0957
Validation Accuracy: 79.33% | Validation Loss: 0.8500
Epoch [11/20], Training Loss: 0.0989
Validation Accuracy: 79.30% | Validation

In [None]:
# =================================================================================
# 7. SAVE THE MODEL (Optional)
# =================================================================================
# You can save the trained model's state for later use.
torch.save(model.state_dict(), 'mini_vgg_cifar10.pth')
print("Model saved to mini_vgg_cifar10.pth")

Model saved to mini_vgg_cifar10.pth
