<a href="https://colab.research.google.com/github/Prianka-Mukhopadhyay/pytorch-cifar10-classification/blob/main/cifar10_image_classification_pytorch2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

# Transformations for training set (augmentation)
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),       # Randomly crop 32x32 with padding
    transforms.RandomHorizontalFlip(),          # Randomly flip images horizontally
    transforms.ToTensor(),                      # Convert image to tensor
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))  # Normalize with CIFAR-10 stats
])

# Transformations for test set (no augmentation, just normalization)
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

# Load training data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True, num_workers=2)

# Load test data
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100,
                                         shuffle=False, num_workers=2)

print("✅ Data loaded with augmentation for training set.")

100%|██████████| 170M/170M [00:13<00:00, 12.9MB/s]


✅ Data loaded with augmentation for training set.


In [2]:
# ============================
# Step 2: Improved CNN Model
# Why? Deeper architecture with BatchNorm & Dropout improves learning and reduces overfitting.
# ============================

import torch.nn as nn
import torch.nn.functional as F

class ImprovedCNN(nn.Module):
    def __init__(self):
        super(ImprovedCNN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)

        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)

        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)

        # Fully connected layers
        self.fc1 = nn.Linear(256 * 4 * 4, 512)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 10)  # 10 classes for CIFAR-10

    def forward(self, x):
        # Block 1
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, 2)

        # Block 2
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.max_pool2d(x, 2)

        # Flatten
        x = x.view(x.size(0), -1)

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x

# Create model instance
model = ImprovedCNN()

print(model)


ImprovedCNN(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=4096, out_features=512, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
)


In [3]:
# ============================
# Step 3: Training Setup
# Why? Define how the model learns (loss), how parameters are updated (optimizer), and use GPU for speed.
# ============================

import torch.optim as optim

# Device configuration (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Move model to GPU if available
model = model.to(device)

# Loss function: CrossEntropy for classification
criterion = nn.CrossEntropyLoss()

# Optimizer: Adam for faster convergence
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Number of training epochs
num_epochs = 30


Using device: cuda


In [5]:
# ============================
# Step 4: Training Loop
# Why? Actually trains the model over multiple epochs and tracks accuracy.
# ============================

for epoch in range(num_epochs):  # loop over the dataset multiple times
    model.train()  # set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in trainloader:
        # Move data to device
        images, labels = images.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Stats
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    # Print stats for this epoch
    train_acc = 100. * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Loss: {running_loss/len(trainloader):.4f} "
          f"Train Acc: {train_acc:.2f}%")

print("✅ Training complete.")


RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x16384 and 4096x512)