Import the dependencies

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

Load CIFAR-10 dataset

In [3]:
# CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)


100%|██████████| 170M/170M [00:03<00:00, 49.1MB/s]


VGG-like Architecture

In [4]:
class VGGLike(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential( # features extraction network
            # Block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True), # to save memory
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # with decreasing spatial size, we'll increase channels to not lose much info

            # Block 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.classifier = nn.Sequential( # Classification network
            nn.Linear(256 * 4 * 4, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1) #flatten
        x = self.classifier(x)
        return x

Model Training

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGGLike().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [6]:
for epoch in range(15):
    running_loss = 0.0
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader):.4f}")

Epoch 1, Loss: 1.4906
Epoch 2, Loss: 0.9484
Epoch 3, Loss: 0.7117
Epoch 4, Loss: 0.5708
Epoch 5, Loss: 0.4571
Epoch 6, Loss: 0.3686
Epoch 7, Loss: 0.2966
Epoch 8, Loss: 0.2420
Epoch 9, Loss: 0.1989
Epoch 10, Loss: 0.1662
Epoch 11, Loss: 0.1450
Epoch 12, Loss: 0.1319
Epoch 13, Loss: 0.1217
Epoch 14, Loss: 0.1122
Epoch 15, Loss: 0.1086


Evaluation

In [7]:
correct, total = 0, 0
model.eval()

with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 77.54%


This VGG-like network improves over the first simple CNN by 5%! due to being deeper (more Conv layers and blocks) and increasing channel depth, allowing it to learn richer, hierarchical features from images.

Further improvements can be made using data augmentation, batch normalization, dropout, longer training, and optimized learning rates to boost generalization and accuracy.
