In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [5]:
class BasicCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 4)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class CNNWithNormalization(nn.Module):
    def __init__(self, num_classes=10, dropout_p=0.5):
        super().__init__()
    
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(dropout_p)
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, num_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        identity = self.shortcut(x)
        
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += identity  # Residual connection
        out = F.relu(out)
        return out

class CNNWithResiduals(nn.Module):
    def __init__(self, num_classes=10, dropout_p=0.3):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        
        self.layer1 = ResidualBlock(32, 64, stride=2)
        self.layer2 = ResidualBlock(64, 128, stride=2)
        self.layer3 = ResidualBlock(128, 256, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(dropout_p)
        self.fc = nn.Linear(256, num_classes)
        
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

In [6]:
def train_model(net, trainloader, criterion, optimizer, epochs=4, device='cpu'):
    net = net.to(device)
    
    for epoch in range(epochs):
        running_loss = 0.0
        
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            if i % 2000 == 1999:
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                running_loss = 0.0
                
    print('Finished training')

def evaluate(model, testloader, device='cpu'):
    model = model.to(device)
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print(f'Accuracy: {100 * correct / total:.2f}%')
    return 100 * correct / total

def load_data(batch_size=4):
    transform = transforms.Compose([
        transforms.ToTensor(), 
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    
    trainset = torchvision.datasets.CIFAR10(
        root='./data', train=True, download=True, transform=transform
    )
    trainloader = torch.utils.data.DataLoader(
        trainset, batch_size=batch_size, shuffle=True, num_workers=2
    )
    
    testset = torchvision.datasets.CIFAR10(
        root='./data', train=False, download=True, transform=transform
    )
    testloader = torch.utils.data.DataLoader(
        testset, batch_size=batch_size, shuffle=False, num_workers=2
    )
    
    return trainloader, testloader


In [7]:
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    trainloader, testloader = load_data(batch_size=128)
    
    print("\n" + "="*50)
    print("VERSION 1: Basic CNN (Chapter 7)")
    print("="*50)
    basic_cnn = BasicCNN()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(basic_cnn.parameters(), lr=0.001, momentum=0.9)
    
    train_model(basic_cnn, trainloader, criterion, optimizer, epochs=3, device=device)
    print("Basic CNN performance:")
    evaluate(basic_cnn, testloader, device=device)
    
    print("\n" + "="*50)
    print("VERSION 2: CNN with BatchNorm & Dropout")
    print("="*50)
    norm_cnn = CNNWithNormalization()
    optimizer = optim.SGD(norm_cnn.parameters(), lr=0.01, momentum=0.9)
    
    train_model(norm_cnn, trainloader, criterion, optimizer, epochs=3, device=device)
    print("CNN with Normalization performance:")
    evaluate(norm_cnn, testloader, device=device)
    
    print("\n" + "="*50)
    print("VERSION 3: CNN with Residual Connections")
    print("="*50)
    res_cnn = CNNWithResiduals()
    optimizer = optim.Adam(res_cnn.parameters(), lr=0.001)
    
    train_model(res_cnn, trainloader, criterion, optimizer, epochs=3, device=device)
    print("CNN with Residuals performance:")
    evaluate(res_cnn, testloader, device=device)
    
    print("\n" + "="*50)
    print("Architecture Summary")
    print("="*50)
    x = torch.randn(4, 3, 32, 32)
    
    models = [
        ("Basic CNN", BasicCNN()),
        ("CNN with Normalization", CNNWithNormalization()),
        ("CNN with Residuals", CNNWithResiduals())
    ]
    
    for name, model in models:
        out = model(x)
        params = sum(p.numel() for p in model.parameters())
        print(f"\n{name}:")
        print(f"  Input: {x.shape} -> Output: {out.shape}")
        print(f"  Parameters: {params:,}")

Using device: cpu


100%|███████████████████████████████████████████████████████████████████████████████| 170M/170M [01:16<00:00, 2.24MB/s]



VERSION 1: Basic CNN (Chapter 7)
Finished training
Basic CNN performance:
Accuracy: 18.25%

VERSION 2: CNN with BatchNorm & Dropout
Finished training
CNN with Normalization performance:
Accuracy: 69.69%

VERSION 3: CNN with Residual Connections
Finished training
CNN with Residuals performance:
Accuracy: 74.96%

Architecture Summary

Basic CNN:
  Input: torch.Size([4, 3, 32, 32]) -> Output: torch.Size([4, 10])
  Parameters: 61,844

CNN with Normalization:
  Input: torch.Size([4, 3, 32, 32]) -> Output: torch.Size([4, 10])
  Parameters: 620,810

CNN with Residuals:
  Input: torch.Size([4, 3, 32, 32]) -> Output: torch.Size([4, 10])
  Parameters: 1,211,786
