In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import time
from pathlib import Path

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


# Summary

This notebook benchmarks training time for one epoch on CIFAR-10 and CIFAR-100 datasets using a medium-complexity CNN model.

**Model Architecture:**
- 3 Convolutional blocks with BatchNorm and MaxPooling
- Channel progression: 3 → 64 → 128 → 256
- 2 Fully connected layers (4096 → 512 → num_classes)
- Total parameters: ~3.5M for CIFAR-10, ~3.6M for CIFAR-100

**Training Configuration:**
- Batch size: 128
- Optimizer: SGD with momentum (0.9) and weight decay (5e-4)
- Learning rate: 0.01
- Data augmentation: Random crop and horizontal flip

In [None]:
# Define a medium-complexity CNN model
class MediumCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(MediumCNN, self).__init__()
        
        # Convolutional layers
        self.conv_layers = nn.Sequential(
            # Block 1: 32x32x3 -> 32x32x64 -> 16x16x64
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 16x16x64
            
            # Block 2: 16x16x64 -> 16x16x128 -> 8x8x128
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 8x8x128
            
            # Block 3: 8x8x128 -> 8x8x256 -> 4x4x256
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 4x4x256
        )
        
        # Fully connected layers
        # After 3 pooling layers: 32 -> 16 -> 8 -> 4
        # Flattened size: 4 * 4 * 256 = 4096
        self.fc_layers = nn.Sequential(
            nn.Linear(4 * 4 * 256, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  # Flatten: [batch_size, 4*4*256]
        x = self.fc_layers(x)
        return x

# Test model dimensions
model_test = MediumCNN(num_classes=10)
test_input = torch.randn(1, 3, 32, 32)
test_output = model_test(test_input)
print(f"Model output shape: {test_output.shape}")
print(f"Total parameters: {sum(p.numel() for p in model_test.parameters()):,}")

Model output shape: torch.Size([1, 10])
Total parameters: 2,474,506


In [3]:
# Load CIFAR-10 dataset
data_root = Path.home() / 'data'
batch_size = 128

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

print("Loading CIFAR-10 dataset...")
cifar10_trainset = torchvision.datasets.CIFAR10(
    root=str(data_root), 
    train=True, 
    download=False, 
    transform=transform_train
)
cifar10_trainloader = DataLoader(
    cifar10_trainset, 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=2
)

print(f"CIFAR-10 training samples: {len(cifar10_trainset)}")
print(f"Number of batches: {len(cifar10_trainloader)}")

Loading CIFAR-10 dataset...
CIFAR-10 training samples: 50000
Number of batches: 391


In [4]:
# Load CIFAR-100 dataset
print("\nLoading CIFAR-100 dataset...")
cifar100_trainset = torchvision.datasets.CIFAR100(
    root=str(data_root), 
    train=True, 
    download=False, 
    transform=transform_train
)
cifar100_trainloader = DataLoader(
    cifar100_trainset, 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=2
)

print(f"CIFAR-100 training samples: {len(cifar100_trainset)}")
print(f"Number of batches: {len(cifar100_trainloader)}")


Loading CIFAR-100 dataset...
CIFAR-100 training samples: 50000
Number of batches: 391


In [5]:
# Function to train one epoch and measure time
def train_one_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    start_time = time.time()
    
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        if (batch_idx + 1) % 100 == 0:
            print(f'  Batch [{batch_idx + 1}/{len(train_loader)}], '
                  f'Loss: {running_loss / (batch_idx + 1):.3f}, '
                  f'Acc: {100. * correct / total:.2f}%')
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    
    avg_loss = running_loss / len(train_loader)
    accuracy = 100. * correct / total
    
    return elapsed_time, avg_loss, accuracy

In [6]:
# Train on CIFAR-100 for one epoch
print("\n" + "="*60)
print("Training on CIFAR-100 for ONE EPOCH")
print("="*60)

model_cifar100 = MediumCNN(num_classes=100).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_cifar100.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

elapsed_time, avg_loss, accuracy = train_one_epoch(
    model_cifar100, 
    cifar100_trainloader, 
    criterion, 
    optimizer, 
    device
)

print("\n" + "-"*60)
print(f"CIFAR-100 Results:")
print(f"  Time for 1 epoch: {elapsed_time:.2f} seconds ({elapsed_time/60:.2f} minutes)")
print(f"  Average Loss: {avg_loss:.4f}")
print(f"  Training Accuracy: {accuracy:.2f}%")
print("-"*60)


Training on CIFAR-100 for ONE EPOCH
  Batch [100/391], Loss: 4.326, Acc: 4.31%
  Batch [200/391], Loss: 4.128, Acc: 6.64%
  Batch [300/391], Loss: 4.017, Acc: 8.05%

------------------------------------------------------------
CIFAR-100 Results:
  Time for 1 epoch: 6.38 seconds (0.11 minutes)
  Average Loss: 3.9314
  Training Accuracy: 9.32%
------------------------------------------------------------


In [7]:
# Train on CIFAR-10 for one epoch
print("\n" + "="*60)
print("Training on CIFAR-10 for ONE EPOCH")
print("="*60)

model_cifar10 = MediumCNN(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_cifar10.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

elapsed_time, avg_loss, accuracy = train_one_epoch(
    model_cifar10, 
    cifar10_trainloader, 
    criterion, 
    optimizer, 
    device
)

print("\n" + "-"*60)
print(f"CIFAR-10 Results:")
print(f"  Time for 1 epoch: {elapsed_time:.2f} seconds ({elapsed_time/60:.2f} minutes)")
print(f"  Average Loss: {avg_loss:.4f}")
print(f"  Training Accuracy: {accuracy:.2f}%")
print("-"*60)


Training on CIFAR-10 for ONE EPOCH
  Batch [100/391], Loss: 1.835, Acc: 31.87%
  Batch [200/391], Loss: 1.705, Acc: 37.01%
  Batch [300/391], Loss: 1.623, Acc: 40.10%

------------------------------------------------------------
CIFAR-10 Results:
  Time for 1 epoch: 5.40 seconds (0.09 minutes)
  Average Loss: 1.5741
  Training Accuracy: 42.07%
------------------------------------------------------------


In [None]:
# Load pretrained ResNet-18 and test training time
import os
os.environ['TORCH_HOME'] = str(Path.home() / '.cache')

from torchvision.models import resnet18, ResNet18_Weights

print("\n" + "="*60)
print("Testing with Pretrained ResNet-18")
print("="*60)

# Test on CIFAR-10
print("\n--- CIFAR-10 with ResNet-18 ---")
resnet_cifar10 = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
# Modify final layer for CIFAR-10 (10 classes)
resnet_cifar10.fc = nn.Linear(resnet_cifar10.fc.in_features, 10)
resnet_cifar10 = resnet_cifar10.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet_cifar10.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

elapsed_time, avg_loss, accuracy = train_one_epoch(
    resnet_cifar10,
    cifar10_trainloader,
    criterion,
    optimizer,
    device
)

print("\n" + "-"*60)
print(f"ResNet-18 on CIFAR-10 Results:")
print(f"  Time for 1 epoch: {elapsed_time:.2f} seconds ({elapsed_time/60:.2f} minutes)")
print(f"  Average Loss: {avg_loss:.4f}")
print(f"  Training Accuracy: {accuracy:.2f}%")
print("-"*60)

# Test on CIFAR-100
print("\n--- CIFAR-100 with ResNet-18 ---")
resnet_cifar100 = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
# Modify final layer for CIFAR-100 (100 classes)
resnet_cifar100.fc = nn.Linear(resnet_cifar100.fc.in_features, 100)
resnet_cifar100 = resnet_cifar100.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet_cifar100.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

elapsed_time, avg_loss, accuracy = train_one_epoch(
    resnet_cifar100,
    cifar100_trainloader,
    criterion,
    optimizer,
    device
)

print("\n" + "-"*60)
print(f"ResNet-18 on CIFAR-100 Results:")
print(f"  Time for 1 epoch: {elapsed_time:.2f} seconds ({elapsed_time/60:.2f} minutes)")
print(f"  Average Loss: {avg_loss:.4f}")
print(f"  Training Accuracy: {accuracy:.2f}%")
print("-"*60)

# Compare model sizes
print("\n" + "="*60)
print("Model Comparison:")
print(f"  Custom MediumCNN parameters: {sum(p.numel() for p in MediumCNN(10).parameters()):,}")
print(f"  ResNet-18 parameters: {sum(p.numel() for p in resnet_cifar10.parameters()):,}")
print("="*60)


Testing with Pretrained ResNet-18

--- CIFAR-10 with ResNet-18 ---
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/hli54/.cache/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 290MB/s]


  Batch [100/391], Loss: 1.432, Acc: 49.01%
  Batch [200/391], Loss: 1.217, Acc: 57.48%
  Batch [300/391], Loss: 1.105, Acc: 61.65%

------------------------------------------------------------
ResNet-18 on CIFAR-10 Results:
  Time for 1 epoch: 5.70 seconds (0.10 minutes)
  Average Loss: 1.0350
  Training Accuracy: 64.19%
------------------------------------------------------------

--- CIFAR-100 with ResNet-18 ---
  Batch [100/391], Loss: 3.619, Acc: 16.06%
  Batch [200/391], Loss: 3.214, Acc: 22.45%
  Batch [300/391], Loss: 2.987, Acc: 26.41%

------------------------------------------------------------
ResNet-18 on CIFAR-100 Results:
  Time for 1 epoch: 5.54 seconds (0.09 minutes)
  Average Loss: 2.8452
  Training Accuracy: 29.09%
------------------------------------------------------------

Model Comparison:
  Custom MediumCNN parameters: 2,474,506
  ResNet-18 parameters: 11,181,642
