In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import DataLoader

In [2]:
def load_cifar10(batch_size=128):

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    
    train_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    
    test_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    
    trainset = torchvision.datasets.CIFAR10(
        root='./data', train=True, download=True, transform=train_transform
    )
    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
    
    testset = torchvision.datasets.CIFAR10(
        root='./data', train=False, download=True, transform=test_transform
    )
    testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    return trainloader, testloader

def create_feature_extractor(num_classes=10):
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
    
    for param in model.parameters():
        param.requires_grad = False
    
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())
    
    print(f"Feature extractor created:")
    print(f"  Trainable parameters: {trainable:,} / {total:,}")
    print(f"  Training only: Final FC layer")
    
    return model

def create_finetuned_model(num_classes=10):
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
    
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    
    params = sum(p.numel() for p in model.parameters())
    print(f"Fine-tuning model created:")
    print(f"  Total parameters: {params:,}")
    print(f"  Training: ALL layers")
    
    return model

In [3]:
def train_epoch(model, trainloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        if (i + 1) % 50 == 0:
            print(f'  Batch {i+1}/{len(trainloader)}: Loss: {running_loss/(i+1):.3f}, Acc: {100.*correct/total:.2f}%')
    
    return running_loss / len(trainloader), 100. * correct / total

def evaluate(model, testloader, device):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return 100. * correct / total

def quick_train(model, trainloader, testloader, epochs, device, lr=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    print(f"\nTraining for {epochs} epochs...")
    for epoch in range(epochs):
        print(f'\nEpoch {epoch+1}/{epochs}:')
        train_loss, train_acc = train_epoch(model, trainloader, criterion, optimizer, device)
        test_acc = evaluate(model, testloader, device)
        
        print(f'  Train Loss: {train_loss:.3f} | Train Acc: {train_acc:.2f}%')
        print(f'  Test Acc: {test_acc:.2f}%')
    
    return model

In [4]:
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}\n")
    
    print("Loading CIFAR-10")
    trainloader, testloader = load_cifar10(batch_size=64) 
    print(f"Training batches: {len(trainloader)}, Test batches: {len(testloader)}\n")
    print("="*60)
    print("APPROACH 1: Feature Extraction (Recommended to start)")
    print("="*60)
    
    model_fe = create_feature_extractor(num_classes=10).to(device)
    
    model_fe = quick_train(model_fe, trainloader, testloader, epochs=3, device=device, lr=0.001)
    
    print(f"\n{'='*60}")
    print(f"Feature Extraction Final Test Accuracy: {evaluate(model_fe, testloader, device):.2f}%")
    print(f"{'='*60}\n")
    
    response = input("\nDo you want to try fine-tuning ALL layers? (slower, takes 10-30 min) [y/N]: ")
    
    if response.lower() == 'y':
        print("\n" + "="*60)
        print("APPROACH 2: Fine-tuning (Training all layers)")
        print("="*60)
        
        model_ft = create_finetuned_model(num_classes=10).to(device)
        
        optimizer = optim.Adam([
            {'params': model_ft.fc.parameters(), 'lr': 0.001},
            {'params': [p for n, p in model_ft.named_parameters() if 'fc' not in n], 'lr': 0.0001}
        ])
        
        criterion = nn.CrossEntropyLoss()
        
        print(f"\nTraining for 3 epochs...")
        for epoch in range(3):
            print(f'\nEpoch {epoch+1}/3:')
            train_loss, train_acc = train_epoch(model_ft, trainloader, criterion, optimizer, device)
            test_acc = evaluate(model_ft, testloader, device)
            
            print(f'  Train Loss: {train_loss:.3f} | Train Acc: {train_acc:.2f}%')
            print(f'  Test Acc: {test_acc:.2f}%')
        
        print(f"\n{'='*60}")
        print(f"Fine-tuning Final Test Accuracy: {evaluate(model_ft, testloader, device):.2f}%")
        print(f"{'='*60}\n")
    else:
        print("\nSkipping fine-tuning. Feature extraction is usually sufficient!")
    
    print("\n" + "="*60)
    print("Comparing with chap 7")
    print("="*60)
    print(f"Your baseline (Chapter 7):        ~63%")
    print(f"Transfer Learning (Feature Ext):  {evaluate(model_fe, testloader, device):.2f}%")
    print(f"\nImprovement: ~{evaluate(model_fe, testloader, device) - 63:.1f}%")
    print("="*60)
    

Using device: cpu

Loading CIFAR-10
Training batches: 782, Test batches: 157

APPROACH 1: Feature Extraction (Recommended to start)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\Rohin's Laptop/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|█████████████████████████████████████████████████████████████████████████████| 44.7M/44.7M [00:05<00:00, 7.91MB/s]


Feature extractor created:
  Trainable parameters: 5,130 / 11,181,642
  Training only: Final FC layer

Training for 3 epochs...

Epoch 1/3:
  Batch 50/782: Loss: 1.778, Acc: 42.41%
  Batch 100/782: Loss: 1.453, Acc: 55.42%
  Batch 150/782: Loss: 1.290, Acc: 60.68%
  Batch 200/782: Loss: 1.176, Acc: 64.24%
  Batch 250/782: Loss: 1.097, Acc: 66.62%
  Batch 300/782: Loss: 1.040, Acc: 67.95%
  Batch 350/782: Loss: 1.000, Acc: 68.98%
  Batch 400/782: Loss: 0.965, Acc: 69.88%
  Batch 450/782: Loss: 0.936, Acc: 70.60%
  Batch 500/782: Loss: 0.910, Acc: 71.18%
  Batch 550/782: Loss: 0.891, Acc: 71.71%
  Batch 600/782: Loss: 0.872, Acc: 72.14%
  Batch 650/782: Loss: 0.857, Acc: 72.52%
  Batch 700/782: Loss: 0.842, Acc: 72.94%
  Batch 750/782: Loss: 0.830, Acc: 73.25%
  Train Loss: 0.824 | Train Acc: 73.36%
  Test Acc: 78.50%

Epoch 2/3:
  Batch 50/782: Loss: 0.683, Acc: 77.59%
  Batch 100/782: Loss: 0.660, Acc: 77.42%
  Batch 150/782: Loss: 0.656, Acc: 77.75%
  Batch 200/782: Loss: 0.643, Acc: 


Do you want to try fine-tuning ALL layers? (slower, takes 10-30 min) [y/N]:  N



Skipping fine-tuning. Feature extraction is usually sufficient!

Comparing with chap 7
Your baseline (Chapter 7):        ~63%
Transfer Learning (Feature Ext):  79.83%

Improvement: ~16.8%
