# Lab 0. AlexNet

In [9]:
# download the packeges
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchvision.models as models
from torchvision.models import AlexNet_Weights

###  0.2 Task

AlexNet was originally designed and pretrained on ImageNet images, which are typically resized to 224×224. Since CIFAR-10 images are only 32×32, they need to be resized to match the expected input dimensions of AlexNet.

In [10]:
# Download and prepare CIFAR-10 dataset

transform = transforms.ToTensor()
train_dataset = CIFAR10(root='./cifar-10-batches-py', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='./cifar-10-batches-py', train=False, download=True, transform=transform)

print("Train samples:", len(train_dataset), "Test samples:", len(test_dataset))

Files already downloaded and verified
Files already downloaded and verified
Train samples: 50000 Test samples: 10000


In [11]:
# Device configuration
device = torch.device("cpu")

In [13]:
# Transformations for CIFAR-10: resize to 224 and normalize as for ImageNet
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

#### Experiment 1: Fine‑Tuning AlexNet (All Layers Trainable)

#### model_ft

Fine-tuning. In this experiment, the entire pretrained AlexNet model (including the earlier layers) is updated during training on CIFAR‑10. This allows the model to adjust its weights to better suit the new task.

In [14]:
# Load pretrained AlexNet and modify its classifier
model_ft = models.alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)

# Replace the final layer to have 10 outputs
num_features = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_features, 10)
model_ft = model_ft.to(device)

# All parameters are trainable (fine-tuning)
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

num_epochs = 10
for epoch in range(num_epochs):
    model_ft.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer_ft.zero_grad()
        outputs = model_ft(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_ft.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = 100 * correct / total
    print(f"Fine-tuning Epoch {epoch+1}/{num_epochs}: Loss {epoch_loss:.4f}, Acc {epoch_acc:.2f}%")

# Evaluate on test set
model_ft.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model_ft(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
test_acc_ft = 100 * correct / total
print(f"\nTest Accuracy (Fine-tuning): {test_acc_ft:.2f}%")

Fine-tuning Epoch 1/10: Loss 1.6959, Acc 37.58%
Fine-tuning Epoch 2/10: Loss 1.2121, Acc 56.87%
Fine-tuning Epoch 3/10: Loss 1.0359, Acc 63.54%
Fine-tuning Epoch 4/10: Loss 0.9304, Acc 67.78%
Fine-tuning Epoch 5/10: Loss 0.8711, Acc 69.95%
Fine-tuning Epoch 6/10: Loss 0.8247, Acc 71.82%
Fine-tuning Epoch 7/10: Loss 0.7741, Acc 73.67%
Fine-tuning Epoch 8/10: Loss 0.7461, Acc 74.65%
Fine-tuning Epoch 9/10: Loss 0.7167, Acc 75.85%
Fine-tuning Epoch 10/10: Loss 0.7105, Acc 76.24%

Test Accuracy (Fine-tuning): 75.52%


#### Experiment 2: Feature Extraction with AlexNet (Freeze Pretrained Layers)

#### model_fe

Feature extraction. In this experiment, the pretrained layers of AlexNet are frozen (i.e., not updated during training), and only the new final fully connected layer (which outputs 10 classes) is trained.

In [15]:
# Load pretrained AlexNet
model_fe = models.alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)

# Freeze all the pretrained parameters
for param in model_fe.parameters():
    param.requires_grad = False

# Replace the final layer to have 10 outputs
num_features = model_fe.classifier[6].in_features
model_fe.classifier[6] = nn.Linear(num_features, 10)
# Only the new layer’s parameters will have requires_grad=True by default
model_fe = model_fe.to(device)

# Optimizer: only update the classifier's parameters
optimizer_fe = optim.Adam(model_fe.classifier[6].parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

num_epochs = 10
for epoch in range(num_epochs):
    model_fe.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer_fe.zero_grad()
        outputs = model_fe(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_fe.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = 100 * correct / total
    print(f"Feature Extraction Epoch {epoch+1}/{num_epochs}: Loss {epoch_loss:.4f}, Acc {epoch_acc:.2f}%")

# Evaluate on test set
model_fe.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model_fe(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
test_acc_fe = 100 * correct / total
print(f"\nTest Accuracy (Feature Extraction): {test_acc_fe:.2f}%")

Feature Extraction Epoch 1/10: Loss 0.7257, Acc 74.87%
Feature Extraction Epoch 2/10: Loss 0.6325, Acc 78.17%
Feature Extraction Epoch 3/10: Loss 0.6081, Acc 79.01%
Feature Extraction Epoch 4/10: Loss 0.6026, Acc 79.43%
Feature Extraction Epoch 5/10: Loss 0.5891, Acc 79.71%
Feature Extraction Epoch 6/10: Loss 0.5856, Acc 80.14%
Feature Extraction Epoch 7/10: Loss 0.5852, Acc 80.26%
Feature Extraction Epoch 8/10: Loss 0.5815, Acc 80.27%
Feature Extraction Epoch 9/10: Loss 0.5850, Acc 80.38%
Feature Extraction Epoch 10/10: Loss 0.5798, Acc 80.45%

Test Accuracy (Feature Extraction): 81.76%


Fine-Tuning vs. Feature Extraction:

Both strategies benefit from the pretrained AlexNet features, but they adapt to the new CIFAR‑10 task in different ways:

Fine‑Tuning:
In this approach, all layers of AlexNet are updated during training. The model gradually improved from a low starting accuracy (37.6% in Epoch 1) to about 76.2% after 10 epochs. This shows that while the network can adjust its internal representations for CIFAR‑10, it may need more time or careful tuning since updating every layer increases the learning complexity.

Feature Extraction:
Here, the pretrained layers are frozen, and only the final classification layer is retrained. This approach leverages the robust, general features already learned from ImageNet without modifying them. As a result, the model starts with a higher baseline (around 74.9% in Epoch 1) and reaches about 80.5% test accuracy after 10 epochs. The quicker improvement suggests that the pretrained features are already highly relevant to CIFAR‑10, and training only the final layer simplifies the optimization task.

In summary, while fine‑tuning offers more flexibility by updating the entire network, feature extraction can yield better performance in this case by efficiently leveraging the robust features learned from a large dataset like ImageNet.