# Question 2: Finetuning a pre-trained network and the optimizers

In [1]:
import os
import torch
import torchvision
from torchvision import transforms, datasets
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import top_k_accuracy_score
from torchvision.models import ResNet101_Weights

In [2]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [3]:
train_dataset = datasets.STL10(root='./data', split='train', download=True, transform=transform)
test_dataset = datasets.STL10(root='./data', split='test', download=True, transform=transform)
num_classes = 10

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to ./data/stl10_binary.tar.gz


100%|██████████| 2640397119/2640397119 [05:17<00:00, 8320906.09it/s] 


Extracting ./data/stl10_binary.tar.gz to ./data
Files already downloaded and verified


In [4]:
model = torchvision.models.resnet101(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 159MB/s]


In [5]:
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)

In [6]:
def initialize_model():
    weights = ResNet101_Weights.DEFAULT
    model = torchvision.models.resnet101(weights=weights)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    return model

In [7]:
num_epochs = 10
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
model = model.to(device)

In [9]:
def train(optimizer, model, criterion, train_loader, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100. * correct / total

    return train_loss, train_acc

In [10]:
def test(model, criterion, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    all_predictions = []
    all_targets = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            all_predictions.extend(outputs.cpu().numpy())
            all_targets.extend(labels.cpu().numpy())

    test_acc = 100. * correct / total

    return test_acc, torch.tensor(all_predictions), torch.tensor(all_targets)

Optimizer 1: Adam; Optimizer 2: Adagrad; Optimizer 3: Adadelta

In [11]:
optimizers = [
    lambda params: optim.Adam(params, lr=0.001),
    lambda params: optim.Adagrad(params, lr=0.01),
    lambda params: optim.Adadelta(params, lr=1.0),
]

In [12]:
for i, optimizer_fn in enumerate(optimizers):
    model = initialize_model()
    model = model.to(device)

    optimizer = optimizer_fn(model.parameters())

    for epoch in range(num_epochs):
        train_loss, train_acc = train(optimizer, model, criterion, train_loader, device)
        test_acc, test_predictions, test_targets = test(model, criterion, test_loader, device)
        top_5_accuracy = top_k_accuracy_score(test_targets, test_predictions, k=5)

        print(f"Optimizer {i+1}, Epoch [{epoch+1}/{num_epochs}], "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, "
              f"Test Acc: {test_acc:.2f}%")

Downloading: "https://download.pytorch.org/models/resnet101-cd907fc2.pth" to /root/.cache/torch/hub/checkpoints/resnet101-cd907fc2.pth
100%|██████████| 171M/171M [00:01<00:00, 163MB/s]
  return test_acc, torch.tensor(all_predictions), torch.tensor(all_targets)


Optimizer 1, Epoch [1/10], Train Loss: 0.7274, Train Acc: 76.90%, Test Acc: 81.66%
Optimizer 1, Epoch [2/10], Train Loss: 0.3967, Train Acc: 86.86%, Test Acc: 78.79%
Optimizer 1, Epoch [3/10], Train Loss: 0.3223, Train Acc: 89.46%, Test Acc: 81.22%
Optimizer 1, Epoch [4/10], Train Loss: 0.2351, Train Acc: 92.10%, Test Acc: 85.49%
Optimizer 1, Epoch [5/10], Train Loss: 0.1292, Train Acc: 95.38%, Test Acc: 83.54%
Optimizer 1, Epoch [6/10], Train Loss: 0.1287, Train Acc: 95.62%, Test Acc: 83.06%
Optimizer 1, Epoch [7/10], Train Loss: 0.1164, Train Acc: 96.22%, Test Acc: 82.21%
Optimizer 1, Epoch [8/10], Train Loss: 0.1135, Train Acc: 96.30%, Test Acc: 78.34%
Optimizer 1, Epoch [9/10], Train Loss: 0.1030, Train Acc: 96.76%, Test Acc: 84.47%
Optimizer 1, Epoch [10/10], Train Loss: 0.1250, Train Acc: 95.68%, Test Acc: 80.78%
Optimizer 2, Epoch [1/10], Train Loss: 1.9960, Train Acc: 22.86%, Test Acc: 24.82%
Optimizer 2, Epoch [2/10], Train Loss: 1.5612, Train Acc: 37.84%, Test Acc: 35.54%
Opt