In [32]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
# from torch.utils.tensorboard import SummaryWriter

import matplotlib.pyplot as plt
import numpy as np

In [36]:
def train_one_epoch(model, loader, loss_func, optimizer, device):
    model.train()
    loss_sum = 0.0
    total = 0

    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad(set_to_none=True)
        outputs = model(inputs)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()

        bs = inputs.size(0)
        loss_sum += loss.item() * bs
        total += bs

    return loss_sum / total


@torch.no_grad()
def evaluate(model, loader, loss_func, device):
    model.eval()
    loss_sum = 0.0
    correct = 0
    total = 0

    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        loss = loss_func(outputs, labels)

        bs = inputs.size(0)
        loss_sum += loss.item() * bs
        total += bs

        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()

    avg_loss = loss_sum / total
    acc = 100.0 * correct / total
    return avg_loss, acc

In [34]:
device = torch.device("cuda")

# transforms
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225)),
])

# dataset
trainset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=train_transform)
testset  = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=test_transform)

# loader
train_loader = DataLoader(trainset, batch_size=256, shuffle=True,  num_workers=2, pin_memory=True)
test_loader  = DataLoader(testset,  batch_size=512, shuffle=False, num_workers=2, pin_memory=True)

# pretrained model
model = torchvision.models.resnet34(weights=torchvision.models.ResNet34_Weights.IMAGENET1K_V1)

#changing conv1 weights
model.conv1.stride = (1, 1)
model.maxpool = nn.Identity()

# classifier head
model.fc = nn.Linear(model.fc.in_features, 10)

model = model.to(device)

loss_func = nn.CrossEntropyLoss()

print("Ready:", next(model.parameters()).device)

Ready: cuda:0


In [41]:
device = torch.device("cuda")

# model
model = torchvision.models.resnet34(weights=torchvision.models.ResNet34_Weights.IMAGENET1K_V1)
model.conv1.stride = (1, 1)
model.maxpool = nn.Identity()
model.fc = nn.Linear(model.fc.in_features, 10)
model = model.to(device)

loss_func = nn.CrossEntropyLoss()

# train classifier head oly
for p in model.parameters():
    p.requires_grad = False
for p in model.fc.parameters():
    p.requires_grad = True

optimizer = optim.SGD(model.fc.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

head_epochs = 5
for e in range(head_epochs):
    model.eval()
    model.fc.train()
    train_loss = train_one_epoch(model, train_loader, loss_func, optimizer, device)
    test_loss, test_acc = evaluate(model, test_loader, loss_func, device)
    print(f"[Head] {e+1}/{head_epochs} | train_loss={train_loss:.4f} | test_loss={test_loss:.4f} | test_acc={test_acc:.2f}%")

# finetune layers
for p in model.parameters():
    p.requires_grad = True

optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

total_epochs = 80
finetune_epochs = total_epochs - head_epochs

# milestones schedule, shifted by head_epochs
milestones = [40 - head_epochs, 65 - head_epochs]
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1)

for e in range(finetune_epochs):
    train_loss = train_one_epoch(model, train_loader, loss_func, optimizer, device)
    scheduler.step()
    test_loss, test_acc = evaluate(model, test_loader, loss_func, device)
    lr = scheduler.get_last_lr()[0]
    print(f"[FT]   {e+1}/{finetune_epochs} | lr={lr:.4g} | train_loss={train_loss:.4f} | test_loss={test_loss:.4f} | test_acc={test_acc:.2f}%")

[Head] 1/5 | train_loss=6.5935 | test_loss=2.6020 | test_acc=50.87%
[Head] 2/5 | train_loss=3.0049 | test_loss=2.1875 | test_acc=53.74%
[Head] 3/5 | train_loss=2.8567 | test_loss=2.7578 | test_acc=48.91%
[Head] 4/5 | train_loss=2.6379 | test_loss=2.1570 | test_acc=53.78%
[Head] 5/5 | train_loss=3.1626 | test_loss=2.9874 | test_acc=49.58%
[FT]   1/75 | lr=0.1 | train_loss=3.1936 | test_loss=2.3743 | test_acc=13.42%
[FT]   2/75 | lr=0.1 | train_loss=1.9656 | test_loss=1.8105 | test_acc=31.05%
[FT]   3/75 | lr=0.1 | train_loss=1.7758 | test_loss=1.7405 | test_acc=36.54%
[FT]   4/75 | lr=0.1 | train_loss=1.6521 | test_loss=1.7743 | test_acc=34.62%
[FT]   5/75 | lr=0.1 | train_loss=1.5569 | test_loss=1.6759 | test_acc=39.31%
[FT]   6/75 | lr=0.1 | train_loss=1.4546 | test_loss=1.4209 | test_acc=46.87%
[FT]   7/75 | lr=0.1 | train_loss=1.3646 | test_loss=1.3325 | test_acc=51.54%
[FT]   8/75 | lr=0.1 | train_loss=1.2532 | test_loss=1.2369 | test_acc=55.48%
[FT]   9/75 | lr=0.1 | train_loss=1.