In [9]:
import torch
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import numpy as np
from pathlib import Path
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from sklearn.metrics import f1_score, accuracy_score
import matplotlib.pyplot as plt

In [5]:
DATA_DIR = Path("Data") / "asl_alphabet_train"

assert DATA_DIR.exists(), f"Path does not exist: {DATA_DIR}"
DATA_DIR


WindowsPath('Data/asl_alphabet_train')

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

full_train = datasets.ImageFolder(DATA_DIR, transform=transform)

print("Total images:", len(full_train))
print("Classes:", full_train.classes)


Total images: 87000
Classes: ['asl_alphabet_train']


In [7]:
indices = np.arange(len(full_train))
labels = np.array(full_train.targets)

train_idx, val_idx = train_test_split(
    indices,
    test_size=0.2,
    stratify=labels,
    random_state=429
)

train_ds = Subset(full_train, train_idx)
val_ds   = Subset(full_train, val_idx)

len(train_ds), len(val_ds)


(69600, 17400)

In [8]:
BATCH_SIZE = 64

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

len(train_loader), len(val_loader)

(1088, 272)

In [10]:
num_classes = len(full_train.classes)   # 29 or 27 depending on dataset
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def get_resnet18_TA(num_classes):
    # Load pretrained ResNet-18
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

    # Replace classifier head
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, num_classes)

    # Freeze all params
    for p in model.parameters():
        p.requires_grad = False

    # Unfreeze only the head
    for p in model.fc.parameters():
        p.requires_grad = True

    # Keep BatchNorm layers in eval mode when frozen
    for m in model.modules():
        if isinstance(m, nn.BatchNorm2d):
            m.eval()
            for p in m.parameters():
                p.requires_grad = False

    return model.to(device)

model_TA = get_resnet18_TA(num_classes)
model_TA


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\georg/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:04<00:00, 9.60MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [11]:
num_classes = len(full_train.classes)   # 29 or 27 depending on dataset

def get_resnet18_TA(num_classes):
    # Load pretrained ResNet-18
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

    # Replace classifier head
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, num_classes)

    # Freeze all params
    for p in model.parameters():
        p.requires_grad = False

    # Unfreeze only the head
    for p in model.fc.parameters():
        p.requires_grad = True

    # Keep BatchNorm layers in eval mode when frozen
    for m in model.modules():
        if isinstance(m, nn.BatchNorm2d):
            m.eval()
            for p in m.parameters():
                p.requires_grad = False

    return model.to(device)

model_TA = get_resnet18_TA(num_classes)
model_TA


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [12]:
criterion = nn.CrossEntropyLoss()

# Only train parameters that require_grad=True
optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model_TA.parameters()),
    lr=1e-3
)


def run_epoch(model, loader, optimizer, criterion, train: bool):
    model.train(train)
    # re-freeze BN if needed
    for m in model.modules():
        if isinstance(m, nn.BatchNorm2d) and not any(p.requires_grad for p in m.parameters()):
            m.eval()

    all_preds = []
    all_targets = []
    total_loss = 0.0

    if train:
        torch.set_grad_enabled(True)
    else:
        torch.set_grad_enabled(False)

    for inputs, targets in loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        if train:
            optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        if train:
            loss.backward()
            optimizer.step()

        total_loss += loss.item() * inputs.size(0)

        preds = outputs.argmax(dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())

    avg_loss = total_loss / len(loader.dataset)
    acc = accuracy_score(all_targets, all_preds)
    macro_f1 = f1_score(all_targets, all_preds, average="macro")

    return avg_loss, acc, macro_f1


In [13]:
EPOCHS = 10  

history_TA = {
    "train_loss": [],
    "val_loss": [],
    "train_acc": [],
    "val_acc": [],
    "train_f1": [],
    "val_f1": []
}

for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc, train_f1 = run_epoch(
        model_TA, train_loader, optimizer, criterion, train=True
    )
    val_loss, val_acc, val_f1 = run_epoch(
        model_TA, val_loader, optimizer, criterion, train=False
    )

    history_TA["train_loss"].append(train_loss)
    history_TA["val_loss"].append(val_loss)
    history_TA["train_acc"].append(train_acc)
    history_TA["val_acc"].append(val_acc)
    history_TA["train_f1"].append(train_f1)
    history_TA["val_f1"].append(val_f1)

    print(
        f"Epoch {epoch:02d}/{EPOCHS} | "
        f"Train loss {train_loss:.4f}, acc {train_acc:.3f}, F1 {train_f1:.3f} | "
        f"Val loss {val_loss:.4f}, acc {val_acc:.3f}, F1 {val_f1:.3f}"
    )

torch.save(model_TA.state_dict(), "resnet18_TA_head_only.pt")


KeyboardInterrupt: 