# Setup

## Download Data

In [None]:
# Fetch data
!mkdir data
!wget -P data/ https://s3.amazonaws.com/video.udacity-data.com/topher/2018/May/5aea1b91_train-test-data/train-test-data.zip
!unzip -q -n data/train-test-data.zip -d data

## Setup Environment

Download the model code.

In [None]:
# FIXME: Necessary but not nice. Do imports better!
!git clone https://github.com/KoniHD/hw2.git
!mv hw2/src/* .
!rm -rf hw2 __init__.py hw2.ipynb

Add necessary dependencies.

In [None]:
# No need if you are using colab

# !pip install matplotlib~=3.5.2
# !pip install torch~=1.8.1
# !pip install torchvision~=0.9.1
# !pip install numpy~=1.21.6
# !pip install pillow~=9.1.1
# !pip install tqdm~=4.64.0
# !pip install jupyter==1.0.0
# !pip install opencv-python==4.6.0.66
# !pip install pandas==1.3.5
!pip install -q lightning

Import required libraries and configure enviromnet.

In [None]:
import glob
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim import lr_scheduler
from tqdm import tqdm

import logging


from lightning.pytorch import Trainer, seed_everything
seed_everything(42, workers=True)   # Try to create deterministic results

# the transforms we defined in Notebook 1 are in the helper file `custom_transforms.py`
from custom_transforms import (
    Rescale,
    RandomCrop,
    Normalize,
    ToTensor,
)

# the dataset we created in Notebook 1
from facial_keypoints_dataset import FacialKeypointsDataset

from model import Simple_CNN
from keypoint_task import KeypointDetection

## Visualize the Data

In [None]:
# defining the data_transform using transforms.Compose([all tx's, . , .])
# order matters! i.e. rescaling should come before a smaller crop
data_transform = transforms.Compose(
    [Rescale(250), RandomCrop(224), Normalize(), ToTensor()]
)

training_keypoints_csv_path = os.path.join("data", "training_frames_keypoints.csv")
training_data_dir = os.path.join("data", "training")
test_keypoints_csv_path = os.path.join("data", "test_frames_keypoints.csv")
test_data_dir = os.path.join("data", "test")


# create the transformed dataset
transformed_dataset = FacialKeypointsDataset(
    csv_file=training_keypoints_csv_path,
    root_dir=training_data_dir,
    transform=data_transform,
)

# load training data in batches
batch_size = 16
train_loader = DataLoader(
    transformed_dataset, batch_size=batch_size, shuffle=True, num_workers=4
)

# creating the test dataset
test_dataset = FacialKeypointsDataset(
    csv_file=test_keypoints_csv_path,
    root_dir=test_data_dir,
    transform=data_transform
)

# loading test data in batches
batch_size = 16
test_loader = DataLoader(
    test_dataset, batch_size=batch_size, shuffle=True, num_workers=4
)

for i, data in enumerate(test_loader):
    sample = data
    image = sample['image'][0]
    keypoints = sample['keypoints'][0]
    _, h, w = image.shape
    # plot the image black and white
    plt.imshow(image.numpy().transpose(1, 2, 0), cmap='gray')
    plt.scatter(keypoints[:, 0]*(w/2)+(w/2), keypoints[:, 1]*(h/2)+(h/2), c='r', s=20)
    plt.show()
    print(f"Image min/max:   {image.min():.4f} / {image.max():.4f}")
    break

# Data Exploration

In [None]:
print(f"\n\n===Metrics of first batch===")
batch = next(iter(train_loader))
images, keypoints = batch['image'], batch['keypoints']

print(f"Image shape:\t\t{images.shape}")
print(f"Image min/max:\t\t{images.min():.4f} / {images.max():.4f}\t\twithin [-1, 1]: {(-0 <= images.min().round(decimals=1) and images.max().round(decimals=1) <= 1)}")
print(f"Keypoints min/max:\t{keypoints.min():.4f} / {keypoints.max():.4f}\twithin [-1, 1]: {(-1 <= keypoints.min().round(decimals=1) and keypoints.max().round(decimals=1) <= 1)}")

# Training
This is a conventional test loop. Below I am attempting to wrap training and inference in PyTorch Lightning.

## Testing overfitting

In [None]:
device = torch.device(
    "cuda:0" if torch.cuda.is_available() else
    "mps" if torch.backends.mps.is_available() else
    "cpu"
)
print(f"Running on device={device}")
# Simple train setup
model = Simple_CNN(out_dim=136, activation=nn.ReLU)
model.to(device)
model = torch.compile(model)
model.train()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Overfitting loop
batch = next(iter(train_loader))
num_epochs = 200
for epoch in range(num_epochs):
    optimizer.zero_grad()
    images, keypoints = batch['image'].to(device), batch['keypoints'].to(device)
    outputs = model(images)
    loss = criterion(outputs, keypoints.view(keypoints.shape[0], -1))
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}\t|\tLoss: {loss.item():.4f}")

Visualize overfitting results

In [None]:
model.eval()
with torch.no_grad():
    outputs = model(images)

outputs = outputs.view(-1, 68, 2).cpu()
images_cpu = images.cpu()

fig, axes = plt.subplots(2, 4, figsize=(16, 8))
for i, ax in enumerate(axes.flat):
    _, h, w = images_cpu[i].shape
    ax.imshow(images_cpu[i].numpy().transpose(1, 2, 0), cmap='gray')
    ax.scatter(outputs[i, :, 0] * (w/2) + (w/2), outputs[i, :, 1] * (h/2) + (h/2), c='r', s=10)
    ax.scatter(keypoints[i, :, 0].cpu() * (w/2) + (w/2), keypoints[i, :, 1].cpu() * (h/2) + (h/2), c='g', s=10)
    ax.axis('off')
plt.suptitle("Red=Predicted, Green=Ground Truth")

## Real training loop

In [None]:
device = torch.device(
    "cuda:0" if torch.cuda.is_available() else
    "mps" if torch.backends.mps.is_available() else
    "cpu"
)
print(f"Running on device={device}")

model = Simple_CNN(out_dim=136, activation=nn.ReLU)
model = torch.compile(model)
model.to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train(model, train_loader, optimizer, criterion, device, epoch: int):
    model.train()
    running_loss = 0.0
    pbar = tqdm(train_loader, desc=f"Train Epoch: {epoch}")
    for batch in pbar:
        optimizer.zero_grad()
        images = batch['image'].to(device)
        keypoints = batch['keypoints'].to(device)
        predictated_keypoints = model(images)
        loss = criterion(predictated_keypoints, keypoints.view(keypoints.size(0), -1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        pbar.set_postfix(loss=f"{loss.item():.4f}")
    return running_loss / len(train_loader)

def validate(model, val_loader, criterion, device, epoch: int):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        pbar = tqdm(val_loader, desc=f"Val Epoch: {epoch}")
        for batch in pbar:
            images = batch['image'].to(device)
            keypoints = batch['keypoints'].to(device)
            predictated_keypoints = model(images)
            loss = criterion(predictated_keypoints, keypoints.view(keypoints.size(0), -1))
            running_loss += loss.item()
            pbar.set_postfix(loss=f"{loss.item():.4f}")
    return running_loss / len(val_loader)

num_epochs = 10
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion, device, epoch)
    val_loss = validate(model, test_loader, criterion, device, epoch)
    print(f'Epoch {epoch+1}/{num_epochs} | Train: {train_loss:.4f} | Val: {val_loss:.4f}')

## Training using PyTorch Lightning

In [None]:
# Create a fresh model for Lightning Module
model = Simple_CNN(out_dim=136, activation=nn.ReLU)
keypoint_task = KeypointDetection(model, criterion=nn.MSELoss())

# Define trainer and train model
trainer = Trainer(max_epochs=10,
                  accelerator='auto',
                  deterministic='warn',
                  default_root_dir=os.path.join(os.getcwd(), 'exp'))
trainer.fit(keypoint_task, train_dataloaders=train_loader, val_dataloaders=test_loader)

In [None]:
%load_ext tensorboard
%tensorboard --logdir exp/lightning_logs

## Part 1: Direct Coordinate Regression

In [None]:
# TODO: Training a simple CNN

In [None]:
# TODO: Visualization of results

## Part 2: Transfer Learning for Keypoint Detection

In [None]:
# TODO: Pretrained ResNet backbone

In [None]:
# TODO: Advanced pretrained models (DINO, MAE, ...)

## Part 3: Heatmap-based Keypoint Detection

In [None]:
# TODO: Heatmap synthesis and training

In [None]:
# TODO: Visualization of heatmap prediction