In [8]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import json
import cv2
import numpy as np

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [10]:
class KeypointsDataset(Dataset):
    def __init__(self, img_dir, data_file):
        self.img_dir = img_dir
        with open(data_file, "r") as f:
            self.data = json.load(f)
        
        self.transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        img = cv2.imread(f"{self.img_dir}/{item['id']}.png")
        h,w = img.shape[:2]

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transforms(img)
        kps = np.array(item['kps']).flatten()
        kps = kps.astype(np.float32)

        kps[::2] *= 224.0 / w 
        kps[1::2] *= 224.0 / h 

        return img, kps

In [11]:
train_dataset = KeypointsDataset("data/images","data/data_train.json")
val_dataset = KeypointsDataset("data/images","data/data_val.json")

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=True)

In [12]:
model = models.resnet101(pretrained=True)
model.fc =  torch.nn.Linear(model.fc.in_features, 14*2) # Replaces the last layer

In [13]:
model = model.to(device)

In [14]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [15]:
best_val_loss = float('inf')

epochs=40
for epoch in range(epochs):
    for i, (imgs,kps) in enumerate(train_loader):
        imgs = imgs.to(device)
        kps = kps.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, kps)
        loss.backward()
        optimizer.step()

        if i % 500 == 0:
            print(f"Epoch {epoch}, iter {i}, loss: {loss.item()}")
    
    if epoch % 2 == 0:
        # validate after each epoch
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for imgs, kps in val_loader:
                imgs = imgs.to(device)
                kps = kps.to(device)

                outputs = model(imgs)
                loss = criterion(outputs, kps)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Epoch {epoch}, Validation Loss: {val_loss}")

        # save best model if it has best val loss so far
        if val_loss < best_val_loss:
            print(f"New best validaiton loss: {val_loss}. Saving model")
            best_val_loss = val_loss
            torch.save(model.state_dict(), "resnet101_kps_best_model.pth")

# save final model to
torch.save(model.state_dict(), "resnet101_kps_last_model.pth")

Epoch 0, iter 0, loss: 13681.513671875
Epoch 0, iter 500, loss: 1901.9072265625
Epoch 0, iter 1000, loss: 97.95955657958984
Epoch 0, iter 1500, loss: 105.36778259277344
Epoch 0, Validation Loss: 80.8640834746266
New best validaiton loss: 80.8640834746266. Saving model
Epoch 1, iter 0, loss: 50.92432403564453
Epoch 1, iter 500, loss: 61.074039459228516
Epoch 1, iter 1000, loss: 50.48830795288086
Epoch 1, iter 1500, loss: 31.21729278564453
Epoch 2, iter 0, loss: 32.932708740234375
Epoch 2, iter 500, loss: 57.42014694213867
Epoch 2, iter 1000, loss: 19.705820083618164
Epoch 2, iter 1500, loss: 15.811482429504395
Epoch 2, Validation Loss: 45.91313001217075
New best validaiton loss: 45.91313001217075. Saving model
Epoch 3, iter 0, loss: 16.91200828552246
Epoch 3, iter 500, loss: 47.6907844543457
Epoch 3, iter 1000, loss: 16.879322052001953
Epoch 3, iter 1500, loss: 81.18817138671875
Epoch 4, iter 0, loss: 11.851243019104004
Epoch 4, iter 500, loss: 16.249515533447266
Epoch 4, iter 1000, los