In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

import json
import cv2
import numpy as np

In [2]:
devic = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create Torch Dataset

In [3]:
class KeypointsDataset(Dataset):
    def __init__(self, img_dir, data_file):
        self.img_dir = img_dir
        with open(data_file, "r") as f:
            self.data = json.load(f)
        
        self.transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        img = cv2.imread(f"{self.img_dir}/{item['id']}.png")
        h,w = img.shape[:2]

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transforms(img)
        kps = np.array(item['kps']).flatten()
        kps = kps.astype(np.float32)

        kps[::2] *= 224.0 / w # Adjust x coordinates
        kps[1::2] *= 224.0 / h # Adjust y coordinates

        return img, kps

In [4]:
train_dataset = KeypointsDataset("/Users/huanjingheng/CS302-ComputerVision/Tennis/data/images", "/Users/huanjingheng/CS302-ComputerVision/Tennis/data/data_train.json")
val_dataset = KeypointsDataset("/Users/huanjingheng/CS302-ComputerVision/Tennis/data/images", "/Users/huanjingheng/CS302-ComputerVision/Tennis/data/data_val.json")

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)

# Create the Model

In [5]:
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 14*2)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /Users/huanjingheng/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:11<00:00, 8.79MB/s]


In [6]:
model = model.to(devic)

# Train the Model

In [7]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [8]:
epochs = 20
for epoch in range(epochs):
    for i, (imgs, kps) in enumerate(train_loader):
        imgs = imgs.to(devic)
        kps = kps.to(devic)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, kps)
        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print(f"Epoch {epoch}, iter {i}, loss {loss.item()}")

Epoch 0, iter 0, loss 14401.0888671875
Epoch 0, iter 10, loss 14187.2607421875
Epoch 0, iter 20, loss 14585.845703125
Epoch 0, iter 30, loss 14048.771484375
Epoch 0, iter 40, loss 13591.0615234375
Epoch 0, iter 50, loss 12636.3564453125
Epoch 0, iter 60, loss 12650.1669921875
Epoch 0, iter 70, loss 12848.7373046875
Epoch 0, iter 80, loss 11770.9853515625
Epoch 0, iter 90, loss 11932.658203125
Epoch 0, iter 100, loss 11506.1943359375
Epoch 0, iter 110, loss 10727.5
Epoch 0, iter 120, loss 10320.935546875
Epoch 0, iter 130, loss 9962.798828125
Epoch 0, iter 140, loss 9530.7548828125
Epoch 0, iter 150, loss 9589.591796875
Epoch 0, iter 160, loss 9145.9931640625
Epoch 0, iter 170, loss 8534.787109375
Epoch 0, iter 180, loss 8362.966796875
Epoch 0, iter 190, loss 8057.16943359375
Epoch 0, iter 200, loss 8347.7021484375
Epoch 0, iter 210, loss 7823.50244140625
Epoch 0, iter 220, loss 6975.08154296875
Epoch 0, iter 230, loss 6699.19189453125
Epoch 0, iter 240, loss 6424.6748046875
Epoch 0, it

In [None]:
torch.save(model.state_dict(), "keypoints_model.pth")