In [198]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
from pathlib import Path
import json
from tqdm import tqdm

In [102]:
# Load annotations for training
train_ann_path = Path('./Data/coco2017/annotations/person_keypoints_train2017.json')
with open(train_ann_path, 'r') as f:
	data = json.load(f)

# Only keep keypoint annotations
annotations = data['annotations']

In [103]:
keypoints = []
for i in range(len(annotations)):
	img_keypoints = annotations[i]["keypoints"] # Keypoints for 1 image

	# Each person has 17 keypoints, 3 values for each
	# (x, y, visibility_flag)
	keypoints.append([])
	for j in range(0, len(img_keypoints), 3):
		keypoints[i].append(img_keypoints[j:j+3])

torch.tensor(keypoints).shape

torch.Size([262465, 17, 3])

In [104]:
train_inputs = []
train_targets = []

# Only keep shoulders, hips, and feet
# Indices for inputs
input_indices = [5, 6, 11, 12] # Left Shoulder, Right Shoulder, Left Hip, Right Hip
# Indices for targets
target_indices = [15, 16] # Left Foot (Ankle), Right Foot (Ankle)

for i in range(len(keypoints)):
	points = np.array(keypoints[i])[input_indices]
	if points[0][2] == 0 or points[1][2] == 0 or points[2][2] == 0 or points[3][2] == 0:
		continue

	label = np.array(keypoints[i])[target_indices]
	if label[0][2] == 0 or label[1][2] == 0:
		continue

	train_inputs.append(points)
	train_targets.append(label)

torch.tensor(train_inputs).shape, torch.tensor(train_targets).shape

(torch.Size([61720, 4, 3]), torch.Size([61720, 2, 3]))

In [106]:
train_inputs[0], train_targets[0]

(array([[198, 193,   2],
        [243, 196,   2],
        [197, 298,   2],
        [228, 297,   2]]),
 array([[205, 475,   2],
        [215, 453,   2]]))

In [107]:
# Load keypoints for testing
test_ann_path = Path('./Data/coco2017/annotations/person_keypoints_val2017.json')
with open(test_ann_path, 'r') as f:
	data = json.load(f)

test_annotations = data['annotations']
keypoints = []
for i in range(len(annotations)):
	img_keypoints = annotations[i]["keypoints"] # Keypoints for 1 image

	# Each person has 17 keypoints, 3 values for each
	# (x, y, visibility_flag)
	keypoints.append([])
	for j in range(0, len(img_keypoints), 3):
		keypoints[i].append(img_keypoints[j:j+3])

test_inputs = []
test_targets = []

# Only keep shoulders, hips, and feet
# Indices for inputs
input_indices = [5, 6, 11, 12] # Left Shoulder, Right Shoulder, Left Hip, Right Hip
# Indices for targets
target_indices = [15, 16] # Left Foot (Ankle), Right Foot (Ankle)

for i in range(len(keypoints)):
	points = np.array(keypoints[i])[input_indices]
	if points[0][2] == 0 or points[1][2] == 0 or points[2][2] == 0 or points[3][2] == 0:
		continue

	label = np.array(keypoints[i])[target_indices]
	if label[0][2] == 0 or label[1][2] == 0:
		continue

	test_inputs.append(points)
	test_targets.append(label)

torch.tensor(test_inputs).shape, torch.tensor(test_targets).shape

(torch.Size([61720, 4, 3]), torch.Size([61720, 2, 3]))

In [108]:
test_inputs[0], test_targets[0]

(array([[198, 193,   2],
        [243, 196,   2],
        [197, 298,   2],
        [228, 297,   2]]),
 array([[205, 475,   2],
        [215, 453,   2]]))

In [160]:
# Dataset
class CocoKeypoints(Dataset):
	def __init__(self, inputs, labels):
		super(CocoKeypoints, self).__init__()
		self.inputs = torch.tensor(inputs, dtype=torch.float32)
		self.labels = torch.tensor(labels, dtype=torch.float32)

	def __len__(self):
		return len(self.labels)

	def __getitem__(self, index):
		# Remove visibility from keypoints
		return self.inputs[index].T[:2].T.flatten(), self.labels[index].T[:2].T.flatten()

In [161]:
BATCH_SIZE = 8
NUM_WORKERS = 0

# Data loader
train = CocoKeypoints(train_inputs, train_targets)
test = CocoKeypoints(test_inputs, test_targets)

train_loader = DataLoader(train, batch_size=BATCH_SIZE,
                          shuffle=True, num_workers=NUM_WORKERS)
test_loader = DataLoader(test, batch_size=BATCH_SIZE,
                         shuffle=True, num_workers=NUM_WORKERS)

In [162]:
train[0]

(tensor([198., 193., 243., 196., 197., 298., 228., 297.]),
 tensor([205., 475., 215., 453.]))

In [204]:
class KeypointPredictor(nn.Module):
	def __init__(self):
		super(KeypointPredictor, self).__init__()
		self.seq = nn.Sequential(
			nn.Linear(8, 64),
			nn.ReLU(),
			nn.Linear(64, 128),
			nn.ReLU(),
			nn.Linear(128, 256),
			nn.ReLU(),
			nn.Linear(256, 128),
			nn.ReLU(),
			nn.Linear(128, 64),
			nn.ReLU(),
			nn.Linear(64, 4)
		)

	def forward(self, x):
		return self.seq(x)

In [205]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = KeypointPredictor().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.HuberLoss()

In [206]:
def train(epoch):
    model.train()
    loop = tqdm(train_loader, leave=False, desc=f"Epoch {epoch}")
    for data, target in loop:
        data, target = data.to(device), target.to(device)
        results = model(data)  # Forward pass
        optimizer.zero_grad()
        loss = loss_fn(results, target)
        loss.backward()  # Back propogation
        optimizer.step()
        loop.set_postfix(loss=loss.item())

def test():
    model.eval()
    test_loss = 0
    with torch.inference_mode():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            results = model(data)
            test_loss += loss_fn(results, target).item()

    test_loss /= len(test_loader.dataset)
    print(f"Test Loss {test_loss}")

In [208]:
epochs = 10
for epoch in range(1, epochs + 1):
	train(epoch)
	test()

                                                                        

Test Loss 2.4509217321718193


                                                                        

Test Loss 2.5224648056135344


                                                                        

Test Loss 2.573404291683563


                                                                        

Test Loss 2.5247613332214405


                                                                        

Test Loss 2.556638284944051


                                                                        

Test Loss 2.5144094517226354


                                                                        

Test Loss 2.665778928481366


                                                                        

Test Loss 2.483441400033502


                                                                        

Test Loss 2.7290008650842768


                                                                         

Test Loss 2.550735821951077


In [210]:
torch.save(model.state_dict(), "FeetPredict.pt")