In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
from torch.optim import lr_scheduler
import numpy as np
from torchvision import datasets,models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import cv2
import PIL
import re


plt.ion()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [None]:
# TODO: Dispay 5 images of keypoints working with humans
# TODO: Display 5 images of wolves with ground truth keypoints
# TODO: Display 5 images of wolves with ground truth bounding boxes

In [None]:
# Implement pretrained keypoint_rcnn for keypoint detection on human picture
model = models.detection.keypointrcnn_resnet50_fpn(pretrained=True)

model.eval()

image = cv2.imread('images/player2.jpg')
image_tensor = transforms.functional.to_tensor(image)

output = model([image_tensor])

for instance in output[0]["keypoints"]:
    for kp in instance:
        cv2.circle(image, (int(kp[0]), int(kp[1])), int(3*kp[2]), (127, 200, 127), 2, cv2.LINE_AA)
    
cv2.imshow("img", image)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [2]:
#Load and process dataset for input to keypoint_rcnn model
class LoadQuadrupedDataset(torch.utils.data.Dataset):
    def __init__(self, root, cus_transforms=None):
        self.root = root
        self.transforms = cus_transforms
        self.images = list(os.listdir(os.path.join(root)))

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        # [Images]
        img_path = os.path.join(self.root, self.images[index])
        img = PIL.Image.open(img_path).convert("RGB")

        # [Targets]
        annotation_path = 'Datasets/Synthetic_Wolf_1__2019_07_18/Annotation/BonePositions.txt'
        file = open(annotation_path, "r")

        targets = []
        labels_data = []
        keypoints_data = []
        boxes_data = []
        count = 0
        
        ## Need to change the labels to 1         labels = torch.ones((num_objs,), dtype=torch.int64)
        #        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

        for i, line in enumerate(file):
        # Extracting labels, Labels of keypoints are stored in the first index: index 0
            if i == 0:
                for j, keypoint_label in enumerate(line.split(' ')):
                    if j is not 0:
                        #labels_data[j] = torch.as_tensor(1)
                        labels_data.append(torch.as_tensor(1))
            # Exracting keypoints, keypoints are stored after the first line of the file
            individual_cordinates = line.split(' ')[0]
            individual_cordinates = re.split(r'\t+', individual_cordinates)

            for j, cordinate in enumerate(individual_cordinates):
                count += 1        
                # Skip even iterations as including it duplicates (x,y) cordinates entry
                if j%2 == 0:
                    continue
                # Skip the value in the first index of the list as this is the frame number    
                if j is not 0:
                    #[torch.as_tensor(1), torch.FloatTensor([100.3232323, 133.13213, 1])]
                    #keypoints_data[int(count / 2)] = torch.FloatTensor([[float(cordinate), float(individual_cordinates[j + 1]), 1]])
                    keypoints_data.append(torch.FloatTensor([[float(cordinate), float(individual_cordinates[j + 1]), 1]]))
                    if j is 1:
                        # Obtain the boxes of each samples, theese are: xmin, xmax, ymin, ymax
                        xmin = float(cordinate)
                        xmax = float(cordinate)
                        ymin = float(individual_cordinates[j + 1])
                        ymax = float(individual_cordinates[j + 1])
                        # boxes_data[i] = torch.FloatTensor([float(xmin), float(ymin), float(xmax), float(ymax)])
                        boxes_data.append(torch.FloatTensor([float(xmin), float(ymin), float(xmax), float(ymax)]))
                    else:
                        xmin = xmin if (xmin < float(cordinate)) else float(cordinate)
                        xmax = xmax if (xmax > float(cordinate)) else float(cordinate)
                        ymin = ymin if (ymin < float(individual_cordinates[j + 1])) else float(individual_cordinates[j + 1])
                        ymax = ymax if (ymax > float(individual_cordinates[j + 1])) else float(individual_cordinates[j + 1])
                        #boxes_data[i] = torch.FloatTensor([float(xmin), float(ymin), float(xmax), float(ymax)])
                        boxes_data.append(torch.FloatTensor([float(xmin), float(ymin), float(xmax), float(ymax)]))
        
        targets.append({'labels': labels_data})
        targets.append({'keypoints': keypoints_data})
        targets.append({'boxes': boxes_data})
        
        
        
        '''
        #print(keypoints)
        for t in targets:
            for k, v in t.items():
                if torch.is_tensor(v) is False: 
                    print(v)
          '''
        if self.transforms is not None:
            img = self.transforms(img)
        
        #targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
      
        return img, targets

In [5]:
LoadQuadrupedDataset('Datasets/Synthetic_Wolf_1__2019_07_18/Images/1')[0]

(<PIL.Image.Image image mode=RGB size=256x256 at 0x251947E2E10>,
 [{'labels': [tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1),
    tensor(1)]},
  {'keypoints': [tensor([[111.2445, 169.4638,   1.0000]]),
    tensor([[110.2057, 157.4300,   1.0000]]),
    tensor([[109.3295, 148.9941,   1.0000]]),
    tensor([[110.3106, 143.4206,   1.0000]]),
    tensor([[116.3418, 141.8787,   1.0000]]),
    tensor([[115.8450, 154.1305,   1.0000]]),
    tensor([[122.6154, 157.9207,   1.0000]]),
    tensor([[129.8520, 177.2709,   1.0000]]),
    tensor([[120.1190, 181.8350,   1.0000]]),
    tensor([[112.5925, 187.8032,   1.0000]]),
    tensor([[103.7195, 154.1634,   1.0000]]),
    tensor([[ 

In [6]:
def get_transform(train):
    transforms_container = []
    transforms_container.append(transforms.ToTensor())
    return transforms.Compose(transforms_container)

In [7]:
# Training using new method
from engine import train_one_epoch, evaluate

dataset = LoadQuadrupedDataset('Datasets/Synthetic_Wolf_1__2019_07_18/Images/1', get_transform(train=True))
dataLoader = torch.utils.data.DataLoader(dataset, batch_size=20, shuffle=False, num_workers=0)

keypoint_rcnn_model = models.detection.keypointrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=2, num_keypoints=17, pretrained_backbone=True)
criterion = nn.CrossEntropyLoss()

params = [p for p in keypoint_rcnn_model.parameters() if p.requires_grad]
optimizer_ft = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)

In [None]:
num_epochs = 1

for epoch in range(num_epochs):
    train_one_epoch(keypoint_rcnn_model, optimizer_ft, dataLoader, device, epoch, print_freq=10)
    lr_scheduler.step()
    #evaluate(keypoint_rcnn_model)