In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
from torch.optim import lr_scheduler
import numpy as np
from torchvision import datasets,models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import cv2
import PIL
import re


plt.ion()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [None]:
# Check to see if we can view images of wolves
image = cv2.imread('Datasets/Synthetic_Wolf_1__2019_07_18/Images/1/00100.png')
cv2.imshow("img", image)
cv2.waitKey(0) 
cv2.destroyAllWindows()

In [None]:
# TODO: Display 5 images of wolves with ground truth keypoints
# TODO: Display 5 images of wolves with ground truth bounding boxes

In [None]:
# Implement pretrained keypoint_rcnn for keypoint detection on human picture
model = models.detection.keypointrcnn_resnet50_fpn(pretrained=True)

model.eval()

image = cv2.imread('images/player2.jpg')
image_tensor = transforms.functional.to_tensor(image)

output = model([image_tensor])

for instance in output[0]["keypoints"]:
    for kp in instance:
        cv2.circle(image, (int(kp[0]), int(kp[1])), int(3*kp[2]), (127, 200, 127), 2, cv2.LINE_AA)
    
cv2.imshow("img", image)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [165]:
# Load and process dataset for input to keypoint_rcnn model
class LoadQuadrupedDataset(torch.utils.data.Dataset):
    def __init__(self, root, cus_transforms=None):
        self.root = root
        self.transforms = cus_transforms
        #self.images =  datasets.ImageFolder(os.path.join(self.root), transform=transforms.ToTensor())
        self.images = list(os.listdir(os.path.join(root)))
        #self.train_images_loader = torch.utils.data.DataLoader(self.images, batch_size=20, shuffle=False, num_workers=0)

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.root, self.images[index])
        img = PIL.Image.open(img_path).convert("RGB")
        # [Images]
        # The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each 
        # image, and should be in ``0-1`` range. Different images can have different sizes.
        #imageDirectory = 'Datasets/Synthetic_Wolf_1__2019_07_18/Images/'
        #imageDataset = datasets.ImageFolder(os.path.join(self.root), transform=transforms.ToTensor())
        
        #self.images = torch.utils.data.DataLoader(self.images, batch_size=20, shuffle=False, num_workers=0)

        #mean = 0.
        #std = 0.
        print('loading images...')
        #for images, _ in dataLoader:
        #    batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
        #    images = images.view(batch_samples, images.size(1), -1)
        #    mean += images.mean(2).sum(0)
        #    std += images.std(2).sum(0)
            # print("Processing Images: " + str(int(i*100/len(dataLoader))) + "%", end='\r')


        #mean /= len(dataLoader.dataset)
        #std /= len(dataLoader.dataset)

        # Normalisation only for training
        # if required to augmented, the postition annotation of the the image must be augmented as well
        # Normalisation only for validation
        data_transforms = {
            'train': transforms.Compose([
                #No need to crop as images as 256 x 256 else uncomment below
                # transforms.Resize(256)
                transforms.ToTensor(),
                #transforms.Normalize(mean, std)
            ]),
            'val': transforms.Compose([
                #No need to crop as images as 256 x 256 else uncomment below
                       transforms.ToTensor(),
                #transforms.Normalize(mean, std)
            ])
        }
        
        #images = datasets.ImageFolder(os.path.join(self.root), transform=data_transforms['train'])
        print("Images obtained")
        #dataLoader = torch.utils.data.DataLoader(imageDataset, batch_size=20, shuffle=False, num_workers=4)

        # [Annotation]
        # keypoints (FloatTensor[N, K, 3]): the locations of the predicted keypoints, in [x, y, v] format.
        # During training, the model expects both the input tensors, as well as a targets (list of dictionary),
        #    containing:
        #        - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values
        #          between 0 and H and 0 and W
        #        - labels (Int64Tensor[N]): the class label for each ground-truth box
        #        - keypoints (FloatTensor[N, K, 3]): the K keypoints location for each of the N instances, in the format [x, y, visibility], where visibility=0 means that the keypoint is not visible.
        annotation_path = 'Datasets/Synthetic_Wolf_1__2019_07_18/Annotation/BonePositions.txt'
        file = open(annotation_path, "r")

        targets = {}
        labels = []
        keypoints = []
        boxes = []
        count = 0

        for i, line in enumerate(file):
            # Extracting labels, Labels of keypoints are stored in the first index: index 0
            if i == 0:
                for j, keypoint_label in enumerate(line.split(' '),1):
                    if j is not 0:
                        labels.append(keypoint_label)
            # Exracting keypoints, keypoints are stored after the first line of the file
            individual_cordinates = line.split(' ')[0]
            individual_cordinates = re.split(r'\t+', individual_cordinates)
            #print("Processing Metadata: " + str(int(i*100/34301)) + "%", end='\r')
            for j, cordinate in enumerate(individual_cordinates):
                count += 1        
                # Skip even iterations as including it duplicates (x,y) cordinates entry
                if j%2 == 0:
                    continue
                # Skip the value in the first index of the list as this is the frame number    
                if j is not 0:
                    keypoints.append([i, [float(cordinate), float(individual_cordinates[j + 1]), 1]])
                    if j is 1:
                        # Obtain the boxes of each samples, theese are: xmin, xmax, ymin, ymax
                        xmin = float(cordinate)
                        xmax = float(cordinate)
                        ymin = float(individual_cordinates[j + 1])
                        ymax = float(individual_cordinates[j + 1])
                        boxes.append([xmin, ymin, xmax, ymax])
                    else:
                        xmin = xmin if (xmin < float(cordinate)) else float(cordinate)
                        xmax = xmax if (xmax > float(cordinate)) else float(cordinate)
                        ymin = ymin if (ymin < float(individual_cordinates[j + 1])) else float(individual_cordinates[j + 1])
                        ymax = ymax if (ymax > float(individual_cordinates[j + 1])) else float(individual_cordinates[j + 1])
                        boxes.append([xmin, ymin, xmax, ymax])

        targets['labels'] = labels
        targets['keypoints'] = keypoints
        targets['boxes'] = boxes
        
        print('Completed processing input data')
        #targets =  torch.from_numpy(targets)
        if self.transforms is not None:
            img = self.transforms(img)
            #targets = self.transforms(targets)

        #return {'images': images, 'targets': targets}
        return img, targets

In [None]:
dataset = LoadQuadrupedDataset('Datasets/Synthetic_Wolf_1__2019_07_18/Images/')
dataLoader = torch.utils.data.DataLoader(dataset, batch_size=20, shuffle=False, num_workers=4)

In [None]:
# Generic function to train a model
# Save the best model and schdule the learning rate

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train() # Set model to training mode
            else:
                model.eval() # Set model to evaluation mode
            
            running_loss = 0.0
            running_corrects = 0
            
            # Iterate over data
            for images, targets in dataset:
                #inputs = inputs.to(device)
                #labels = labels.to(device)
                
                # zero the parameter gradients
                optimizer.zero_grad()
                print(1)
                # Forward
                # Track the history if in training phase
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(images, targets)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, targets)
                    
                    # Backward + optimize if in training phase
                    if phase ==  'train':
                        print(2)
                        loss.backward()
                        optimizer.step()
                
                #statistics 
                running_loss += loss.item() * inputs.size(0)
                #running_corrects += torch.sum(preds  == labels.data)
            print(3)
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.dbouble() / dataset_sizes[phase]
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                
        print()
    
    
    
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model              

In [None]:
# Retraining the keypoint_rcnn model
keypoint_rcnn_model = models.detection.keypointrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=2, num_keypoints=20, pretrained_backbone=True)
#print(keypoint_rcnn_model)

criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.SGD(keypoint_rcnn_model.parameters(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)


In [None]:
# Train new keypoint_rcnn model
keypoint_rcnn_model = train_model(keypoint_rcnn_model, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)

In [166]:
def get_transform(train):
    transforms_container = []
    transforms_container.append(transforms.ToTensor())
    return transforms.Compose(transforms_container)

In [167]:
# Training using new method
from engine import train_one_epoch, evaluate

dataset = LoadQuadrupedDataset('Datasets/Synthetic_Wolf_1__2019_07_18/Images/1', get_transform(train=True))
dataLoader = torch.utils.data.DataLoader(dataset, batch_size=20, shuffle=False, num_workers=0)

keypoint_rcnn_model = models.detection.keypointrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=2, num_keypoints=17, pretrained_backbone=True)
criterion = nn.CrossEntropyLoss()

params = [p for p in keypoint_rcnn_model.parameters() if p.requires_grad]
optimizer_ft = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)

#optimizer_ft = optim.SGD(keypoint_rcnn_model.parameters(), lr=0.001, momentum=0.9)
#exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [None]:
num_epochs = 1

for epoch in range(num_epochs):
    train_one_epoch(keypoint_rcnn_model, optimizer_ft, dataLoader, device, epoch, print_freq=10)
    lr_scheduler.step()
    #evaluate(keypoint_rcnn_model)

loading images...
Images obtained
['Frame', 'Hips', 'Spine1', 'Neck', 'Head', 'HeadSite', 'LeftShoulder', 'LeftArm', 'LeftForeArm', 'LeftHand', 'LeftHandSite', 'RightShoulder', 'RightArm', 'RightForeArm', 'RightHand', 'RightHandSite', 'LeftUpLeg', 'LeftLeg', 'LeftFoot', 'LeftFootSite', 'RightUpLeg', 'RightLeg', 'RightFoot', 'RightFootSite', 'Tail', 'Tail1', 'Tail1Site\n']
27
Completed processing input data
loading images...
Images obtained
['Frame', 'Hips', 'Spine1', 'Neck', 'Head', 'HeadSite', 'LeftShoulder', 'LeftArm', 'LeftForeArm', 'LeftHand', 'LeftHandSite', 'RightShoulder', 'RightArm', 'RightForeArm', 'RightHand', 'RightHandSite', 'LeftUpLeg', 'LeftLeg', 'LeftFoot', 'LeftFootSite', 'RightUpLeg', 'RightLeg', 'RightFoot', 'RightFootSite', 'Tail', 'Tail1', 'Tail1Site\n']
27
Completed processing input data
loading images...
Images obtained
['Frame', 'Hips', 'Spine1', 'Neck', 'Head', 'HeadSite', 'LeftShoulder', 'LeftArm', 'LeftForeArm', 'LeftHand', 'LeftHandSite', 'RightShoulder', 'Ri