In [23]:
import cv2
import numpy as np
import math
import time
from scipy.ndimage.filters import gaussian_filter
import matplotlib.pyplot as plt
import matplotlib
import torch
from torchvision import transforms
import util
from torchsummary import summary
from body import Body
import import_ipynb
from train_model import BodyposeBackbone, MainModel
from coco_dataset import COCODataset, transform_image
from copy import deepcopy
import random
import pandas as pd
from tqdm import tqdm
import sys
import os
from train_utils import chunk_data

  from scipy.ndimage.filters import gaussian_filter


Define number of keypoints of new neural network

In [24]:
NO_OF_KEYPOINTS = 17

In [25]:
path_to_coco_dataset = '/media/jakub/One Touch/coco_pose/coco2017labels-pose/coco-pose'

In [26]:
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
            [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
            [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255, 255, 255]]

Create backbone model

In [27]:
backbone_model_path = '../model/body_pose_model.pth'

In [28]:
backbone_model = BodyposeBackbone()

Load backbone params from file

In [29]:
backbone_params = torch.load(backbone_model_path)

In [30]:
backbone_dict = util.transfer(backbone_model, backbone_params)

In [31]:
backbone_model.load_state_dict(backbone_dict)

<All keys matched successfully>

In [32]:
main_model = MainModel(backbone_model, NO_OF_KEYPOINTS)

Create dataset from COCO .json file

In [33]:
train_data = COCODataset(path_to_coco_dataset, transforms=transform_image(train=True))

loading annotations into memory...
Done (t=0.12s)
creating index...
index created!


In [34]:
# Skeleton defines which joints are connected. Because of numbering from 1 I subtract it
skeleton = train_data.coco.cats[1]['skeleton']
skeleton = [[connection[0]-1, connection[1]-1] for connection in skeleton]

Show example image

In [35]:
index = random.randint(0, 100)
# load image
torch_image, image, target= train_data[index]

reference_image = image.copy()
model_output_image = image.copy()
# draw keypoints
for keypoint in target:
    for point in keypoint:
        x = point[0]
        y = point[1]
        if x != 0 and y != 0:
            cv2.circle(reference_image, (x, y), 3, (0, 0, 255), -1)
cv2.imshow('Example image', reference_image)
cv2.waitKey(10_000)
cv2.destroyAllWindows()

In [36]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [37]:
device = torch.device("cpu")

In [38]:
# image as torch tensor with shape as (CxHxW)
torch_image.size()

torch.Size([3, 480, 640])

In [17]:
output = main_model(torch_image)
part_affinity_field_maps, confidence = output
part_affinity_field_maps = part_affinity_field_maps.detach().numpy()

(34,)

In [18]:
part_affinity_field_maps = np.average(part_affinity_field_maps, axis=1)
part_affinity_field_maps = np.average(part_affinity_field_maps, axis=1)

In [19]:
# chunk the data
part_affinity_field_maps_chunk = chunk_data(part_affinity_field_maps, 2)

for point in part_affinity_field_maps_chunk:
    x = point[0]
    y = point[1]
    if x != 0 and y != 0:
        cv2.circle(model_output_image, (int(x), int(y)), 3, (255, 0, 0), -1)
cv2.imshow('Example image', model_output_image)
cv2.waitKey(10_000)
cv2.destroyAllWindows()

In [20]:
loss = torch.nn.MSELoss()
params = [p for p in main_model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params)

In [72]:
def train(main_model, loss, optimizer, inputs, outputs):
    optimizer.zero_grad()

    main_model.train()

    logits, confidence = main_model(inputs)
    logits = logits.detach().numpy()
    logits = np.average(logits, axis=1)
    logits = np.average(logits, axis=1)
    logits = torch.FloatTensor(logits)
    output = loss.forward(logits, outputs)
    output.backward()
    optimizer.step()

    # return logits
    return output.item()

In [92]:
epochs = 1

for epoch in range(epochs):
    for id in train_data.ids:
        torch_image, image, targets = train_data[id]
        targets = torch.FloatTensor(targets)
        if targets.size()[0] > 1:
            for target in targets:
                target = target[:,:-1].flatten()
                output = train(main_model=main_model, loss=loss, optimizer=optimizer, inputs=torch_image, outputs=target)

        else:
            targeta = targets[:,:,:-1].flatten()
            output = train(main_model=main_model, loss=loss, optimizer=optimizer, inputs=torch_image, outputs=targets)
        print(output)

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [64]:
torch_target = torch.FloatTensor(target)

In [86]:
for target in torch_target:
    print(target.size())

torch.Size([17, 3])


In [88]:
torch_target.size()

torch.Size([1, 17, 3])