In [1]:
import cv2
import numpy as np
import math
import time
from scipy.ndimage.filters import gaussian_filter
import matplotlib.pyplot as plt
import matplotlib
import torch
from torchvision import transforms
import util
from torchsummary import summary
from body import Body
import import_ipynb
from train_model import BodyposeBackbone, MainModel
from coco_dataset import COCODataset, transform_image
from copy import deepcopy
import random
import pandas as pd
from tqdm import tqdm
import sys
import os

  from scipy.ndimage.filters import gaussian_filter


importing Jupyter notebook from train_model.ipynb
importing Jupyter notebook from coco_dataset.ipynb


  from pandas.core.computation.check import NUMEXPR_INSTALLED


Define number of keypoints of new neural network

In [2]:
NO_OF_KEYPOINTS = 17

In [3]:
path_to_coco_dataset = '/media/jakub/One Touch/coco_pose/coco2017labels-pose/coco-pose'

In [4]:
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
            [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
            [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255, 255, 255]]

Create backbone model

In [5]:
backbone_model_path = '../model/body_pose_model.pth'

In [6]:
backbone_model = BodyposeBackbone()

Load backbone params from file

In [7]:
backbone_params = torch.load(backbone_model_path)

In [8]:
backbone_dict = util.transfer(backbone_model, backbone_params)

In [9]:
backbone_model.load_state_dict(backbone_dict)

<All keys matched successfully>

In [10]:
main_model = MainModel(backbone_model, NO_OF_KEYPOINTS)

Create dataset from COCO .json file

In [11]:
train_data = COCODataset(path_to_coco_dataset, transforms=transform_image(train=True))

loading annotations into memory...
Done (t=0.24s)
creating index...
index created!


In [12]:
# Skeleton defines which joints are connected. Because of numbering from 1 I subtract it
skeleton = train_data.coco.cats[1]['skeleton']
skeleton = [[connection[0]-1, connection[1]-1] for connection in skeleton]

In [13]:
def random_index():
    # Search for ann where human are present on image and add that to ann for future displaying
    index = random.randint(0, 100)
    anns = []
    while anns == []:
        for id in train_data.coco.getAnnIds():
            if train_data.coco.anns[id]['image_id'] == train_data.coco.getImgIds()[index]:
                ann = train_data.coco.anns[id]
                if 1 in ann['keypoints'] or 2 in ann['keypoints']:
                    anns.append(ann)
        if anns == []:
            index = random.randint(0, 100)
    return index

In [14]:
# bboxes = [ann['bbox'] for ann in anns]

# # For all bboxes change all element to int
# bboxes = [[int(element) for element in box] for box in bboxes]

In [15]:
# # Create keypoints and split them into chunks. 
# # Each chunk represents (x, y, v) where v stands for visible. For more info look into notes
# keypoints = [ann['keypoints'] for ann in anns]
# new_keypoints = []
# for keypoint in keypoints:
#     new_keypoints.append([[keypoint[i], keypoint[i+1], keypoint[i+2]] for i in range(0, len(keypoint), 3)])
# keypoints = new_keypoints

Show example image

In [16]:
# # load image
# image, target = train_data._load_image(train_data.coco.getImgIds()[index])

# # draw bboxes
# for bbox in bboxes:
#     cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 0, 255))

# # draw connections
# for i, connection in enumerate(skeleton):
#     for keypoint in keypoints:
#         point1 = keypoint[connection[0]]
#         point2 = keypoint[connection[1]]
#         if point1[2] != 0 and point2[2] != 0:
#             cv2.line(image, point1[:-1], point2[:-1], color=colors[i], thickness=3)
# cv2.imshow('Example image', image)
# cv2.waitKey(5_000)
# cv2.destroyAllWindows()

In [23]:
index = random_index()
# load image
image, target= train_data._load_image(train_data.coco.getImgIds()[index])

# draw keypoints
for keypoint in target:
    for point in keypoint:
        x = point[0]
        y = point[1]
        if x != 0 and y != 0:
            cv2.circle(image, (x, y), 3, (0, 0, 255), -1)
cv2.imshow('Example image', image)
cv2.waitKey(10_000)
cv2.destroyAllWindows()

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
device = torch.device("cpu")

In [None]:
images, keypoints = train_data._load_image(train_data.coco.getImgIds()[0])

In [None]:
# image shape in (HxWxC)
images.shape

(480, 640, 3)

In [None]:
torch_image = transforms.ToTensor()(images)

In [None]:
# image as torch tensor with shape as (CxHxW)
torch_image.size()

torch.Size([3, 480, 640])

In [None]:
output = main_model(torch_image)

In [None]:
part_affinity_field_maps, confidence = output

In [None]:
loss = torch.nn.MSELoss()
params = [p for p in main_model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params)

In [None]:
def train(main_model, loss, optimizer, inputs, outputs):
    optimizer.zero_grad()

    main_model.train()

    out1 = backbone_model(inputs)
    logits = main_model(out1)
    output = loss.forward(logits, outputs)
    output.backward()
    optimizer.step()

    return output.item()

In [None]:
epochs = 0

for epoch in range(epochs):
    for id in train_data.ids:
        train_data.coco.loadAnns(train_data.coco.getAnnIds(id))
        train_data._load_image(id)

KeyboardInterrupt: 