In [1]:
import argparse

import cv2
import numpy as np
import torch
import time
from core.cv.pose_estimation_modules_2.with_mobilenet import PoseEstimationWithMobileNet
from core.cv.pose_estimation_modules_2.keypoints import extract_keypoints, group_keypoints, BODY_PARTS_KPT_IDS, BODY_PARTS_PAF_IDS
from core.cv.pose_estimation_modules_2.load_state import load_state
import math



def normalize(img, img_mean, img_scale):
    img = np.array(img, dtype=np.float32)
    img = (img - img_mean) * img_scale
    return img


def pad_width(img, stride, pad_value, min_dims):
    h, w, _ = img.shape
    h = min(min_dims[0], h)
    min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride
    min_dims[1] = max(min_dims[1], w)
    min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride
    pad = []
    pad.append(int(math.floor((min_dims[0] - h) / 2.0)))
    pad.append(int(math.floor((min_dims[1] - w) / 2.0)))
    pad.append(int(min_dims[0] - h - pad[0]))
    pad.append(int(min_dims[1] - w - pad[1]))
    padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3],
                                    cv2.BORDER_CONSTANT, value=pad_value)
    return padded_img, pad


In [2]:
class pose_estimation_python(object):
    net = None

    def __init__(self, device='cpu'):
        self.device = device

        self.net = PoseEstimationWithMobileNet()
        checkpoint = torch.load('models/pose_estimation_2d.tar', map_location=self.device)
        load_state(self.net, checkpoint)
        self.net = self.net.eval()
        self.stride = 8
        self.upsample_ratio = 4
        self.height_size = 256

        if device != 'cpu':
            net = self.net.cuda()

    def infer_fast(self, img, net_input_height_size,
                   pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1 / 256):

        height, width, _ = img.shape
        scale = net_input_height_size / height

        scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
        scaled_img = normalize(scaled_img, img_mean, img_scale)
        min_dims = [net_input_height_size, max(scaled_img.shape[1], net_input_height_size)]
        padded_img, pad = pad_width(scaled_img, self.stride, pad_value, min_dims)

        tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float()
        if self.device != 'cpu':
            tensor_img = tensor_img.cuda()

        stages_output = self.net(tensor_img)

        stage2_heatmaps = stages_output[-2]
        heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0))
        heatmaps = cv2.resize(heatmaps, (0, 0), fx=self.upsample_ratio, fy=self.upsample_ratio, interpolation=cv2.INTER_CUBIC)

        stage2_pafs = stages_output[-1]
        pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0))
        pafs = cv2.resize(pafs, (0, 0), fx=self.upsample_ratio, fy=self.upsample_ratio, interpolation=cv2.INTER_CUBIC)

        total_keypoints_num = 0
        all_keypoints_by_type = []
        for kpt_idx in range(18):  # 19th for bg
            total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type,
                                                     total_keypoints_num)

        pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs)

        return pose_entries, all_keypoints, scale, pad, heatmaps, pafs

    def predict(self, img, height_size,):
        with torch.no_grad():
            return self.infer_fast(img, height_size)
        
pose_estimator = pose_estimation_python()

In [3]:
img_path = '../data/1.jpg'
# img_path = '1.png'

stride = 8
upsample_ratio = 4
color = [0, 224, 255]

image = cv2.imread(img_path, cv2.IMREAD_COLOR)

pose_entries, all_keypoints, scale, pad, heatmaps, pafs = pose_estimator.predict(image, 265)

In [4]:
from core.cv.pose_estimation_modules.parse_poses import parse_poses


In [5]:
from pose_extractor import extract_poses

AVG_PERSON_HEIGHT = 180

# pelvis (body center) is missing, id == 2
map_id_to_panoptic = [1, 0, 9, 10, 11, 3, 4, 5, 12, 13, 14, 6, 7, 8, 15, 16, 17, 18]

limbs = [[18, 17, 1],
         [16, 15, 1],
         [5, 4, 3],
         [8, 7, 6],
         [11, 10, 9],
         [14, 13, 12]]


def get_root_relative_poses(inference_results):
    # features, heatmap, paf_map = inference_results[0][0], inference_results[1][0], inference_results[2][0] # My
    features, heatmap, paf_map = inference_results

    upsample_ratio = 4
    found_poses = extract_poses(heatmap[0:-1], paf_map, upsample_ratio)[0]

In [16]:
upsample_ratio = 4
found_poses = extract_poses(heatmaps, pafs, upsample_ratio)[0]