In [52]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import cv2
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import json
import copy

In [53]:
def show_images(images, titles):
    n = len(images)
    plt.figure(figsize=(10,10))
    for i in range(n):
        plt.subplot(1,n,i+1)
        plt.imshow(images[i], cmap='gray')
        plt.title(titles[i])
        plt.axis('off')
    plt.show()

In [54]:
coco = COCO('../coco2014/annotations/person_keypoints_train2014.json')


loading annotations into memory...
Done (t=4.02s)
creating index...
index created!


In [55]:
movenet = hub.load("https://www.kaggle.com/models/google/movenet/TensorFlow2/singlepose-lightning/4")

In [56]:
image_dir = "../coco2014/images/train2014"

human_images_anns = []
blacklist = ["../coco2014/images/train2014/COCO_train2014_000000524291.jpg",
             "../coco2014/images/train2014/COCO_train2014_000000262191.jpg",
             "../coco2014/images/train2014/COCO_train2014_000000262171.jpg",
             "../coco2014/images/train2014/COCO_train2014_000000524317.jpg",
             "../coco2014/images/train2014/COCO_train2014_000000524325.jpg",
             "../coco2014/images/train2014/COCO_train2014_000000000049.jpg",
             "../coco2014/images/train2014/COCO_train2014_000000000061.jpg",

             ] #images that only include part of a person and should be excluded 
human_image_ids = set()
for ann in coco.anns.values():
    if ann['category_id'] == 1 and ann['iscrowd'] == 0: #human, not a crowd
        human_image_ids.add(ann['image_id'])
        human_images_anns.append(ann)
count = 0
human_image_paths = []
for img_id in human_image_ids:
    if(count < 20):
        img_info = coco.loadImgs(img_id)[0]
        img_path = f"{image_dir}/{img_info['file_name']}"
        if(img_path not in blacklist):
            human_image_paths.append(img_path)
            count+=1

print(len(human_image_paths))

20


In [57]:
keypoint_names = ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', 'left_shoulder', 'right_shoulder',
                  'left_elbow', 'right_elbow', 'left_wrist', 'right_wrist', 'left_hip', 'right_hip',
                  'left_knee', 'right_knee', 'left_ankle', 'right_ankle']

connections = [(0, 1), (0, 2), (1, 3), (2, 4), (0, 5), (0, 6), (5, 7), (7, 9), (6, 8), (8, 10),
               (5, 6), (5, 11), (6, 12), (11, 12), (11, 13), (13, 15), (12, 14), (14, 16)]

def detect_pose_static(image_path):
    image = cv2.imread(image_path)
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)
    v = cv2.equalizeHist(v)
    hsv_image = cv2.merge([h, s, v])
    final_image = cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB)

    image_resized = tf.image.resize_with_pad(tf.expand_dims(final_image, axis=0), 192, 192) #192 for lightning
    image_np = image_resized.numpy().astype(np.int32)
    outputs = movenet.signatures["serving_default"](tf.constant(image_np))
    keypoints = outputs['output_0'].numpy()

    original_image_resized = tf.image.resize_with_pad(tf.expand_dims(image, axis=0), 192, 192)
    original_image_np = original_image_resized.numpy().astype(np.int32)
    outputs = movenet.signatures["serving_default"](tf.constant(original_image_np))
    original_keypoints = outputs['output_0'].numpy()

    return keypoints,original_keypoints

def visualize_pose_static(image_path, keypoints, original_keypoints):
    image = cv2.imread(image_path)
    image_original = cv2.imread(image_path)
    keypoints = np.array(keypoints)
    original_keypoints = np.array(original_keypoints)
    if keypoints.shape == (1, 1, 17, 3):
        keypoints = keypoints[0, 0]
        for kp in keypoints:
            x = int(kp[1] * image.shape[1])
            y = int(kp[0] * image.shape[0])
            cv2.circle(image, (x, y), 12, (255, 0, 0), -1)  # Increase thickness and change color to blue
        for connection in connections:
            start_point = (int(keypoints[connection[0], 1] * image.shape[1]),
                           int(keypoints[connection[0], 0] * image.shape[0]))
            end_point = (int(keypoints[connection[1], 1] * image.shape[1]),
                         int(keypoints[connection[1], 0] * image.shape[0]))
            cv2.line(image, start_point, end_point, (0, 0, 255), 8)  # Increase thickness and change color to red
        
        original_keypoints = original_keypoints[0, 0]
        for kp in original_keypoints:
            x = int(kp[1] * image_original.shape[1])
            y = int(kp[0] * image_original.shape[0])
            cv2.circle(image_original, (x, y), 12, (255, 0, 0), -1)  # Increase thickness and change color to blue
        for connection in connections:
            start_point = (int(original_keypoints[connection[0], 1] * image_original.shape[1]),
                           int(original_keypoints[connection[0], 0] * image_original.shape[0]))
            end_point = (int(original_keypoints[connection[1], 1] * image_original.shape[1]),
                         int(original_keypoints[connection[1], 0] * image_original.shape[0]))
            cv2.line(image_original, start_point, end_point, (0, 0, 255), 8)  # Increase thickness and change color to red
        show_images([image_original,image],["Predicted Annotations without Processing","Predicted Annotations after Processing"])
    else:
        print("Unexpected shape of keypoints array:", keypoints.shape)

In [58]:
for static_image_path in human_image_paths:
    print(static_image_path)
    static_keypoints,original_static_keypoints = detect_pose_static(static_image_path)
    #visualize_pose_static(static_image_path, static_keypoints, original_static_keypoints)

../coco2014/images/train2014/COCO_train2014_000000262145.jpg
../coco2014/images/train2014/COCO_train2014_000000262146.jpg
../coco2014/images/train2014/COCO_train2014_000000393223.jpg
../coco2014/images/train2014/COCO_train2014_000000393224.jpg
../coco2014/images/train2014/COCO_train2014_000000524297.jpg
../coco2014/images/train2014/COCO_train2014_000000393227.jpg
../coco2014/images/train2014/COCO_train2014_000000131084.jpg
../coco2014/images/train2014/COCO_train2014_000000393230.jpg
../coco2014/images/train2014/COCO_train2014_000000524311.jpg
../coco2014/images/train2014/COCO_train2014_000000393241.jpg
../coco2014/images/train2014/COCO_train2014_000000524314.jpg
../coco2014/images/train2014/COCO_train2014_000000131101.jpg
../coco2014/images/train2014/COCO_train2014_000000524320.jpg
../coco2014/images/train2014/COCO_train2014_000000393251.jpg
../coco2014/images/train2014/COCO_train2014_000000000036.jpg
../coco2014/images/train2014/COCO_train2014_000000524338.jpg
../coco2014/images/train

In [61]:
def format_coco_results(human_image_paths, predictions, coco):
    results = []
    for image_path, pred in zip(human_image_paths, predictions):
        img_id = int(image_path.split("_")[-1].split(".")[0])  # Extract COCO image ID
        keypoints = pred[0, 0, :, :]  # Shape: [17, 3]
        
        # Convert normalized keypoints to absolute image coordinates
        image = cv2.imread(image_path)
        h, w, _ = image.shape
        formatted_keypoints = []
        for kp in keypoints:
            x, y, confidence = float(kp[1] * w), float(kp[0] * h), float(kp[2])
            formatted_keypoints.extend([x, y, confidence])  # Use confidence threshold
        
        results.append({
            "image_id": img_id,
            "category_id": 1,  # Category for 'person'
            "keypoints": formatted_keypoints,
            "num_keypoints": 17,
            "score": float(keypoints[:, 2].mean())
        })
    return results

for ann in coco.anns.values():
    if 'num_keypoints' not in ann:
        ann['num_keypoints'] = 17
    if 'keypoints' not in ann: 
        ann['keypoints'] = []

# Detect poses and format results
predictions = []
for static_image_path in human_image_paths:
    static_keypoints, _ = detect_pose_static(static_image_path)
    predictions.append(static_keypoints)

coco_results = format_coco_results(human_image_paths, predictions, coco)
results_path = "pose_results_equalized.json"
with open(results_path, 'w') as f:
    json.dump(coco_results, f, indent=2)

# Load results and evaluate



predictions = []
for static_image_path in human_image_paths:
    _, static_keypoints = detect_pose_static(static_image_path)
    predictions.append(static_keypoints)

coco_results = format_coco_results(human_image_paths, predictions, coco)
results_path = "pose_results_unequalized.json"
with open(results_path, 'w') as f:
    json.dump(coco_results, f, indent=2)

print("Evaluation for Unequalized Images")
coco_dt = coco.loadRes(results_path)
coco_eval = COCOeval(coco, coco_dt, "keypoints")
coco_eval.evaluate()
print("Evaluation for Unequalized Images:")
coco_eval.accumulate()
coco_eval.summarize()

Evaluation for Unequalized Images
Loading and preparing results...
DONE (t=0.03s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *keypoints*
DONE (t=4.92s).
Evaluation for Unequalized Images:
Accumulating evaluation results...
DONE (t=0.51s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.004
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.010
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.005
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.000
 Average Recall     (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 