# Human Pose Estimation with MoveNet

## Visualization libraries & Imports

In [None]:
!pip install -q imageio
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs


In [None]:
!pip install tensorflow==2.12.0
!pip install tensorflow-hub==0.13.0
!pip install matplotlib


In [None]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed
import numpy as np
import cv2

# Import matplotlib libraries
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.patches as patches


# Some modules to display an animation using imageio.
import imageio
from IPython.display import HTML, display

In [None]:

# Maps bones to h36m
KEYPOINT_EDGE_INDS_TO_COLOR_36 = {
    (0, 1): 'm',
    (1, 2): 'm',
    (2, 3): 'm',

    (0, 4): 'c',
    (4, 5): 'c',
    (5, 6): 'c',
    
    (0, 7): 'y',
    (7, 8): 'y',
    (8, 9): 'y',
    (9, 10): 'y',

    (8, 11): 'b',
    (11, 12): 'g',
    (12, 13): 'b',
    
    (8, 14): 'r',
    (14, 15): 'r',
    (15, 16): 'r',

}

# Maps bones to h36m
KEYPOINT_EDGE_INDS_TO_COLOR_36 = {
    (0, 1): 'm',
    (1, 2): 'm',
    (2, 3): 'm',

    (0, 4): 'c',
    (4, 5): 'c',
    (5, 6): 'c',
    
    (0, 7): 'y',
    (7, 8): 'y',
    (8, 9): 'y',
    (9, 10): 'y',

    (8, 11): 'b',
    (11, 12): 'g',
    (12, 13): 'b',
    
    (8, 14): 'r',
    (14, 15): 'r',
    (15, 16): 'r',

}

# Maps bones
KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

def _keypoints_and_edges_for_display(type, keypoints_with_scores, height, width, keypoint_threshold=0.11):

  keypoints_all = []
  keypoint_edges_all = []
  edge_colors = []

  if type == "h36m":
    key_joints = KEYPOINT_EDGE_INDS_TO_COLOR_36
  else:
    key_joints = KEYPOINT_EDGE_INDS_TO_COLOR

  num_instances, _, _, _ = keypoints_with_scores.shape
  
  for idx in range(num_instances):
    kpts_x = keypoints_with_scores[0, idx, :, 1]
    kpts_y = keypoints_with_scores[0, idx, :, 0]
    kpts_scores = keypoints_with_scores[0, idx, :, 2]
    kpts_absolute_xy = np.stack([width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
    kpts_above_thresh_absolute = kpts_absolute_xy[ kpts_scores > keypoint_threshold, :]
    keypoints_all.append(kpts_above_thresh_absolute)

    for edge_pair, color in key_joints.items():
      if (kpts_scores[edge_pair[0]] > keypoint_threshold and kpts_scores[edge_pair[1]] > keypoint_threshold):
        x_start = kpts_absolute_xy[edge_pair[0], 0]
        y_start = kpts_absolute_xy[edge_pair[0], 1]
        x_end = kpts_absolute_xy[edge_pair[1], 0]
        y_end = kpts_absolute_xy[edge_pair[1], 1]
        line_seg = np.array([[x_start, y_start], [x_end, y_end]])
        keypoint_edges_all.append(line_seg)
        edge_colors.append(color)

  if keypoints_all:
    keypoints_xy = np.concatenate(keypoints_all, axis=0)

  # Reshape the array
  if keypoint_edges_all:
    edges_xy = np.stack(keypoint_edges_all, axis=0)
  else:
    edges_xy = np.zeros((0, 2, 2))

  return keypoints_xy, edges_xy, edge_colors


def draw_prediction_on_image(image, keypoints_with_scores, type):

  height, width, channel = image.shape
  aspect_ratio = float(width) / height
  fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))

  # To remove the huge white borders
  fig.tight_layout(pad=0)
  ax.margins(0)
  ax.set_yticklabels([])
  ax.set_xticklabels([])
  plt.axis('off')

  im = ax.imshow(image)
  line_segments = LineCollection([], linewidths=(4), linestyle='solid')
  ax.add_collection(line_segments)
  # Turn off tick labels
  scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)

  keypoint_locs, keypoint_edges, edge_colors = _keypoints_and_edges_for_display(type, keypoints_with_scores, height, width)

  line_segments.set_segments(keypoint_edges)
  line_segments.set_color(edge_colors)
  if keypoint_edges.shape[0]:
    line_segments.set_segments(keypoint_edges)
    line_segments.set_color(edge_colors)
  if keypoint_locs.shape[0]:
    scat.set_offsets(keypoint_locs)

  fig.canvas.draw()
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = image_from_plot.reshape(
      fig.canvas.get_width_height()[::-1] + (3,))
  plt.close(fig)
    
  return image_from_plot


## Load Model from TF hub

In [None]:
!wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite

In [None]:
# Load TFLite model
interpreter = tf.lite.Interpreter(model_path="model.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

INPUT_IDX = input_details[0]['index']
OUTPUT_IDX = output_details[0]['index']
INPUT_DTYPE = input_details[0]['dtype']  # usually float32 for float16 models
input_size  = 192


def movenet(input_image):
    """
    input_image: tf.Tensor or np.ndarray of shape [1,192,192,3]
                 dtype int32 (like SavedModel) or float32
    Returns: numpy array [1,1,17,3] with keypoints and scores
    """
    # Match TFLite dtype expectations
    if INPUT_DTYPE == np.uint8:
        tensor = tf.cast(input_image, tf.uint8).numpy()
    else:
        # TFLite float models expect [0,1] float32
        tensor = tf.cast(input_image, tf.float32).numpy()
        if tensor.max() > 1.0:
            tensor /= 255.0

    interpreter.set_tensor(INPUT_IDX, tensor)
    interpreter.invoke()
    keypoints_with_scores = interpreter.get_tensor(OUTPUT_IDX)  # [1,1,17,3]
    return keypoints_with_scores

## Single Image

This session demonstrates the minumum working example of running the model on a **single image** to predict the 17 human keypoints.

### Load Input Image

In [None]:
# Load the input image.
image_path = 'messi2.jpeg'
image = tf.io.read_file(image_path)
image = tf.image.decode_jpeg(image)

### Run Inference

In [None]:
# Resize and pad the image to keep the aspect ratio and fit the expected size.
input_image = tf.expand_dims(image, axis=0)
input_image = tf.image.resize_with_pad(input_image, input_size, input_size)

# Run model inference.
keypoints_with_scores = movenet(input_image)

# Visualize the predictions with image.
display_image = tf.expand_dims(image, axis=0)
display_image = tf.cast(tf.image.resize_with_pad(
    display_image, 1280, 1280), dtype=tf.int32)

output_overlay = draw_prediction_on_image(np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores, type="coco")

plt.figure(figsize=(5, 5))
plt.imshow(output_overlay)
_ = plt.axis('off')

In [None]:
new_keypoints = keypoints_with_scores.copy()
new_keypoints.shape

In [None]:
kp_without_scores = np.array([])

for i in range(0, len(new_keypoints[0][0])):
   t = np.delete(new_keypoints[0][0][i], -1)
   kp_without_scores = np.append(kp_without_scores, t)

len(kp_without_scores), kp_without_scores

In [None]:
def coco_h36m(keypoints):
    temporal = keypoints.shape[0]
    keypoints_h36m = np.zeros(shape=(keypoints.shape))
    htps_keypoints = np.zeros((temporal, 4, 2), dtype=np.float32)

    # Reference from 
    # https://github.com/Vegetebird/StridedTransformer-Pose3D/blob/26161031d0f6cd29df6c56c52f9fd401301e6efd/demo/lib/preprocess.py#L15
    # htps_keypoints: head, thorax, pelvis, spine
    htps_keypoints[:, 0, 0] = np.mean(keypoints[:, 1:5, 0], axis=1, dtype=np.float32)
    htps_keypoints[:, 0, 1] = np.sum(keypoints[:, 1:3, 1], axis=1, dtype=np.float32) - keypoints[:, 0, 1]
    htps_keypoints[:, 1, :] = np.mean(keypoints[:, 5:7, :], axis=1, dtype=np.float32)
    htps_keypoints[:, 1, :] += (keypoints[:, 0, :] - htps_keypoints[:, 1, :]) / 3

    htps_keypoints[:, 2, :] = np.mean(keypoints[:, 11:13, :], axis=1, dtype=np.float32)
    htps_keypoints[:, 3, :] = np.mean(keypoints[:, [5, 6, 11, 12], :], axis=1, dtype=np.float32)

    keypoints_h36m[:, [10, 8, 0, 7], :] = htps_keypoints
    keypoints_h36m[:, [9, 11, 14, 12, 15, 13, 16, 4, 1, 5, 2, 6, 3], :] = keypoints[:, [0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], :]

    keypoints_h36m[:, 9, :] -= (keypoints_h36m[:, 9, :] - np.mean(keypoints[:, 5:7, :], axis=1, dtype=np.float32)) / 4
    keypoints_h36m[:, 7, 0] += 2*(keypoints_h36m[:, 7, 0] - np.mean(keypoints_h36m[:, [0, 8], 0], axis=1, dtype=np.float32))
    keypoints_h36m[:, 8, 1] -= (np.mean(keypoints[:, 1:3, 1], axis=1, dtype=np.float32) - keypoints[:, 0, 1])*2/3

    return keypoints_h36m

In [None]:
# Adjust the shape
kp_without_scores = kp_without_scores.reshape(1,17,2)

# Convert the COCO to h36m format
h36m_without_scores = coco_h36m(kp_without_scores)

h36m_without_scores = h36m_without_scores.reshape(17,2)
# This helpful for running the keypoints in pose2mesh
h36m_without_scores_for_mesh = h36m_without_scores * 1000

display(h36m_without_scores)

Swap axis

In [None]:
h36m_without_scores_for_mesh_swap = np.zeros(shape=(17, 2))

for i in range(0, len(h36m_without_scores_for_mesh_swap)):
    h36m_without_scores_for_mesh_swap[i][1], h36m_without_scores_for_mesh_swap[i][0] = \
                                h36m_without_scores_for_mesh[i][0], h36m_without_scores_for_mesh[i][1]

h36m_without_scores_for_mesh_swap

Save the Keypoints in .npy 

In [None]:
image_name = image_path.split(".")
image_name = image_name[0]
keypoints_dir = 'keyPoints/{image_name}.npy'.format(image_name=image_name)
np.save(keypoints_dir, h36m_without_scores_for_mesh_swap)

Load the keypoints

In [None]:
a = np.load("keyPoints/{image_name}.npy".format(image_name=image_name))
a, a.shape

Visualize the Keypoints 

In [None]:
import matplotlib.pyplot as plt

import numpy as np

for i in range(0, len(a)):
    plt.plot(a[i][1], a[i][0], 'o')

plt.show()

In [None]:
lst = h36m_without_scores.tolist()
for i in range (0, len(lst)):
  lst[i].append(keypoints_with_scores[0][0][i][2])

In [None]:
h36m_with_scores = np.full((17,3), lst)
h36m_with_scores = h36m_with_scores.reshape(1,1,17,3)
h36m_with_scores

In [None]:
output_overlay = draw_prediction_on_image(np.squeeze(display_image.numpy(), axis=0), h36m_with_scores, type="h36m")
plt.figure(figsize=(5, 5))
plt.imshow(output_overlay)
_ = plt.axis('off')

In [None]:
h36m_with_scores.shape