# Human Pose Estimation with MoveNet

## Visualization libraries & Imports

In [1]:
!pip install -q imageio
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs

[0m

In [2]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed
import numpy as np
import pandas as pd

import cv2

# Import matplotlib libraries
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.patches as patches

# Some modules to display an animation using imageio.
import imageio
from IPython.display import HTML, display


import shutil # Shutil module offers high-level operation on a file


In [3]:
#@title Helper functions for visualization

# Dictionary that maps from joint names to keypoint indices.
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
}

# Maps bones to a matplotlib color name.
KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

def _keypoints_and_edges_for_display(keypoints_with_scores,
                                     height,
                                     width,
                                     keypoint_threshold=0.11):
  """Returns high confidence keypoints and edges for visualization.

  Args:
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    height: height of the image in pixels.
    width: width of the image in pixels.
    keypoint_threshold: minimum confidence score for a keypoint to be
      visualized.

  Returns:
    A (keypoints_xy, edges_xy, edge_colors) containing:
      * the coordinates of all keypoints of all detected entities;
      * the coordinates of all skeleton edges of all detected entities;
      * the colors in which the edges should be plotted.
  """
  keypoints_all = []
  keypoint_edges_all = []
  edge_colors = []
  num_instances, _, _, _ = keypoints_with_scores.shape
  for idx in range(num_instances):
    kpts_x = keypoints_with_scores[0, idx, :, 1]
    kpts_y = keypoints_with_scores[0, idx, :, 0]
    kpts_scores = keypoints_with_scores[0, idx, :, 2]
    kpts_absolute_xy = np.stack(
        [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
    kpts_above_thresh_absolute = kpts_absolute_xy[
        kpts_scores > keypoint_threshold, :]
    keypoints_all.append(kpts_above_thresh_absolute)

    for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
      if (kpts_scores[edge_pair[0]] > keypoint_threshold and
          kpts_scores[edge_pair[1]] > keypoint_threshold):
        x_start = kpts_absolute_xy[edge_pair[0], 0]
        y_start = kpts_absolute_xy[edge_pair[0], 1]
        x_end = kpts_absolute_xy[edge_pair[1], 0]
        y_end = kpts_absolute_xy[edge_pair[1], 1]
        line_seg = np.array([[x_start, y_start], [x_end, y_end]])
        keypoint_edges_all.append(line_seg)
        edge_colors.append(color)
  if keypoints_all:
    keypoints_xy = np.concatenate(keypoints_all, axis=0)
  else:
    keypoints_xy = np.zeros((0, 17, 2))

  if keypoint_edges_all:
    edges_xy = np.stack(keypoint_edges_all, axis=0)
  else:
    edges_xy = np.zeros((0, 2, 2))
  return keypoints_xy, edges_xy, edge_colors


def draw_prediction_on_image(
    image, keypoints_with_scores, crop_region=None, close_figure=False,
    output_image_height=None):
  """Draws the keypoint predictions on image.

  Args:
    image: A numpy array with shape [height, width, channel] representing the
      pixel values of the input image.
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    crop_region: A dictionary that defines the coordinates of the bounding box
      of the crop region in normalized coordinates (see the init_crop_region
      function below for more detail). If provided, this function will also
      draw the bounding box on the image.
    output_image_height: An integer indicating the height of the output image.
      Note that the image aspect ratio will be the same as the input image.

  Returns:
    A numpy array with shape [out_height, out_width, channel] representing the
    image overlaid with keypoint predictions.
  """
  height, width, channel = image.shape
  aspect_ratio = float(width) / height
  fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))
  # To remove the huge white borders
  fig.tight_layout(pad=0)
  ax.margins(0)
  ax.set_yticklabels([])
  ax.set_xticklabels([])
  plt.axis('off')

  im = ax.imshow(image)
  line_segments = LineCollection([], linewidths=(4), linestyle='solid')
  ax.add_collection(line_segments)
  # Turn off tick labels
  scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)

  (keypoint_locs, keypoint_edges,
   edge_colors) = _keypoints_and_edges_for_display(
       keypoints_with_scores, height, width)

  line_segments.set_segments(keypoint_edges)
  line_segments.set_color(edge_colors)
  if keypoint_edges.shape[0]:
    line_segments.set_segments(keypoint_edges)
    line_segments.set_color(edge_colors)
  if keypoint_locs.shape[0]:
    scat.set_offsets(keypoint_locs)

  if crop_region is not None:
    xmin = max(crop_region['x_min'] * width, 0.0)
    ymin = max(crop_region['y_min'] * height, 0.0)
    rec_width = min(crop_region['x_max'], 0.99) * width - xmin
    rec_height = min(crop_region['y_max'], 0.99) * height - ymin
    rect = patches.Rectangle(
        (xmin,ymin),rec_width,rec_height,
        linewidth=1,edgecolor='b',facecolor='none')
    ax.add_patch(rect)

  fig.canvas.draw()
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = image_from_plot.reshape(
      fig.canvas.get_width_height()[::-1] + (3,))
  plt.close(fig)
  if output_image_height is not None:
    output_image_width = int(output_image_height / height * width)
    image_from_plot = cv2.resize(
        image_from_plot, dsize=(output_image_width, output_image_height),
         interpolation=cv2.INTER_CUBIC)
  return image_from_plot

def to_gif(images, fps):
  """Converts image sequence (4D numpy array) to gif."""
  imageio.mimsave('./animation.gif', images, fps=fps)
  return embed.embed_file('./animation.gif')

def progress(value, max=100):
  return HTML("""
      <progress
          value='{value}'
          max='{max}',
          style='width: 100%'
      >
          {value}
      </progress>
  """.format(value=value, max=max))

## Load Model from TensorFlow Hub (TF hub)

In [4]:
model_name = "movenet_thunder" #@param ["movenet_lightning", "movenet_thunder", "movenet_lightning_f16.tflite", "movenet_thunder_f16.tflite", "movenet_lightning_int8.tflite", "movenet_thunder_int8.tflite"]

if "tflite" in model_name:
  if "movenet_lightning_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/float16/4?lite-format=tflite
    input_size = 256
  elif "movenet_lightning_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/int8/4?lite-format=tflite
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  # Initialize the TFLite interpreter
  interpreter = tf.lite.Interpreter(model_path="model.tflite")
  interpreter.allocate_tensors()

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    # TF Lite format expects tensor type of uint8.
    input_image = tf.cast(input_image, dtype=tf.uint8)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
    # Invoke inference.
    interpreter.invoke()
    # Get the model prediction.
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    return keypoints_with_scores

else:
    
  if "movenet_lightning" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
    input_size = 192
  elif "movenet_thunder" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/thunder/4")
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    model = module.signatures['serving_default']

    # SavedModel format expects tensor type of int32.
    input_image = tf.cast(input_image, dtype=tf.int32)
    # Run model inference.
    outputs = model(input_image)
    # Output is a [1, 1, 17, 3] tensor.
    keypoints_with_scores = outputs['output_0'].numpy()
    return keypoints_with_scores

2022-11-26 20:06:36.788835: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


## Method to extract keypoints (Interface)

In [None]:
def extract_keypoint(image_name_list):
    for i, filename in enumerate(image_name_list): 
        # read file and decode
        image_path = 'images/images/'+ filename
        image = tf.io.read_file(image_path)
        image = tf.image.decode_jpeg(image)

        
        # Run Inference

        # Resize and pad the image to keep the aspect ratio and fit the expected size.
        input_image = tf.expand_dims(image, axis=0)
        input_image = tf.image.resize_with_pad(input_image, input_size, input_size)

        # Run model inference.
        keypoints_with_scores = movenet(input_image)

        # Visualize the predictions with image.
        display_image = tf.expand_dims(image, axis=0)
        display_image = tf.cast(tf.image.resize_with_pad(display_image, 1280, 1280), dtype=tf.int32)
        output_overlay = draw_prediction_on_image(np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)

        # convert BGR TO RGB  & Save
        im_rgb = output_overlay[:, :, [2, 1, 0]]
        cv2.imwrite('extracted_images/'+filename, im_rgb) #location to save the images with extracted keypoints
        
        # Output
        print(i, filename)
#         plt.figure(figsize=(5, 5))
#         plt.imshow(output_overlay)
#         _ = plt.axis('off')

# Implementing MoveNet - Thunder to extract keypoints

## Fetch dataset by cloning GitHub repo

In [5]:
!git clone https://github.com/Muhammad-Taufiq-Khan/Pose-Detection-ML-Intern.git
%cd Pose-Detection-ML-Intern/

csv_file = pd.read_csv("labels.csv",names=['image','label'])

Cloning into 'Pose-Detection-ML-Intern'...
remote: Enumerating objects: 1236, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 1236 (delta 6), reused 18 (delta 6), pack-reused 1218[K
Receiving objects: 100% (1236/1236), 44.38 MiB | 44.86 MiB/s, done.
Resolving deltas: 100% (6/6), done.
/kaggle/working/Pose-Detection-ML-Intern


In [7]:
# create a new directory to save images with extracted keypoints
shutil.rmtree('extracted_images', ignore_errors=True) # remove directory if already exists
!mkdir extracted_images

# call interface to extract keypoints
extract_keypoint(csv_file['image'])

2022-11-26 20:06:56.050820: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


0 Image_1.jpg
1 Image_4.jpg
2 Image_6.jpg
3 Image_15.jpg
4 Image_33.jpg
5 Image_37.jpg
6 Image_45.jpg
7 Image_48.jpg
8 Image_62.jpg
9 Image_65.jpg
10 Image_75.jpg
11 Image_82.jpg
12 Image_109.jpg
13 Image_116.jpg
14 Image_122.jpg
15 Image_124.jpg
16 Image_157.jpg
17 Image_177.jpg
18 Image_179.jpg
19 Image_182.jpg
20 Image_183.jpg
21 Image_195.jpg
22 Image_201.jpg
23 Image_211.jpg
24 Image_218.jpg
25 Image_222.jpg
26 Image_228.jpg
27 Image_235.jpg
28 Image_238.jpg
29 Image_239.jpg
30 Image_263.jpg
31 Image_280.jpg
32 Image_295.jpg
33 Image_306.jpg
34 Image_330.jpg
35 Image_335.jpg
36 Image_339.jpg
37 Image_346.jpg
38 Image_348.jpg
39 Image_361.jpg
40 Image_362.jpg
41 Image_367.jpg
42 Image_368.jpg
43 Image_380.jpg
44 Image_385.jpg
45 Image_410.jpg
46 Image_411.jpg
47 Image_420.jpg
48 Image_452.jpg
49 Image_453.jpg
50 Image_462.jpg
51 Image_483.jpg
52 Image_494.jpg
53 Image_497.jpg
54 Image_503.jpg
55 Image_506.jpg
56 Image_521.jpg
57 Image_537.jpg
58 Image_542.jpg
59 Image_555.jpg
60 Im

- Extracted images has been saved to the "extracted_images" directory. Convert the directory into zip file to make it downloadable

In [13]:
# Create a zip file
!zip -r folder.zip extracted_images

# Then downlod that zip file (following snippet is only for colab):
# from google.colab import files
# files.download("/content/folder.zip")

  adding: extracted_images/ (stored 0%)
  adding: extracted_images/Image_8652.jpg (deflated 7%)
  adding: extracted_images/Image_33.jpg (deflated 11%)
  adding: extracted_images/Image_12085.jpg (deflated 10%)
  adding: extracted_images/Image_218.jpg (deflated 11%)
  adding: extracted_images/Image_5980.jpg (deflated 10%)
  adding: extracted_images/Image_10012.jpg (deflated 11%)
  adding: extracted_images/Image_608.jpg (deflated 8%)
  adding: extracted_images/Image_3299.jpg (deflated 9%)
  adding: extracted_images/Image_6754.jpg (deflated 10%)
  adding: extracted_images/Image_1317.jpg (deflated 7%)
  adding: extracted_images/Image_12457.jpg (deflated 11%)
  adding: extracted_images/Image_7511.jpg (deflated 13%)
  adding: extracted_images/Image_11117.jpg (deflated 8%)
  adding: extracted_images/Image_3255.jpg (deflated 11%)
  adding: extracted_images/Image_62.jpg (deflated 8%)
  adding: extracted_images/Image_5018.jpg (deflated 15%)
  adding: extracted_images/Image_8584.jpg (deflated 10%)