# Extracting keypoints from videos

We will use the following models to extract keypoints:

* MoveNet (2D)
* MediaPipe Pose Landmarker (3D)

In [1]:
import tensorflow as tf
import tensorflow_hub as tf_hub
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import numpy as np
import pandas as pd
import imageio
import matplotlib.pyplot as plt
from pathlib import Path

2026-02-06 02:14:06.419080: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2026-02-06 02:14:06.695821: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-02-06 02:14:08.392519: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
  from pkg_resources import parse_version


## MoveNet (2D)

In [2]:
# Helper functions

KEYPOINT_DICT = {
  'nose': 0,
  'left_eye': 1,
  'right_eye': 2,
  'left_ear': 3,
  'right_ear': 4,
  'left_shoulder': 5,
  'right_shoulder': 6,
  'left_elbow': 7,
  'right_elbow': 8,
  'left_wrist': 9,
  'right_wrist': 10,
  'left_hip': 11,
  'right_hip': 12,
  'left_knee': 13,
  'right_knee': 14,
  'left_ankle': 15,
  'right_ankle': 16
}

# Cropping algorithm to improve detection accuracy (source: https://www.tensorflow.org/hub/tutorials/movenet)

MIN_CROP_KEYPOINT_SCORE = 0.2

def init_crop_region(image_height, image_width):
  if image_width > image_height:
    box_height = image_width / image_height
    box_width = 1.0
    y_min = (image_height / 2 - image_width / 2) / image_height
    x_min = 0.0
  else:
    box_height = 1.0
    box_width = image_height / image_width
    y_min = 0.0
    x_min = (image_width / 2 - image_height / 2) / image_width

  return {
    'y_min': y_min,
    'x_min': x_min,
    'y_max': y_min + box_height,
    'x_max': x_min + box_width,
    'height': box_height,
    'width': box_width
  }

def torso_visible(keypoints):
  return ((keypoints[0, 0, KEYPOINT_DICT['left_hip'], 2] >
           MIN_CROP_KEYPOINT_SCORE or
          keypoints[0, 0, KEYPOINT_DICT['right_hip'], 2] >
           MIN_CROP_KEYPOINT_SCORE) and
          (keypoints[0, 0, KEYPOINT_DICT['left_shoulder'], 2] >
           MIN_CROP_KEYPOINT_SCORE or
          keypoints[0, 0, KEYPOINT_DICT['right_shoulder'], 2] >
           MIN_CROP_KEYPOINT_SCORE))

def determine_torso_and_body_range(keypoints, target_keypoints, center_y, center_x):
  torso_joints = ['left_shoulder', 'right_shoulder', 'left_hip', 'right_hip']
  max_torso_yrange = 0.0
  max_torso_xrange = 0.0
  for joint in torso_joints:
    dist_y = abs(center_y - target_keypoints[joint][0])
    dist_x = abs(center_x - target_keypoints[joint][1])
    if dist_y > max_torso_yrange:
      max_torso_yrange = dist_y
    if dist_x > max_torso_xrange:
      max_torso_xrange = dist_x

  max_body_yrange = 0.0
  max_body_xrange = 0.0
  for joint in KEYPOINT_DICT.keys():
    if keypoints[0, 0, KEYPOINT_DICT[joint], 2] < MIN_CROP_KEYPOINT_SCORE:
      continue
    dist_y = abs(center_y - target_keypoints[joint][0]);
    dist_x = abs(center_x - target_keypoints[joint][1]);
    if dist_y > max_body_yrange:
      max_body_yrange = dist_y

    if dist_x > max_body_xrange:
      max_body_xrange = dist_x

  return [max_torso_yrange, max_torso_xrange, max_body_yrange, max_body_xrange]

def determine_crop_region(keypoints, image_height, image_width):
  target_keypoints = {}
  for joint in KEYPOINT_DICT.keys():
    target_keypoints[joint] = [
      keypoints[0, 0, KEYPOINT_DICT[joint], 0] * image_height,
      keypoints[0, 0, KEYPOINT_DICT[joint], 1] * image_width
    ]

  if torso_visible(keypoints):
    center_y = (target_keypoints['left_hip'][0] +
                target_keypoints['right_hip'][0]) / 2;
    center_x = (target_keypoints['left_hip'][1] +
                target_keypoints['right_hip'][1]) / 2;

    (max_torso_yrange, max_torso_xrange,
      max_body_yrange, max_body_xrange) = determine_torso_and_body_range(
          keypoints, target_keypoints, center_y, center_x)

    crop_length_half = np.amax(
        [max_torso_xrange * 1.9, max_torso_yrange * 1.9,
          max_body_yrange * 1.2, max_body_xrange * 1.2])

    tmp = np.array(
        [center_x, image_width - center_x, center_y, image_height - center_y])
    crop_length_half = np.amin(
        [crop_length_half, np.amax(tmp)]);

    crop_corner = [center_y - crop_length_half, center_x - crop_length_half];

    if crop_length_half > max(image_width, image_height) / 2:
      return init_crop_region(image_height, image_width)
    else:
      crop_length = crop_length_half * 2;
      return {
        'y_min': crop_corner[0] / image_height,
        'x_min': crop_corner[1] / image_width,
        'y_max': (crop_corner[0] + crop_length) / image_height,
        'x_max': (crop_corner[1] + crop_length) / image_width,
        'height': (crop_corner[0] + crop_length) / image_height -
            crop_corner[0] / image_height,
        'width': (crop_corner[1] + crop_length) / image_width -
            crop_corner[1] / image_width
      }
  else:
    return init_crop_region(image_height, image_width)

def crop_and_resize(image, crop_region, crop_size):
  boxes=[[crop_region['y_min'], crop_region['x_min'],
          crop_region['y_max'], crop_region['x_max']]]
  output_image = tf.image.crop_and_resize(
      image, box_indices=[0], boxes=boxes, crop_size=crop_size)
  return output_image

def run_inference(movenet, image, crop_region, crop_size):
  image_height, image_width, _ = image.shape
  input_image = crop_and_resize(
    tf.expand_dims(image, axis=0), crop_region, crop_size=crop_size)
  input_image = tf.cast(input_image, dtype=tf.int32)
  
  keypoints_with_scores = movenet(input_image)
  keypoints_with_scores = keypoints_with_scores['output_0'].numpy()

  for idx in range(17):
    keypoints_with_scores[0, 0, idx, 0] = (
        crop_region['y_min'] * image_height +
        crop_region['height'] * image_height *
        keypoints_with_scores[0, 0, idx, 0]) / image_height
    keypoints_with_scores[0, 0, idx, 1] = (
        crop_region['x_min'] * image_width +
        crop_region['width'] * image_width *
        keypoints_with_scores[0, 0, idx, 1]) / image_width
  return keypoints_with_scores


def load_video_data(video_path):
  reader = imageio.get_reader(video_path)
  fps = reader.get_meta_data()['fps']

  frames = []
  timestamps = []

  for frame_index, frame in enumerate(reader):
    frames.append(frame)
    timestamps.append(int((frame_index / fps) * 1000))
  
  return frames, timestamps

def compute_video_keypoints(frames, model):
  frame_height, frame_width, _ = frames[0].shape
  input_size = 256
  crop_region = init_crop_region(frame_height, frame_width)

  keypoint_list = []
  for frame in frames:
    outputs = run_inference(model, frame, crop_region, crop_size=[input_size, input_size])
    keypoint_list.append(outputs)

    crop_region = determine_crop_region(outputs, frame_height, frame_width)
  
  return keypoint_list

def restructure_keypoints(keypoint_list, timestamps):
  stacked = np.concatenate(keypoint_list, axis=0)[:, 0, :, :]
  yx = stacked[:, 5:, :] # remove face keypoints

  joint_names = [joint for joint, idx in sorted(KEYPOINT_DICT.items(), key=lambda x: x[1]) if idx > 4]
  coordinates = {}
  for i, joint in enumerate(joint_names):
    coordinates[f'{joint}_x'] = yx[:, i, 1]
    coordinates[f'{joint}_y'] = yx[:, i, 0]
    coordinates[f'{joint}_confidence'] = yx[:, i, 2]
  
  frames = np.arange(len(keypoint_list), dtype=int)
  times = np.asarray(timestamps, dtype=int)

  df = pd.DataFrame({'frame': frames, 'time': times, **coordinates})
  return df

In [3]:
VIDEO_DIR = Path('../data/Utvalda filminspelningar för IRAF analys/Dec 2025 sit-stå och stå-sitt')
VIDEO_PATH = VIDEO_DIR / 'DJI_20250425092743_0028_D.MP4'

frames, timestamps_ms = load_video_data(VIDEO_PATH)
timestamps_sec = [ts // 1000 for ts in timestamps_ms]

In [4]:
model = tf_hub.load("https://tfhub.dev/google/movenet/singlepose/thunder/4")
model = model.signatures['serving_default']

keypoint_list = compute_video_keypoints(frames, model)
structured_keypoints = restructure_keypoints(keypoint_list, timestamps_sec)
structured_keypoints

E0000 00:00:1770340464.634197   77029 cuda_executor.cc:1309] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1770340464.639733   77029 gpu_device.cc:2342] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Unnamed: 0,frame,time,left_shoulder_x,left_shoulder_y,left_shoulder_confidence,right_shoulder_x,right_shoulder_y,right_shoulder_confidence,left_elbow_x,left_elbow_y,...,left_knee_confidence,right_knee_x,right_knee_y,right_knee_confidence,left_ankle_x,left_ankle_y,left_ankle_confidence,right_ankle_x,right_ankle_y,right_ankle_confidence
0,0,0,0.521092,0.519910,0.518108,0.487056,0.516722,0.474878,0.510511,0.588453,...,0.385356,0.433174,0.683465,0.330330,0.453917,0.836186,0.308937,0.436931,0.815531,0.253571
1,1,0,0.527349,0.528619,0.950304,0.494427,0.521837,0.817154,0.486307,0.586665,...,0.860634,0.410921,0.702312,0.876248,0.449585,0.847421,0.892166,0.428270,0.827499,0.912544
2,2,0,0.528537,0.527519,0.724062,0.490452,0.520796,0.901601,0.484892,0.584482,...,0.812152,0.410696,0.703564,0.843845,0.450351,0.850582,0.899944,0.428882,0.826985,0.885123
3,3,0,0.528805,0.527451,0.703769,0.491186,0.521737,0.856841,0.485584,0.585750,...,0.784059,0.410876,0.702036,0.817702,0.450794,0.848726,0.874410,0.429225,0.825832,0.820706
4,4,0,0.528499,0.527151,0.716405,0.490723,0.520593,0.914595,0.485261,0.585545,...,0.820067,0.410952,0.703862,0.842280,0.450559,0.850310,0.907743,0.429210,0.826805,0.881726
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1392,1392,27,0.497387,0.529327,0.781885,0.460888,0.528069,0.806294,0.503026,0.620907,...,0.355629,0.417497,0.703355,0.788210,0.450186,0.850523,0.818536,0.428047,0.828777,0.761928
1393,1393,27,0.497320,0.528276,0.726723,0.460820,0.527198,0.730001,0.503591,0.621395,...,0.432041,0.416851,0.703323,0.847398,0.450049,0.850857,0.904617,0.428049,0.829433,0.822648
1394,1394,27,0.497783,0.528964,0.785727,0.460696,0.527867,0.803017,0.503322,0.620881,...,0.508949,0.417347,0.702260,0.790500,0.449447,0.851026,0.835349,0.427453,0.829263,0.897273
1395,1395,27,0.497340,0.528751,0.730734,0.460983,0.527778,0.757211,0.503100,0.620441,...,0.369285,0.417176,0.702867,0.769770,0.450016,0.851413,0.877545,0.427749,0.829164,0.814743


## MediaPipe Pose Landmarker (3D)

In [5]:
MEDIAPIPE_JOINTS = [
  ('left_shoulder', 11),
  ('right_shoulder', 12),
  ('left_elbow', 13),
  ('right_elbow', 14),
  ('left_wrist', 15),
  ('right_wrist', 16),
  ('left_hip', 23),
  ('right_hip', 24),
  ('left_knee', 25),
  ('right_knee', 26),
  ('left_ankle', 27),
  ('right_ankle', 28)
]


def restructure_mediapipe_joints(landmarker_results, timestamps, world:bool):
  if world:
    landmarker_results = [landmarker_result.pose_world_landmarks[0] for landmarker_result in landmarker_results]
  else:
    landmarker_results = [landmarker_result.pose_landmarks[0] for landmarker_result in landmarker_results]
  
  stacked = np.stack(landmarker_results)
  coordinates = {}
  for joint_name, joint_index in MEDIAPIPE_JOINTS:
    coordinates[f'{joint_name}_x'] = np.vectorize(lambda joint: joint.x)(stacked[:, joint_index])
    coordinates[f'{joint_name}_y'] = np.vectorize(lambda joint: joint.y)(stacked[:, joint_index])
    coordinates[f'{joint_name}_z'] = np.vectorize(lambda joint: joint.z)(stacked[:, joint_index])
    coordinates[f'{joint_name}_visibility'] = np.vectorize(lambda joint: joint.visibility)(stacked[:, joint_index])
    coordinates[f'{joint_name}_presence'] = np.vectorize(lambda joint: joint.presence)(stacked[:, joint_index])
  frames = np.arange(len(landmarker_results), dtype=int)
  times = np.asarray(timestamps, dtype=int)

  df = pd.DataFrame({'frame': frames, 'time': times, **coordinates})
  return df

In [6]:
MODEL_PATH = '../models/pose_landmarker_heavy.task'

base_options = mp.tasks.BaseOptions(model_asset_path=MODEL_PATH)
running_mode = mp.tasks.vision.RunningMode.VIDEO
options = mp.tasks.vision.PoseLandmarkerOptions(base_options=base_options, running_mode=running_mode)

landmarker_results = []
with mp.tasks.vision.PoseLandmarker.create_from_options(options) as landmarker:
  for i in range(len(frames)):
    mp_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=frames[i])
    timestamp = timestamps_ms[i]
    landmarker_result = landmarker.detect_for_video(mp_frame, timestamp)
    landmarker_results.append(landmarker_result)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1770340504.175858   77353 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1770340504.242887   77352 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1770340504.316494   77354 landmark_projection_calculator.cc:78] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


In [7]:
norm_joints = restructure_mediapipe_joints(landmarker_results, timestamps_sec, world=False)
norm_joints

Unnamed: 0,frame,time,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_visibility,left_shoulder_presence,right_shoulder_x,right_shoulder_y,right_shoulder_z,...,left_ankle_x,left_ankle_y,left_ankle_z,left_ankle_visibility,left_ankle_presence,right_ankle_x,right_ankle_y,right_ankle_z,right_ankle_visibility,right_ankle_presence
0,0,0,0.526308,0.533846,-0.045157,0.999912,0.999968,0.494425,0.524130,0.082677,...,0.449518,0.844357,-0.071455,0.951086,0.999784,0.428877,0.827237,0.028045,0.717678,0.999967
1,1,0,0.526414,0.533680,-0.048651,0.999917,0.999990,0.494480,0.524139,0.086429,...,0.449508,0.844363,-0.050186,0.947307,0.999604,0.429309,0.826037,0.025505,0.716324,0.999956
2,2,0,0.526499,0.533749,-0.047206,0.999917,0.999979,0.494498,0.523852,0.097816,...,0.449486,0.844370,-0.053422,0.944385,0.999737,0.429307,0.825180,0.025801,0.707172,0.999967
3,3,0,0.526582,0.533866,-0.049631,0.999920,0.999987,0.494386,0.523696,0.095028,...,0.449483,0.844534,-0.040793,0.944963,0.999898,0.429305,0.823555,0.025979,0.711971,0.999987
4,4,0,0.526712,0.533983,-0.050224,0.999923,0.999988,0.494385,0.523696,0.094459,...,0.449351,0.844860,-0.040335,0.944032,0.999826,0.429319,0.823615,0.022961,0.715814,0.999983
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1392,1392,27,0.500659,0.524765,-0.142126,0.999997,0.999999,0.460545,0.525534,0.019055,...,0.448326,0.845339,-0.103045,0.992004,0.999971,0.427757,0.824357,-0.021244,0.978743,0.999993
1393,1393,27,0.500658,0.524775,-0.141058,0.999997,0.999999,0.460555,0.525549,0.020752,...,0.448292,0.845339,-0.105252,0.992137,0.999971,0.427730,0.824425,-0.021524,0.978932,0.999993
1394,1394,27,0.500655,0.524854,-0.140134,0.999997,0.999999,0.460554,0.525584,0.021784,...,0.448284,0.845524,-0.108306,0.992393,0.999976,0.427729,0.824423,-0.022007,0.979381,0.999994
1395,1395,27,0.500652,0.524908,-0.140566,0.999997,0.999999,0.460555,0.525619,0.021643,...,0.448283,0.845675,-0.108036,0.992469,0.999973,0.427714,0.824512,-0.022302,0.979556,0.999993


In [8]:
world_joints = restructure_mediapipe_joints(landmarker_results, timestamps_sec, world=True)
world_joints

Unnamed: 0,frame,time,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_visibility,left_shoulder_presence,right_shoulder_x,right_shoulder_y,right_shoulder_z,...,left_ankle_x,left_ankle_y,left_ankle_z,left_ankle_visibility,left_ankle_presence,right_ankle_x,right_ankle_y,right_ankle_z,right_ankle_visibility,right_ankle_presence
0,0,0,0.210281,-0.374487,-0.080654,0.999912,0.999968,0.067446,-0.482596,0.159119,...,-0.206292,0.574583,-0.085083,0.951086,0.999784,-0.328149,0.516032,0.093391,0.717678,0.999967
1,1,0,0.215366,-0.373880,-0.081082,0.999917,0.999990,0.079011,-0.481406,0.161747,...,-0.197136,0.576344,-0.049167,0.947307,0.999604,-0.323196,0.514280,0.087199,0.716324,0.999956
2,2,0,0.214531,-0.373799,-0.078405,0.999917,0.999979,0.082024,-0.480034,0.173934,...,-0.194963,0.574965,-0.048163,0.944385,0.999737,-0.324346,0.507706,0.087756,0.707172,0.999967
3,3,0,0.214477,-0.374903,-0.079350,0.999920,0.999987,0.082385,-0.480506,0.173881,...,-0.187702,0.575450,-0.033644,0.944963,0.999898,-0.324416,0.500273,0.087804,0.711971,0.999987
4,4,0,0.215554,-0.375311,-0.079758,0.999923,0.999988,0.083609,-0.480593,0.173857,...,-0.184286,0.573695,-0.026581,0.944032,0.999826,-0.321039,0.493932,0.084320,0.715814,0.999983
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1392,1392,27,0.166843,-0.391119,-0.211584,0.999997,0.999999,-0.065915,-0.480173,0.063136,...,-0.098225,0.560027,-0.147090,0.992004,0.999971,-0.267639,0.500255,-0.028005,0.978743,0.999993
1393,1393,27,0.166329,-0.391185,-0.210803,0.999997,0.999999,-0.065106,-0.480172,0.065848,...,-0.100579,0.560551,-0.150070,0.992137,0.999971,-0.268174,0.500234,-0.028324,0.978932,0.999993
1394,1394,27,0.165948,-0.391529,-0.210229,0.999997,0.999999,-0.065134,-0.480390,0.066528,...,-0.100530,0.560339,-0.153728,0.992393,0.999976,-0.267703,0.499804,-0.028357,0.979381,0.999994
1395,1395,27,0.165990,-0.391717,-0.209810,0.999997,0.999999,-0.064095,-0.480388,0.067170,...,-0.100321,0.560558,-0.155101,0.992469,0.999973,-0.267694,0.499787,-0.029080,0.979556,0.999993


In [9]:
def draw_keypoints_on_image(image, structured_keypoints, joint_names):
  height, width, _ = image.shape
  aspect_ratio = float(width) / height
  fig, ax = plt.subplots(figsize=(12*aspect_ratio,12))
  fig.tight_layout(pad=0)
  ax.margins(0)
  ax.set_yticklabels([])
  ax.set_xticklabels([])
  plt.axis('off')

  x = np.array([structured_keypoints[f"{j}_x"] for j in joint_names]) * width
  y = np.array([structured_keypoints[f"{j}_y"] for j in joint_names]) * height
  ax.imshow(image)
  ax.scatter(x, y, c="#00ff00")
  fig.canvas.draw()
  image_from_plot = np.frombuffer(fig.canvas.tostring_argb(), dtype=np.uint8)
  image_from_plot = image_from_plot.reshape(fig.canvas.get_width_height()[::-1] + (4,))
  image_from_plot = image_from_plot[:, :, 1:4]
  plt.close(fig)

  return image_from_plot

def draw_keypoints_on_video(frames, structured_keypoints, joint_names, outpath):
  images = []  
  for i in range(len(frames)):
    image = draw_keypoints_on_image(frames[i], structured_keypoints.iloc[i].to_dict(), joint_names)
    images.append(image)
  
  imageio.mimsave(Path(outpath), images, fps=50)

POSE_EDGES_12 = [
  (11,13),(13,15),    # left arm
  (12,14),(14,16),    # right arm
  (11,12),            # shoulders
  (11,23),(12,24),    # torso
  (23,24),            # hips
  (23,25),(25,27),    # left leg
  (24,26),(26,28),    # right leg
]

def render_world_frame(world_row):
  xs, depth, up = [], [], []
  for name, _ in MEDIAPIPE_JOINTS:
    xs.append(world_row[f"{name}_x"])
    depth.append(world_row[f"{name}_z"])
    up.append(world_row[f"{name}_y"])

  fig = plt.figure(figsize=(4, 4))
  ax = fig.add_subplot(111, projection="3d")

  ax.scatter(xs, depth, up, s=25)

  for a, b in POSE_EDGES_12:
    ia = [i for i, (_, idx) in enumerate(MEDIAPIPE_JOINTS) if idx == a][0]
    ib = [i for i, (_, idx) in enumerate(MEDIAPIPE_JOINTS) if idx == b][0]
    ax.plot(
      [xs[ia], xs[ib]],
      [depth[ia], depth[ib]],
      [up[ia], up[ib]],
      linewidth=2
    )

  ax.set_xlim(-0.5, 0.5)
  ax.set_ylim(-0.5, 0.5)
  ax.set_zlim(-1.0, 1.0)

  ax.set_xlabel("X")
  ax.set_ylabel("Z")
  ax.set_zlabel("Y") # z and y flipped in matplotlib
  ax.invert_zaxis()
  ax.set_aspect('equal')

  ax.view_init(elev=20, azim=225)

  fig.canvas.draw()
  w, h = fig.canvas.get_width_height()
  buf = np.frombuffer(fig.canvas.tostring_argb(), dtype=np.uint8)
  buf = buf.reshape((h, w, 4))
  img = buf[:, :, 1:4]

  plt.close(fig)
  return img


def draw_world_preview_video(world_df, outpath, fps=50):
  images = []
  for i in range(len(world_df)):
    images.append(render_world_frame(world_df.iloc[i]))
  imageio.mimsave(outpath, images, fps=fps)

In [10]:
joint_names = [name for name, index in MEDIAPIPE_JOINTS]

start_frame = 550
end_frame = 650

draw_keypoints_on_video(frames[start_frame:end_frame], structured_keypoints.iloc[start_frame:end_frame], joint_names, '../data/processed/keypoints_on_video/movenet.mp4')
draw_keypoints_on_video(frames[start_frame:end_frame], norm_joints.iloc[start_frame:end_frame], joint_names, '../data/processed/keypoints_on_video/mediapipe_norm.mp4')
draw_world_preview_video(world_joints.iloc[start_frame:end_frame], "../data/processed/keypoints_on_video/mediapipe_world.mp4")

