In [4]:
import os


OUT_DIR = ".temp"

if not os.path.exists(OUT_DIR):
	os.makedirs(OUT_DIR)

In [19]:
import cv2
from pose_format.utils.holistic import load_holistic


DATA_DIR = "/mnt/data3/slt-datasets-3/GSL/GSL_continuous/health1_signer1_rep1_sentences/sentences0000" 

holistic_config = {
	"model_complexity": 2,
	"refine_face_landmarks": True,
	"min_detection_confidence": 0.2,
	"min_tracking_confidence": 0.2,
}

files = sorted(os.listdir(DATA_DIR))
frames = [cv2.imread(os.path.join(DATA_DIR, file)) for file in files]
pose = load_holistic(frames, 30, frames[0].shape[1], frames[0].shape[0], progress=True, additional_holistic_config=holistic_config)
with open(f"{OUT_DIR}/example.pose", "wb") as f:
	pose.write(f)

I0000 00:00:1713266500.309229 3984003 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1713266500.322471 3984353 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 550.54.14), renderer: NVIDIA TITAN X (Pascal)/PCIe/SSE2
100%|██████████| 111/111 [00:15<00:00,  7.22it/s]


In [14]:
from pose_format.pose import Pose

data_buffer = open(f"{OUT_DIR}/example.pose", "rb").read()
pose = Pose.read(data_buffer)

In [8]:
[p.name for p in pose.header.components]

['POSE_LANDMARKS',
 'FACE_LANDMARKS',
 'LEFT_HAND_LANDMARKS',
 'RIGHT_HAND_LANDMARKS',
 'POSE_WORLD_LANDMARKS']

In [9]:
from pose_format.utils.pose_converter import convert_pose

components = [p for p in pose.header.components]# if p.name != "POSE_LANDMARKS"]
pose = convert_pose(pose, components)

In [10]:
[p.name for p in pose.header.components]

['POSE_LANDMARKS',
 'FACE_LANDMARKS',
 'LEFT_HAND_LANDMARKS',
 'RIGHT_HAND_LANDMARKS',
 'POSE_WORLD_LANDMARKS']

In [10]:
from numpy.typing import NDArray
import numpy as np
import os
import cv2
from skvideo.io import vwrite


def draw_keypoints(
		frame,
        frame_keypoints,
        size: int = 5,
        threshold: float = 0.5,
        color: int | None = None):
	'''Draw keypoints in the frame'''
	fr = frame.copy()
	for i, (x, y, z) in enumerate(frame_keypoints):
		# check if x or y are masked
		if np.ma.is_masked(x) or np.ma.is_masked(y) or np.isnan(x) or np.isnan(y):
			# print(x.data, y.data)
			continue
		else:
			fr[int(y)-size:int(y)+size:,int(x)-size:int(x)+size] = color if color is not None else 255-fr[int(y)-size:int(y)+size:,int(x)-size:int(x)+size]
	return fr

def store_video(video: NDArray[np.uint8], name: str, dir: str = '.temp'):
	if not os.path.exists(dir):
		os.makedirs(dir)
	vwrite(f"{dir}/{name}.mp4", video)


In [74]:
keypoints = pose.body.data
h, w = 480, 848
video = np.zeros((keypoints.shape[0], h, w, 3), np.dtype('uint8'))
print(keypoints.shape)
for i_frame in range(keypoints.shape[0]):
	video[i_frame] = draw_keypoints(video[i_frame], keypoints[i_frame][0], size=2)
store_video(video, 30, "example_adhoc.mp4")

(111, 1, 586, 3)


In [20]:
from pose_format.pose_visualizer import PoseVisualizer


v = PoseVisualizer(pose)
v.save_video(f"{OUT_DIR}/example_b.mp4", v.draw())
# v.save_gif(f"{OUT_DIR}/example.gif", v.draw())

111it [00:07, 15.42it/s]


# With mediapipe only

In [26]:
import os
import numpy as np

from skimage.io import imread
from numpy.typing import NDArray
from mediapipe import solutions


JOINTS_SIZE = np.float16

def process_keys(frame_keypoints) -> NDArray[JOINTS_SIZE]:
	pose = [[landmark.x, landmark.y, landmark.z] for landmark in frame_keypoints.pose_landmarks.landmark] if frame_keypoints.pose_landmarks is not None else [[np.nan]*3]*33
	face = [[landmark.x, landmark.y, landmark.z] for landmark in frame_keypoints.face_landmarks.landmark] if frame_keypoints.face_landmarks is not None else [[np.nan]*3]*468
	rhand = [[landmark.x, landmark.y, landmark.z] for landmark in frame_keypoints.right_hand_landmarks.landmark] if frame_keypoints.right_hand_landmarks is not None else [[np.nan]*3]*21
	lhand = [[landmark.x, landmark.y, landmark.z] for landmark in frame_keypoints.left_hand_landmarks.landmark] if frame_keypoints.left_hand_landmarks is not None else [[np.nan]*3]*21
	return np.stack(pose + face + rhand + lhand).astype(JOINTS_SIZE)

def run_holistic(frames: NDArray[np.uint8]) -> NDArray[JOINTS_SIZE]:
	keypoints = []
	with solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: # type: ignore
		for frame in frames:
			keypoints.append(holistic.process(frame))
	return keypoints


DATA_DIR = "/mnt/data3/slt-datasets-3/GSL/GSL_continuous/health1_signer1_rep1_sentences/sentences0000" 

files = sorted(os.listdir(DATA_DIR))
video = np.stack([imread(os.path.join(DATA_DIR, file)) for file in files])

print(f"Loaded {len(video)} frames")

keypoints = run_holistic(video)

AttributeError: module 'numpy' has no attribute 'float'.
`np.float` was a deprecated alias for the builtin `float`. To avoid this error in existing code, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

In [18]:
processed_keys = np.array(list(map(process_keys, keypoints)))
processed_keys.shape

(111, 543, 3)

In [19]:
def draw_keypoints(
		frame,
        frame_keypoints,
		h: int,
		w: int,
        size: int = 5,
        color: int | None = None):
	'''Draw keypoints in the frame'''
	fr = frame.copy()
	for x, y, z in frame_keypoints:
		# check if x or y are masked
		if np.ma.is_masked(x) or np.ma.is_masked(y) or np.isnan(x) or np.isnan(y):
			# print(x.data, y.data)
			continue
		else:
			x = int(x*w)
			y = int(y*h)
			fr[y-size:y+size:,x-size:x+size] = color if color is not None else 255-fr[y-size:y+size:,x-size:x+size]
	return fr

In [27]:
# read processed keys from test.npy
processed_keys = np.load("test.npy")

In [28]:
h, w = 480, 720
video = np.zeros((processed_keys.shape[0], h, w, 3), np.dtype('uint8'))
print(processed_keys.shape)
for i_frame in range(processed_keys.shape[0]):
	video[i_frame] = draw_keypoints(video[i_frame], processed_keys[i_frame], h, w, size=2)
store_video(video, "example_mp")

(111, 543, 3)


In [82]:
keypoints[0].pose_landmarks.landmark[0]

x: 0.4614348
y: 0.24736053
z: -0.44210353
visibility: 0.99998236

In [76]:
# pickle store the keypoints
import pickle

with open(f"{OUT_DIR}/example_mp.pickle", "wb") as f:
	pickle.dump(keypoints, f)