In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# from pathlib import Path
# from typing import cast

# import cv2 as cv
# import matplotlib.pyplot as plt
# import mediapipe as mp
# from mediapipe import solutions
# from mediapipe.framework.formats import landmark_pb2
# import mediapipe.python.solutions.drawing_styles as mp_drawing_styles
# import mediapipe.python.solutions.drawing_utils as mp_drawing_utils
# import mediapipe.python.solutions.hands as mp_hands
# from mediapipe.tasks import python
# from mediapipe.tasks.python import vision
# from mediapipe.tasks.python.components.containers.category import Category
# from mediapipe.tasks.python.components.containers.landmark import (
#     Landmark,
#     NormalizedLandmark,
# )
# from mediapipe.tasks.python.vision.core.vision_task_running_mode import (
#     VisionTaskRunningMode as VisionRunningMode,
# )
# import numpy as np

# from holo_table.landmark.compute import HandLandmarkerFrame
# from holo_table.landmark.compute import get_landmarks_from_result
# from holo_table.utils.cv import cv_imshow
# from holo_table.utils.data import get_resource
# from holo_table.utils.mediapipe import (
#     HAND_LANDMARK_MAP,
#     HAND_LANDMARK_NAMES,
#     get_default_hand_connections,
# )
# from holo_table.utils.plt import show_frame
# from holo_table.video.frame import Frame
# from holo_table.video.load import list_video_frames, iterate_video_frames

from pathlib import Path
from typing import cast

import cv2 as cv
import matplotlib.pyplot as plt
import mediapipe as mp
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe.python.solutions.drawing_styles as mp_drawing_styles
import mediapipe.python.solutions.drawing_utils as mp_drawing_utils
import mediapipe.python.solutions.hands as mp_hands
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.tasks.python.components.containers.category import Category
from mediapipe.tasks.python.components.containers.landmark import (
    Landmark,
    NormalizedLandmark,
)
from mediapipe.tasks.python.core.base_options import BaseOptions
from mediapipe.tasks.python.vision.core.vision_task_running_mode import (
    VisionTaskRunningMode as VisionRunningMode,
)
from mediapipe.tasks.python.vision.hand_landmarker import (
    HandLandmarker,
    HandLandmarkerOptions,
    HandLandmarkerResult,
)
import numpy as np

from holo_table.landmark.compute import HandLandmarkerFrame
from holo_table.utils.cv import cv_imshow
from holo_table.utils.data import get_resource
from holo_table.utils.mediapipe import (
    HAND_LANDMARK_MAP,
    HAND_LANDMARK_NAMES,
    get_default_hand_connections,
    get_landmarks_from_result,
)
from holo_table.utils.plt import show_frame
from holo_table.video.frame import Frame
from holo_table.video.load import iterate_video_frames, list_video_frames


In [None]:
# load the landmark recognition model
hand_landmark_model_path = get_resource("hand_landmarker.task")
hlf = HandLandmarkerFrame(
    hand_landmark_model_path=hand_landmark_model_path,
    hand_landmarker_kwargs={
        # "running_mode": VisionRunningMode.IMAGE,
        "running_mode": VisionRunningMode.VIDEO,
        "num_hands": 2,
    },
)


In [None]:
data_fol = get_resource("hand_fol")
video_name = "pinch_02.mp4"
video_path = data_fol / video_name
video_path


In [None]:
vfs = list_video_frames(
    video_path,
    keep_every_nth_frame=1,
    # max_frame_count=4,
)
print(f"{len(vfs)=}")
# show_frame(vfs[2])


In [None]:
# detection_result = hlf.detect(vfs[2])
# one_hand_world_landmarks = get_landmarks_from_result(detection_result, "world")
# one_hand_world_landmarks[HAND_LANDMARK_MAP["WRIST"]]


In [None]:
def compute_landmark_dist(
    one_hand_world_landmarks: list[Landmark],
    landmark_name1: str,
    landmark_name2: str,
) -> float:
    """Compute the distance between two landmarks."""
    landmark1 = one_hand_world_landmarks[HAND_LANDMARK_MAP[landmark_name1]]
    landmark2 = one_hand_world_landmarks[HAND_LANDMARK_MAP[landmark_name2]]
    return np.linalg.norm(
        np.array([landmark1.x, landmark1.y, landmark1.z])
        - np.array([landmark2.x, landmark2.y, landmark2.z])
    ).astype(float)


# compute distance between thumb and index tips
# compute_landmark_dist(one_hand_world_landmarks, "THUMB_TIP", "INDEX_FINGER_TIP")


In [None]:
def compute_pinch_norm(
    one_hand_world_landmarks: list[Landmark],
) -> float:
    """Compute the pinch size, normalized."""
    dist_thumb_index = compute_landmark_dist(
        one_hand_world_landmarks, "THUMB_TIP", "INDEX_FINGER_TIP"
    )
    dist_wrist_index = compute_landmark_dist(
        one_hand_world_landmarks, "WRIST", "INDEX_FINGER_MCP"
    )
    return dist_thumb_index / dist_wrist_index


# compute_pinch_norm(one_hand_world_landmarks)


In [None]:
from IPython.display import display, clear_output


def plot_frame(
    fig,
    ax,
    frame: Frame,
    dist: float,
):
    """Plot a frame."""
    ax.cla()
    show_frame(
        frame,
        ax=ax,
        do_show=False,
        do_resize=True,
        title_suffix=f": {dist:.3f}",
    )
    display(fig)
    clear_output(wait=True)


In [None]:
# process the video and compute all the pinch sizes

# fig, ax = plt.subplots(figsize=(4, 4))

from tqdm import tqdm


all_dist_ls = []
all_msec_ls = []

for frame in tqdm(vfs):
    all_msec_ls.append(frame.msec)
    detection_result = hlf.detect(frame)
    one_hand_world_landmarks = get_landmarks_from_result(detection_result, "world")
    if one_hand_world_landmarks is None:
        continue
    dist = compute_pinch_norm(one_hand_world_landmarks)
    all_dist_ls.append(dist)
    # print(f"{dist=:.5f}")
    # show_frame(frame, title_suffix=f"{dist=:.5f}")
    # plot_frame(fig, ax, frame, dist)
    # break

all_msec = np.array(all_msec_ls)
all_dist = np.array(all_dist_ls)


In [None]:
def diff_pad(x):
    return np.diff(x, prepend=x[0])


In [None]:
# raw data first derivative

# all_dist_d = np.diff(all_dist, prepend=all_dist[0])
all_dist_d = diff_pad(all_dist)
plt.plot(all_msec, all_dist)
plt.plot(all_msec, all_dist_d)
plt.grid()


In [None]:
# create a moving average filter
# left_triangle = np.arange(1, 5+1) 
# left_triangle = left_triangle / left_triangle.sum()
# plt.plot(left_triangle)

def create_left_triangle_filter(
    window_size: int,
) -> np.ndarray:
    """Create a moving average filter."""
    triangle = np.arange(1, window_size+1) 
    triangle = triangle / triangle.sum()
    return triangle

left_triangle = create_left_triangle_filter(5)


In [None]:
# apply the filter
def convolve_pad(x, kernel):
    return np.convolve(x, kernel, mode="same")

In [None]:
# all_dist_pad = np.pad(all_dist, (4, 0), mode="edge")
# all_dist_pad = np.pad(all_dist, (len(left_triangle)-1, 0), mode="edge")
# all_dist_smooth = np.convolve(all_dist_pad, left_triangle, mode="valid")

# smooth the pinch data
all_dist_smooth = convolve_pad(all_dist, left_triangle)
print(all_dist.shape)
# print(all_dist_pad.shape)
print(all_dist_smooth.shape)

# compute and smooth the first derivative
all_dist_smooth_d = diff_pad(all_dist_smooth)
all_dist_smooth_d_smooth = convolve_pad(all_dist_smooth_d, left_triangle)

# plotz
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(all_msec, all_dist_smooth)
axt = ax.twinx()
axt.plot(all_msec, all_dist_smooth_d_smooth, color="C1")
axt.grid()


In [None]:
# cool but we work in an online fashion

# create a moving average filter
filter_size = 5
left_triangle = create_left_triangle_filter(filter_size)
# save previous values of the raw data
hist_dist = np.zeros(filter_size, dtype=float)
# save previous values of the moving average
hist_dist_smooth = np.zeros(filter_size, dtype=float)

# # save previous values of the first derivative of the moving average
# hist_dist_smooth_d = np.zeros(filter_size, dtype=float)

all_dist_sds_ls = []

i = 0
for dist in all_dist:
    # update the history
    hist_dist = np.roll(hist_dist, -1)
    hist_dist[-1] = dist
    # print(hist_dist)

    # compute the moving average
    dist_smooth = np.dot(hist_dist, left_triangle)

    # update the history for the smooth data
    hist_dist_smooth = np.roll(hist_dist_smooth, -1)
    hist_dist_smooth[-1] = dist_smooth

    # compute the derivative of the smooth data
    dist_smooth_d = diff_pad(hist_dist_smooth)

    # compute the moving average of the first derivative
    dist_smooth_d_smooth = np.dot(dist_smooth_d, left_triangle)
    all_dist_sds_ls.append(dist_smooth_d_smooth)

    # print()
    # i += 1
    if i > 3:
        break


all_dist_sds = np.array(all_dist_sds_ls)

plt.plot(all_msec, all_dist_sds)
plt.grid()


In [None]:
plt.plot(all_msec, all_dist_sds)
plt.plot(all_msec, all_dist_smooth_d_smooth)
plt.grid()