In [29]:
import cv2 as cv
import matplotlib.pyplot as plt
import os
import time
import mediapipe as mp
import numpy as np

In [30]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
mp_face_mesh = mp.solutions.face_mesh 

In [31]:
def mediapipe_detection(image, model):
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
    return image, results

In [32]:
def draw_landmarks(image, result):
    if result.face_landmarks:
        mp_drawing.draw_landmarks(image, result.face_landmarks, mp_face_mesh.FACEMESH_TESSELATION)
    if result.pose_landmarks:
        mp_drawing.draw_landmarks(image, result.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    if result.left_hand_landmarks:
        mp_drawing.draw_landmarks(image, result.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    if result.right_hand_landmarks:
        mp_drawing.draw_landmarks(image, result.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)


In [33]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 


In [34]:
cap = cv.VideoCapture('sign.mp4')
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        ret, frame = cap.read()
        if not ret:
            print("Video ended or cannot read the frame.")
            break

        image, results = mediapipe_detection(frame, holistic)
            

        draw_styled_landmarks(image, results)

        cv.imshow('OpenCV feed', image)
        if cv.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv.destroyAllWindows()

I0000 00:00:1753077907.296779   21041 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1753077907.341256   24343 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 570.172.08), renderer: NVIDIA GeForce RTX 4070 SUPER/PCIe/SSE2
W0000 00:00:1753077907.371718   24315 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1753077907.389427   24319 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1753077907.390321   24315 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1753077907.390320   24318 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000

Video ended or cannot read the frame.


In [35]:
results.right_hand_landmarks

In [36]:
frame

## Extracting Keypoint Values

In [38]:
results.pose_landmarks.landmark[0]

x: 0.208067343
y: 0.308392137
z: -1.54519868
visibility: 0.999556541

In [39]:
results.pose_landmarks.landmark[0].x

0.20806734263896942

In [40]:
results.pose_landmarks.landmark[0].y

0.30839213728904724

In [41]:
results.pose_landmarks.landmark[0].z

-1.545198678970337

In [42]:
pose = []

for res in results.pose_landmarks.landmark:
   test = np.array([res.x, res.y, res.z, res.visibility])
   pose.append(test)

In [43]:
pose

[array([ 0.20806734,  0.30839214, -1.54519868,  0.99955654]),
 array([ 0.25050735,  0.26525313, -1.48343861,  0.99952579]),
 array([ 0.28829548,  0.26402819, -1.48437369,  0.99950022]),
 array([ 0.32623079,  0.26317799, -1.48611891,  0.99966347]),
 array([ 0.16714066,  0.27334788, -1.39754558,  0.99929464]),
 array([ 0.14025199,  0.27824914, -1.39638591,  0.99898279]),
 array([ 0.11178094,  0.28311449, -1.39694214,  0.99892175]),
 array([ 0.41755825,  0.29371953, -0.98805618,  0.99972099]),
 array([ 0.11090349,  0.31942326, -0.54129785,  0.99861974]),
 array([ 0.28736043,  0.36194232, -1.36281073,  0.99981803]),
 array([ 0.18105596,  0.36720055, -1.23716307,  0.99953759]),
 array([ 0.74084538,  0.57214075, -0.94858307,  0.99902934]),
 array([0.05834119, 0.60413313, 0.00761558, 0.99067634]),
 array([ 1.05765486,  0.82694376, -1.20036244,  0.77121145]),
 array([0.01450908, 0.90410393, 0.12235822, 0.15506069]),
 array([ 1.33288968,  1.07438779, -1.59983516,  0.25128546]),
 array([-0.03859

In [44]:
pose = np.array(pose)

In [45]:
pose.shape

(33, 4)

In [46]:
pose.flatten().shape

(132,)

In [47]:
pose = pose.flatten()

In [48]:
pose.shape

(132,)

In [49]:
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [50]:
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)

In [51]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [53]:
extract_keypoints(results).shape

(1662,)