In [4]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp 

In [23]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  
    image.flags.writeable = False                  
    results = model.process(image)                
    image.flags.writeable = True                 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
    return image, results

In [26]:
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)
with mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7
) as hands:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame, res = mediapipe_detection(frame, hands)
        if res.multi_hand_landmarks:
             for hand_landmarks in res.multi_hand_landmarks:
                mp_draw.draw_landmarks(
                    frame,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS)

        cv2.imshow('Feed', frame)
        if cv2.waitKey(15) & 0xFF == ord('z'):
            break

cap.release()
cv2.destroyAllWindows()

In [22]:
if res.multi_hand_landmarks:
    for idx, hand_landmarks in enumerate(res.multi_hand_landmarks):
        label = res.multi_handedness[idx].classification[0].label  # 'Left' or 'Right'
        print(f"\n{label} hand landmarks:")
        
        for i, lm in enumerate(hand_landmarks.landmark):
            print(f"Landmark {i}: x={lm.x:.3f}, y={lm.y:.3f}, z={lm.z:.3f}")


Left hand landmarks:
Landmark 0: x=0.252, y=0.888, z=0.000
Landmark 1: x=0.379, y=0.814, z=-0.049
Landmark 2: x=0.478, y=0.675, z=-0.064
Landmark 3: x=0.525, y=0.541, z=-0.075
Landmark 4: x=0.560, y=0.433, z=-0.085
Landmark 5: x=0.401, y=0.471, z=-0.023
Landmark 6: x=0.457, y=0.321, z=-0.057
Landmark 7: x=0.491, y=0.227, z=-0.089
Landmark 8: x=0.522, y=0.140, z=-0.112
Landmark 9: x=0.328, y=0.437, z=-0.026
Landmark 10: x=0.363, y=0.264, z=-0.059
Landmark 11: x=0.396, y=0.144, z=-0.091
Landmark 12: x=0.425, y=0.040, z=-0.115
Landmark 13: x=0.254, y=0.445, z=-0.038
Landmark 14: x=0.249, y=0.269, z=-0.078
Landmark 15: x=0.264, y=0.151, z=-0.113
Landmark 16: x=0.287, y=0.047, z=-0.136
Landmark 17: x=0.181, y=0.485, z=-0.055
Landmark 18: x=0.144, y=0.345, z=-0.098
Landmark 19: x=0.128, y=0.245, z=-0.126
Landmark 20: x=0.126, y=0.144, z=-0.144


In [15]:
def extract_keypoints(results):
    # Initialize empty arrays
    lh = np.zeros(21 * 3)
    rh = np.zeros(21 * 3)

    if results.multi_hand_landmarks and results.multi_handedness:
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            label = results.multi_handedness[idx].classification[0].label  # 'Left' or 'Right'
            hand_kp = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()
            
            if label == 'Left':
                lh = hand_kp
            elif label == 'Right':
                rh = hand_kp

    return np.concatenate([lh, rh])


In [None]:
DATH_PATH = os.path.join('ASL_Data')
actions = np.array([])

n_sequences = 20
s_length = 30

for action in actions: 
    dirmax = np.max(np.array(os.listdir(os.path.join(DATH_PATH, action))).astype(int))
    for sequence in range(1,n_sequences+1):
        try: 
            os.makedirs(os.path.join(DATH_PATH, action, str(dirmax+sequence)))
        except:
            pass

In [None]:
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)
with mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7
) as hands:

    for action in actions:
        for sequence in range(n_sequences):
            for frame_num in range(s_length):

                ret, frame = cap.read()

                if not ret:
                    break
                
                image, res = mediapipe_detection(frame, hands)
                if res.multi_hand_landmarks:
                    for hand_landmarks in res.multi_hand_landmarks:
                        mp_draw.draw_landmarks(
                            frame,
                            hand_landmarks,
                            mp_hands.HAND_CONNECTIONS)
                
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.imshow('Feed', image)
                    cv2.waitKey(500)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('Feed', image)
                
                # NEW Export keypoints
                keypoints = extract_keypoints(res)
                npy_path = os.path.join(DATH_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                    
    cap.release()
cv2.destroyAllWindows()