In [4]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp 

In [23]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  
    image.flags.writeable = False                  
    results = model.process(image)                
    image.flags.writeable = True                 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
    return image, results

In [15]:
def extract_keypoints(results):
    # Initialize empty arrays
    lh = np.zeros(21 * 3)
    rh = np.zeros(21 * 3)

    if results.multi_hand_landmarks and results.multi_handedness:
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            label = results.multi_handedness[idx].classification[0].label  # 'Left' or 'Right'
            hand_kp = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()
            
            if label == 'Left':
                lh = hand_kp
            elif label == 'Right':
                rh = hand_kp

    return np.concatenate([lh, rh])


In [38]:
DATH_PATH = os.path.join('ASL_Data')
# actions = np.array(['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'])
actions = np.array(['a','b','c'])
n_sequences = 20
s_length = 30

for action in actions: 
    action_path = os.path.join(DATH_PATH, action)
    os.makedirs(action_path, exist_ok=True)

    existing_dirs = os.listdir(action_path)
    if existing_dirs:
        dirmax = np.max(np.array(existing_dirs).astype(int))
    else:
        dirmax = -1  # if no existing folders, start at 0
    for sequence in range(n_sequences):  # 0 to 19
        try: 
            os.makedirs(os.path.join(action_path, str(dirmax + 1 + sequence)))
        except:
            pass

In [39]:
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)
with mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7
) as hands:

    for action in actions:
        for sequence in range(n_sequences):
            for frame_num in range(s_length):

                ret, frame = cap.read()

                if not ret:
                    break
                
                image, res = mediapipe_detection(frame, hands)
                if res.multi_hand_landmarks:
                    for hand_landmarks in res.multi_hand_landmarks:
                        mp_draw.draw_landmarks(
                            image,
                            hand_landmarks,
                            mp_hands.HAND_CONNECTIONS)
                
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.imshow('Feed', image)
                    cv2.waitKey(250)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.imshow('Feed', image)
                
                keypoints = extract_keypoints(res)
                npy_path = os.path.join(DATH_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                if cv2.waitKey(15) & 0xFF == ord('z'):
                    break
                    
    cap.release()
cv2.destroyAllWindows()