In [4]:
from __future__ import unicode_literals
import cv2
import numpy as np
import os
import mediapipe as mp
import time
from datetime import datetime

# Initialize MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

def mediapipe_detection_hands(image, model):
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_styled_landmarks(image, results):
    if results.multi_hand_landmarks and results.multi_handedness:
        for idx in range(len(results.multi_hand_landmarks)):
            classification = results.multi_handedness[idx].classification
            if classification[0].label == 'Right':
                mp_drawing.draw_landmarks(
                    image, results.multi_hand_landmarks[idx], mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                )
            else:
                mp_drawing.draw_landmarks(
                    image, results.multi_hand_landmarks[idx], mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                )

def extract_keypoints(results):
    lh = np.zeros(21*3)
    rh = np.zeros(21*3)
    if results.multi_hand_landmarks:
        for idx in range(len(results.multi_hand_landmarks)):
            classification = results.multi_handedness[idx].classification
            if classification[0].label == 'Right':
                rh = np.array([[res.x, res.y, res.z] for res in results.multi_hand_landmarks[idx].landmark]).flatten()
            else:
                lh = np.array([[res.x, res.y, res.z] for res in results.multi_hand_landmarks[idx].landmark]).flatten()
    return np.concatenate([lh, rh])

def validate_keypoints(keypoints, min_visible=0.3):
    lh_visible = np.mean(keypoints[:63] != 0)
    rh_visible = np.mean(keypoints[63:] != 0)
    return lh_visible > min_visible or rh_visible > min_visible

def main():
    data_path = input("Enter dataset folder path: ").strip()
    action = input("Enter sign/action name: ").strip()
    num_sequences = int(input("Enter number of sequences to record: "))
    num_frames = int(input("Enter number of frames per sequence: "))
    camera_index = int(input("Enter camera index (usually 0): "))

    os.makedirs(os.path.join(data_path, action), exist_ok=True)

    cap = cv2.VideoCapture(camera_index)
    if not cap.isOpened():
        print("Error: Could not open camera.")
        return

    with mp_hands.Hands(max_num_hands=2,
                        min_detection_confidence=0.8,
                        min_tracking_confidence=0.7) as hands:
        for seq in range(num_sequences):
            keypoints_list = []
            frame_count = 0
            collecting = False
            start_time = time.time()

            while frame_count < num_frames:
                ret, frame = cap.read()
                if not ret:
                    continue

                image, results = mediapipe_detection_hands(frame, hands)
                draw_styled_landmarks(image, results)

                if not collecting:
                    cv2.putText(image, f"Sign: {action}", (20, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(image, f"Sequence {seq+1}/{num_sequences}", (20, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
                    cv2.putText(image, "Press SPACE to start", (20, 120),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (100, 255, 255), 2)
                    cv2.imshow("Sign Recorder", image)
                    key = cv2.waitKey(10)
                    if key == ord(' '):
                        collecting = True
                        start_time = time.time()
                    elif key == ord('q'):
                        cap.release()
                        cv2.destroyAllWindows()
                        return
                    continue

                elapsed = time.time() - start_time
                fps = (frame_count + 1) / elapsed if elapsed > 0 else 1
                remaining_time = max(0, (num_frames - frame_count) / max(fps, 1))

                cv2.putText(image, f"Collecting {frame_count}/{num_frames}", (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                cv2.putText(image, f"Est. time left: {remaining_time:.1f}s", (10, 60),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

                if results.multi_hand_landmarks:
                    keypoints = extract_keypoints(results)
                    if validate_keypoints(keypoints):
                        keypoints_list.append(keypoints)
                        frame_count += 1
                        cv2.circle(image, (30, 120), 15, (0, 255, 0), -1)

                cv2.imshow("Sign Recorder", image)
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    cap.release()
                    cv2.destroyAllWindows()
                    return

            if keypoints_list:
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                file_path = os.path.join(data_path, action, f"{seq+1}_{timestamp}")
                np.save(file_path, keypoints_list)
                print(f"Saved sequence {seq+1} with {len(keypoints_list)} frames.")
            else:
                print(f"Warning: Sequence {seq+1} has no valid frames.")

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

Enter dataset folder path:  dataset
Enter sign/action name:  خد صمبوصه
Enter number of sequences to record:  ١٠
Enter number of frames per sequence:  ٣٠
Enter camera index (usually 0):  ٠


I0000 00:00:1749032742.657563 3080073 gl_context.cc:369] GL version: 2.1 (2.1 ATI-7.0.3), renderer: AMD Radeon Pro 555X OpenGL Engine
W0000 00:00:1749032742.688537 3109023 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1749032742.710741 3109023 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Saved sequence 1 with 30 frames.
Saved sequence 2 with 30 frames.
Saved sequence 3 with 30 frames.
Saved sequence 4 with 30 frames.
Saved sequence 5 with 30 frames.
Saved sequence 6 with 30 frames.
Saved sequence 7 with 30 frames.
Saved sequence 8 with 30 frames.
Saved sequence 9 with 30 frames.
Saved sequence 10 with 30 frames.
