In [3]:
import cv2
import os
import mediapipe as mp
import numpy as np
import time

mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
hands = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.7)
pose = mp_pose.Pose(min_detection_confidence=0.7)
## here modify to capture only those signs and be sure to mention the words you did sign for and also download those videos so i can learn what it is
SIGNS = [
    'One'
]
## 50 frams for one data, and for each data i need 30 times of it
DATASET_PATH = 'dataset'
FRAMES_PER_VIDEO = 30
NUM_VIDEOS = 30

def extract_landmarks(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    hand_results = hands.process(image_rgb)
    pose_results = pose.process(image_rgb)
    
    landmarks = []
    if hand_results.multi_hand_landmarks:
        for hand_landmarks in hand_results.multi_hand_landmarks[:2]:
            for lm in hand_landmarks.landmark:
                landmarks.extend([lm.x, lm.y, lm.z])
        landmarks.extend([0] * (21 * 3 * (2 - len(hand_results.multi_hand_landmarks))))
    else:
        landmarks.extend([0] * (21 * 3 * 2))
    
    if pose_results.pose_landmarks:
        for i, lm in enumerate(pose_results.pose_landmarks.landmark):
            if i < 33:
                landmarks.extend([lm.x, lm.y, lm.z])
    else:
        landmarks.extend([0] * (33 * 3))
    
    landmarks = np.array(landmarks)
    expected_landmarks = 225  # 21*3*2 + 33*3
    if len(landmarks) != expected_landmarks:
        if len(landmarks) < expected_landmarks:
            landmarks = np.pad(landmarks, (0, expected_landmarks - len(landmarks)), mode='constant')
        else:
            landmarks = landmarks[:expected_landmarks]
    
    return landmarks

def collect_data(signs_to_collect=SIGNS):
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open webcam")
        return
    
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
    
    for sign in signs_to_collect:
        sign_path = os.path.join(DATASET_PATH, sign)
        os.makedirs(sign_path, exist_ok=True)
        
        print(f"Collecting data for sign: {sign}")
        for video_idx in range(NUM_VIDEOS):
            video_path = os.path.join(sign_path, f"video_{video_idx}")
            os.makedirs(video_path, exist_ok=True)
            
            print(f"  Video {video_idx + 1}/{NUM_VIDEOS}. Press 's' to start, 'q' to quit.")
            while True:
                ret, frame = cap.read()
                if not ret:
                    print("Error: Failed to capture frame")
                    break
                cv2.putText(frame, f"Sign: {sign}, Video: {video_idx + 1}, Press 's'", 
                           (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.imshow('Data Collection', frame)
                
                key = cv2.waitKey(1) & 0xFF
                if key == ord('s'):
                    break
                elif key == ord('q'):
                    cap.release()
                    cv2.destroyAllWindows()
                    hands.close()
                    pose.close()
                    return
            
            print("  Recording...")
            for frame_idx in range(FRAMES_PER_VIDEO):
                ret, frame = cap.read()
                if not ret:
                    print("Error: Failed to capture frame")
                    break
                
                landmarks = extract_landmarks(frame)
                frame_path = os.path.join(video_path, f"frame_{frame_idx}.jpg")
                cv2.imwrite(frame_path, frame)
                
                cv2.putText(frame, f"Sign: {sign}, Frame: {frame_idx + 1}/{FRAMES_PER_VIDEO}", 
                           (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.imshow('Data Collection', frame)
                cv2.waitKey(33)  # ~30 fps
                
            print("  Done recording video.")
            time.sleep(1)
    
    cap.release()
    cv2.destroyAllWindows()
    hands.close()
    pose.close()
    print("Data collection complete.")

if __name__ == "__main__":
    collect_data()

Collecting data for sign: One
  Video 1/30. Press 's' to start, 'q' to quit.
  Recording...
  Done recording video.
  Video 2/30. Press 's' to start, 'q' to quit.
  Recording...
  Done recording video.
  Video 3/30. Press 's' to start, 'q' to quit.
  Recording...
  Done recording video.
  Video 4/30. Press 's' to start, 'q' to quit.
  Recording...
  Done recording video.
  Video 5/30. Press 's' to start, 'q' to quit.
  Recording...
  Done recording video.
  Video 6/30. Press 's' to start, 'q' to quit.
  Recording...
  Done recording video.
  Video 7/30. Press 's' to start, 'q' to quit.


In [None]:
## Backup of full
SIGNS = [
    'Hello', 'Great', 'Sunny', 'Brave', 'Kind', 'Happy', 'Strong', 'Wise', 'Gentle', 'Bright',
    'Calm', 'Quick', 'Warm', 'Clear', 'Bold', 'Sweet', 'Pure', 'Vivid', 'Steady', 'Lively',
    'Soft', 'True', 'Deep', 'Light', 'Fast', 'Cool', 'High', 'Smooth', 'Rich', 'Quiet',
    'Firm', 'New', 'Old', 'Young', 'Safe', 'Wild', 'Hot', 'Cold', 'Dry', 'Wet',
    'Early', 'Late', 'Near', 'Far', 'Full', 'Empty', 'Sharp', 'Dull', 'Loud', 'I'
]

In [1]:
import mediapipe as mp
print(mp.__version__)


0.10.20


In [2]:
import sys
print(sys.version)


3.12.8 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:48:34) [MSC v.1929 64 bit (AMD64)]
