In [1]:
import cv2
import os
import mediapipe as mp
import json
import pandas as pd
from datetime import datetime
import keyboard
import numpy as np

In [2]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_face_mesh = mp.solutions.face_mesh

In [3]:
# Create directories if they don't exist
output_dir = 'sign_language_dataset'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)


In [4]:
def extract_landmarks_mediapipe(frame):
    with mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5) as hands:
        with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
            with mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh:
                # Convert BGR to RGB
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
                # Process hand landmarks
                hands_results = hands.process(frame_rgb)
                left_hand_landmarks, right_hand_landmarks = [],[]
                if hands_results.multi_hand_landmarks:
                    for hand_landmarks, handedness in zip(hands_results.multi_hand_landmarks, hands_results.multi_handedness):
                        if handedness.classification[0].label == 'Left':
                            left_hand_landmarks = hand_landmarks
                        elif handedness.classification[0].label == 'Right':
                            right_hand_landmarks = hand_landmarks

                # Process pose landmarks
                pose_results = pose.process(frame_rgb)
                pose_landmarks = pose_results.pose_landmarks


                # Process face landmarks
                face_results = face_mesh.process(frame_rgb)
                face_landmarks = face_results.multi_face_landmarks
                print(face_landmarks)

                

    return left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks

In [5]:
max_face_index = 467
max_left_hand_index = 20
max_right_hand_index = 20
max_pose_index = 32

face_columns = [f"face_{i}" for i in range(max_face_index + 1)]
left_hand_columns = [f"left_hand_{i}" for i in range(max_left_hand_index + 1)]
right_hand_columns = [f"right_hand_{i}" for i in range(max_right_hand_index + 1)]
pose_columns = [f"pose_{i}" for i in range(max_pose_index + 1)]

header =      [f"{col}_{coord}" for col in face_columns for coord in ['x', 'y']] + \
              [f"{col}_{coord}" for col in left_hand_columns for coord in ['x', 'y']] + \
              [f"{col}_{coord}" for col in right_hand_columns for coord in ['x', 'y']] + \
              [f"{col}_{coord}" for col in pose_columns for coord in ['x', 'y']] 

In [6]:


def landmarks_to_df(left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks, header):
    # Initialize dictionaries to store landmark data
    landmarks_data = {}

    # Process face landmarks
    if face_landmarks:
        for i, landmark_list in enumerate(face_landmarks):
            for j, lm in enumerate(landmark_list.landmark):
                landmarks_data[f"face_{j}_x"] = lm.x
                landmarks_data[f"face_{j}_y"] = lm.y
            # Fill missing face landmarks with zeros
            for j in range(len(landmark_list.landmark), max_face_index + 1):
                landmarks_data[f"face_{j}_x"] = 0.0
                landmarks_data[f"face_{j}_y"] = 0.0
    else:
        # Fill all face landmarks with zeros if face_landmarks is None
        for j in range(max_face_index + 1):
            landmarks_data[f"face_{j}_x"] = 0.0
            landmarks_data[f"face_{j}_y"] = 0.0

    # Process left hand landmarks
    if left_hand_landmarks:
        for i, lm in enumerate(left_hand_landmarks.landmark):
            landmarks_data[f"left_hand_{i}_x"] = lm.x
            landmarks_data[f"left_hand_{i}_y"] = lm.y
        # Fill missing left hand landmarks with zeros
        for i in range(len(left_hand_landmarks.landmark), max_left_hand_index + 1):
            landmarks_data[f"left_hand_{i}_x"] = 0.0
            landmarks_data[f"left_hand_{i}_y"] = 0.0
    else:
        # Fill all left hand landmarks with zeros if left_hand_landmarks is None
        for i in range(max_left_hand_index + 1):
            landmarks_data[f"left_hand_{i}_x"] = 0.0
            landmarks_data[f"left_hand_{i}_y"] = 0.0

    # Process right hand landmarks
    if right_hand_landmarks:
        for i, lm in enumerate(right_hand_landmarks.landmark):
            landmarks_data[f"right_hand_{i}_x"] = lm.x
            landmarks_data[f"right_hand_{i}_y"] = lm.y
        # Fill missing right hand landmarks with zeros
        for i in range(len(right_hand_landmarks.landmark), max_right_hand_index + 1):
            landmarks_data[f"right_hand_{i}_x"] = 0.0
            landmarks_data[f"right_hand_{i}_y"] = 0.0
    else:
        # Fill all right hand landmarks with zeros if right_hand_landmarks is None
        for i in range(max_right_hand_index + 1):
            landmarks_data[f"right_hand_{i}_x"] = 0.0
            landmarks_data[f"right_hand_{i}_y"] = 0.0

    # Process pose landmarks
    if pose_landmarks:
        for i, lm in enumerate(pose_landmarks.landmark):
            landmarks_data[f"pose_{i}_x"] = lm.x
            landmarks_data[f"pose_{i}_y"] = lm.y
        # Fill missing pose landmarks with zeros
        for i in range(len(pose_landmarks.landmark), max_pose_index + 1):
            landmarks_data[f"pose_{i}_x"] = 0.0
            landmarks_data[f"pose_{i}_y"] = 0.0
    else:
        # Fill all pose landmarks with zeros if pose_landmarks is None
        for i in range(max_pose_index + 1):
            landmarks_data[f"pose_{i}_x"] = 0.0
            landmarks_data[f"pose_{i}_y"] = 0.0

    # Create DataFrame from extracted landmark data
    # df = pd.DataFrame([landmarks_data], columns=header)


    return landmarks_data


In [7]:
# Initialize webcam capture
cap = cv2.VideoCapture(0)

# Initialize variables
output_dir = "gestures_data"
csv_filename = "keypoints_data.csv"

df = pd.DataFrame(columns=header)
frame_count = 0
data=[]

os.makedirs(output_dir, exist_ok=True)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to capture frame from webcam.")
        break
    
    frame = cv2.flip(frame, 1)

    # Print input resolution
    # print(f"Input resolution: {frame.shape[1]}x{frame.shape[0]}")
    
    # Display instructions
    cv2.putText(frame, "Press 'r' to capture frame, 'q' to quit", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
    
    # Display the frame
    cv2.imshow('Frame', frame)
    
    # Capture frame on 'r' press
    if keyboard.is_pressed('r'):
        label = input("Enter gesture label (A-Z): ")
        left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks = extract_landmarks_mediapipe(frame)
        
        # Process landmarks if any are detected
        if left_hand_landmarks or right_hand_landmarks or pose_landmarks or face_landmarks:
            # Preprocess landmarks
            landmarks_data = landmarks_to_df(left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks, header)
            
            # Add frame number and label
            landmarks_data['label'] = label
            
            # Append landmarks data to DataFrame
            data.append(landmarks_data)
            
            # Increment frame count
            frame_count += 1
            print(f"Captured and processed frame {frame_count}")
    
    # Press 'q' to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

if data:
    df = pd.concat([df, pd.DataFrame(data)], ignore_index=True)

# Save dataframe to CSV
df.to_csv(os.path.join(output_dir, csv_filename), index=False)


[landmark {
  x: 0.47671914
  y: 0.622017
  z: -0.030240754
}
landmark {
  x: 0.47612837
  y: 0.57641876
  z: -0.05445109
}
landmark {
  x: 0.47640356
  y: 0.5911953
  z: -0.028956477
}
landmark {
  x: 0.46699846
  y: 0.5345484
  z: -0.039912257
}
landmark {
  x: 0.47589183
  y: 0.56286204
  z: -0.057653084
}
landmark {
  x: 0.47567734
  y: 0.5458085
  z: -0.053338546
}
landmark {
  x: 0.47536957
  y: 0.506366
  z: -0.025539372
}
landmark {
  x: 0.4045838
  y: 0.50735706
  z: 0.015810303
}
landmark {
  x: 0.47516495
  y: 0.48033333
  z: -0.01898959
}
landmark {
  x: 0.47505683
  y: 0.46373242
  z: -0.021022346
}
landmark {
  x: 0.47466037
  y: 0.39875156
  z: -0.008126928
}
landmark {
  x: 0.47679067
  y: 0.6287679
  z: -0.029034019
}
landmark {
  x: 0.4768988
  y: 0.63385624
  z: -0.025666116
}
landmark {
  x: 0.4769829
  y: 0.63599956
  z: -0.021143008
}
landmark {
  x: 0.47742814
  y: 0.6406619
  z: -0.019819954
}
landmark {
  x: 0.4775252
  y: 0.646043
  z: -0.021554062
}
landmark 

In [None]:
df.to_csv(os.path.join(output_dir, csv_filename), index=False)