In [1]:
import cv2
import os
import mediapipe as mp
import json
import pandas as pd
from datetime import datetime
import keyboard
import numpy as np

In [2]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_face_mesh = mp.solutions.face_mesh

In [3]:
# Create directories if they don't exist
output_dir = 'sign_language_dataset'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)


In [4]:
def extract_landmarks_mediapipe(frame):
    with mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5) as hands:
        with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
            with mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh:
                # Convert BGR to RGB
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
                # Process hand landmarks
                hands_results = hands.process(frame_rgb)
                left_hand_landmarks, right_hand_landmarks = [],[]
                if hands_results.multi_hand_landmarks:
                    for hand_landmarks, handedness in zip(hands_results.multi_hand_landmarks, hands_results.multi_handedness):
                        if handedness.classification[0].label == 'Left':
                            left_hand_landmarks = hand_landmarks
                        elif handedness.classification[0].label == 'Right':
                            right_hand_landmarks = hand_landmarks

                # Process pose landmarks
                pose_results = pose.process(frame_rgb)
                pose_landmarks = pose_results.pose_landmarks


                # Process face landmarks
                face_results = face_mesh.process(frame_rgb)
                face_landmarks = face_results.multi_face_landmarks

                

    return left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks

In [5]:
max_face_index = 467
max_left_hand_index = 20
max_right_hand_index = 20
max_pose_index = 32

face_columns = [f"face_{i}" for i in range(max_face_index + 1)]
left_hand_columns = [f"left_hand_{i}" for i in range(max_left_hand_index + 1)]
right_hand_columns = [f"right_hand_{i}" for i in range(max_right_hand_index + 1)]
pose_columns = [f"pose_{i}" for i in range(max_pose_index + 1)]

header =      [f"{col}_{coord}" for col in face_columns for coord in ['x', 'y']] + \
              [f"{col}_{coord}" for col in left_hand_columns for coord in ['x', 'y']] + \
              [f"{col}_{coord}" for col in right_hand_columns for coord in ['x', 'y']] + \
              [f"{col}_{coord}" for col in pose_columns for coord in ['x', 'y']] 

In [6]:


def landmarks_to_df(left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks, header):
    # Initialize dictionaries to store landmark data
    landmarks_data = {}

    # Process face landmarks
    if face_landmarks:
        for i, landmark_list in enumerate(face_landmarks):
            for j, lm in enumerate(landmark_list.landmark):
                landmarks_data[f"face_{j}_x"] = lm.x
                landmarks_data[f"face_{j}_y"] = lm.y
            # Fill missing face landmarks with zeros
            for j in range(len(landmark_list.landmark), max_face_index + 1):
                landmarks_data[f"face_{j}_x"] = 0.0
                landmarks_data[f"face_{j}_y"] = 0.0
    else:
        # Fill all face landmarks with zeros if face_landmarks is None
        for j in range(max_face_index + 1):
            landmarks_data[f"face_{j}_x"] = 0.0
            landmarks_data[f"face_{j}_y"] = 0.0

    # Process left hand landmarks
    if left_hand_landmarks:
        for i, lm in enumerate(left_hand_landmarks.landmark):
            landmarks_data[f"left_hand_{i}_x"] = lm.x
            landmarks_data[f"left_hand_{i}_y"] = lm.y
        # Fill missing left hand landmarks with zeros
        for i in range(len(left_hand_landmarks.landmark), max_left_hand_index + 1):
            landmarks_data[f"left_hand_{i}_x"] = 0.0
            landmarks_data[f"left_hand_{i}_y"] = 0.0
    else:
        # Fill all left hand landmarks with zeros if left_hand_landmarks is None
        for i in range(max_left_hand_index + 1):
            landmarks_data[f"left_hand_{i}_x"] = 0.0
            landmarks_data[f"left_hand_{i}_y"] = 0.0

    # Process right hand landmarks
    if right_hand_landmarks:
        for i, lm in enumerate(right_hand_landmarks.landmark):
            landmarks_data[f"right_hand_{i}_x"] = lm.x
            landmarks_data[f"right_hand_{i}_y"] = lm.y
        # Fill missing right hand landmarks with zeros
        for i in range(len(right_hand_landmarks.landmark), max_right_hand_index + 1):
            landmarks_data[f"right_hand_{i}_x"] = 0.0
            landmarks_data[f"right_hand_{i}_y"] = 0.0
    else:
        # Fill all right hand landmarks with zeros if right_hand_landmarks is None
        for i in range(max_right_hand_index + 1):
            landmarks_data[f"right_hand_{i}_x"] = 0.0
            landmarks_data[f"right_hand_{i}_y"] = 0.0

    # Process pose landmarks
    if pose_landmarks:
        for i, lm in enumerate(pose_landmarks.landmark):
            landmarks_data[f"pose_{i}_x"] = lm.x
            landmarks_data[f"pose_{i}_y"] = lm.y
        # Fill missing pose landmarks with zeros
        for i in range(len(pose_landmarks.landmark), max_pose_index + 1):
            landmarks_data[f"pose_{i}_x"] = 0.0
            landmarks_data[f"pose_{i}_y"] = 0.0
    else:
        # Fill all pose landmarks with zeros if pose_landmarks is None
        for i in range(max_pose_index + 1):
            landmarks_data[f"pose_{i}_x"] = 0.0
            landmarks_data[f"pose_{i}_y"] = 0.0

    # Create DataFrame from extracted landmark data
    # df = pd.DataFrame([landmarks_data], columns=header)


    return landmarks_data


In [7]:

# Initialize webcam capture
cap = cv2.VideoCapture(0)

# Initialize variables
output_dir = "Dataset_CSVs"
csv_filename = "ASL_letters_data.csv"



df = pd.DataFrame(columns=header)
frame_count = 0
data=[]

os.makedirs(output_dir, exist_ok=True)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to capture frame from webcam.")
        break
    
    frame = cv2.flip(frame, 1)
    
    # Display instructions
    cv2.putText(frame, "Press 'r' to capture frame, 'q' to quit", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
    
    # Display the frame
    cv2.imshow('Frame', frame)
    
    # Capture frame on 'r' press
    if keyboard.is_pressed('r'):
        label = input("Enter gesture label (A-Z): ")
        left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks = extract_landmarks_mediapipe(frame)
        
        # Process landmarks if any are detected
        if left_hand_landmarks or right_hand_landmarks or pose_landmarks or face_landmarks:
            # Preprocess landmarks
            landmarks_data = landmarks_to_df(left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks, header)
            
            # Add frame number and label
            
            landmarks_data['label'] = label
            
            # Append landmarks data to DataFrame
            data.append(landmarks_data)
            
            # Increment frame count
            frame_count += 1
            print(f"Captured and processed frame {frame_count}")
    
    # Press 'q' to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

if data:
    df = pd.concat([df, pd.DataFrame(data)], ignore_index=True)

# Save dataframe to CSV
df.to_csv(os.path.join(output_dir, csv_filename), index=False)


Captured and processed frame 1
Captured and processed frame 2
Captured and processed frame 3
Captured and processed frame 4
Captured and processed frame 5
Captured and processed frame 6
Captured and processed frame 7
Captured and processed frame 8
Captured and processed frame 9
Captured and processed frame 10
Captured and processed frame 11
Captured and processed frame 12
Captured and processed frame 13
Captured and processed frame 14
Captured and processed frame 15
Captured and processed frame 16
Captured and processed frame 17
Captured and processed frame 18
Captured and processed frame 19
Captured and processed frame 20
Captured and processed frame 21
Captured and processed frame 22
Captured and processed frame 23
Captured and processed frame 24
Captured and processed frame 25
Captured and processed frame 26
Captured and processed frame 27
Captured and processed frame 28
Captured and processed frame 29
Captured and processed frame 30
Captured and processed frame 31
Captured and proc

  df = pd.concat([df, pd.DataFrame(data)], ignore_index=True)


In [8]:
df

Unnamed: 0,face_0_x,face_0_y,face_1_x,face_1_y,face_2_x,face_2_y,face_3_x,face_3_y,face_4_x,face_4_y,...,pose_28_y,pose_29_x,pose_29_y,pose_30_x,pose_30_y,pose_31_x,pose_31_y,pose_32_x,pose_32_y,label
0,0.480987,0.600960,0.486844,0.549152,0.483475,0.566798,0.479284,0.495447,0.488247,0.532376,...,3.211675,0.699006,3.337574,0.386315,3.310355,0.652768,3.440469,0.438355,3.428981,A
1,0.515435,0.612552,0.521321,0.560277,0.517003,0.578228,0.511478,0.506666,0.522563,0.543482,...,3.176484,0.687595,3.314011,0.347050,3.274199,0.615969,3.411350,0.400396,3.408218,A
2,0.499053,0.592653,0.503803,0.538680,0.500637,0.558055,0.494142,0.487191,0.504793,0.522018,...,3.123261,0.685968,3.249211,0.402161,3.224098,0.647203,3.359931,0.446049,3.337926,A
3,0.437985,0.589782,0.439237,0.538740,0.438284,0.556461,0.429700,0.488853,0.439693,0.522769,...,3.069936,0.615897,3.201243,0.283825,3.157427,0.545426,3.312458,0.328650,3.307702,A
4,0.420540,0.584587,0.419919,0.535113,0.419445,0.551029,0.408256,0.484223,0.419772,0.519081,...,3.183950,0.663719,3.308161,0.319413,3.274222,0.582834,3.420210,0.364315,3.421397,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,0.520614,0.352501,0.513362,0.303441,0.517297,0.319465,0.502038,0.254613,0.511856,0.287691,...,2.688287,0.740219,2.780487,0.428859,2.766555,0.681142,2.880365,0.485734,2.899997,Z
171,0.541340,0.346211,0.541528,0.295426,0.541615,0.311955,0.533266,0.245822,0.541795,0.279791,...,2.770550,0.731708,2.850057,0.419065,2.855127,0.676299,2.961555,0.473893,2.987009,Z
172,0.554323,0.346718,0.557969,0.292852,0.555463,0.311045,0.549424,0.242032,0.558957,0.276686,...,2.755517,0.751304,2.836892,0.456692,2.837764,0.707007,2.940717,0.496167,2.962595,Z
173,0.560705,0.348462,0.565981,0.296934,0.562740,0.313857,0.557426,0.245758,0.567199,0.281118,...,2.794259,0.716861,2.894729,0.419733,2.877630,0.668587,3.002590,0.458183,3.011346,Z


In [9]:
df.to_csv(os.path.join(output_dir, csv_filename), index=False)