Getting hand sign data

In [73]:
import mediapipe as mp
import cv2

# Initialize MediaPipe Hands.
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)

# Drawing helper (not required, but can be useful).
mp_drawing = mp.solutions.drawing_utils
# Start capturing video from the webcam.
cap = cv2.VideoCapture(0)

sign_count = 0  # Counter for the number of signs collected.
entry_count = 0  # Counter for the number of entries collected for each sign.

# Array to store hand landmarks.
hand_landmarks_array = []

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the image to RGB.
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # Process the image.
    results = hands.process(image_rgb)

    # Get hand landmarks for each detected hand.
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Display landmarks on the frame.
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Check if 'g' key is pressed to capture hand landmarks.
            key = cv2.waitKey(1) & 0xFF
            if key == ord('g'):
                # Collect hand landmarks relative to the wrist.
                hand_landmarks_data = []
                wrist_landmark = hand_landmarks.landmark[mp_hands.HandLandmark.WRIST]
                for landmark in hand_landmarks.landmark:
                    # Calculate relative coordinates.
                    rel_x = landmark.x - wrist_landmark.x
                    rel_y = landmark.y - wrist_landmark.y
                    rel_z = landmark.z - wrist_landmark.z
                    hand_landmarks_data.append((rel_x, rel_y, rel_z))
                # Save hand landmarks in the array.
                hand_landmarks_array.append(hand_landmarks_data)
                print("Landmark Collected")

    # Display the frame.
    cv2.imshow('Hand Sign Recognition', frame)

    # Break the loop if the user presses 'q'.
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Close MediaPipe Hands and release the video capture.
hands.close()
cap.release()
cv2.destroyAllWindows()

# Print the collected hand landmarks.
print(hand_landmarks_array)


[]


In [16]:
len(hand_landmarks_array)
#hand_landmarks_array

10

In [19]:
sign_love = hand_landmarks_array

In [17]:
sign_call_me = hand_landmarks_array

In [14]:
sign_bad = hand_landmarks_array

In [11]:
sign_nice = hand_landmarks_array

In [8]:
sign_hi = hand_landmarks_array

In [43]:
labels = []
for i in range(0,10):
    labels.append("Nice")

len(labels)

10

In [44]:
labels

['Nice',
 'Nice',
 'Nice',
 'Nice',
 'Nice',
 'Nice',
 'Nice',
 'Nice',
 'Nice',
 'Nice']

In [59]:
import numpy as np
sign_love_np = np.array(sign_love)
labels = np.array(labels)

In [53]:
sign_bad_np.shape,labels.shape

((10, 21, 3), (10,))

In [33]:
import numpy as np
import pandas as pd


# Assuming sign_hi_np is your array of hand landmarks with shape (10, 21, 3)
# and labels is a list of corresponding labels with length 10

# Reshape the sign_hi_np array to have 27 coordinates per row
sign_hi_reshaped = sign_hi_np.reshape((sign_hi_np.shape[0], -1))

# Create a DataFrame with the reshaped array and labels
df = pd.DataFrame(sign_hi_reshaped)

# Add a column for the labels
df['label'] = labels

# Save the DataFrame to a CSV file
df.to_csv('hand_landmarks.csv', index=False)


In [30]:
df

Unnamed: 0,coord_0,coord_1,coord_2,coord_3,coord_4,coord_5,coord_6,coord_7,coord_8,coord_9,...,coord_54,coord_55,coord_56,coord_57,coord_58,coord_59,coord_60,coord_61,coord_62,label
0,0.0,0.0,0.0,0.073137,-0.025343,-0.037451,0.134113,-0.084433,-0.058367,0.178948,...,0.003178,-0.302305,-0.076415,0.013736,-0.356407,-0.089474,0.023282,-0.410957,-0.098307,Hi
1,0.0,0.0,0.0,0.071008,-0.021649,-0.02847,0.130841,-0.075665,-0.042977,0.175764,...,0.008543,-0.285614,-0.063568,0.018333,-0.335083,-0.073951,0.029184,-0.384049,-0.080803,Hi
2,0.0,0.0,0.0,0.078078,-0.007072,-0.026003,0.15062,-0.05762,-0.039892,0.204652,...,0.055591,-0.294158,-0.075411,0.072369,-0.345609,-0.087633,0.0883,-0.396734,-0.095709,Hi
3,0.0,0.0,0.0,0.072026,-0.02237,-0.048197,0.133499,-0.077011,-0.072713,0.172613,...,-0.009395,-0.301949,-0.078616,0.000176,-0.35571,-0.094753,0.011252,-0.409657,-0.105822,Hi
4,0.0,0.0,0.0,0.068221,-0.01535,-0.042036,0.12723,-0.062907,-0.063739,0.165996,...,-0.003511,-0.277552,-0.075653,0.006619,-0.328111,-0.090485,0.018132,-0.378739,-0.100681,Hi
5,0.0,0.0,0.0,0.086603,-0.031191,-0.045316,0.155572,-0.111125,-0.062982,0.178836,...,-0.02236,-0.349011,-0.073719,-0.014304,-0.411549,-0.089009,-0.005182,-0.471926,-0.099103,Hi
6,0.0,0.0,0.0,0.074577,-0.032057,-0.034833,0.130883,-0.10788,-0.046358,0.14657,...,-0.02027,-0.299718,-0.055325,-0.013987,-0.353861,-0.068326,-0.007752,-0.407364,-0.076802,Hi
7,0.0,0.0,0.0,-0.064734,-0.007996,-0.02995,-0.119244,-0.055033,-0.047128,-0.149924,...,-0.016184,-0.265139,-0.062409,-0.029677,-0.309556,-0.069212,-0.045052,-0.349986,-0.074054,Hi
8,0.0,0.0,0.0,-0.063895,-0.008453,-0.027382,-0.117485,-0.058601,-0.041686,-0.147337,...,-0.015972,-0.26212,-0.053687,-0.028389,-0.304176,-0.059547,-0.042666,-0.343008,-0.06416,Hi
9,0.0,0.0,0.0,-0.062991,-0.004677,-0.026957,-0.116729,-0.047309,-0.0448,-0.153165,...,-0.027105,-0.231175,-0.074567,-0.04346,-0.27392,-0.081813,-0.060344,-0.313625,-0.086578,Hi


In [61]:

import pandas as pd
import numpy as np

# Assuming sign_nice_np is a NumPy array containing the hand landmarks
# Reshape the array so that each row contains all the coordinates for one sign
sign_call_me_reshaped = sign_love_np.reshape((sign_love_np.shape[0], -1))

# Create a DataFrame with the reshaped array and new labels
new_df = pd.DataFrame(sign_call_me_reshaped)

# Assuming 'hi' is the label for all signs
new_df['label'] = 'Love'
result_df = pd.concat([result_df, new_df], ignore_index=True)
result_df
# Append the new DataFrame to the existing DataFrame
#df = df.append(new_df)
result_df.to_csv('hand_landmarks.csv', index=False)

In [81]:
import mediapipe as mp
import cv2
from mediapipe.tasks.python import vision
from mediapipe.tasks import python

# Initialize MediaPipe Hands.
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)
base_options = python.BaseOptions(model_asset_path='D:/sign_language_recognition/gesture_recognizer.task')
options = vision.GestureRecognizerOptions(base_options=base_options)
recognizer = vision.GestureRecognizer.create_from_options(options)

# Drawing helper (not required, but can be useful).
mp_drawing = mp.solutions.drawing_utils
# Start capturing video from the webcam.
cap = cv2.VideoCapture(0)


while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the image to RGB.
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # Process the image.
    results = hands.process(image_rgb)

    # Get hand landmarks for each detected hand.
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Display landmarks on the frame.
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Display the frame.
    cv2.imshow('Hand Sign Recognition', frame)

    # Break the loop if the user presses 'q'.
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Close MediaPipe Hands and release the video capture.
hands.close()
cap.release()
cv2.destroyAllWindows()



RuntimeError: Unable to open file at d:\sign_language_recognition\myvenv\lib\site-packages/D:\sign_language_recognition\gesture_recognizer.task, errno=22

In [82]:
import mediapipe as mp
import cv2
import numpy as np

# Initialize MediaPipe Hands.
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# Load your gesture recognition model (replace this with your actual model loading code)

# Define your class names
classNames = [...]

# Start capturing video from the webcam.
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the image to RGB.
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # Get hand landmark prediction
    result = hands.process(frame_rgb)

    className = ''

    # Post-process the result
    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            landmarks = []
            for lm in hand_landmarks.landmark:
                lmx = int(lm.x * frame.shape[1])
                lmy = int(lm.y * frame.shape[0])
                landmarks.append([lmx, lmy])

            # Drawing landmarks on frames
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Predict gesture using your model
            prediction = model.predict([landmarks])
            print(prediction)
            classID = np.argmax(prediction)
            className = classNames[classID]

    # Show the prediction on the frame
    cv2.putText(frame, className, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)

    # Display the frame.
    cv2.imshow('Hand Gesture Recognition', frame)

    # Break the loop if the user presses 'q'.
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Close MediaPipe Hands and release the video capture.
hands.close()
cap.release()
cv2.destroyAllWindows()
  

AttributeError: 'str' object has no attribute 'predict'