In [1]:
import os
import pandas as pd
import cv2
import mediapipe as mp

from sklearn.ensemble import RandomForestClassifier

In [2]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [3]:
CLASSES = ['Y', 'Z']
BASE_DIR = os.getcwd()
IMAGE_DIR = 'data_both_hands'
NO_OF_ROWS_PER_CLASS = 2001
CHANGE_HANDS = 1000
flag_right_hand = True

In [4]:
# Check is IMAGE_STEPS is a multiple of NO_OF_IMAGES_PER_CLASS

# Check if image directory exists or not. If it doesn't then create directory.
if not os.path.exists(os.path.join(BASE_DIR, IMAGE_DIR)):
    os.mkdir(path = os.path.join(BASE_DIR, IMAGE_DIR))

# Start capturing images
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Unable to open camera")
    exit()

# Iterate through classes
for classes in CLASSES:
    counter = 1
    print(f"Collecting data points for {classes} class")

    # Waiting window
    while True:
        ret, frame = cap.read()
        frame = cv2.flip(frame, 1)
        cv2.putText(frame, 'Ready? Press "Q" ! :)', (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,
                    cv2.LINE_AA)
        cv2.imshow('Waiting window', frame)
        if cv2.waitKey(1) == ord('q'):
            cv2.destroyAllWindows()
            cv2.waitKey(1)
            break

    coords_list = list()
    # Detect hands using Mediapipe and store landmark coordinates
    with mp_hands.Hands(static_image_mode = False, max_num_hands = 1, min_detection_confidence = 0.2) as hands:
        while counter != NO_OF_ROWS_PER_CLASS:
            # First right hand
            if counter <= CHANGE_HANDS:
                success, image = cap.read()
                image = cv2.flip(image, 1)
                if not success:
                    print("Error reading frame from camera")
                    break
        
                results = hands.process(image)
                if results.multi_hand_landmarks:
                    coords = list()
                    for idx, value in enumerate(results.multi_hand_landmarks[0].landmark):
                        coords.append(value.x)
                        coords.append(value.y)
                    coords_list.append(coords)
                # Draw hand landmarks on image
                if results.multi_hand_landmarks:
                    for hand_landmarks in results.multi_hand_landmarks:
                        mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                                 mp_drawing.DrawingSpec(color=(0,255,0), thickness = 2, circle_radius = 4))
    
                cv2.putText(image, f'Collecting datapoints, counter {counter}', (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,
                        cv2.LINE_AA)
                cv2.imshow(f"Collecting for {classes} class, right hand", image)
                # Check for the 'q' key to exit
                if cv2.waitKey(1) == ord("q"):
                    cv2.destroyAllWindows()
                    cv2.waitKey(1)
                    break

            # Waiting window for changing hands
            if counter == CHANGE_HANDS + 1:
                while True:
                    ret, frame = cap.read()
                    frame = cv2.flip(frame, 1)
                    cv2.putText(frame, 'Waiting to switch hands, press \'q\' when ready.', (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,
                                cv2.LINE_AA)
                    cv2.imshow('Waiting window', frame)
                    if cv2.waitKey(1) == ord('q'):
                        cv2.destroyAllWindows()
                        cv2.waitKey(1)
                        break
                        
            # Left hand
            if counter > CHANGE_HANDS:
                success, image = cap.read()
                image = cv2.flip(image, 1)
                if not success:
                    print("Error reading frame from camera")
                    break
        
                results = hands.process(image)
                if results.multi_hand_landmarks:
                    coords = list()
                    for idx, value in enumerate(results.multi_hand_landmarks[0].landmark):
                        coords.append(value.x)
                        coords.append(value.y)
                    coords_list.append(coords)
                # Draw hand landmarks on image
                if results.multi_hand_landmarks:
                    for hand_landmarks in results.multi_hand_landmarks:
                        mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                                 mp_drawing.DrawingSpec(color=(0,255,0), thickness = 2, circle_radius = 4))
    
                cv2.putText(image, f'Collecting datapoints, counter {counter}', (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,
                        cv2.LINE_AA)
                cv2.imshow(f"Collecting for {classes} class, left hand", image)
                # Check for the 'q' key to exit
                if cv2.waitKey(1) == ord("q"):
                    cv2.destroyAllWindows()
                    cv2.waitKey(1)
                    break

            counter += 1

        cv2.destroyAllWindows()
        cv2.waitKey(1)

    temp = pd.DataFrame(coords_list)
    temp.to_csv(os.path.join(IMAGE_DIR, f"{classes}_both.csv"), index=False)       

cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)
    

Collecting data points for Y class


I0000 00:00:1715835550.288436       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Collecting data points for Z class


I0000 00:00:1715835713.223260       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro


-1