In [1]:
import os
import sys
import string

import cv2
import numpy as np
from cvzone.HandTrackingModule import HandDetector

sys.path.append(os.path.abspath(".."))
from utils import euclidean_distance, calculate_angle, is_above

In [2]:
dataset_path = r'..\\..\\dataset_landmarks'
np_path = r'..\\..\\np_features'
numeric_data_path = os.path.join(np_path, 'numeric_data.npy')
TRAIN = 'train'
TEST = 'test'

train_folder = os.path.join(dataset_path, TRAIN)
test_folder = os.path.join(dataset_path, TEST)

### Creating directories to collection train and test data

In [None]:
# Create the directory Structure

# For images
if not os.path.exists(dataset_path):
    os.makedirs(dataset_path)

if not os.path.exists(train_folder):
    os.makedirs(train_folder)

if not os.path.exists(test_folder):
    os.makedirs(test_folder)

# For numeric features
if not os.path.exists(np_path):
    os.makedirs(np_path)

# Create special folders
space_image_folder = 'space'

special_gestures = [space_image_folder]

for gesture in special_gestures:
    # train
    if not os.path.exists(train_folder + '\\' + gesture):
        os.makedirs(train_folder + '\\' + gesture)
    

    # test
    if not os.path.exists(test_folder + '\\' + gesture):
        os.makedirs(test_folder + '\\' + gesture)


# Create folders for each word A-Z

for i in string.ascii_uppercase:
    train_folder_i = os.path.join(train_folder, i)
    if not os.path.exists(train_folder_i):
        os.makedirs(train_folder_i)
    
    test_folder_i = os.path.join(test_folder, i)
    if not os.path.exists(test_folder_i):
        os.makedirs(test_folder_i)

if not os.path.exists(numeric_data_path):
    numeric_data = np.empty((0, 16))
    np.save(numeric_data_path, numeric_data)
else:
    numeric_data = np.load(numeric_data_path)

# Data collection

In the block below we will collect data using opencv and process the images before saving them

## Using landmarks

This method will help us to predict with higher accuracy in different situations (light, background, or even hand-size)

For this first we collect data from hand gestures using two hand detectors
1. `hand_detector` (First Detector):
   - Detects the hand in the full frame.
   - Provides the bounding box (`bbox`) of the detected hand.
   - Used to extract and crop the hand from the original frame.
2. `cropped_hand_detector (Second Detector):
   - Runs on the cropped hand region extracted using the first detector.
   - Re-detects the hand within the smaller cropped area.
   - Provides more precise landmark positions within the cropped image.
   - Used to draw landmarks and connections on a static 300x300 frame.

### Advantages of this method:
1. Better Accuracy in Landmark Detection
2. Ensures Hand is Properly Centered in Static Image
3. Avoids Scaling Issues

## Landmarks features

Defined 15 different features for landmarks

- Distance between the wrist and fingertips:
    - `wist_thumb`: Distance between wrist and thumb tip.
    - `wist_index`: Distance between wrist and index finger tip.
    - `wist_middle`: Distance between wrist and middle finger tip.
    - `wist_ping`: Distance between wrist and ring finger tip.
    - `wist_pinky`: Distance between wrist and pinky finger tip.

- Distance between specific fingertips:
    - `thumb_index`: Distance between thumb tip and index finger tip.
    - `thumb_pinky`: Distance between thumb tip and pinky finger tip.
    - `thumb_middle`: Distance between thumb tip and middle finger tip.
    - `index_middle`: Distance between index finger tip and middle finger tip.

- Other numerical features:
    - `index_middle_dip`: Distance between the DIP joints of index and middle fingers.
    - `index_middle_z`: Z-axis distance between index and middle finger tips.
    - `thumb_ping_angle`: Angle formed by thumb tip, wrist, and ring finger tip.
    - `thumb_index_angle`: Angle formed by thumb tip, wrist, and index finger tip.
    - `index_middle_angle`: Angle formed by index finger tip, wrist, and middle finger tip.
    - `thumb_index_above`: Binary feature indicating if the index finger tip is above the thumb tip.

In [None]:
mode = TRAIN
directory = os.path.join(dataset_path, mode) + '/'
min_value = 70

PAD = 20

capture = cv2.VideoCapture(0)
interrupt = -1

hand_detector = HandDetector(detectionCon=0.8, maxHands=1)
cropped_hand_detector = HandDetector(detectionCon=0.8, maxHands=1)

# Define hand landmark connections based on MediaPipe's hand model
HAND_CONNECTIONS = [
    (0, 1), (1, 2), (2, 3), (3, 4),  # Thumb
    (0, 5), (5, 6), (6, 7), (7, 8),  # Index Finger
    (0, 9), (9, 10), (10, 11), (11, 12),  # Middle Finger
    (0, 13), (13, 14), (14, 15), (15, 16),  # Ring Finger
    (0, 17), (17, 18), (18, 19), (19, 20),  # Pinky Finger
    (5, 9), (9, 13), (13, 17)  # Palm connections
]

# Close the video capture by pressing '`'
# Change the mode between Train and Test with '/'
while True:
    _, frame = capture.read()

    # Simulating mirror Image

    frame = cv2.flip(frame, 1)
    frame = cv2.resize(frame, (500, 500))

    hands, _ = hand_detector.findHands(frame, draw=False)

    # Get count of existing images
    count = {
                'space': len(os.listdir(directory+"/space")),

                'a': len(os.listdir(directory+"/A")),
                'b': len(os.listdir(directory+"/B")),
                'c': len(os.listdir(directory+"/C")),
                'd': len(os.listdir(directory+"/D")),
                'e': len(os.listdir(directory+"/E")),
                'f': len(os.listdir(directory+"/F")),
                'g': len(os.listdir(directory+"/G")),
                'h': len(os.listdir(directory+"/H")),
                'i': len(os.listdir(directory+"/I")),
                'j': len(os.listdir(directory+"/J")),
                'k': len(os.listdir(directory+"/K")),
                'l': len(os.listdir(directory+"/L")),
                'm': len(os.listdir(directory+"/M")),
                'n': len(os.listdir(directory+"/N")),
                'o': len(os.listdir(directory+"/O")),
                'p': len(os.listdir(directory+"/P")),
                'q': len(os.listdir(directory+"/Q")),
                'r': len(os.listdir(directory+"/R")),
                's': len(os.listdir(directory+"/S")),
                't': len(os.listdir(directory+"/T")),
                'u': len(os.listdir(directory+"/U")),
                'v': len(os.listdir(directory+"/V")),
                'w': len(os.listdir(directory+"/W")),
                'x': len(os.listdir(directory+"/X")),
                'y': len(os.listdir(directory+"/Y")),
                'z': len(os.listdir(directory+"/Z")),
    }

    numeric_data = np.load(numeric_data_path)

    # Display the count of each letter on the screen

    x_text = int(0.8 * frame.shape[1])
    x_mode = int(0.6 * frame.shape[1])
    y_start = 20 
    y_step = 13

    for i, letter in enumerate(sorted(count.keys())):
        y_pos = y_start + i * y_step  # Calculate y position dynamically
        text = f"{letter.upper()} : {count[letter]}"
        cv2.putText(frame, text, (x_text, y_pos), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 255), 1)

    # Display mode of the folder (train / test) 
    cv2.putText(frame, f'Mode: {mode}', (x_mode, y_start), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 255), 1)

    # Coordinates of the ROI
    
    x1 = 10
    y1 = 10
    x2 = int(0.5 * frame.shape[1])
    y2 = int(0.5 * frame.shape[1])

    # clean white frame
    landmarks_frame = np.ones((350, 350, 3), np.uint8) * 255

    numeric_list = []
    if hands:
        for hand in hands:
            x, y, w, h = hand['bbox']
            x1 = max(0, x - PAD)
            y1 = max(0, y - PAD)
            x2 = min(frame.shape[1], x + w + PAD)
            y2 = min(frame.shape[0], y + h + PAD)

            # Crop the hand region
            image = frame[y1:y2, x1:x2].copy()

            # Re-detect the hand using the hand region 
            crop_hand, _ = cropped_hand_detector.findHands(image, draw=True)
            if crop_hand:
                hand = crop_hand[0]
                landmarks = hand['lmList']
                scale_x = ((350-w)//2)-10
                scale_y = ((350-h)//2)-10
                
                # Draw lines between hand landmarks            
                for connection in HAND_CONNECTIONS:
                    pt1 = (landmarks[connection[0]][0]+scale_x, landmarks[connection[0]][1]+scale_y)  # First point (x, y)
                    pt2 = (landmarks[connection[1]][0]+scale_x, landmarks[connection[1]][1]+scale_y)  # Second point (x, y)
                    cv2.line(landmarks_frame, pt1, pt2, (0, 255, 0), 2)  # Green lines

                # Draw circles on each landmark
                for lm in landmarks:
                    point_x = lm[0] + scale_x
                    point_y = lm[1] + scale_y
                    cv2.circle(landmarks_frame, (point_x, point_y), 5, (0, 0, 255), -1)
                
                # Distance between the wist and fingertips
                wist_thumb = euclidean_distance(landmarks[0], landmarks[4])
                wist_index = euclidean_distance(landmarks[0], landmarks[8])
                wist_middle = euclidean_distance(landmarks[0], landmarks[12])
                wist_ping = euclidean_distance(landmarks[0], landmarks[16])
                wist_pinky = euclidean_distance(landmarks[0], landmarks[20])

                # Distance between special fingertips
                thumb_index = euclidean_distance(landmarks[4], landmarks[8])
                thumb_pinky = euclidean_distance(landmarks[4], landmarks[20])
                thumb_middle = euclidean_distance(landmarks[4], landmarks[12])
                index_middle = euclidean_distance(landmarks[8], landmarks[12])

                # Other numerical features
                index_middle_dip = euclidean_distance(landmarks[7], landmarks[11])
                index_middle_z = euclidean_distance(landmarks[8][2], landmarks[12][2])
                thumb_ping_angle = calculate_angle(landmarks[4], landmarks[0], landmarks[16])
                thumb_index_angle = calculate_angle(landmarks[4], landmarks[0], landmarks[8])
                index_middle_angle = calculate_angle(landmarks[8], landmarks[0], landmarks[12])

                thumb_index_above = int(is_above(landmarks[4][1], landmarks[8][1]))
                
                numeric_list.extend([wist_thumb, wist_index, wist_middle, wist_ping, wist_pinky,
                                    thumb_index, thumb_middle, thumb_pinky, index_middle,
                                    index_middle_dip, index_middle_z, thumb_ping_angle, thumb_index_angle,
                                    index_middle_angle, thumb_index_above])
            
    cv2.imshow("Frame", frame)
    cv2.imshow('landmarks_frame', landmarks_frame)

    interrupt = cv2.waitKey(10)
    if interrupt & 0xFF == ord('`'): 
        # escape key
        break
    if interrupt & 0xFF == ord('/'):
        # change mode
        mode = TRAIN if mode == TEST else TEST
        directory = os.path.join(dataset_path, mode) + '/'
        
    keys = {
    'space': 32,
    **{chr(i): ord(chr(i)) for i in range(ord('a'), ord('z') + 1)}
    }

    for key, ascii_value in keys.items():
        if interrupt & 0xFF == ascii_value:
            cv2.imwrite(f"{directory}{key}/{count[key]}.jpg", landmarks_frame)
            if mode == TRAIN:
                numeric_list.append(ascii_value)
                np.save(numeric_data_path, np.vstack([numeric_data, np.array(numeric_list).reshape(1, 16)]))  
    
capture.release()
cv2.destroyAllWindows()