## Hand Gesture Recognition Models

### Imports and Utils

In [1]:
"""
Importing necessary libraries
"""
import os
import cv2
import mediapipe as mp

import torch

# Remove all the warnings
import warnings
warnings.filterwarnings('ignore')

# Set env CUDA_LAUNCH_BLOCKING=1
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize MediaPipe Imports
mp_drawings = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [2]:
"""
Defining Utiliy Functions
"""

def landmarks_to_list(image, multi_hand_landmarks) -> torch.Tensor:
    """
    image: The image on which the landmarks are detected
    multi_hand_landmarks: The landmarks of the hand

    Returns: torch.Tensor: The tensor of landmarks [x, y]
    """

    image_height, image_width, _ = image.shape

    landmarks_list = []
    if multi_hand_landmarks:
        for hand_landmarks in multi_hand_landmarks:
            for landmark in hand_landmarks.landmark:
                # orgin is top left corner (0, 0)
                landmarks_list.append([min(int(landmark.x * image_width), image_width - 1), -min(int(landmark.y * image_height), image_height - 1)])

    return torch.tensor(landmarks_list)

def normalize_landmarks(landmarks: torch.Tensor) -> torch.Tensor:
    """
    landmarks: torch.Tensor: The landmarks of the hand

    Returns: torch.Tensor: The normalized landmarks
    """

    landmarks = landmarks - landmarks[0]
    landmarks = landmarks / torch.max(landmarks)

    return landmarks

### Dataset Creation and Preprocessing

In [5]:
# Initialize MediaPipe Hands
HandLandmarker = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.8,
    min_tracking_confidence=0.5
)

# Initialize the webcam
cap = cv2.VideoCapture(0)
dataset = []

with HandLandmarker as landmarker:
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            continue

        # the BGR image to RGB.
        frame = cv2.flip(frame, 1)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Dectect the hand landmarks
        frame.flags.writeable = False
        results = landmarker.process(frame)

        # Draw the hand annotations on the image.
        frame.flags.writeable = True
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        key = cv2.waitKey(5) & 0xFF
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawings.draw_landmarks(
                    frame,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawings.DrawingSpec(color=(97, 137, 48), thickness=2, circle_radius=4),
                    mp_drawings.DrawingSpec(color=(255, 255, 255), thickness=2, circle_radius=2),
                )
            
            if key != 255 and key != 27:
                # Convert the landmarks to a list wrt the image
                landmarks = landmarks_to_list(frame, results.multi_hand_landmarks)

                # Normalize the landmarks
                landmarks = normalize_landmarks(landmarks).flatten()

                # Append the label to the landmarks and store in the dataset
                dataset.append(torch.cat((landmarks, torch.tensor([int(chr(key))]))))
                print(f"Gesture Labelled: {chr(key)}")

        cv2.imshow('MediaPipe Hands', frame)
        if key == 27: # ESC
            break

cap.release()
cv2.destroyAllWindows()

# Convert the dataset to a tensor
dataset = torch.stack(dataset).to(device)

### Model Creation and Training

### Testing and Plotting