**INTRO TO HOW YOU USE THIS:**
1. WHEN YOU RUN THE SCRIPT IT WILL OPEN AN EXTERNAL CAMERA WINDOW – THIS IS WHAT WE ARE USING TO LOG THE DATA.
2. IF YOU WISH TO ENTER LOGGING MODE (WHERE YOU CAN COLLECT THE DATAPOINTS FOR THE HANDSIGN YOU'RE MAKING) PRESS "1"
3. MAKE THE HAND SIGN YOU WANT AND PRESS THE CORRESPONDING KEY ON THE KEYBOARD AND IT WILL BE LOGGED (A MESSAGE WILL APPEAR BRIEFLY INDICATING WHAT LETTER YOU PRESSED)
4. TO EXIT LOGGING MODE, PRESS "0"
5. TO EXIT THE PROGRAM ALTOGETHER, PRESS "ESC"

_A note on how the data is structured in the .csv: Each row represents one logged hand signal. That is, when you have the program open in logging mode and click a possible button. The following 42 columns are the respective X and Y 'coordinates' of each segment of the hand. The first two columns are always 0,0 as these are the palm node from which the relative position of all the other segments is calculated. While this may seem odd, it makes a lot more sense to calculate the finger position relatively as opposed to where they appear pixel wise in the image, as this is prone to change and would make the model very rigid and overiftting._

In [85]:
#pip install tensorflow, mediapipe, opencv-python

In [86]:
import csv
import copy
import random

import cv2 as cv
import numpy as np
import mediapipe as mp
import tensorflow as tf
import torch

In [87]:
'''class KeyPointClassifier(object):
    def __init__(
        self,
        model_path='./data/keypoint_classifier.pt',
        num_threads=1,
    ):
        self.interpreter = tf.lite.Interpreter(model_path=model_path,
                                               num_threads=num_threads)

        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

    def __call__(
        self,
        landmark_list,
    ):
        input_details_tensor_index = self.input_details[0]['index']
        self.interpreter.set_tensor(
            input_details_tensor_index,
            np.array([landmark_list], dtype=np.float32))
        self.interpreter.invoke()

        output_details_tensor_index = self.output_details[0]['index']

        result = self.interpreter.get_tensor(output_details_tensor_index)

        result_index = np.argmax(np.squeeze(result))

        return result_index'''

"class KeyPointClassifier(object):\n    def __init__(\n        self,\n        model_path='./data/keypoint_classifier.pt',\n        num_threads=1,\n    ):\n        self.interpreter = tf.lite.Interpreter(model_path=model_path,\n                                               num_threads=num_threads)\n\n        self.interpreter.allocate_tensors()\n        self.input_details = self.interpreter.get_input_details()\n        self.output_details = self.interpreter.get_output_details()\n\n    def __call__(\n        self,\n        landmark_list,\n    ):\n        input_details_tensor_index = self.input_details[0]['index']\n        self.interpreter.set_tensor(\n            input_details_tensor_index,\n            np.array([landmark_list], dtype=np.float32))\n        self.interpreter.invoke()\n\n        output_details_tensor_index = self.output_details[0]['index']\n\n        result = self.interpreter.get_tensor(output_details_tensor_index)\n\n        result_index = np.argmax(np.squeeze(result))\n\n 

In [88]:
class KeyPointClassifier:
    def __init__(self, model_path='./data/keypoint_classifier.pt'):
        # Load the PyTorch model
        self.model = torch.jit.load(model_path)
        self.model.eval()  # Set the model to evaluation mode

    def __call__(self, landmark_list):
        # Convert the input landmark list to a PyTorch tensor
        input_tensor = torch.tensor([landmark_list], dtype=torch.float32)

        # Perform inference
        with torch.no_grad():
            output = self.model(input_tensor)

        # Get the predicted class index
        result_index = torch.argmax(output, dim=1).item()
        return result_index

In [89]:
def main():

    cap_device = 0
    cap_width = 960
    cap_height = 540

    use_brect = True

    cap = cv.VideoCapture(cap_device)
    cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width)
    cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height)

    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(
        model_complexity=0,
        static_image_mode=True,
        max_num_hands=2,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.7
    )

    keypoint_classifier = KeyPointClassifier()

    with open('./data/keypoint_classifier_labels.csv', encoding='utf-8-sig') as f:
        keypoint_classifier_labels = csv.reader(f)
        keypoint_classifier_labels = [row[0] for row in keypoint_classifier_labels]

    mode = 0

    while True:
        key = cv.waitKey(10)
        if key == 27:
            break
        if key != -1:
            print()
        number, mode = select_mode(key, mode)

        ret, image = cap.read()
        if not ret:
            break
        image = cv.flip(image, 1)
        debug_image = copy.deepcopy(image)

        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = hands.process(image)
        image.flags.writeable = True

        if results.multi_hand_landmarks is not None:
            for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
                brect = calc_bounding_rect(debug_image, hand_landmarks)
                landmark_list = calc_landmark_list(debug_image, hand_landmarks)

                pre_processed_landmark_list = pre_process_landmark(landmark_list)

                logging_csv(number, mode, pre_processed_landmark_list)

                hand_sign_id = keypoint_classifier(pre_processed_landmark_list)

                debug_image = draw_bounding_rect(use_brect, debug_image, brect)
                debug_image = draw_landmarks(debug_image, landmark_list)
                debug_image = draw_info_text(
                    debug_image,
                    brect,
                    handedness,
                    keypoint_classifier_labels[hand_sign_id],
                    ""
                )

        debug_image = draw_info(debug_image, mode, number)

        cv.imshow('Hand Gesture Recognition', debug_image)

    cap.release()
    cv.destroyAllWindows()


def logging_csv(number, mode, landmark_list):
    """
    Logs the selected number and landmark list to a CSV file if mode is 1.
    Also logs a horizontally mirrored version of the landmarks and introduces variance.
    """
    if mode == 0:
        # Mode 0: No logging
        pass
    elif mode == 1 and (0 <= number <= 25):  # Ensure number corresponds to A-Z (0-25)
        csv_path = './data/keypoint.csv'
        try:
            with open(csv_path, 'a', newline="") as f:
                writer = csv.writer(f)
                
                # Log the original data
                writer.writerow([chr(number + 97), *landmark_list])
                
                # Create and log the mirrored data
                mirrored_landmark_list = mirror_landmarks(landmark_list)
                writer.writerow([chr(number + 97), *mirrored_landmark_list])
                
                # Add variance to the original data and log it
                varied_landmark_list = add_variance(landmark_list)
                writer.writerow([chr(number + 97), *varied_landmark_list])
                
                # Add variance to the mirrored data and log it
                varied_mirrored_landmark_list = add_variance(mirrored_landmark_list)
                writer.writerow([chr(number + 97), *varied_mirrored_landmark_list])
        except FileNotFoundError:
            print(f"Error: CSV path '{csv_path}' not found.")
        except Exception as e:
            print(f"Error while logging to CSV: {e}")
    return

def mirror_landmarks(landmark_list):
    """
    Mirrors the landmarks horizontally by flipping the x-coordinates.
    Assumes the x-coordinates are in even indices and y-coordinates in odd indices.
    """
    mirrored_landmark_list = []
    for i in range(0, len(landmark_list), 2):
        x = landmark_list[i]
        y = landmark_list[i + 1]
        mirrored_x = -x  # Flip the x-coordinate
        mirrored_landmark_list.extend([mirrored_x, y])
    return mirrored_landmark_list

def add_variance(landmark_list, variance=0.02):
    """
    Adds random variance to the landmark coordinates.
    The variance is applied as a small random offset to each coordinate.
    """
    varied_landmark_list = []
    for coord in landmark_list:
        varied_coord = coord + random.uniform(-variance, variance)
        varied_landmark_list.append(varied_coord)
    return varied_landmark_list

def select_mode(key, mode):
    number = -1
    if ord('a') <= key <= ord('z'):  # A ~ Z
        number = key - 97  # Map ASCII A-Z to 0-25
    elif key == ord('0'):  # '0' key
        mode = 0 # nothing mode
    elif key == ord('1'):  # '1' key 
        mode = 1 # logging mode
    return number, mode

def pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    # Convert to relative coordinates
    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]

        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y

    # Convert to a one-dimensional list
    temp_landmark_list = list(
        itertools.chain.from_iterable(temp_landmark_list))

    # Normalization
    max_value = max(list(map(abs, temp_landmark_list)))

    def normalize_(n):
        return n / max_value

    temp_landmark_list = list(map(normalize_, temp_landmark_list))

    return temp_landmark_list

def draw_landmarks(image, landmark_point):
    def draw_line_pair(p1, p2):
        cv.line(image, tuple(p1), tuple(p2), (0, 0, 0), 6)
        cv.line(image, tuple(p1), tuple(p2), (255, 255, 255), 2)

    def draw_keypoint(pt, radius):
        cv.circle(image, tuple(pt), radius, (255, 255, 255), -1)
        cv.circle(image, tuple(pt), radius, (0, 0, 0), 1)

    if len(landmark_point) > 0:
        # Fingers
        finger_connections = [
            [2, 3, 4],     # Thumb
            [5, 6, 7, 8],  # Index
            [9, 10, 11, 12],  # Middle
            [13, 14, 15, 16],  # Ring
            [17, 18, 19, 20]   # Pinky
        ]
        for finger in finger_connections:
            for i in range(len(finger) - 1):
                draw_line_pair(landmark_point[finger[i]], landmark_point[finger[i + 1]])

        # Palm
        palm_connections = [
            (0, 1), (1, 2), (2, 5), (5, 9),
            (9, 13), (13, 17), (17, 0)
        ]
        for start, end in palm_connections:
            draw_line_pair(landmark_point[start], landmark_point[end])

    # Key points
    for index, landmark in enumerate(landmark_point):
        if index in [4, 8, 12, 16, 20]:  # Fingertips
            draw_keypoint(landmark, 8)
        else:
            draw_keypoint(landmark, 5)

    return image


def draw_bounding_rect(use_brect, image, brect):
    if use_brect:
        # Outer rectangle
        cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[3]),
                     (0, 0, 0), 1)

    return image


def draw_info_text(image, brect, handedness, hand_sign_text,
                   finger_gesture_text):
    cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[1] - 22),
                 (0, 0, 0), -1)

    info_text = handedness.classification[0].label[0:]
    if hand_sign_text != "":
        info_text = info_text + ':' + hand_sign_text
    cv.putText(image, info_text, (brect[0] + 5, brect[1] - 4),
               cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv.LINE_AA)

    if finger_gesture_text != "":
        cv.putText(image, "Finger Gesture:" + finger_gesture_text, (10, 60),
                   cv.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 4, cv.LINE_AA)
        cv.putText(image, "Finger Gesture:" + finger_gesture_text, (10, 60),
                   cv.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2,
                   cv.LINE_AA)

    return image

def calc_bounding_rect(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_array = np.empty((0, 2), int)

    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)

        landmark_point = [np.array((landmark_x, landmark_y))]

        landmark_array = np.append(landmark_array, landmark_point, axis=0)

    x, y, w, h = cv.boundingRect(landmark_array)

    return [x, y, x + w, y + h]

def calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_point = []

    # Keypoint
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)

        landmark_point.append([landmark_x, landmark_y])

    return landmark_point

def draw_info(image, mode, number):
    mode_string = ['Logging Key Point', 'Not Logging']
    if 1 <= mode <= 2:
        cv.putText(image, "MODE: " + mode_string[mode - 1], (10, 90),
                   cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1,
                   cv.LINE_AA)
        if number == -1:
            pass
        elif 0 <= number <= 25:
            cv.putText(image, "LETTER: " + str(chr(number+97)), (10, 110),
                       cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1,
                       cv.LINE_AA)
    return image


if __name__ == '__main__':
    main()

I0000 00:00:1744837717.615460 4424501 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1 Pro
W0000 00:00:1744837717.625013 4683994 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1744837717.629914 4683994 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


The below code introduces the direction the hand is pointing instead of only relative position of the fingers, this is important to be able to distinguish signs

In [90]:
'''import csv
import copy
import random

import cv2 as cv
import numpy as np
import mediapipe as mp
import torch


class KeyPointClassifier:
    def __init__(self, model_path='./data/keypoint_classifier_2.pt'):
        # Load the PyTorch model
        self.model = torch.jit.load(model_path)
        self.model.eval()  # Set the model to evaluation mode

    def __call__(self, landmark_list):
        # Convert the input landmark list to a PyTorch tensor
        input_tensor = torch.tensor([landmark_list], dtype=torch.float32)

        # Perform inference
        with torch.no_grad():
            output = self.model(input_tensor)

        # Get the predicted class index
        result_index = torch.argmax(output, dim=1).item()
        return result_index

def main():
    cap_device = 0
    cap_width = 960
    cap_height = 540

    use_brect = True

    cap = cv.VideoCapture(cap_device)
    cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width)
    cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height)

    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(
        model_complexity=0,
        static_image_mode=True,
        max_num_hands=2,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.7
    )

    keypoint_classifier = KeyPointClassifier()

    with open('./data/keypoint_classifier_labels.csv', encoding='utf-8-sig') as f:
        keypoint_classifier_labels = csv.reader(f)
        keypoint_classifier_labels = [row[0] for row in keypoint_classifier_labels]

    mode = 0

    while True:
        key = cv.waitKey(10)
        if key == 27:
            break
        if key != -1:
            print()
        number, mode = select_mode(key, mode)

        ret, image = cap.read()
        if not ret:
            break
        image = cv.flip(image, 1)
        debug_image = copy.deepcopy(image)

        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = hands.process(image)
        image.flags.writeable = True

        if results.multi_hand_landmarks is not None:
            for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
                brect = calc_bounding_rect(debug_image, hand_landmarks)
                landmark_list = calc_landmark_list(debug_image, hand_landmarks)

                # Calculate hand direction
                hand_direction = calculate_hand_direction(landmark_list)

                pre_processed_landmark_list = pre_process_landmark(landmark_list)

                logging_csv(number, mode, pre_processed_landmark_list)

                hand_sign_id = keypoint_classifier(pre_processed_landmark_list)

                debug_image = draw_bounding_rect(use_brect, debug_image, brect)
                debug_image = draw_landmarks(debug_image, landmark_list)
                debug_image = draw_info_text(
                    debug_image,
                    brect,
                    handedness,
                    f"{keypoint_classifier_labels[hand_sign_id]}",
                    f"Direction: ({hand_direction[0]:.2f}, {hand_direction[1]:.2f})"
                )

        debug_image = draw_info(debug_image, mode, number)

        cv.imshow('Hand Gesture Recognition', debug_image)

    cap.release()
    cv.destroyAllWindows()

def calculate_hand_direction(landmark_list):
    """
    Calculate the direction vector of the hand based on the wrist and middle fingertip.
    Args:
        landmark_list: List of landmarks (x, y coordinates).
    Returns:
        A tuple representing the normalized direction vector (dx, dy).
    """
    # Ensure landmark_list is structured as [[x1, y1], [x2, y2], ...]
    if isinstance(landmark_list[0], (int, float)):  # Flattened list
        landmark_list = [landmark_list[i:i + 2] for i in range(0, len(landmark_list), 2)]

    # Wrist (landmark 0) and middle fingertip (landmark 12)
    wrist = np.array(landmark_list[0])
    middle_fingertip = np.array(landmark_list[12])

    # Calculate the direction vector
    direction_vector = middle_fingertip - wrist

    # Normalize the vector
    norm = np.linalg.norm(direction_vector)
    if norm == 0:
        return (0, 0)  # Avoid division by zero
    normalized_vector = direction_vector / norm

    return tuple(normalized_vector)

def logging_csv(number, mode, landmark_list):
    """
    Logs the selected number, landmark list, and hand direction to a CSV file if mode is 1.
    Also logs a horizontally mirrored version of the landmarks and introduces variance.
    """
    if mode == 0:
        # Mode 0: No logging
        pass
    elif mode == 1 and (0 <= number <= 25):  # Ensure number corresponds to A-Z (0-25)
        csv_path = './data/keypoint_dir.csv'
        try:
            # Ensure landmark_list is structured as [[x1, y1], [x2, y2], ...]
            if isinstance(landmark_list[0], (int, float)):  # Flattened list
                landmark_list = [landmark_list[i:i + 2] for i in range(0, len(landmark_list), 2)]

            with open(csv_path, 'a', newline="") as f:
                writer = csv.writer(f)

                # Calculate hand direction
                hand_direction = calculate_hand_direction(landmark_list)

                # Flatten landmark_list for logging
                flattened_landmark_list = [coord for point in landmark_list for coord in point]

                # Log the original data with hand direction
                writer.writerow([chr(number + 97), *flattened_landmark_list, *hand_direction])

                # Create and log the mirrored data with hand direction
                mirrored_landmark_list = mirror_landmarks(flattened_landmark_list)
                mirrored_hand_direction = (-hand_direction[0], hand_direction[1])  # Flip x-direction
                writer.writerow([chr(number + 97), *mirrored_landmark_list, *mirrored_hand_direction])

                # Add variance to the original data and log it with hand direction
                varied_landmark_list = add_variance(flattened_landmark_list)
                writer.writerow([chr(number + 97), *varied_landmark_list, *hand_direction])

                # Add variance to the mirrored data and log it with hand direction
                varied_mirrored_landmark_list = add_variance(mirrored_landmark_list)
                writer.writerow([chr(number + 97), *varied_mirrored_landmark_list, *mirrored_hand_direction])
        except FileNotFoundError:
            print(f"Error: CSV path '{csv_path}' not found.")
        except Exception as e:
            print(f"Error while logging to CSV: {e}")
    return


def mirror_landmarks(landmark_list):
    mirrored_landmark_list = []
    for i in range(0, len(landmark_list), 2):
        x = landmark_list[i]
        y = landmark_list[i + 1]
        mirrored_x = -x
        mirrored_landmark_list.extend([mirrored_x, y])
    return mirrored_landmark_list


def add_variance(landmark_list, variance=0.02):
    varied_landmark_list = []
    for coord in landmark_list:
        varied_coord = coord + random.uniform(-variance, variance)
        varied_landmark_list.append(varied_coord)
    return varied_landmark_list


def select_mode(key, mode):
    number = -1
    if ord('a') <= key <= ord('z'):
        number = key - 97
    elif key == ord('0'):
        mode = 0
    elif key == ord('1'):
        mode = 1
    return number, mode


def pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    # Convert to relative coordinates
    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]
        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y

    # Convert to a one-dimensional list
    temp_landmark_list = list(itertools.chain.from_iterable(temp_landmark_list))

    # Normalization
    max_value = max(list(map(abs, temp_landmark_list)))

    def normalize_(n):
        return n / max_value

    temp_landmark_list = list(map(normalize_, temp_landmark_list))

    return temp_landmark_list


def draw_landmarks(image, landmark_point):
    def draw_line_pair(p1, p2):
        cv.line(image, tuple(p1), tuple(p2), (0, 0, 0), 6)
        cv.line(image, tuple(p1), tuple(p2), (255, 255, 255), 2)

    def draw_keypoint(pt, radius):
        cv.circle(image, tuple(pt), radius, (255, 255, 255), -1)
        cv.circle(image, tuple(pt), radius, (0, 0, 0), 1)

    if len(landmark_point) > 0:
        finger_connections = [
            [2, 3, 4],
            [5, 6, 7, 8],
            [9, 10, 11, 12],
            [13, 14, 15, 16],
            [17, 18, 19, 20]
        ]
        for finger in finger_connections:
            for i in range(len(finger) - 1):
                draw_line_pair(landmark_point[finger[i]], landmark_point[finger[i + 1]])
        palm_connections = [
            (0, 1), (1, 2), (2, 5), (5, 9),
            (9, 13), (13, 17), (17, 0)
        ]
        for start, end in palm_connections:
            draw_line_pair(landmark_point[start], landmark_point[end])
    for index, landmark in enumerate(landmark_point):
        if index in [4, 8, 12, 16, 20]:
            draw_keypoint(landmark, 8)
        else:
            draw_keypoint(landmark, 5)
    return image


def draw_bounding_rect(use_brect, image, brect):
    if use_brect:
        cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[3]), (0, 0, 0), 1)
    return image


def draw_info_text(image, brect, handedness, hand_sign_text, finger_gesture_text):
    cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[1] - 44), (0, 0, 0), -1)
    info_text = handedness.classification[0].label[0:]
    if hand_sign_text != "":
        info_text = info_text + ':' + hand_sign_text
    cv.putText(image, info_text, (brect[0] + 5, brect[1] - 26),
               cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv.LINE_AA)
    if finger_gesture_text != "":
        cv.putText(image, finger_gesture_text, (brect[0] + 5, brect[1] - 4),
                   cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv.LINE_AA)
    return image


def calc_bounding_rect(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]
    landmark_array = np.empty((0, 2), int)
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        landmark_point = [np.array((landmark_x, landmark_y))]
        landmark_array = np.append(landmark_array, landmark_point, axis=0)
    x, y, w, h = cv.boundingRect(landmark_array)
    return [x, y, x + w, y + h]


def calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]
    landmark_point = []
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        landmark_point.append([landmark_x, landmark_y])
    return landmark_point


def draw_info(image, mode, number):
    mode_string = ['Logging Key Point', 'Not Logging']
    if 1 <= mode <= 2:
        cv.putText(image, "MODE: " + mode_string[mode - 1], (10, 90),
                   cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1,
                   cv.LINE_AA)
        if number == -1:
            pass
        elif 0 <= number <= 25:
            cv.putText(image, "LETTER: " + str(chr(number + 97)), (10, 110),
                       cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1,
                       cv.LINE_AA)
    return image


if __name__ == '__main__':
    main()'''

'import csv\nimport copy\nimport random\n\nimport cv2 as cv\nimport numpy as np\nimport mediapipe as mp\nimport torch\n\n\nclass KeyPointClassifier:\n    def __init__(self, model_path=\'./data/keypoint_classifier_2.pt\'):\n        # Load the PyTorch model\n        self.model = torch.jit.load(model_path)\n        self.model.eval()  # Set the model to evaluation mode\n\n    def __call__(self, landmark_list):\n        # Convert the input landmark list to a PyTorch tensor\n        input_tensor = torch.tensor([landmark_list], dtype=torch.float32)\n\n        # Perform inference\n        with torch.no_grad():\n            output = self.model(input_tensor)\n\n        # Get the predicted class index\n        result_index = torch.argmax(output, dim=1).item()\n        return result_index\n\ndef main():\n    cap_device = 0\n    cap_width = 960\n    cap_height = 540\n\n    use_brect = True\n\n    cap = cv.VideoCapture(cap_device)\n    cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width)\n    cap.set(cv.CAP_