# Hand gesture recognition

---


## 1. Import packages


In [1]:
pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
import cv2
import mediapipe as mp

## 2. Create utils functions


### OpenCV


### MediaPipe


In [3]:
MIN_DETECTION_CONFIDENCE = 0.4
MIN_PRESENCE_CONFIDENCE = 0.4
NUM_HANDS = 1
MIN_TRACKING_CONFIDENCE = 0.3

In [4]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

### Model predictions


In [5]:
labels = [
    "closed",
    "dislike",
    "like",
    "palm",
    "point_up",
    "rock",
    "victory",
    "victory_inverted",
]

## 3. Preprocess the data


In [6]:
def frame_preprocessing(frame, resolution=None, flip=False):
    if resolution is not None:
        frame = cv2.resize(frame, resolution)

    if flip:
        frame = cv2.flip(frame, 1)

    return frame

## Execute program


### Tweak settings


#### Camera Settings


In [7]:
FRAMERATE = None
RESOLUTION = (833, 480)  # (1280, 720)  # None  # (833, 480)  # (1280, 720)
FLIP_CAMERA = False

#### Mediapipe hand bones


In [8]:
BOX_MARGIN = 24

MIN_DETECTION_CONFIDENCE = 0.4
MIN_PRESENCE_CONFIDENCE = 0.4
NUM_HANDS = 1
MIN_TRACKING_CONFIDENCE = 0.3

#### Model settings


In [9]:
MODEL_NAME = "4_feb_w_additional_datasets"

MODEL_PATH = f"./models/{MODEL_NAME}/{MODEL_NAME}.hdf5"
MIN_GESTURE_CONFIDENCE = 0.7
MP_MODEL_COMPLEXITY = 0

In [10]:
from tensorflow.keras.models import load_model

from helpers.predictions import get_gesture
from helpers.mediapipe import get_landmarks, draw_landmarks, draw_box
from helpers.camera import read_frame, show_frame, get_close_event, close_camera

capture = cv2.VideoCapture(1)


model = load_model(MODEL_PATH, compile=False)
with mp_holistic.Holistic(
    min_detection_confidence=MIN_DETECTION_CONFIDENCE,
    min_tracking_confidence=MIN_TRACKING_CONFIDENCE,
    model_complexity=MP_MODEL_COMPLEXITY,
) as holistic:
    while True:
        frame = read_frame(capture, FRAMERATE)
        frame = frame_preprocessing(frame, RESOLUTION, FLIP_CAMERA)

        landmarks = get_landmarks(frame, holistic)

        for i, hand_landmark in enumerate(landmarks):
            hand = [1, 0] if i == 0 else [0, 1]

            if hand_landmark:
                frame = draw_landmarks(frame, hand_landmark, mp_holistic, mp_drawing)
                gesture, accuracy = get_gesture(
                    model, labels, MIN_GESTURE_CONFIDENCE, hand, hand_landmark
                )

                frame = draw_box(frame, gesture, accuracy, hand, hand_landmark)

            show_frame(frame, "hand gesture recognition")

        if get_close_event():
            break

close_camera(capture)

I0000 00:00:1707068456.084033       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
