In [10]:
import cv2
import time
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.tasks.python import text
from mediapipe.tasks.python import audio

In [11]:
model_path = './gesture_recognizer.task'

In [13]:
BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult
VisionRunningMode = mp.tasks.vision.RunningMode

# Create a gesture recognizer instance with the live stream mode:
def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    # print('gesture recognition result: {}'.format(result))
    pass

options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)

with GestureRecognizer.create_from_options(options) as recognizer:
    # Start capturing from the webcam.
    cap = cv2.VideoCapture(0)

    start_time = time.time() 
    
    while cap.isOpened():
        success, frame = cap.read()

        # Ignore the empty camera frame
        if not success:
            print("Ignoring empty camera frame.")
            continue

        current_time = time.time() - start_time  # Calculate the elapsed time since the start.
        frame_timestamp_ms = int(current_time * 1000)  # Convert elapsed time to milliseconds.

        # Convert the frame received from OpenCV to a MediaPipe’s Image object.
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

        recognizer.recognize_async(mp_image, frame_timestamp_ms)
    
        # Display the frame.
        cv2.imshow('MediaPipe Hands', frame)
        
        # Break the loop when 'q' is pressed.
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break
    
    # Release the webcam and close the window.
    cap.release()
    cv2.destroyAllWindows()

I0000 00:00:1709664643.847211       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1
W0000 00:00:1709664643.856496       1 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1709664643.862338       1 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
