# Project CC-DP3 

In this notebook we will show how gesture control can be used for sending commands to an agent, the agent will eventually execute an order related to this gesture.

In this example we use an external webcam to record the livestream of given gestures and will display an animation in real-time to confirm the action related to the gesture.

The gestures are as follows: 

    - FOLLOW: 👆
    - STOP: ✋


In [None]:
# This will download the model that will recognize the gestures - this model supports 7 hand gestures: 👍, 👎, ✌️, ☝️, ✊, 👋, 🤟
!wget -q https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task

In [2]:
# getting the GIFs for the example
!wget -A .gif -r -l 1 -H -O gif_A.gif https://media.tenor.com/1OMWOWwx98EAAAAC/upright-point.gif
!wget -A .gif -r -l 1 -H -O gif_B.gif https://gifdb.com/images/high/michael-scott-no-hand-gesture-p7iyykb88vqq4ks2.gif


will be placed in the single file you specified.

--2023-11-18 19:09:43--  https://media.tenor.com/1OMWOWwx98EAAAAC/upright-point.gif
Resolving media.tenor.com (media.tenor.com)... 2a00:1450:400e:80e::200a, 2a00:1450:400e:800::200a, 2a00:1450:400e:810::200a, ...
Connecting to media.tenor.com (media.tenor.com)|2a00:1450:400e:80e::200a|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1894549 (1,8M) [image/gif]
Saving to: ‘gif_A.gif’


2023-11-18 19:09:44 (2,85 MB/s) - ‘gif_A.gif’ saved [1894549/1894549]

FINISHED --2023-11-18 19:09:44--
Total wall clock time: 0,7s
Downloaded: 1 files, 1,8M in 0,6s (2,85 MB/s)
will be placed in the single file you specified.

--2023-11-18 19:09:44--  https://gifdb.com/images/high/michael-scott-no-hand-gesture-p7iyykb88vqq4ks2.gif
Resolving gifdb.com (gifdb.com)... 2400:52e0:1e01::879:1, 84.17.46.53
Connecting to gifdb.com (gifdb.com)|2400:52e0:1e01::879:1|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 78

In [1]:
%pip install mediapipe, cv2

[31mERROR: Invalid requirement: 'mediapipe,'[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [1]:
import copy
import cv2
import mediapipe as mp
import time
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.framework.formats import landmark_pb2


# Helper function for visualizing gesture recognition results
# Helper function for visualizing gesture recognition results
def visualize_results(frame, gesture_results):
    mp_drawing = mp.solutions.drawing_utils

    if gesture_results:
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        annotated_image = copy.deepcopy(frame_rgb)

        for hand_landmarks in gesture_results.hand_landmarks:
            hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
            hand_landmarks_proto.landmark.extend([
                landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
            ])

            mp_drawing.draw_landmarks(
                annotated_image,
                hand_landmarks_proto,
                mp.solutions.hands.HAND_CONNECTIONS,
                mp.solutions.drawing_styles.get_default_hand_landmarks_style(),
                mp.solutions.drawing_styles.get_default_hand_connections_style())

        cv2.imshow("Webcam Feed", cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))


# TODO: Add paths to GIFs here
gif_A = "path/to/gif_a.gif"
gif_B = "path/to/gif_b.gif"

# verbose flag will show overlay on webcam footage
VERBOSE = True

# duration of the gif & timeout to pause hand gesture recognition
DURATION = 5

# Initialize MediaPipe for gesture recognition
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils

# Load the gesture recognition model
model_path = 'gesture_recognizer.task'
base_options = python.BaseOptions(model_asset_path=model_path)
options = vision.GestureRecognizerOptions(base_options=base_options)
recognizer = vision.GestureRecognizer.create_from_options(options)

# Open the webcam (use 0 for the default camera)
cap = cv2.VideoCapture(0)

# Check if the webcam is opened successfully
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Create a window to display the camera feed
cv2.namedWindow("Webcam Feed", cv2.WINDOW_NORMAL)

# Flag to control the pause state and determine which gesture was recognized
pause_time = None
recognized_gesture = None

while True:
    timestamp_ms = int(time.time() * 1000)  # Get timestamp in milliseconds

    # Read a frame from the webcam
    ret, frame = cap.read()

    # Check if the frame was read successfully
    if not ret:
        print("Error: Failed to grab frame.")
        break

    # Check if it's time to pause analysis
    if pause_time and time.time() - pause_time < DURATION:
        # Display a specific GIF based on recognized gesture
        if recognized_gesture == "pointing_finger":
            gif_frame = cv2.imread(gif_A)  # Replace with your GIF file for pointing finger
        elif recognized_gesture == "open_hand_stop_sign":
            gif_frame = cv2.imread(gif_B)  # Replace with your GIF file for open hand stop sign

        cv2.imshow("GIF Window", gif_frame)
        cv2.waitKey(1)
        continue

    # Convert the frame received from OpenCV to a MediaPipe’s Image object.
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

    # Send live image data to perform gesture recognition.
    # The results are accessible via the result_callback provided in
    # the GestureRecognizerOptions object.

    # Perform gesture recognition on the frame
    gesture_results = recognizer.recognize(mp_image)

    print(gesture_results)
    # Check for specific gestures to pause
    if gesture_results:
        if gesture_results.gestures != []:
            top_gesture = gesture_results.gestures[0][0].category_name
            print(top_gesture)

    # Display the frame with gesture overlay in the window
    visualize_results(frame, gesture_results)

    # Break the loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# TODO implement GIF logic

# Release the webcam
cap.release()

# Close the webcam window
cv2.destroyWindow("Webcam Feed")

# After a specified duration, close the GIF window and return to webcam
if pause_time:
    while time.time() - pause_time < DURATION:
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyWindow("GIF Window")


I0000 00:00:1700343797.392193       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro
I0000 00:00:1700343797.394328       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro
W0000 00:00:1700343797.394622       1 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1700343797.395655       1 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
GestureRecognizerResult(gestures=[], han

: 