In [1]:
import cv2
import mediapipe as mp
import math
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from matplotlib import pyplot as plt
from mediapipe.framework.formats import landmark_pb2


mp_hands = mp.solutions.hands

IMAGE_FILENAMES = ['test1.jpg', 'test2.jpg', 'test3.jpg', 'test4.jpg']

# STEP 2: Create an GestureRecognizer object.
base_options = python.BaseOptions(model_asset_path='C:/Users/miica/.conda/envs/proyecto_final/proyecto_final/gesture_recognizer.task')
options = vision.GestureRecognizerOptions(base_options=base_options)
# options = vision.GestureRecognizerOptions(
#     base_options=base_options,
#     canned_gestures_classifier_options=[])

recognizer = vision.GestureRecognizer.create_from_options(options)

DESIRED_HEIGHT = 480
DESIRED_WIDTH = 480

def resize_and_show(image):
  h, w = image.shape[:2]
  if h < w:
    img = cv2.resize(image, (DESIRED_WIDTH, math.floor(h/(w/DESIRED_WIDTH))))
  else:
    img = cv2.resize(image, (math.floor(w/(h/DESIRED_HEIGHT)), DESIRED_HEIGHT))
  #cv2.imshow("Test", img)

# Preview the images.
images = {name: cv2.imread(name) for name in IMAGE_FILENAMES}
for name, image in images.items():
  print(name)
  resize_and_show(image)

images = []
results = []
i = 0
   
for image_file_name in IMAGE_FILENAMES:
  # STEP 3: Load the input image.
  image = mp.Image.create_from_file(image_file_name)

  # STEP 4: Recognize gestures in the input image.
  recognition_result = recognizer.recognize(image)

  # STEP 5: Process the result. In this case, visualize it.
  images.append(image)
  
  #top_gesture = []
  #hand_landmarks = []

  print(i)
  print(recognition_result.gestures)
  print(recognition_result.hand_landmarks)
  i += 1
  if len(recognition_result.gestures) != 0:
    top_gesture = recognition_result.gestures[0][0]
  if len(recognition_result.hand_landmarks) != 0:
    hand_landmarks = recognition_result.hand_landmarks
    # print(hand_landmarks[0][0].x)
  results.append((top_gesture, hand_landmarks))

plt.rcParams.update({
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.spines.left': False,
    'axes.spines.bottom': False,
    'xtick.labelbottom': False,
    'xtick.bottom': False,
    'ytick.labelleft': False,
    'ytick.left': False,
    'xtick.labeltop': False,
    'xtick.top': False,
    'ytick.labelright': False,
    'ytick.right': False
})

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles


def display_one_image(image, title, subplot, titlesize=16):
    """Displays one image along with the predicted category name and score."""
    plt.subplot(*subplot)
    plt.imshow(image)
    if len(title) > 0:
        plt.title(title, fontsize=int(titlesize), color='black', fontdict={'verticalalignment':'center'}, pad=int(titlesize/1.5))
    return (subplot[0], subplot[1], subplot[2]+1)

def IsOkay(hand_landmarks):

  print("Len handlandmarks = ", len(hand_landmarks[0]))
  print("HandLandmarks: ", hand_landmarks)

  x4, y4 = hand_landmarks[0][4].x, hand_landmarks[0][4].y
  x8, y8 = hand_landmarks[0][8].x, hand_landmarks[0][8].y

  diff_x = abs(x8 - x4)
  diff_y = abs(y8 - y4)

  print("Diff_x = ", diff_x, ", diff_y = ", diff_y)

  if(diff_x < 0.01 and diff_y < 0.01):
    return True
  else:
    return False
  
def CheckGesture(hand_landmarks):
  if len(hand_landmarks) == 0:
    return

  isOkay = IsOkay(hand_landmarks)
  if(isOkay):
     return "Okay"


def display_dots_on_hands(images, results):
  images = [image.numpy_view() for image in images]
  gestures = [top_gesture for (top_gesture, _) in results]
  multi_hand_landmarks_list = [multi_hand_landmarks for (_, multi_hand_landmarks) in results]
  
  # Auto-squaring: this will drop data that does not fit into square or square-ish rectangle.
  rows = int(math.sqrt(len(images)))
  cols = len(images) // rows

  # Size and spacing.
  FIGSIZE = 13.0
  SPACING = 0.1
  subplot=(rows,cols, 1)
  if rows < cols:
      plt.figure(figsize=(FIGSIZE,FIGSIZE/cols*rows))
  else:
      plt.figure(figsize=(FIGSIZE/rows*cols,FIGSIZE))

  
  for i, (image, gestures) in enumerate(zip(images[:rows*cols], gestures[:rows*cols])):
    if gestures.category_name == "None":
      gesture_name = CheckGesture(multi_hand_landmarks_list[i])
      title = f"{gesture_name} ({gestures.score:.2f})"
    else:
      title = f"{gestures.category_name} ({gestures.score:.2f})"
    dynamic_titlesize = FIGSIZE*SPACING/max(rows,cols) * 40 + 3
    annotated_image = image.copy()
    height, width, _ = annotated_image.shape

    for hand_landmarks in multi_hand_landmarks_list[i]:
      for landmark in hand_landmarks:
        x, y = landmark.x, landmark.y
        x = int(x * width)
        y = int(y * height)

        cv2.circle(annotated_image, (x, y), 2, (0, 255, 0), -1)
      display_one_image(annotated_image, title, subplot, titlesize=dynamic_titlesize)

    plt.tight_layout()
    plt.subplots_adjust(wspace=SPACING, hspace=SPACING)
    plt.show()


def display_batch_of_images_with_gestures_and_hand_landmarks(images, results):
    """Displays a batch of images with the gesture category and its score along with the hand landmarks."""
    # Images and labels.
    images = [image.numpy_view() for image in images]
    gestures = [top_gesture for (top_gesture, _) in results]
    multi_hand_landmarks_list = [multi_hand_landmarks for (_, multi_hand_landmarks) in results]

    # Auto-squaring: this will drop data that does not fit into square or square-ish rectangle.
    rows = int(math.sqrt(len(images)))
    cols = len(images) // rows

    # Size and spacing.
    FIGSIZE = 13.0
    SPACING = 0.1
    subplot=(rows,cols, 1)
    if rows < cols:
        plt.figure(figsize=(FIGSIZE,FIGSIZE/cols*rows))
    else:
        plt.figure(figsize=(FIGSIZE/rows*cols,FIGSIZE))

    # Display gestures and hand landmarks.
    for i, (image, gestures) in enumerate(zip(images[:rows*cols], gestures[:rows*cols])):
        title = f"{gestures.category_name} ({gestures.score:.2f})"
        dynamic_titlesize = FIGSIZE*SPACING/max(rows,cols) * 40 + 3
        annotated_image = image.copy()

        for hand_landmarks in multi_hand_landmarks_list[i]:
          hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
          hand_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
          ])

          mp_drawing.draw_landmarks(
            annotated_image,
            hand_landmarks_proto,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())

        subplot = display_one_image(annotated_image, title, subplot, titlesize=dynamic_titlesize)

    # Layout.
    plt.tight_layout()
    plt.subplots_adjust(wspace=SPACING, hspace=SPACING)
    plt.show()

#display_batch_of_images_with_gestures_and_hand_landmarks(images, results)
display_dots_on_hands(images, results)

ModuleNotFoundError: No module named 'pygame'

## INSTALACIÓN ##
pip install opencv-python
pip install mediapipe
pip install mouse
pip install pyautogui
## STREAMING ##

In [3]:
import cv2
import numpy as np
import mouse
import mediapipe as mp
import pyautogui
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import pygame

pygame.mixer.init()
ckick_sound = pygame.mixer.Sound("mouse-click-153941.mp3")
global prevGesture
dx = 0
dy = 0

prevGesture = ""
mouse_pressed = False
smoothed_landmarks = []

font = cv2.FONT_HERSHEY_SIMPLEX
tamaño_texto = 1
color_texto = (0, 255, 0)
color_texto2 = (255, 234, 143)
grosor = 2

w_monitor, h_monitor = pyautogui.size()

def draw_landmarks_on_image(image, result):
    
    hand_landmarks_list = result.hand_landmarks
    annotated_image = np.copy(image)

    # Bucle por todos los landmarks
    for i in range(len(hand_landmarks_list)):
        hand_landmarks = hand_landmarks_list[i]

        # Obtiene las coordenadas y dibuja cada landmark y sus conexiones
        hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        hand_landmarks_proto.landmark.extend([
        landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
        ])

        mp_drawing.draw_landmarks(
            annotated_image,
            hand_landmarks_proto,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
        
    return annotated_image

def MoveMouse(hand_landmarks):
    global smoothed_landmarks

    # Normaliza resolución de cámara con resolución de monitor
    x_cam = hand_landmarks[0][8].x * w_cam
    y_cam = hand_landmarks[0][8].y * h_cam

    x = (x_cam / w_cam) * w_monitor
    y = (y_cam / h_cam) * h_monitor

    # Suaviza el movimiento del mouse
    if(len(smoothed_landmarks) == 0):
        smoothed_landmarks.append(x)
        smoothed_landmarks.append(y)
    alpha = 0.2 
    smoothed_landmarks[0] = (1 - alpha) * smoothed_landmarks[0] + alpha * x
    smoothed_landmarks[1] = (1 - alpha) * smoothed_landmarks[1] + alpha * y

    #print("X = ", smoothed_landmarks[0], ", Y = ", smoothed_landmarks[1])
    mouse.move(smoothed_landmarks[0], smoothed_landmarks[1], absolute=True, duration=0.01)

def LeftClickMouse():
    # Deja un delay para que el click no se mueva al mover los dedos para hacer el gesto okay
    #MoveMouse(hand_landmarks)
    mouse.click('left')
    ckick_sound.play()

def RightClickMouse():
    mouse.click('right')
    ckick_sound.play()

def DragMouse(hand_landmarks):
    global mouse_pressed

    if(not mouse_pressed):
        mouse.press()
        mouse_pressed = True
    else:
        MoveMouse(hand_landmarks)

def IsOkay(hand_landmarks, click):
    # Landmark 4 -> Thumb Tip
    x1, y1 = hand_landmarks[0][4].x, hand_landmarks[0][4].y

    if(click == "Left"):
        # Landmark 8 -> Index Finger Tip
        x2, y2 = hand_landmarks[0][8].x, hand_landmarks[0][8].y

        diff_x = abs(x2 - x1)
        diff_y = abs(y2 - y1)
    else:
        # Landmark 12 -> Middle Finger Tip
        x2, y2 = hand_landmarks[0][12].x, hand_landmarks[0][12].y
        # Landmark 12 -> Middle Finger Tip
        x3, y3 = hand_landmarks[0][16].x, hand_landmarks[0][16].y

        diff_x2 = abs(x3 - x1)
        diff_y2 = abs(y3 - y1)

        if(diff_x2 > 0.03 and diff_y2 > 0.03):
            return False


    diff_x = abs(x2 - x1)
    diff_y = abs(y2 - y1)

    # print("Diff_x = ", diff_x, ", diff_y = ", diff_y)
    if(diff_x < 0.03 and diff_y < 0.03):
        return True
    else:
        return False

# Mejora la acción de mover el cursor teniendo en cuenta la diferencia
# de distancia entre el dedo índice y el resto (salvo pulgar) para 
# acertar en los casos donde la mano esté girada
def CheckAngledPointingUp(hand_landmarks):
    # Landmark 8 -> Index Finger Tip
    x1, y1 = hand_landmarks[0][8].x, hand_landmarks[0][8].y
    # Landmark 12 -> Middle Finger Tip
    x2, y2 = hand_landmarks[0][12].x, hand_landmarks[0][12].y
    # Landmark 16 -> Ring Finger Tip
    x3, y3 = hand_landmarks[0][16].x, hand_landmarks[0][16].y
    # Landmark 20 -> Pinky Finger Tip
    x4, y4 = hand_landmarks[0][20].x, hand_landmarks[0][20].y

    # Middle, Ring y Pinky están juntos -> Mano cerrada
    diff_x2_3 = abs(x3 - x2)
    diff_y2_3 = abs(y3 - y2)
    diff_x4_3 = abs(x4 - x3)
    diff_y4_3 = abs(y4 - y3)

    if((diff_x2_3 < 0.03 and diff_x4_3 < 0.03) or (diff_y2_3 < 0.03 and diff_y4_3 < 0.03)):

        # Diferencia entre Index y el resto de dedos -> Pointing Up sin tener en cuenta el ángulo de la mano
        diffx_index_ring_finger = abs(x2 - x1)
        diffy_index_ring_finger = abs(y2 - y1)

        if(diffx_index_ring_finger > 0.1 or diffy_index_ring_finger > 0.1):
            return True
    
    return False

def CheckGesture(hand_landmarks, gesture):
    global mouse_pressed
    global prevGesture
    if len(hand_landmarks) == 0:
        return "None", "Nothing"
    
    # Comprueba si se ha soltado el click izquierdo y lo libera
    if(prevGesture != "Okay Left"):
        if(mouse_pressed):
            mouse.release()
            mouse_pressed = False

    # Comprueba si es okay (click izquierdo)
    if(IsOkay(hand_landmarks, "Left")):
        # Si el gesto detectado anterior no es un okay left, cuenta como un click,
        # si el gesto anterior fue un okay left, cuenta como que el click está mantenido
        
        if(prevGesture != "Okay Left"):
            # Click
            LeftClickMouse()
        else:
            DragMouse(hand_landmarks)
        return "Okay Left", "Left Click"
    
    if(IsOkay(hand_landmarks, "Right")):
        RightClickMouse()
        return "Okay Right", "Right Click"
    
    if(gesture == "Pointing_Up"):
        # Mueve el cursor
        MoveMouse(hand_landmarks)
        return "Pointing Up", "Moving Cursor"
    elif(CheckAngledPointingUp(hand_landmarks)):
        MoveMouse(hand_landmarks)
        return "Pointing Up", "Moving Cursor Angled"
    
    return "None", "Nothing"

BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult
VisionRunningMode = mp.tasks.vision.RunningMode

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

results = []

# Create a image segmenter instance with the live stream mode:
def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    #print(result.gestures)
    global results
    results = result
    # cv2.imwrite('somefile.jpg', imright)

options = GestureRecognizerOptions(
    BaseOptions(model_asset_path='gesture_recognizer.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)

video = cv2.VideoCapture(1)

timestamp = 0

with GestureRecognizer.create_from_options(options) as recognizer:
  # The recognizer is initialized. Use it here.
    
    while video.isOpened(): 
        # Capture frame-by-frame
        ret, frame = video.read()

        if not ret:
            print("Ignoring empty frame")
            break

        global w_cam, h_cam
        h_cam, w_cam, _ = frame.shape

        frame = cv2.flip(frame, 1)
        timestamp += 1
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
        # Send live image data to perform gesture recognition
        # The results are accessible via the `result_callback` provided in
        # the `GestureRecognizerOptions` object.
        # The gesture recognizer must be created with the live stream mode.
        recognizer.recognize_async(mp_image, timestamp)

        if(not results is None and results != []):
            annotated_image = draw_landmarks_on_image(mp_image.numpy_view(), results)

            if(len(results.gestures) > 0):
                gesture, action = CheckGesture(results.hand_landmarks, results.gestures[0][0].category_name)
                prevGesture = gesture
                #print(results.gestures[0][0].category_name)  
                cv2.putText(annotated_image, "Action: " + action, (10, 50), font, tamaño_texto, color_texto, grosor)
                cv2.putText(annotated_image, "Gesture: " + gesture, (10, 100), font, tamaño_texto, color_texto2, grosor)
                cv2.putText(annotated_image, "Dx: " + str(dx), (10, 150), font, tamaño_texto, color_texto, grosor)
                cv2.putText(annotated_image, "Dy: " + str(dy), (10, 200), font, tamaño_texto, color_texto2, grosor)
            else:
                cv2.putText(annotated_image, "Action: Nothing", (10, 50), font, tamaño_texto, color_texto, grosor)
                cv2.putText(annotated_image, "Gesture: None", (10, 100), font, tamaño_texto, color_texto2, grosor)
            cv2.imshow('Show',annotated_image)
        else:
            cv2.imshow('Show', frame)

        if cv2.waitKey(5) & 0xFF == 27:
            break

video.release()
cv2.destroyAllWindows()