In [1]:
import mediapipe as mp
import cv2

In [2]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [3]:
def distance(p1, p2):
    return ((p2.x - p1.x) ** 2 + (p2.y - p1.y) ** 2 + (p2.z - p1.z) ** 2) ** 0.5

In [4]:
def finger_raised(landmarks):
    fingers = {
        'Pulgar': False,
        'Indice': False,
        'Medio': False,
        'Anular': False,
        'Menique': False
    }

    # Pulgar: comparar eje X (diferente si es mano izquierda o derecha)
    # Si la palma está mirando al frente, comparamos 4 y 3
    if landmarks[17].x < landmarks[5].x:  # mano derecha
        fingers['Pulgar'] = landmarks[4].x > landmarks[3].x
    else:  # mano izquierda
        fingers['Pulgar'] = landmarks[4].x < landmarks[3].x

    # Otros dedos: eje Y (más arriba = más pequeño Y)
    fingers['Indice'] = landmarks[8].y < landmarks[6].y
    fingers['Medio'] = landmarks[12].y < landmarks[10].y
    fingers['Anular'] = landmarks[16].y < landmarks[14].y
    fingers['Menique'] = landmarks[20].y < landmarks[18].y

    return fingers
    

In [5]:
def gesture_detection(fingers, landmarks):
    def dist(a, b):
        return distance(landmarks[a], landmarks[b])

    def fingers_together():
        return (
            dist(8, 12) < 0.13 and  # Índice - Medio
            dist(12, 16) < 0.13 and # Medio - Anular
            dist(16, 20) < 0.13     # Anular - Meñique
        )

    if all(fingers.values()) and fingers_together():
        return 'Alto ✋'
    elif fingers['Indice'] and fingers['Menique'] and not any([fingers['Medio'], fingers['Anular'], fingers['Pulgar']]):
        return 'Cuernos 🤘'
    elif fingers['Menique'] and fingers['Pulgar'] and not any([fingers['Indice'], fingers['Medio'], fingers['Anular']]):
        return 'Llámame 🤙'
    elif fingers['Pulgar'] and fingers['Indice'] and fingers['Menique'] and not any([fingers['Medio'], fingers['Anular']]):
        return 'Te quiero ❤️‍🔥'
    elif all([fingers['Indice'], fingers['Medio'], fingers['Anular'], fingers['Menique']]) and \
     abs(landmarks[12].x - landmarks[16].x) > 0.06 and \
     abs(landmarks[8].x - landmarks[12].x) < 0.05 and \
     abs(landmarks[16].x - landmarks[20].x) < 0.05:
        return 'Saludo vulcano 🖖'

    elif not any(fingers.values()):
        return 'Puño ✊'
    elif fingers['Pulgar'] and fingers['Indice'] and not any([fingers['Medio'], fingers['Anular'], fingers['Menique']]) and dist(4, 8) < 0.09:
        return 'Pinza 🤏'
    elif fingers['Indice'] and not any([fingers['Pulgar'], fingers['Medio'], fingers['Anular'], fingers['Menique']]) and landmarks[8].x < landmarks[5].x:
        return 'Señalar izquierda 👈'
    elif fingers['Indice'] and not any([fingers['Pulgar'], fingers['Medio'], fingers['Anular'], fingers['Menique']]) and landmarks[8].x > landmarks[5].x:
        return 'Señalar derecha 👉'
    elif fingers['Pulgar'] and fingers['Indice'] and not any([fingers['Medio'], fingers['Anular'], fingers['Menique']]) and dist(4, 8) < 0.50 and dist(4, 12) > 0.12:
        return 'Corazón coreano 🫰'

    elif fingers['Indice'] and fingers['Medio'] and not any([fingers['Pulgar'], fingers['Anular'], fingers['Menique']]):
        return 'Victory ✌️'
    elif fingers['Pulgar'] and not any([fingers['Indice'], fingers['Medio'], fingers['Anular'], fingers['Menique']]):
        return 'Pulgar arriba 👍'
    elif any([fingers['Medio'], fingers['Anular'], fingers['Menique']]) and dist(4, 8) < 0.05:
        return 'Ok 👌'
    elif all(fingers.values()):
        return 'Mano abierta 🖐️'

    return 'Gesto no definido'



In [6]:
cap = cv2.VideoCapture(0)



In [None]:
with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.7,
    min_tracking_confidence = 0.7) as hands:
   
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      continue

    # Flip the image horizontally for a later selfie-view display
    image = cv2.flip(image, 1)

    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    results = hands.process(image_rgb)

    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:
        # Draw hand landmarks
        mp_drawing.draw_landmarks(
            image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        
        landmarks = hand_landmarks.landmark
        
        fingers = finger_raised(landmarks)
        gesture = gesture_detection(fingers, landmarks)

        # Show gesture detection
        cv2.putText(image, f'Gesture: {gesture}', (10,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Option
        info_fingers = f'Fingers: {[k for k, v in fingers.items() if v]}'
        cv2.putText(image, info_fingers, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (25, 255, 0), 2)

    cv2.imshow('Gesture Tracking', image)
    if cv2.waitKey(5) & 0xFF == ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1747669818.673977 1083135 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M4 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1747669818.680521 1085152 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1747669818.686788 1085152 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1747669820.166824 1085152 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
