In [21]:
import base64

import cv2
import numpy as np
import requests
import os
from scipy.interpolate import interp1d

IMG_PATH = "image.jpg"
API_KEY = os.environ["ROBOFLOW_API_KEY"]
DISTANCE_TO_OBJECT = 1250  # mm             # Mejor funcionamieto - 50cm reales de la camara
HEIGHT_OF_HUMAN_FACE = 210  # mm
GAZE_DETECTION_URL = (
    "http://127.0.0.1:9001/gaze/gaze_detection?api_key=" + API_KEY
)
previous_gaze_point = None  # Para almacenar el punto de mirada anterior
SMOOTHING_FACTOR = 0.7  # Factor de suavizado (ajusta este valor)

In [26]:
def calibrar_mirada(puntos_calibracion, cap):
    """
    Función para calibrar la detección de la mirada.

    Args:
      puntos_calibracion: Una lista de tuplas que representan las coordenadas
                          de los puntos de calibración en la pantalla.
      cap: El objeto cv2.VideoCapture para capturar imágenes de la webcam.

    Returns:
      Un mapa de calibración (diccionario) que relaciona las posiciones
      de la mirada con las coordenadas de la pantalla.
    """

    mapa_calibracion = {}

    for punto in puntos_calibracion:
        # Mostrar la imagen de la cámara
        ret, frame = cap.read()
        if not ret:
            print("Error al capturar la imagen.")
            break

        # Dibujar el punto de calibración en la imagen
        cv2.circle(frame, punto, 10, (255, 255, 255), -1)
        cv2.imshow('Calibración', frame)

        cv2.waitKey(0)  # Esperar a que se presione una tecla

        # Capturar la posición de la mirada
        ret, frame = cap.read()
        gazes = detect_gazes(frame)
        if gazes:
            gaze = gazes[0]
            posicion_mirada = (gaze["yaw"], gaze["pitch"])
            mapa_calibracion[posicion_mirada] = punto

    cv2.destroyAllWindows()
    print(mapa_calibracion)
    return mapa_calibracion



def detect_gazes(frame: np.ndarray):
    img_encode = cv2.imencode(".jpg", frame)[1]
    img_base64 = base64.b64encode(img_encode)
    resp = requests.post(
        GAZE_DETECTION_URL,
        json={
            "api_key": API_KEY,
            "image": {"type": "base64", "value": img_base64.decode("utf-8")},
        },
    )
    gazes = resp.json()[0]["predictions"]
    return gazes


def draw_gaze(img: np.ndarray, gaze: dict):
    # draw face bounding box
    face = gaze["face"]
    x_min = int(face["x"] - face["width"] / 2)
    x_max = int(face["x"] + face["width"] / 2)
    y_min = int(face["y"] - face["height"] / 2)
    y_max = int(face["y"] + face["height"] / 2)
    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (255, 0, 0), 3)

    # draw gaze arrow
    _, imgW = img.shape[:2]
    arrow_length = imgW / 2
    dx = -arrow_length * np.sin(gaze["yaw"]) * np.cos(gaze["pitch"])
    dy = -arrow_length * np.sin(gaze["pitch"])
    cv2.arrowedLine(
        img,
        (int(face["x"]), int(face["y"])),
        (int(face["x"] + dx), int(face["y"] + dy)),
        (0, 0, 255),
        2,
        cv2.LINE_AA,
        tipLength=0.18,
    )

    # draw keypoints
    for keypoint in face["landmarks"]:
        color, thickness, radius = (0, 255, 0), 2, 2
        x, y = int(keypoint["x"]), int(keypoint["y"])
        cv2.circle(img, (x, y), thickness, color, radius)

    # draw label and score
    label = "yaw {:.2f}  pitch {:.2f}".format(
        gaze["yaw"] / np.pi * 180, gaze["pitch"] / np.pi * 180
    )
    cv2.putText(
        img, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 0), 3
    )

    return img


def smooth_gaze_point(gaze_point, previous_gaze_point, image_width, image_height):
    """
    Suaviza el movimiento del punto de mirada usando un filtro de suavizado.
    """

    if previous_gaze_point is None:
        return gaze_point

    smoothed_x = int(
        SMOOTHING_FACTOR * previous_gaze_point[0]
        + (1 - SMOOTHING_FACTOR) * gaze_point[0]
    )
    smoothed_y = int(
        SMOOTHING_FACTOR * previous_gaze_point[1]
        + (1 - SMOOTHING_FACTOR) * gaze_point[1]
    )

    # Limitar el punto suavizado a las dimensiones de la pantalla
    smoothed_x = max(0, min(smoothed_x, image_width - 1))
    smoothed_y = max(0, min(smoothed_y, image_height - 1))

    return (smoothed_x, smoothed_y)


if __name__ == "__main__":
    cap = cv2.VideoCapture(0)

    # Define los puntos de calibración (ajusta las coordenadas si es necesario)
    puntos_calibracion = [(0, 0), (320, 0), (640, 0),
                          (0, 240), (640, 240),
                          (0, 480), (320, 480), (640, 480)]

    # Calibrar la mirada
    mapa_calibracion = calibrar_mirada(puntos_calibracion, cap)

    # Crear funciones de interpolación para yaw y pitch
    x = [p[0] for p in mapa_calibracion.keys()]
    y = [p[1] for p in mapa_calibracion.keys()]
    f_x = interp1d(x, [p[0] for p in mapa_calibracion.values()], fill_value="extrapolate")
    f_y = interp1d(y, [p[1] for p in mapa_calibracion.values()], fill_value="extrapolate")


    previous_gaze_point = None
    while True:

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
        ret, frame = cap.read()

        gazes = detect_gazes(frame)

        if not ret or frame is None:
            print("Error: El frame capturado está vacío.")
            continue

        if len(gazes) == 0:
            continue

        gaze = gazes[0]
        draw_gaze(frame, gaze)

        image_height, image_width = frame.shape[:2]

        length_per_pixel = HEIGHT_OF_HUMAN_FACE / gaze["face"]["height"]

        dx = -DISTANCE_TO_OBJECT * np.tan(gaze['yaw']) / length_per_pixel
        # 100000000 is used to denote out of bounds
        dx = dx if not np.isnan(dx) else 100000000
        dy = -DISTANCE_TO_OBJECT * np.arccos(gaze['yaw']) * np.tan(
            gaze['pitch']) / length_per_pixel
        dy = dy if not np.isnan(dy) else 100000000
        gaze_point = int(image_width / 2 + dx), int(image_height / 2 + dy)

        # Ajustar la posición de la mirada con el mapa de calibración
        gaze_point = (int(f_x(gaze_point[0])), int(f_y(gaze_point[1])))

        # Suavizar el punto de mirada
        smoothed_gaze_point = smooth_gaze_point(gaze_point,
                                                previous_gaze_point,
                                                image_width, image_height)
        previous_gaze_point = smoothed_gaze_point
        print(smoothed_gaze_point)
        cv2.circle(frame, smoothed_gaze_point, 25, (0, 0, 255), -1)

        cv2.imshow("gaze", frame)

        

{(-0.2763197720050812, 0.10601512342691422): (0, 0), (-0.0305057130753994, 0.0855625718832016): (320, 0), (0.26966771483421326, 0.10556185245513916): (640, 0), (-0.21770144999027252, -0.006858705077320337): (0, 240), (0.2687574326992035, -0.02521321177482605): (640, 240), (-0.23862457275390625, -0.1513863354921341): (0, 480), (0.013530457392334938, -0.13403238356113434): (320, 480), (0.28632497787475586, -0.14757642149925232): (640, 480)}
(640, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)
(639, 0)

KeyboardInterrupt: 