In [1]:
import cv2
import mediapipe as mp
import numpy as np




In [2]:
############## PARAMETERS #######################################################

# Set these values to show/hide certain vectors of the estimation
draw_gaze = True
draw_full_axis = False
draw_headpose = False

# Gaze Score multiplier (Higher multiplier = Gaze affects headpose estimation more)
x_score_multiplier = 10
y_score_multiplier = 10

# Threshold of how close scores should be to average between frames
threshold = .3

#################################################################################

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False,
    refine_landmarks=True,
    max_num_faces=2,
    min_detection_confidence=0.5)
cap = cv2.VideoCapture(0)

face_3d = np.array([
    [0.0, 0.0, 0.0],            # Nose tip
    [0.0, -330.0, -65.0],       # Chin
    [-225.0, 170.0, -135.0],    # Left eye left corner
    [225.0, 170.0, -135.0],     # Right eye right corner
    [-150.0, -150.0, -125.0],   # Left Mouth corner
    [150.0, -150.0, -125.0]     # Right mouth corner
    ], dtype=np.float64)

# Reposition left eye corner to be the origin
leye_3d = np.array(face_3d)
leye_3d[:,0] += 225
leye_3d[:,1] -= 175
leye_3d[:,2] += 135

# Reposition right eye corner to be the origin
reye_3d = np.array(face_3d)
reye_3d[:,0] -= 225
reye_3d[:,1] -= 175
reye_3d[:,2] += 135

# Gaze scores from the previous frame
last_lx, last_rx = 0, 0
last_ly, last_ry = 0, 0

while cap.isOpened():
    success, img = cap.read()

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False
    
    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True
    
    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    (img_h, img_w, img_c) = img.shape
    face_2d = []

    if not results.multi_face_landmarks:
      continue 

    for face_landmarks in results.multi_face_landmarks:
        face_2d = []
        for idx, lm in enumerate(face_landmarks.landmark):
            # Convert landmark x and y to pixel coordinates
            x, y = int(lm.x * img_w), int(lm.y * img_h)

            # Add the 2D coordinates to an array
            face_2d.append((x, y))
        
        # Get relevant landmarks for headpose estimation
        face_2d_head = np.array([
            face_2d[1],      # Nose
            face_2d[199],    # Chin
            face_2d[33],     # Left eye left corner
            face_2d[263],    # Right eye right corner
            face_2d[61],     # Left mouth corner
            face_2d[291]     # Right mouth corner
        ], dtype=np.float64)

        face_2d = np.asarray(face_2d)

        # Calculate left x gaze score
        if (face_2d[243,0] - face_2d[130,0]) != 0:
            lx_score = (face_2d[468,0] - face_2d[130,0]) / (face_2d[243,0] - face_2d[130,0])
            if abs(lx_score - last_lx) < threshold:
                lx_score = (lx_score + last_lx) / 2
            last_lx = lx_score

        # Calculate left y gaze score
        if (face_2d[23,1] - face_2d[27,1]) != 0:
            ly_score = (face_2d[468,1] - face_2d[27,1]) / (face_2d[23,1] - face_2d[27,1])
            if abs(ly_score - last_ly) < threshold:
                ly_score = (ly_score + last_ly) / 2
            last_ly = ly_score

        # Calculate right x gaze score
        if (face_2d[359,0] - face_2d[463,0]) != 0:
            rx_score = (face_2d[473,0] - face_2d[463,0]) / (face_2d[359,0] - face_2d[463,0])
            if abs(rx_score - last_rx) < threshold:
                rx_score = (rx_score + last_rx) / 2
            last_rx = rx_score

        # Calculate right y gaze score
        if (face_2d[253,1] - face_2d[257,1]) != 0:
            ry_score = (face_2d[473,1] - face_2d[257,1]) / (face_2d[253,1] - face_2d[257,1])
            if abs(ry_score - last_ry) < threshold:
                ry_score = (ry_score + last_ry) / 2
            last_ry = ry_score

        # The camera matrix
        focal_length = 1 * img_w
        cam_matrix = np.array([ [focal_length, 0, img_h / 2],
                                [0, focal_length, img_w / 2],
                                [0, 0, 1]])

        # Distortion coefficients 
        dist_coeffs = np.zeros((4, 1), dtype=np.float64)

        # Solve PnP
        _, l_rvec, l_tvec = cv2.solvePnP(leye_3d, face_2d_head, cam_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE)
        _, r_rvec, r_tvec = cv2.solvePnP(reye_3d, face_2d_head, cam_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE)


        # Get rotational matrix from rotational vector
        l_rmat, _ = cv2.Rodrigues(l_rvec)
        r_rmat, _ = cv2.Rodrigues(r_rvec)


        # [0] changes pitch
        # [1] changes roll
        # [2] changes yaw
        # +1 changes ~45 degrees (pitch down, roll tilts left (counterclockwise), yaw spins left (counterclockwise))

        # Adjust headpose vector with gaze score
        l_gaze_rvec = np.array(l_rvec)
        l_gaze_rvec[2][0] -= (lx_score-.5) * x_score_multiplier
        l_gaze_rvec[0][0] += (ly_score-.5) * y_score_multiplier

        r_gaze_rvec = np.array(r_rvec)
        r_gaze_rvec[2][0] -= (rx_score-.5) * x_score_multiplier
        r_gaze_rvec[0][0] += (ry_score-.5) * y_score_multiplier

        # --- Projection ---

        # Get left eye corner as integer
        l_corner = face_2d_head[2].astype(np.int32)

        # Project axis of rotation for left eye
        axis = np.float32([[-100, 0, 0], [0, 100, 0], [0, 0, 300]]).reshape(-1, 3)
        l_axis, _ = cv2.projectPoints(axis, l_rvec, l_tvec, cam_matrix, dist_coeffs)
        l_gaze_axis, _ = cv2.projectPoints(axis, l_gaze_rvec, l_tvec, cam_matrix, dist_coeffs)

        # Draw axis of rotation for left eye
        if draw_headpose:
            if draw_full_axis:
                cv2.line(img, l_corner, tuple(np.ravel(l_axis[0]).astype(np.int32)), (200,200,0), 3)
                cv2.line(img, l_corner, tuple(np.ravel(l_axis[1]).astype(np.int32)), (0,200,0), 3)
            cv2.line(img, l_corner, tuple(np.ravel(l_axis[2]).astype(np.int32)), (0,200,200), 3)

        if draw_gaze:
            if draw_full_axis:
                cv2.line(img, l_corner, tuple(np.ravel(l_gaze_axis[0]).astype(np.int32)), (255,0,0), 3)
                cv2.line(img, l_corner, tuple(np.ravel(l_gaze_axis[1]).astype(np.int32)), (0,255,0), 3)
            cv2.line(img, l_corner, tuple(np.ravel(l_gaze_axis[2]).astype(np.int32)), (0,0,255), 3)

        
    
        # Get left eye corner as integer
        r_corner = face_2d_head[3].astype(np.int32)

        # Get left eye corner as integer
        r_axis, _ = cv2.projectPoints(axis, r_rvec, r_tvec, cam_matrix, dist_coeffs)
        r_gaze_axis, _ = cv2.projectPoints(axis, r_gaze_rvec, r_tvec, cam_matrix, dist_coeffs)

        # Draw axis of rotation for left eye
        if draw_headpose:
            if draw_full_axis:
                cv2.line(img, r_corner, tuple(np.ravel(r_axis[0]).astype(np.int32)), (200,200,0), 3)
                cv2.line(img, r_corner, tuple(np.ravel(r_axis[1]).astype(np.int32)), (0,200,0), 3)
            cv2.line(img, r_corner, tuple(np.ravel(r_axis[2]).astype(np.int32)), (0,200,200), 3)

        if draw_gaze:
            if draw_full_axis:
                cv2.line(img, r_corner, tuple(np.ravel(r_gaze_axis[0]).astype(np.int32)), (255,0,0), 3)
                cv2.line(img, r_corner, tuple(np.ravel(r_gaze_axis[1]).astype(np.int32)), (0,255,0), 3)
            cv2.line(img, r_corner, tuple(np.ravel(r_gaze_axis[2]).astype(np.int32)), (0,0,255), 3)
                


    cv2.imshow('Head Pose Estimation', img)

    if cv2.waitKey(5) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
# Define the coordinates for the zones
zone_coordinates = {
    'top_right': (img_w // 2, 0, img_w, img_h // 2),
    'bottom_right': (img_w // 2, img_h // 2, img_w, img_h),
    'top_left': (0, 0, img_w // 2, img_h // 2),
    'bottom_left': (0, img_h // 2, img_w // 2, img_h),
    'center': (img_w // 4, img_h // 4, 3 * img_w // 4, 3 * img_h // 4)
}

# Initialize the zones color dictionary
zones_color = {zone: (0, 0, 0) for zone in zone_coordinates}

# ...

while cap.isOpened():
    success, img = cap.read()

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False
    
    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True
    
    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    # ...

    # Calculate gaze scores (as in your existing code)

    # Determine the current zone based on gaze scores
    current_zone = None
    for zone, (x1, y1, x2, y2) in zone_coordinates.items():
        if x1 <= face_2d[468, 0] <= x2 and y1 <= face_2d[468, 1] <= y2:
            current_zone = zone
            break

    # Update the color of the current zone to red
    if current_zone is not None:
        zones_color[current_zone] = (0, 0, 255)  # Red

    # Draw rectangles for each zone
    for zone, (x1, y1, x2, y2) in zone_coordinates.items():
        cv2.rectangle(img, (x1, y1), (x2, y2), zones_color[zone], -1)  # Fill rectangle

    # Draw the existing head pose and gaze lines (as in your existing code)

    # Show the image
    cv2.imshow('Gaze and Zone Detection', img)

    # Handle key press to exit the loop
    if cv2.waitKey(5) & 0xFF == ord('q'):
        break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()

In [3]:
import cv2
import mediapipe as mp
import numpy as np

############## PARAMETERS #######################################################

# Set these values to show/hide certain vectors of the estimation
draw_gaze = True
draw_full_axis = False
draw_headpose = False

# Gaze Score multiplier (Higher multiplier = Gaze affects headpose estimation more)
x_score_multiplier = 10
y_score_multiplier = 10

# Threshold of how close scores should be to average between frames
threshold = .3

#################################################################################

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False,
                                  refine_landmarks=True,
                                  max_num_faces=2,
                                  min_detection_confidence=0.5)
cap = cv2.VideoCapture(0)

face_3d = np.array([
    [0.0, 0.0, 0.0],            # Nose tip
    [0.0, -330.0, -65.0],       # Chin
    [-225.0, 170.0, -135.0],    # Left eye left corner
    [225.0, 170.0, -135.0],     # Right eye right corner
    [-150.0, -150.0, -125.0],   # Left Mouth corner
    [150.0, -150.0, -125.0]     # Right mouth corner
], dtype=np.float64)

# Reposition left eye corner to be the origin
leye_3d = np.array(face_3d)
leye_3d[:, 0] += 225
leye_3d[:, 1] -= 175
leye_3d[:, 2] += 135

# Reposition right eye corner to be the origin
reye_3d = np.array(face_3d)
reye_3d[:, 0] -= 225
reye_3d[:, 1] -= 175
reye_3d[:, 2] += 135

# Gaze scores from the previous frame
last_lx, last_rx = 0, 0
last_ly, last_ry = 0, 0

# Define the coordinates for the zones
zone_coordinates = {
    'top_right': (img_w // 2, 0, img_w, img_h // 2),
    'bottom_right': (img_w // 2, img_h // 2, img_w, img_h),
    'top_left': (0, 0, img_w // 2, img_h // 2),
    'bottom_left': (0, img_h // 2, img_w // 2, img_h),
    'center': (img_w // 4, img_h // 4, 3 * img_w // 4, 3 * img_h // 4)
}

# Initialize the zones color dictionary
zones_color = {zone: (0, 0, 0) for zone in zone_coordinates}

while cap.isOpened():
    success, img = cap.read()

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False

    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True

    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    (img_h, img_w, img_c) = img.shape
    face_2d = []

    if not results.multi_face_landmarks:
        continue

    for face_landmarks in results.multi_face_landmarks:
        face_2d = []
        for idx, lm in enumerate(face_landmarks.landmark):
            # Convert landmark x and y to pixel coordinates
            x, y = int(lm.x * img_w), int(lm.y * img_h)

            # Add the 2D coordinates to an array
            face_2d.append((x, y))

        # Get relevant landmarks for headpose estimation
        face_2d_head = np.array([
            face_2d[1],      # Nose
            face_2d[199],    # Chin
            face_2d[33],     # Left eye left corner
            face_2d[263],    # Right eye right corner
            face_2d[61],     # Left mouth corner
            face_2d[291]     # Right mouth corner
        ], dtype=np.float64)

        face_2d = np.asarray(face_2d)

        # Calculate gaze scores (as in your existing code)

        # Determine the current zone based on gaze scores
        current_zone = None
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            if x1 <= face_2d[468, 0] <= x2 and y1 <= face_2d[468, 1] <= y2:
                current_zone = zone
                break

        # Update the color of the current zone to red
        if current_zone is not None:
            zones_color[current_zone] = (0, 0, 255)  # Red

        # Draw rectangles for each zone
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            cv2.rectangle(img, (x1, y1), (x2, y2), zones_color[zone], -1)  # Fill rectangle

        # Draw the existing head pose and gaze lines (as in your existing code)

        # Show the image
        cv2.imshow('Gaze and Zone Detection', img)

        # Handle key press to exit the loop
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 

In [2]:
import cv2
import mediapipe as mp
import numpy as np

############## PARAMETERS #######################################################

# Set these values to show/hide certain vectors of the estimation
draw_gaze = True
draw_full_axis = False
draw_headpose = False

# Gaze Score multiplier (Higher multiplier = Gaze affects headpose estimation more)
x_score_multiplier = 10
y_score_multiplier = 10

# Threshold of how close scores should be to average between frames
threshold = .3

#################################################################################

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False,
                                  refine_landmarks=True,
                                  max_num_faces=2,
                                  min_detection_confidence=0.5)
cap = cv2.VideoCapture(0)

face_3d = np.array([
    [0.0, 0.0, 0.0],            # Nose tip
    [0.0, -330.0, -65.0],       # Chin
    [-225.0, 170.0, -135.0],    # Left eye left corner
    [225.0, 170.0, -135.0],     # Right eye right corner
    [-150.0, -150.0, -125.0],   # Left Mouth corner
    [150.0, -150.0, -125.0]     # Right mouth corner
], dtype=np.float64)

# Reposition left eye corner to be the origin
leye_3d = np.array(face_3d)
leye_3d[:, 0] += 225
leye_3d[:, 1] -= 175
leye_3d[:, 2] += 135

# Reposition right eye corner to be the origin
reye_3d = np.array(face_3d)
reye_3d[:, 0] -= 225
reye_3d[:, 1] -= 175
reye_3d[:, 2] += 135

# Gaze scores from the previous frame
last_lx, last_rx = 0, 0
last_ly, last_ry = 0, 0

# Define the coordinates for the zones
zone_coordinates = {
    'top_right': (img_w // 2, 0, img_w, img_h // 2),
    'bottom_right': (img_w // 2, img_h // 2, img_w, img_h),
    'top_left': (0, 0, img_w // 2, img_h // 2),
    'bottom_left': (0, img_h // 2, img_w // 2, img_h),
    'center': (img_w // 4, img_h // 4, 3 * img_w // 4, 3 * img_h // 4)
}

# Initialize the zones color dictionary
zones_color = {zone: (0, 0, 0) for zone in zone_coordinates}

while cap.isOpened():
    success, img = cap.read()

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False

    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True

    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    (img_h, img_w, img_c) = img.shape
    face_2d = []

    if not results.multi_face_landmarks:
        continue

    for face_landmarks in results.multi_face_landmarks:
        face_2d = []
        for idx, lm in enumerate(face_landmarks.landmark):
            # Convert landmark x and y to pixel coordinates
            x, y = int(lm.x * img_w), int(lm.y * img_h)

            # Add the 2D coordinates to an array
            face_2d.append((x, y))

        # Get relevant landmarks for headpose estimation
        face_2d_head = np.array([
            face_2d[1],      # Nose
            face_2d[199],    # Chin
            face_2d[33],     # Left eye left corner
            face_2d[263],    # Right eye right corner
            face_2d[61],     # Left mouth corner
            face_2d[291]     # Right mouth corner
        ], dtype=np.float64)

        face_2d = np.asarray(face_2d)

        # Calculate gaze scores (as in your existing code)

        # Determine the current zone based on gaze scores
        current_zone = None
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            if x1 <= face_2d[468, 0] <= x2 and y1 <= face_2d[468, 1] <= y2:
                current_zone = zone
                break

        # Update the color of the current zone to red
        if current_zone is not None:
            zones_color[current_zone] = (0, 0, 255)  # Red

        # Draw rectangles for each zone
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            cv2.rectangle(img, (x1, y1), (x2, y2), zones_color[zone], -1)  # Fill rectangle

        # Draw the existing head pose and gaze lines (as in your existing code)

        # Show the image
        cv2.imshow('Gaze and Zone Detection', img)

        # Handle key press to exit the loop
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()


NameError: name 'img_w' is not defined

In [None]:
import cv2
import mediapipe as mp
import numpy as np

############## PARAMETERS #######################################################

# Set these values to show/hide certain vectors of the estimation
draw_gaze = True
draw_full_axis = False
draw_headpose = False

# Gaze Score multiplier (Higher multiplier = Gaze affects headpose estimation more)
x_score_multiplier = 10
y_score_multiplier = 10

# Threshold of how close scores should be to average between frames
threshold = .3

#################################################################################

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False,
                                  refine_landmarks=True,
                                  max_num_faces=2,
                                  min_detection_confidence=0.5)
cap = cv2.VideoCapture(0)

# Gaze scores from the previous frame
last_lx, last_rx = 0, 0
last_ly, last_ry = 0, 0

while cap.isOpened():
    success, img = cap.read()

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False

    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True

    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    (img_h, img_w, img_c) = img.shape
    face_2d = []

    if not results.multi_face_landmarks:
        continue

    for face_landmarks in results.multi_face_landmarks:
        face_2d = []
        for idx, lm in enumerate(face_landmarks.landmark):
            # Convert landmark x and y to pixel coordinates
            x, y = int(lm.x * img_w), int(lm.y * img_h)

            # Add the 2D coordinates to an array
            face_2d.append((x, y))

        # Get relevant landmarks for headpose estimation
        face_2d_head = np.array([
            face_2d[1],      # Nose
            face_2d[199],    # Chin
            face_2d[33],     # Left eye left corner
            face_2d[263],    # Right eye right corner
            face_2d[61],     # Left mouth corner
            face_2d[291]     # Right mouth corner
        ], dtype=np.float64)

        face_2d = np.asarray(face_2d)

        # Calculate gaze scores (as in your existing code)

        # Determine the current zone based on gaze scores
        current_zone = None
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            if x1 <= face_2d[468, 0] <= x2 and y1 <= face_2d[468, 1] <= y2:
                current_zone = zone
                break

        # Update the color of the current zone to red
        if current_zone is not None:
            zones_color[current_zone] = (0, 0, 255)  # Red

        # Draw rectangles for each zone
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            cv2.rectangle(img, (x1, y1), (x2, y2), zones_color[zone], -1)  # Fill rectangle

        # Draw the existing head pose and gaze lines (as in your existing code)

        # Show the image
        cv2.imshow('Gaze and Zone Detection', img)

        # Handle key press to exit the loop
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()


In [3]:
import cv2
import mediapipe as mp
import numpy as np

############## PARAMETERS #######################################################

# Set these values to show/hide certain vectors of the estimation
draw_gaze = True
draw_full_axis = False
draw_headpose = False

# Gaze Score multiplier (Higher multiplier = Gaze affects headpose estimation more)
x_score_multiplier = 10
y_score_multiplier = 10

# Threshold of how close scores should be to average between frames
threshold = .3

#################################################################################

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False,
                                  refine_landmarks=True,
                                  max_num_faces=2,
                                  min_detection_confidence=0.5)
cap = cv2.VideoCapture(0)

# Gaze scores from the previous frame
last_lx, last_rx = 0, 0
last_ly, last_ry = 0, 0

# Define the coordinates for the zones
zone_coordinates = {
    'top_right': (img_w // 2, 0, img_w, img_h // 2),
    'bottom_right': (img_w // 2, img_h // 2, img_w, img_h),
    'top_left': (0, 0, img_w // 2, img_h // 2),
    'bottom_left': (0, img_h // 2, img_w // 2, img_h),
    'center': (img_w // 4, img_h // 4, 3 * img_w // 4, 3 * img_h // 4)
}

# Initialize the zones color dictionary
zones_color = {zone: (0, 0, 0) for zone in zone_coordinates}

while cap.isOpened():
    success, img = cap.read()

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False

    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True

    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    (img_h, img_w, img_c) = img.shape
    face_2d = []

    if not results.multi_face_landmarks:
        continue

    for face_landmarks in results.multi_face_landmarks:
        face_2d = []
        for idx, lm in enumerate(face_landmarks.landmark):
            # Convert landmark x and y to pixel coordinates
            x, y = int(lm.x * img_w), int(lm.y * img_h)

            # Add the 2D coordinates to an array
            face_2d.append((x, y))

        # Get relevant landmarks for headpose estimation
        face_2d_head = np.array([
            face_2d[1],      # Nose
            face_2d[199],    # Chin
            face_2d[33],     # Left eye left corner
            face_2d[263],    # Right eye right corner
            face_2d[61],     # Left mouth corner
            face_2d[291]     # Right mouth corner
        ], dtype=np.float64)

        face_2d = np.asarray(face_2d)

        # Calculate gaze scores (as in your existing code)

        # Determine the current zone based on gaze scores
        current_zone = None
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            if x1 <= face_2d[468, 0] <= x2 and y1 <= face_2d[468, 1] <= y2:
                current_zone = zone
                break

        # Update the color of the current zone to red
        if current_zone is not None:
            zones_color[current_zone] = (0, 0, 255)  # Red

        # Draw rectangles for each zone
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            cv2.rectangle(img, (x1, y1), (x2, y2), zones_color[zone], -1)  # Fill rectangle

        # Draw the existing head pose and gaze lines (as in your existing code)

        # Show the image
        cv2.imshow('Gaze and Zone Detection', img)

        # Handle key press to exit the loop
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()


NameError: name 'img_w' is not defined

In [None]:
import cv2
import mediapipe as mp
import numpy as np

############## PARAMETERS #######################################################

# Set these values to show/hide certain vectors of the estimation
draw_gaze = True
draw_full_axis = False
draw_headpose = False

# Gaze Score multiplier (Higher multiplier = Gaze affects headpose estimation more)
x_score_multiplier = 10
y_score_multiplier = 10

# Threshold of how close scores should be to average between frames
threshold = .3

#################################################################################

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False,
                                  refine_landmarks=True,
                                  max_num_faces=2,
                                  min_detection_confidence=0.5)
cap = cv2.VideoCapture(0)

# Gaze scores from the previous frame
last_lx, last_rx = 0, 0
last_ly, last_ry = 0, 0

# Define the coordinates for the zones
zone_coordinates = {
    'top_right': (0, 0, img_w // 2, img_h // 2),
    'bottom_right': (0, img_h // 2, img_w // 2, img_h),
    'top_left': (img_w // 2, 0, img_w, img_h // 2),
    'bottom_left': (img_w // 2, img_h // 2, img_w, img_h),
    'center': (img_w // 4, img_h // 4, 3 * img_w // 4, 3 * img_h // 4)
}

# Initialize the zones color dictionary
zones_color = {zone: (255, 255, 255) for zone in zone_coordinates}

while cap.isOpened():
    success, img = cap.read()

    # Reset the colors for each zone to the original state (white)
    zones_color = {zone: (255, 255, 255) for zone in zone_coordinates}

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False

    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True

    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    (img_h, img_w, img_c) = img.shape
    face_2d = []

    if not results.multi_face_landmarks:
        continue

    for face_landmarks in results.multi_face_landmarks:
        face_2d = []
        for idx, lm in enumerate(face_landmarks.landmark):
            # Convert landmark x and y to pixel coordinates
            x, y = int(lm.x * img_w), int(lm.y * img_h)

            # Add the 2D coordinates to an array
            face_2d.append((x, y))

        # Get relevant landmarks for headpose estimation
        face_2d_head = np.array([
            face_2d[1],      # Nose
            face_2d[199],    # Chin
            face_2d[33],     # Left eye left corner
            face_2d[263],    # Right eye right corner
            face_2d[61],     # Left mouth corner
            face_2d[291]     # Right mouth corner
        ], dtype=np.float64)

        face_2d = np.asarray(face_2d)

        # Calculate gaze scores (as in your existing code)

        # Determine the current zone based on gaze scores
        current_zone = None
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            if x1 <= face_2d[468, 0] <= x2 and y1 <= face_2d[468, 1] <= y2:
                current_zone = zone
                break

        # Update the color of the current zone to red
        if current_zone is not None:
            zones_color[current_zone] = (0, 0, 255)  # Red

        # Draw rectangles for each zone
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            cv2.rectangle(img, (x1, y1), (x2, y2), zones_color[zone], 2)  # Hollow rectangle, 2-pixel thickness

        # Draw the existing head pose and gaze lines (as in your existing code)

        # Show the image
        cv2.imshow('Gaze and Zone Detection', img)

        # Handle key press to exit the loop
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()


In [4]:
import cv2
import mediapipe as mp
import numpy as np

############## PARAMETERS #######################################################

# Set these values to show/hide certain vectors of the estimation
draw_gaze = True
draw_full_axis = False
draw_headpose = False

# Gaze Score multiplier (Higher multiplier = Gaze affects headpose estimation more)
x_score_multiplier = 10
y_score_multiplier = 10

# Threshold of how close scores should be to average between frames
threshold = .3

#################################################################################

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False,
                                  refine_landmarks=True,
                                  max_num_faces=2,
                                  min_detection_confidence=0.5)
cap = cv2.VideoCapture(0)

# Gaze scores from the previous frame
last_lx, last_rx = 0, 0
last_ly, last_ry = 0, 0

# Define the coordinates for the zones
zone_coordinates = {
    'top_right': (0, 0, img_w // 2, img_h // 2),
    'bottom_right': (0, img_h // 2, img_w // 2, img_h),
    'top_left': (img_w // 2, 0, img_w, img_h // 2),
    'bottom_left': (img_w // 2, img_h // 2, img_w, img_h),
    'center': (img_w // 4, img_h // 4, 3 * img_w // 4, 3 * img_h // 4)
}

# Initialize the zones color dictionary
zones_color = {zone: (255, 255, 255) for zone in zone_coordinates}

while cap.isOpened():
    success, img = cap.read()

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False

    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True

    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    (img_h, img_w, img_c) = img.shape  # Initialize img_w and img_h here

    face_2d = []

    if not results.multi_face_landmarks:
        continue

    for face_landmarks in results.multi_face_landmarks:
        face_2d = []
        for idx, lm in enumerate(face_landmarks.landmark):
            # Convert landmark x and y to pixel coordinates
            x, y = int(lm.x * img_w), int(lm.y * img_h)

            # Add the 2D coordinates to an array
            face_2d.append((x, y))

        # Get relevant landmarks for headpose estimation
        face_2d_head = np.array([
            face_2d[1],      # Nose
            face_2d[199],    # Chin
            face_2d[33],     # Left eye left corner
            face_2d[263],    # Right eye right corner
            face_2d[61],     # Left mouth corner
            face_2d[291]     # Right mouth corner
        ], dtype=np.float64)

        face_2d = np.asarray(face_2d)

        # Calculate gaze scores (as in your existing code)

        # Determine the current zone based on gaze scores
        current_zone = None
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            if x1 <= face_2d[468, 0] <= x2 and y1 <= face_2d[468, 1] <= y2:
                current_zone = zone
                break

        # Update the color of the current zone to red
        if current_zone is not None:
            zones_color[current_zone] = (0, 0, 255)  # Red

        # Draw rectangles for each zone
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            cv2.rectangle(img, (x1, y1), (x2, y2), zones_color[zone], 2)  # Hollow rectangle, 2-pixel thickness

        # Draw the existing head pose and gaze lines (as in your existing code)

        # Show the image
        cv2.imshow('Gaze and Zone Detection', img)

        # Handle key press to exit the loop
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()


NameError: name 'img_w' is not defined

# main

In [1]:
import cv2
import mediapipe as mp
import numpy as np




In [4]:
############## PARAMETERS #######################################################

# Set these values to show/hide certain vectors of the estimation
draw_gaze = True
draw_full_axis = False
draw_headpose = True

# Gaze Score multiplier (Higher multiplier = Gaze affects headpose estimation more)
x_score_multiplier = 10
y_score_multiplier = 10

# Threshold of how close scores should be to average between frames
threshold = .3

#################################################################################

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False,
                                  refine_landmarks=True,
                                  max_num_faces=2,
                                  min_detection_confidence=0.5)
cap = cv2.VideoCapture(0)

# Initialize img_w and img_h
img_w, img_h = int(cap.get(3)), int(cap.get(4))

# Define the coordinates for the zones
zone_coordinates = {
    'top_right': (0, 0, img_w // 2, img_h // 2),
    'bottom_right': (0, img_h // 2, img_w // 2, img_h),
    'top_left': (img_w // 2, 0, img_w, img_h // 2),
    'bottom_left': (img_w // 2, img_h // 2, img_w, img_h),
    #'center': (img_w // 4, img_h // 4, 3 * img_w // 4, 3 * img_h // 4)
}

# Initialize the zones color dictionary
zones_color = {zone: (255, 255, 255) for zone in zone_coordinates}

last_lx, last_rx = 0, 0
last_ly, last_ry = 0, 0

while cap.isOpened():
    success, img = cap.read()

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False

    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True

    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    face_2d = []

    if not results.multi_face_landmarks:
        continue

    for face_landmarks in results.multi_face_landmarks:
        face_2d = []
        for idx, lm in enumerate(face_landmarks.landmark):
            x, y = int(lm.x * img_w), int(lm.y * img_h)
            face_2d.append((x, y))

        current_zone = None
        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            if x1 <= face_2d[159][0] <= x2 and y1 <= face_2d[159][1] <= y2:
                current_zone = zone
                break

        for zone in zone_coordinates.keys():
            zones_color[zone] = (255, 255, 255)

        if current_zone is not None:
            zones_color[current_zone] = (0, 0, 255)

        for zone, (x1, y1, x2, y2) in zone_coordinates.items():
            cv2.rectangle(img, (x1, y1), (x2, y2), zones_color[zone], 2)

    cv2.imshow('Gaze and Zone Detection', img)

    if cv2.waitKey(5) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [1]:
import cv2
import mediapipe as mp
import numpy as np

############## PARAMETERS #######################################################

# Set these values to show/hide certain vectors of the estimation
draw_gaze = True
draw_full_axis = False
draw_headpose = False

# Gaze Score multiplier (Higher multiplier = Gaze affects headpose estimation more)
x_score_multiplier = 10
y_score_multiplier = 10

# Threshold of how close scores should be to average between frames
threshold = .3

#################################################################################

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False,
                                  refine_landmarks=True,
                                  max_num_faces=2,
                                  min_detection_confidence=0.5)
cap = cv2.VideoCapture(0)

img_w, img_h = int(cap.get(3)), int(cap.get(4))

# Define the coordinates for the zones
zone_coordinates = {
    'top_right': (0, 0, img_w // 2, img_h // 2),
    'bottom_right': (0, img_h // 2, img_w // 2, img_h),
    'top_left': (img_w // 2, 0, img_w, img_h // 2),
    'bottom_left': (img_w // 2, img_h // 2, img_w, img_h),
    'center': (img_w // 4, img_h // 4, 3 * img_w // 4, 3 * img_h // 4)
}

# Initialize the zones color dictionary
zones_color = {zone: (255, 255, 255) for zone in zone_coordinates}

last_lx, last_rx = 0, 0
last_ly, last_ry = 0, 0

while cap.isOpened():
    success, img = cap.read()

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False

    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True

    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    face_2d = []

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            face_2d = []
            for idx, lm in enumerate(face_landmarks.landmark):
                x, y = int(lm.x * img_w), int(lm.y * img_h)
                face_2d.append((x, y))

            # Check if relevant landmarks are present
            if len(face_2d) >= 468:
                # Extract eye landmark positions
                left_eye = (face_2d[159][0], face_2d[159][1])
                right_eye = (face_2d[386][0], face_2d[386][1])

                # Calculate gaze vector (from left eye to right eye)
                gaze_vector = (right_eye[0] - left_eye[0], right_eye[1] - left_eye[1])

                # Calculate the angle of the gaze vector
                gaze_angle = np.degrees(np.arctan2(gaze_vector[1], gaze_vector[0]))

                # Visualize the gaze direction as a line
                line_length = 50
                line_end = (
                    int(left_eye[0] + line_length * np.cos(np.radians(gaze_angle))),
                    int(left_eye[1] + line_length * np.sin(np.radians(gaze_angle)))
                )
                cv2.line(img, left_eye, line_end, (0, 255, 0), 2)

                # Determine the current zone based on the gaze angle
                current_zone = None
                if -45 < gaze_angle <= 45:
                    current_zone = 'right'
                elif 45 < gaze_angle <= 135:
                    current_zone = 'top'
                elif -135 <= gaze_angle <= -45:
                    current_zone = 'bottom'
                else:
                    current_zone = 'left'

                # Update zone colors based on gaze
                for zone in zone_coordinates.keys():
                    zones_color[zone] = (255, 255, 255)

                if current_zone is not None:
                    zones_color[current_zone] = (0, 0, 255)

                # Draw rectangles on zones
                for zone, (x1, y1, x2, y2) in zone_coordinates.items():
                    cv2.rectangle(img, (x1, y1), (x2, y2), zones_color[zone], 2)

    cv2.imshow('Gaze and Zone Detection', img)

    if cv2.waitKey(5) & 0xFF == ord('q'):
        break

# ... (rest of the code)



cap.release()
cv2.destroyAllWindows()





In [1]:
import cv2
import mediapipe as mp
import numpy as np

############## PARAMETERS #######################################################

# Set these values to show/hide certain vectors of the estimation
draw_gaze = True
draw_full_axis = False
draw_headpose = False

# Gaze Score multiplier (Higher multiplier = Gaze affects headpose estimation more)
x_score_multiplier = 10
y_score_multiplier = 10

# Threshold of how close scores should be to average between frames
threshold = 0.3

# Define the coordinates for the zones
zone_coordinates = {
    'top_right': (0, 0, img_w // 2, img_h // 2),
    'bottom_right': (0, img_h // 2, img_w // 2, img_h),
    'top_left': (img_w // 2, 0, img_w, img_h // 2),
    'bottom_left': (img_w // 2, img_h // 2, img_w, img_h),
    'center': (img_w // 4, img_h // 4, 3 * img_w // 4, 3 * img_h // 4)
}

# Initialize the zones color dictionary
zones_color = {zone: (255, 255, 255) for zone in zone_coordinates}

#################################################################################

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    refine_landmarks=True,
    max_num_faces=2,
    min_detection_confidence=0.5
)
cap = cv2.VideoCapture(0)

while cap.isOpened():
    success, img = cap.read()

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False

    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True

    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    face_2d = []

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            face_2d = []
            for idx, lm in enumerate(face_landmarks.landmark):
                x, y = int(lm.x * img_w), int(lm.y * img_h)
                face_2d.append((x, y))

            if len(face_2d) >= 468:
                left_eye = (face_2d[159][0], face_2d[159][1])
                right_eye = (face_2d[386][0], face_2d[386][1])

                gaze_vector = (right_eye[0] - left_eye[0], right_eye[1] - left_eye[1])
                gaze_angle = np.degrees(np.arctan2(gaze_vector[1], gaze_vector[0]))

                # Visualize the gaze direction as a line
                line_length = 50
                line_end = (
                    int(left_eye[0] + line_length * np.cos(np.radians(gaze_angle))),
                    int(left_eye[1] + line_length * np.sin(np.radians(gaze_angle)))
                )
                cv2.line(img, left_eye, line_end, (0, 255, 0), 2)

                # Determine the current zone based on the gaze angle
                current_zone = None
                if -45 < gaze_angle <= 45:
                    current_zone = 'right'
                elif 45 < gaze_angle <= 135:
                    current_zone = 'top'
                elif -135 <= gaze_angle <= -45:
                    current_zone = 'bottom'
                else:
                    current_zone = 'left'

                # Update zone colors based on gaze
                for zone in zone_coordinates.keys():
                    zones_color[zone] = (255, 255, 255)

                if current_zone is not None:
                    zones_color[current_zone] = (0, 0, 255)

                # Draw rectangles on zones
                for zone, (x1, y1, x2, y2) in zone_coordinates.items():
                    cv2.rectangle(img, (x1, y1), (x2, y2), zones_color[zone], 2)

    cv2.imshow('Gaze and Zone Detection', img)

    if cv2.waitKey(5) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()






NameError: name 'img_w' is not defined

In [8]:
import cv2
import mediapipe as mp
import numpy as np

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    refine_landmarks=True,
    max_num_faces=2,
    min_detection_confidence=0.5
)
cap = cv2.VideoCapture(0)

# Define the coordinates for the zones
zone_coordinates = {
    'top_right': (0, 0, img_w // 2, img_h // 2),
    'bottom_right': (0, img_h // 2, img_w // 2, img_h),
    'top_left': (img_w // 2, 0, img_w, img_h // 2),
    'bottom_left': (img_w // 2, img_h // 2, img_w, img_h),
    'center': (img_w // 4, img_h // 4, 3 * img_w // 4, 3 * img_h // 4)
}

# Initialize the zones color dictionary
zones_color = {zone: (255, 255, 255) for zone in zone_coordinates}

while cap.isOpened():
    success, img = cap.read()

    # Flip + convert img from BGR to RGB
    img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)

    # To improve performance
    img.flags.writeable = False

    # Get the result
    results = face_mesh.process(img)
    img.flags.writeable = True

    # Convert the color space from RGB to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    face_2d = []

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            face_2d = []
            for idx, lm in enumerate(face_landmarks.landmark):
                x, y = int(lm.x * img_w), int(lm.y * img_h)
                face_2d.append((x, y))

            if len(face_2d) >= 468:
                left_eye = (face_2d[159][0], face_2d[159][1])
                right_eye = (face_2d[386][0], face_2d[386][1])

                gaze_vector = (right_eye[0] - left_eye[0], right_eye[1] - left_eye[1])
                gaze_angle = np.degrees(np.arctan2(gaze_vector[1], gaze_vector[0]))

                # Determine the current zone based on the gaze angle
                current_zone = None
                if -45 < gaze_angle <= 45:
                    current_zone = 'right'
                elif 45 < gaze_angle <= 135:
                    current_zone = 'top'
                elif -135 <= gaze_angle <= -45:
                    current_zone = 'bottom'
                else:
                    current_zone = 'left'

                # Update zone colors based on gaze
                for zone in zone_coordinates.keys():
                    zones_color[zone] = (255, 255, 255)

                if current_zone is not None:
                    zones_color[current_zone] = (0, 0, 255)

                # Draw rectangles on zones
                for zone, (x1, y1, x2, y2) in zone_coordinates.items():
                    cv2.rectangle(img, (x1, y1), (x2, y2), zones_color[zone], 2)

    cv2.imshow('Gaze and Zone Detection', img)

    if cv2.waitKey(5) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
