try to analyze the gaze data with in AOI Plan A

Step 1: Automatic tracking + visualise the display position (dynamic ROI)
You manually click on the 4 corners in the first frame → initialise the display position
Use optical flow to track the corners of the display in each frame
Save the ROI (coordinates of the four corners) of the display in each frame
Visualise every few frames to help you manually check the tracking results
Finally, save the ROI of all frames in a CSV or JSON file for subsequent use

In [None]:
pip install opencv-python-headless numpy pandas shapely matplotlib

read gaze data

In [2]:
import pandas as pd

# Load the uploaded gaze CSV file
gaze_file_path = "/Users/jiaonahu/Documents/eye-tracking/sub_RJ70HQ/exports/000/gaze_positions.csv"
gaze_data = pd.read_csv(gaze_file_path)

# Display the first few rows to understand the structure
gaze_data.head()

Unnamed: 0,gaze_timestamp,world_index,confidence,norm_pos_x,norm_pos_y,base_data,gaze_point_3d_x,gaze_point_3d_y,gaze_point_3d_z,eye_center0_3d_x,...,eye_center0_3d_z,gaze_normal0_x,gaze_normal0_y,gaze_normal0_z,eye_center1_3d_x,eye_center1_3d_y,eye_center1_3d_z,gaze_normal1_x,gaze_normal1_y,gaze_normal1_z
0,27552.401964,1,0.810659,0.6316,0.015143,27552.368713-0 27552.435214-1,12.220782,33.768872,60.316149,31.925014,...,0.387049,-0.271092,0.434771,0.858768,-37.624465,-9.240443,-2.938625,0.541557,0.50818,0.669679
1,27552.403943,1,0.337533,3.007644,-1.288352,27552.403943-1,397.701637,212.718724,103.010951,,...,,,,,-37.624465,-9.240443,-2.938625,0.870652,0.443918,0.211899
2,27552.404224,1,0.444916,0.460274,0.185104,27552.404224-0,-28.548249,169.721647,466.767724,31.925014,...,0.387049,-0.120947,0.3396,0.932761,,,,,,
3,27552.435201,2,0.846872,0.65547,0.082286,27552.435187-0 27552.435214-1,15.8727,31.98481,66.312535,31.925014,...,0.387049,-0.203939,0.341408,0.917524,-37.624465,-9.240443,-2.938625,0.541557,0.50818,0.669679
4,27552.451869,2,0.867987,0.66958,0.109007,27552.468523-0 27552.435214-1,18.201822,31.475342,69.715881,31.925014,...,0.387049,-0.16467,0.30002,0.939613,-37.624465,-9.240443,-2.938625,0.541557,0.50818,0.669679


Import libraries and setup Show first frame and select 4 corners manually

In [12]:
import cv2
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

# === Modify the path to your video ===
VIDEO_PATH = '/Users/jiaonahu/Documents/eye-tracking/sub_RJ70HQ/exports/000/gaze_overlay_video.mp4'  # Replace with your actual path
OUTPUT_VIS_FOLDER = '/Users/jiaonahu/Desktop/monitor_tracking_visuals'
OUTPUT_TRACKING_CSV = '/Users/jiaonahu/Desktop/monitor_tracking.csv'

# Video settings
VIDEO_WIDTH = 640
VIDEO_HEIGHT = 480
FPS = 30
VISUALIZE_EVERY_N = 30  # Save a frame with visualization every N frames

# For storing clicked points
clicked_points = []

# Mouse callback to select 4 corners
def click_event(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN and len(clicked_points) < 4:
        clicked_points.append([x, y])
        print(f"Point {len(clicked_points)}: ({x}, {y})")

# Open video and read the first frame
cap = cv2.VideoCapture(VIDEO_PATH)
ret, first_frame = cap.read()
if not ret:
    raise ValueError("Could not read video.")

first_frame = cv2.resize(first_frame, (VIDEO_WIDTH, VIDEO_HEIGHT))

# Create OpenCV window and set mouse callback
cv2.namedWindow("Select Monitor Corners")
cv2.setMouseCallback("Select Monitor Corners", click_event)

print("👉 Please click the 4 corners of the monitor (clockwise or counterclockwise).")
print("✅ After clicking all 4, click on the image window and press any key to continue.")

# Main loop for selecting points
while True:
    temp_frame = first_frame.copy()
    for pt in clicked_points:
        cv2.circle(temp_frame, tuple(pt), 5, (0, 255, 0), -1)
    cv2.imshow("Select Monitor Corners", temp_frame)

    if len(clicked_points) == 4:
        key = cv2.waitKey(0)  # Wait for any key press
        if key != -1:
            break  # Exit loop after key press
    elif cv2.waitKey(1) & 0xFF == 27:  # ESC to quit early
        print("❌ Selection cancelled.")
        break

cv2.destroyAllWindows()

# Safety check
if len(clicked_points) != 4:
    raise ValueError("You must click exactly 4 points.")

print("✅ Monitor corners selected successfully:")
for i, pt in enumerate(clicked_points, 1):
    print(f"  Corner {i}: {pt}")

👉 Please click the 4 corners of the monitor (clockwise or counterclockwise).
✅ After clicking all 4, click on the image window and press any key to continue.
Point 1: (527, 192)
Point 2: (79, 190)
Point 3: (76, 442)
Point 4: (523, 463)
✅ Monitor corners selected successfully:
  Corner 1: [527, 192]
  Corner 2: [79, 190]
  Corner 3: [76, 442]
  Corner 4: [523, 463]


Track monitor corners using optical flow + save visualization

In [13]:
os.makedirs(OUTPUT_VIS_FOLDER, exist_ok=True)

prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
prev_pts = np.array(clicked_points, dtype=np.float32).reshape(-1, 1, 2)

lk_params = dict(winSize=(15, 15), maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

tracked_corners = []
frame_idx = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame = cv2.resize(frame, (VIDEO_WIDTH, VIDEO_HEIGHT))
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    pts, status, _ = cv2.calcOpticalFlowPyrLK(prev_gray, gray, prev_pts, None, **lk_params)
    tracked_corners.append([frame_idx] + pts.reshape(-1).tolist())

    # Visualization
    if frame_idx % VISUALIZE_EVERY_N == 0:
        vis_frame = frame.copy()
        pts_int = pts.reshape(-1, 2).astype(int)
        for i in range(4):
            cv2.circle(vis_frame, tuple(pts_int[i]), 5, (0, 255, 0), -1)
            cv2.line(vis_frame, tuple(pts_int[i]), tuple(pts_int[(i + 1) % 4]), (255, 0, 0), 2)
        vis_path = os.path.join(OUTPUT_VIS_FOLDER, f"frame_{frame_idx:05d}.png")
        plt.imsave(vis_path, vis_frame[:, :, ::-1])

    # Update
    prev_gray = gray.copy()
    prev_pts = pts
    frame_idx += 1

cap.release()

columns = ['frame',
           'x1', 'y1',
           'x2', 'y2',
           'x3', 'y3',
           'x4', 'y4']
tracked_df = pd.DataFrame(tracked_corners, columns=columns)
tracked_df.to_csv(OUTPUT_TRACKING_CSV, index=False)

print(f"Tracking data saved to {OUTPUT_TRACKING_CSV}")
print(f"Visual frames saved to folder: {OUTPUT_VIS_FOLDER}")


Tracking data saved to /Users/jiaonahu/Desktop/monitor_tracking.csv
Visual frames saved to folder: /Users/jiaonahu/Desktop/monitor_tracking_visuals


Plan C Manually label some keyframes + interpolation
Plan D Use object detection models (e.g., YOLO) to automatically detect the display