In [33]:
import cv2
import pandas as pd

# Load video and eye-tracking data
video_path = "pilot2/sangsu/02_ElevatorTest.mp4"
data_path = "pilot2/processed/eyetracker.pkl"
data = pd.read_pickle(data_path)
eye_data = data[data['Scene']==2].copy()
output_video = "pilot2/processed/output_with_gaze.mp4"

# Ensure you're working with a copy of the DataFrame slice

# Now, you can safely modify gaze_data
eye_data[['pupilLSensorPosR_X', 'pupilLSensorPosR_Y']] = eye_data[['pupilLSensorPosR_X', 'pupilLSensorPosR_Y']].interpolate()

# Check for any remaining NaN values after interpolation
eye_data[['pupilLSensorPosR_X', 'pupilLSensorPosR_Y']] = eye_data[['pupilLSensorPosR_X', 'pupilLSensorPosR_Y']].fillna(0.5)
eye_data['normalized_Unitytime'] = eye_data.groupby('Scene')['Unitytime'].transform(lambda x: x - x.min())

# Initialize video capture and writer
cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))  # Frames per second of the video
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Set target resolution for output video
target_width, target_height = 1920, 1080  # Example resolution

# Initialize VideoWriter with target resolution
out = cv2.VideoWriter(output_video, cv2.VideoWriter_fourcc(*'mp4v'), fps, (target_width, target_height))

# Scale factor for Vive Pro Eye resolution to target video resolution
# vive_width, vive_height = 2880, 1600  # Original Vive resolution
# scale_x = target_width / vive_width
# scale_y = target_height / vive_height

# Convert normalized Unitytime to frame indices
eye_data['frame_idx'] = (eye_data['normalized_Unitytime'] * fps).astype(int)

# Iterate over each frame of the video
frame_idx = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Resize the frame to target resolution
    # frame_resized = cv2.resize(frame, (target_width, target_height))

    # Find the corresponding eye-tracking data for the current video frame
    # Check for gaze data that matches the current frame index
    gaze_data = eye_data[eye_data['frame_idx'] == frame_idx]

    # Debug: Check if there is any matching gaze data
    if gaze_data.empty:
        print(f"No gaze data found for frame {frame_idx}")
    else:
        # Retrieve gaze coordinates
        x, y = gaze_data.mean()[['pupilLSensorPosR_X', 'pupilLSensorPosR_Y']]

        # Scale gaze coordinates to match video resolution
        # x = int(x * scale_x)
        # y = int(y * scale_y)
        x = int(x* frame_width)
        y = int(y* frame_height)
        print(x,y)

        # Draw the gaze point on the frame
        if 0 <= x < target_width and 0 <= y < target_height:
            cv2.circle(frame,(x, y), 5, (0, 255, 0), -1)

    # Write the frame to the output video
    out.write(frame)
    frame_idx += 1

cap.release()
out.release()

803 480
803 480
803 481
804 481
804 481
804 481
804 481
804 481
804 482
804 482
804 482
804 482
805 482
805 483
805 483
805 483
805 482
805 482
805 482
805 482
805 483
805 482
805 482
804 482
805 483
805 484
804 484
804 484
805 484
804 485
802 485
801 486
799 486
798 486
796 486
794 487
797 482
802 475
803 475
801 475
800 475
800 475
799 476
799 476
798 477
796 477
795 478
793 478
790 479
789 479
789 479
786 479
784 479
783 480
782 480
781 481
780 481
779 481
780 481
782 482
775 483
753 484
576 376
-1317 -719
800 516
783 480
782 480
782 482
781 482
782 484
782 484
782 484
784 484
784 484
785 484
785 484
786 484
787 484
785 482
773 472
774 470
774 470
775 470
776 470
777 471
778 471
780 471
782 471
784 472
785 472
786 473
788 473
789 474
792 475
795 477
798 478
801 479
803 479
805 481
807 482
810 482
810 482
814 483
843 477
849 478
850 481
851 481
854 478
856 479
860 481
863 483
857 485
804 482
797 480
799 479
800 478
802 478
808 479
810 480
811 480
812 483
852 485
885 485
883 485
881 4

In [28]:
gaze_data[['pupilLSensorPosL_X','pupilLSensorPosL_Y']]

Unnamed: 0,pupilLSensorPosL_X,pupilLSensorPosL_Y
1970-01-01 00:00:59.194,0.390934,0.38266
1970-01-01 00:00:59.196,0.391134,0.382301
1970-01-01 00:00:59.198,0.391334,0.381942
1970-01-01 00:00:59.200,0.391534,0.381582
1970-01-01 00:00:59.202,0.391597,0.381212
1970-01-01 00:00:59.204,0.39136,0.380817
1970-01-01 00:00:59.206,0.391124,0.380422
1970-01-01 00:00:59.208,0.390887,0.380027
1970-01-01 00:00:59.210,0.39065,0.379632
1970-01-01 00:00:59.212,0.390762,0.379178


In [None]:
x, y =gaze_data[['pupilLSensorPosR_X', 'pupilLSensorPosR_Y']].mean()
print(x,y)


0.4167138636112213 0.4082764983177185


In [11]:
eye_data.columns

Index(['Scene', 'Unitytime', 'validL', 'validR', 'gazeoriginL_X',
       'gazeoriginL_Y', 'gazeoriginL_Z', 'gazeoriginR_X', 'gazeoriginR_Y',
       'gazeoriginR_Z', 'gazeL_X', 'gazeL_Y', 'gazeL_Z', 'gazeR_X', 'gazeR_Y',
       'gazeR_Z', 'pupilL', 'pupilR', 'eye_opennessL', 'eye_opennessR',
       'pupilLSensorPosL_X', 'pupilLSensorPosL_Y', 'pupilLSensorPosL_Z',
       'pupilLSensorPosR_X', 'pupilLSensorPosR_Y', 'pupilLSensorPosR_Z',
       'convergence_distance_mm', 'convergence_distance_validity',
       'normalized_Unitytime', 'frame_idx'],
      dtype='object')