 Derived from
 
 https://www.axonlab.org/hcph-sops/data-management/edf-to-bids/
 
 https://github.com/TheAxonLab/hcph-sops/blob/mkdocs/code/eyetracking/convert.py

Make sure the python version >=3.7 to support the statement

In [1]:
from __future__ import annotations 
from pathlib import Path
import pandas as pd
import numpy as np
from pyedfread import read_edf
from collections import defaultdict
from itertools import product, groupby
from warnings import warn
import re

In [2]:
# Global variable from 
# https://github.com/TheAxonLab/hcph-sops/blob/mkdocs/code/eyetracking/convert.py

# If setting WRITE_RAW_EDF as True, no preprocessing will be conducted on the edf data
WRITE_RAW_EDF = True
# -------------------------------------------------------------------------------
DEFAULT_EYE = "right"
DEFAULT_FREQUENCY = 1000 #It is 1000
DEFAULT_MODE = "P-CR"
DEFAULT_SCREEN = (0, 800, 0, 600)

# EyeLink calibration coordinates from
# https://www.sr-research.com/calibration-coordinate-calculator/
# Affect the performance?
EYELINK_CALIBRATION_COORDINATES = [
    (400, 300),
    (400, 51),
    (400, 549),
    (48, 300),
    (752, 300),
    (48, 51),
    (752, 51),
    (48, 549),
    (752, 549),
    (224, 176),
    (576, 176),
    (224, 424),
    (576, 424),
]

EYE_CODE_MAP = defaultdict(lambda: "unknown", {"R": "right", "L": "left", "RL": "both"})
EDF2BIDS_COLUMNS = {
    "g": '',
    "p": "pupil",
    "h": "href",
    "r": "raw",
    "fg": "fast",
    "fh": "fast_href",
    "fr": "fast_raw",
}

BIDS_COLUMNS_ORDER = (
    [f"eye{num}_{c}_coordinate" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_pupil_size" for num in (1, 2)]
    + [f"eye{num}_pupil_{c}_coordinate" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_fixation" for num in (1, 2)]
    + [f"eye{num}_saccade" for num in (1, 2)]
    + [f"eye{num}_blink" for num in (1, 2)]
    + [f"eye{num}_href_{c}_coordinate" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_{c}_velocity" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_href_{c}_velocity" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_raw_{c}_velocity" for num, c in product((1, 2), ("x", "y"))]
    + [f"fast_{c}_velocity" for c in ("x", "y")]
    + [f"fast_{kind}_{c}_velocity" for kind, c in product(("href", "raw"), ("x", "y"))]
    + [f"screen_ppdeg_{c}_coordinate" for c in ("x", "y")]
    + ["timestamp"]
)


Read in the edf file

In [64]:
subject_idx = 3
T_idx = 1

DATA_PATH = Path("/Users/cag/Documents/Dataset/MREyeTrack/EDF")

if subject_idx == 1:    
    edf_name = f"000001_fixed_dot-16_grid_T1w_2024-10-14_17h24.37.511.EDF"    
elif subject_idx == 2:
    edf_name = f"000002_fixed_dot-16_grid_T1w_2024-10-14_16h54.23.461.EDF" 
elif subject_idx == 3: 
    edf_name = f"000003_fixed_dot-16_grid_T1w_2024-10-14_16h35.56.202.EDF"
    # "OT4.EDF"
else:
    edf_name = f""
    

file_path = str(DATA_PATH / edf_name)
print(file_path)
ori_recording, ori_events, ori_messages = read_edf(file_path)
# The first timestamp of  `recording`
print(f" {ori_recording[100000:100100]}")
# print(ori_messages)
# print(ori_events)
# print(messages)
ori_messages = ori_messages.rename(
    columns={
        # Normalize weird header names generated by pyedfread
        "message": "trialid",
        "trial": "trial",
        # Convert some BIDS columns
        "time": "timestamp",
    }
)

recording = ori_recording
messages = ori_messages
events = ori_events
print(f'\nThe entire info of `message`: \n{messages[20:80]}')
recording.columns

/Users/cag/Documents/Dataset/MREyeTrack/EDF/000003_fixed_dot-16_grid_T1w_2024-10-14_16h35.56.202.EDF
               time  px_left  px_right  py_left  py_right  hx_left  hx_right  \
100000  11804897.0 -32768.0   -8343.0 -32768.0   -4422.0 -32768.0   -2938.0   
100001  11804898.0 -32768.0   -8348.0 -32768.0   -4417.0 -32768.0   -2943.0   
100002  11804899.0 -32768.0   -8353.0 -32768.0   -4418.0 -32768.0   -2949.0   
100003  11804900.0 -32768.0   -8359.0 -32768.0   -4425.0 -32768.0   -2956.0   
100004  11804901.0 -32768.0   -8369.0 -32768.0   -4431.0 -32768.0   -2967.0   
...            ...      ...       ...      ...       ...      ...       ...   
100095  11804992.0 -32768.0   -8362.0 -32768.0   -4409.0 -32768.0   -2958.0   
100096  11804993.0 -32768.0   -8379.0 -32768.0   -4433.0 -32768.0   -2978.0   
100097  11804994.0 -32768.0   -8396.0 -32768.0   -4467.0 -32768.0   -2999.0   
100098  11804995.0 -32768.0   -8398.0 -32768.0   -4473.0 -32768.0   -3001.0   
100099  11804996.0 -32768.0  

Index(['time', 'px_left', 'px_right', 'py_left', 'py_right', 'hx_left',
       'hx_right', 'hy_left', 'hy_right', 'pa_left', 'pa_right', 'gx_left',
       'gx_right', 'gy_left', 'gy_right', 'rx', 'ry', 'gxvel_left',
       'gxvel_right', 'gyvel_left', 'gyvel_right', 'hxvel_left', 'hxvel_right',
       'hyvel_left', 'hyvel_right', 'rxvel_left', 'rxvel_right', 'ryvel_left',
       'ryvel_right', 'fgxvel', 'fgyvel', 'fhxvel', 'fhyvel', 'frxvel',
       'fryvel', 'flags', 'input', 'buttons', 'htype', 'errors'],
      dtype='object')

# 1 Parsing the messages

In [65]:
messages = messages.rename(
    columns={c: c.strip() for c in messages.columns.values}
).drop_duplicates()

In [66]:

# Extract calibration headers
_cal_hdr = ori_messages.trialid.str.startswith("!CAL")
calibration = ori_messages[_cal_hdr]
# messages = messages.drop(messages.index[_cal_hdr])
print(calibration)

    timestamp  trial                                            trialid
1    11613423     -1  !CAL \n>>>>>>> CALIBRATION (HV5,P-CR) FOR RIGH...
2    11613423     -1                           !CAL Calibration points:
3    11613423     -1                !CAL -44.9, -37.6         0,      0
4    11613424     -1                !CAL -45.1, -53.3         0,  -2457
5    11613424     -1                !CAL -46.4, -22.1         0,   2457
6    11613424     -1                !CAL -70.7, -35.4     -3474,      0
7    11613424     -1                !CAL -18.7, -35.4      3474,      0
8    11613424     -1  !CAL eye check box: (L,R,T,B)\n\t  -76   -14  ...
9    11613424     -1  !CAL href cal range: (L,R,T,B)\n\t-5211  5211 ...
10   11613424     -1  !CAL Cal coeff:(X=a+bx+cy+dxx+eyy,Y=f+gx+goaly...
11   11613424     -1     !CAL Prenormalize: offx, offy = -44.93 -37.582
12   11613424     -1       !CAL Gains: cx:135.647 lx:132.666 rx:145.700
13   11613424     -1       !CAL Gains: cy:168.228 ty:164.291 by:

In [67]:
# Extracting the StartTime and StopTime metadata.
message_first_trigger = '!MODE RECORD CR 1000 2 0 R'
message_last_trigger = 'ET: eye-tracker stopped'
metadata = {
    'StopTime': None,
    'StartTime': None
}

# Find Start time
start_rows = messages.trialid.str.contains(
    message_first_trigger, case=False, regex=True
)
stop_rows = messages.trialid.str.contains(
    message_last_trigger, case=False, regex=True
)


# Extract calibration headers
_cal_hdr = messages.trialid.str.startswith("!CAL")
calibration = messages[_cal_hdr]
messages = messages.drop(messages.index[_cal_hdr])

# Pick the LAST of the start messages
metadata["StartTime"] = (
    int(messages[start_rows].timestamp.values[-1])
    if start_rows.any()
    else None
)

# Pick the FIRST of the stop messages
metadata["StopTime"] = (
    int(messages[stop_rows].timestamp.values[0])
    if stop_rows.any()
    else None
)

# Drop start and stop messages from messages dataframe
messages = messages.loc[~start_rows & ~stop_rows, :]

metadata

  int(messages[start_rows].timestamp.values[-1])
  int(messages[stop_rows].timestamp.values[0])


{'StopTime': 12361867, 'StartTime': 11704897}

In [68]:
# Extracting basic metadata.
# !MODE RECORD CR 1000 2 0 R

mode_record = messages.trialid.str.startswith("!MODE RECORD")

meta_record = {
    "freq": DEFAULT_FREQUENCY,
    "mode": DEFAULT_MODE,
    "eye": DEFAULT_EYE,
}

if mode_record.any():
    try:
        meta_record = re.match(
            r"\!MODE RECORD (?P<mode>\w+) (?P<freq>\d+) \d \d (?P<eye>[RL]+)",
            messages[mode_record].trialid.iloc[-1].strip(),
        ).groupdict()

        meta_record["eye"] = EYE_CODE_MAP[meta_record["eye"]]
        meta_record["mode"] = (
            "P-CR" if meta_record["mode"] == "CR" else meta_record["mode"]
        )
    except AttributeError:
        warn(
            "Error extracting !MODE RECORD message, "
            "using default frequency, mode, and eye"
        )
    finally:
        messages = messages.loc[~mode_record]

eye = (
    ("right", "left") if meta_record["eye"] == "both" else (meta_record["eye"],)
)

metadata["SamplingFrequency"] = int(meta_record["freq"])
metadata["EyeTrackingMethod"] = meta_record["mode"]
metadata["RecordedEye"] = meta_record["eye"]

In [69]:
# Extracting screen parameters.
# GAZE_COORDS 0.00 0.00 800.00 600.00

# Extract GAZE_COORDS message signaling start of recording
gaze_msg = messages.trialid.str.startswith("GAZE_COORDS")

metadata["ScreenAOIDefinition"] = [
    "square",
    DEFAULT_SCREEN,
]
if gaze_msg.any():
    try:
        gaze_record = re.match(
            r"GAZE_COORDS (\d+\.\d+) (\d+\.\d+) (\d+\.\d+) (\d+\.\d+)",
            messages[gaze_msg].trialid.iloc[-1].strip(),
        ).groups()
        metadata["ScreenAOIDefinition"][1] = [
            int(round(float(gaze_record[0]))),
            int(round(float(gaze_record[2]))),
            int(round(float(gaze_record[1]))),
            int(round(float(gaze_record[3]))),
        ]
    except AttributeError:
        warn("Error extracting GAZE_COORDS")
    finally:
        messages = messages.loc[~gaze_msg]
        
print(metadata)

{'StopTime': 12361867, 'StartTime': 11704897, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR', 'RecordedEye': 'right', 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]]}


In [70]:
# Extracting parameters of the pupil fit model.
# ELCL_PROC ELLIPSE (5)
# ELCL_EFIT_PARAMS 1.01 4.00  0.15 0.05  0.65 0.65  0.00 0.00 0.30
# Extract ELCL_PROC AND ELCL_EFIT_PARAMS to extract pupil fit method
pupilfit_msg = messages.trialid.str.startswith("ELCL_PROC")

if pupilfit_msg.any():
    try:
        pupilfit_method = [
            val
            for val in messages[pupilfit_msg]
            .trialid.iloc[-1]
            .strip()
            .split(" ")[1:]
            if val
        ]
        metadata["PupilFitMethod"] = pupilfit_method[0].lower()
        metadata["PupilFitMethodNumberOfParameters"] = int(
            pupilfit_method[1].strip("(").strip(")")
        )
    except AttributeError:
        warn("Error extracting ELCL_PROC (pupil fitting method)")
    finally:
        messages = messages.loc[~pupilfit_msg]

pupilfit_msg_params = messages.trialid.str.startswith("ELCL_EFIT_PARAMS")
if pupilfit_msg_params.any():
    rows = messages[pupilfit_msg_params]
    row = rows.trialid.values[-1].strip().split(" ")[1:]
    try:
        metadata["PupilFitParameters"] = [
            tuple(float(val) for val in vals)
            for k, vals in groupby(row, key=bool)
            if k
        ]
    except AttributeError:
        warn("Error extracting ELCL_EFIT_PARAMS (pupil fitting parameters)")
    finally:
        messages = messages.loc[~pupilfit_msg_params]
        
metadata

{'StopTime': 12361867,
 'StartTime': 11704897,
 'SamplingFrequency': 1000,
 'EyeTrackingMethod': 'P-CR',
 'RecordedEye': 'right',
 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]],
 'PupilFitMethod': 'ellipse',
 'PupilFitMethodNumberOfParameters': 5,
 'PupilFitParameters': [(1.01, 4.0),
  (0.15, 0.05),
  (0.65, 0.65),
  (0.0, 0.0, 0.3)]}

In [71]:
# Calibration validation.
# VALIDATE R 4POINT 4 RIGHT at 752,300 OFFSET 0.35 deg. -8.7,-3.8 pix.
# Extract VALIDATE messages for a calibration validation
validation_msg = messages.trialid.str.startswith("VALIDATE")

if validation_msg.any():
    metadata["ValidationPosition"] = []
    metadata["ValidationErrors"] = []

for i_row, validate_row in enumerate(messages[validation_msg].trialid.values):
    prefix, suffix = validate_row.split("OFFSET")
    validation_eye = (
        f"eye{eye.index('right') + 1}"
        if "RIGHT" in prefix
        else f"eye{eye.index('left') + 1}"
    )
    validation_coords = [
        int(val.strip())
        for val in prefix.rsplit("at", 1)[-1].split(",")
        if val.strip()
    ]
    metadata["ValidationPosition"].append(
        [validation_eye, validation_coords]
    )

    validate_values = [
        float(val)
        for val in re.match(
            r"(-?\d+\.\d+) deg\.\s+(-?\d+\.\d+),(-?\d+\.\d+) pix\.",
            suffix.strip(),
        ).groups()
    ]

    metadata["ValidationErrors"].append(
        (validation_eye, validate_values[0], tuple(validate_values[1:]))
    )
messages = messages.loc[~validation_msg]

print(messages)
print(metadata)

     timestamp  trial                                            trialid
0     11586679     -1                        ET: Start experiment 'dots'
28    11658945     -1  NO Reply is disabled for function eyelink_cal_...
29    11704891     -1                              ET: recording started
30    11704896     -1                               RECCFG CR 1000 2 0 R
31    11704896     -1                                      ELCLCFG TOWER
..         ...    ...                                                ...
163   12336868     -1                   ET: Start routine 'centered_dot'
164   12341868     -1                   ET: Start routine 'centered_dot'
165   12346867     -1                   ET: Start routine 'centered_dot'
166   12351868     -1                   ET: Start routine 'centered_dot'
167   12356867     -1                   ET: Start routine 'centered_dot'

[137 rows x 3 columns]
{'StopTime': 12361867, 'StartTime': 11704897, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR'

In [72]:
# Extracting final bits of metadata.
# Extract THRESHOLDS messages prior recording and process last
thresholds_msg = messages.trialid.str.startswith("THRESHOLDS")
if thresholds_msg.any():
    metadata["PupilThreshold"] = [None] * len(eye)
    metadata["CornealReflectionThreshold"] = [None] * len(eye)
    thresholds_chunks = (
        messages[thresholds_msg].trialid.iloc[-1].strip().split(" ")[1:]
    )
    eye_index = eye.index(EYE_CODE_MAP[thresholds_chunks[0]])
    metadata["PupilThreshold"][eye_index] = int(thresholds_chunks[-2])
    metadata["CornealReflectionThreshold"][eye_index] = int(
        thresholds_chunks[-1]
    )
messages = messages.loc[~thresholds_msg]
print(messages)
print(metadata)

     timestamp  trial                                            trialid
0     11586679     -1                        ET: Start experiment 'dots'
28    11658945     -1  NO Reply is disabled for function eyelink_cal_...
29    11704891     -1                              ET: recording started
30    11704896     -1                               RECCFG CR 1000 2 0 R
31    11704896     -1                                      ELCLCFG TOWER
..         ...    ...                                                ...
163   12336868     -1                   ET: Start routine 'centered_dot'
164   12341868     -1                   ET: Start routine 'centered_dot'
165   12346867     -1                   ET: Start routine 'centered_dot'
166   12351868     -1                   ET: Start routine 'centered_dot'
167   12356867     -1                   ET: Start routine 'centered_dot'

[136 rows x 3 columns]
{'StopTime': 12361867, 'StartTime': 11704897, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR'

In [73]:
# Flush the remaining messages as a metadata entry.
# Consume the remainder of messages

if not messages.empty:
    metadata["LoggedMessages"] = [
        (int(msg_timestamp), msg.strip())
        for msg_timestamp, msg in messages[["timestamp", "trialid"]].values
    ]
    
print(messages)
print(metadata)

     timestamp  trial                                            trialid
0     11586679     -1                        ET: Start experiment 'dots'
28    11658945     -1  NO Reply is disabled for function eyelink_cal_...
29    11704891     -1                              ET: recording started
30    11704896     -1                               RECCFG CR 1000 2 0 R
31    11704896     -1                                      ELCLCFG TOWER
..         ...    ...                                                ...
163   12336868     -1                   ET: Start routine 'centered_dot'
164   12341868     -1                   ET: Start routine 'centered_dot'
165   12346867     -1                   ET: Start routine 'centered_dot'
166   12351868     -1                   ET: Start routine 'centered_dot'
167   12356867     -1                   ET: Start routine 'centered_dot'

[136 rows x 3 columns]
{'StopTime': 12361867, 'StartTime': 11704897, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR'

# 2 Parsing the recording dataframe

In [74]:
recording = ori_recording
ori_recording

Unnamed: 0,time,px_left,px_right,py_left,py_right,hx_left,hx_right,hy_left,hy_right,pa_left,...,fgyvel,fhxvel,fhyvel,frxvel,fryvel,flags,input,buttons,htype,errors
0,11704897.0,-32768.0,-5290.0,-32768.0,-4578.0,-32768.0,354.0,-32768.0,147.0,-32768.0,...,1.401298e-45,-1.946136e+14,0.0,0.0,0.0,32641.0,32768.0,0.0,-32768.0,0.0
1,11704898.0,-32768.0,-5286.0,-32768.0,-4597.0,-32768.0,357.0,-32768.0,123.0,-32768.0,...,1.401298e-45,-1.946136e+14,0.0,0.0,0.0,24449.0,32768.0,0.0,-32768.0,0.0
2,11704899.0,-32768.0,-5285.0,-32768.0,-4618.0,-32768.0,357.0,-32768.0,97.0,-32768.0,...,1.401298e-45,-1.946136e+14,0.0,0.0,0.0,24449.0,32768.0,0.0,-32768.0,0.0
3,11704900.0,-32768.0,-5287.0,-32768.0,-4611.0,-32768.0,355.0,-32768.0,106.0,-32768.0,...,1.401298e-45,-1.946136e+14,0.0,0.0,0.0,24449.0,32768.0,0.0,-32768.0,0.0
4,11704901.0,-32768.0,-5288.0,-32768.0,-4583.0,-32768.0,356.0,-32768.0,141.0,-32768.0,...,1.401298e-45,-1.946136e+14,0.0,0.0,0.0,24449.0,32768.0,0.0,-32768.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
664845,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
664846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
664847,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
664848,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [75]:
# Curation of the input dataframe
# Normalize timestamps (should be int and strictly positive)
recording = recording.astype({"time": int})
recording = recording[recording["time"] > 0]
raw_recording_len = len(recording)
print(f'raw_recording length: {raw_recording_len}')

recording = recording.rename(
    columns={
#         # Fix buggy header names generated by pyedfread
#         "fhxyvel": "fhxvel",
#         "frxyvel": "frxvel",
        # Normalize weird header names generated by pyedfread
        "rx": "screen_ppdeg_x_coordinate",
        "ry": "screen_ppdeg_y_coordinate",
        # Convert some BIDS columns
        "time": "timestamp",
    }
)

# Split extra columns from the dataframe
extra = recording[["flags", "input", "htype"]]
recording = recording.drop(columns=["flags", "input", "htype"])
print(len(recording))

# Remove columns that are always very close to zero
recording = recording.loc[:, (recording.abs() > 1e-8).any(axis=0)]
# Remove columns that are always 1e8 or more
recording = recording.loc[:, (recording.abs() < 1e8).any(axis=0)]
# Replace unreasonably high values with NaNs
recording = recording.replace({1e8: np.nan})

assert len(recording) == raw_recording_len

raw_recording length: 657970
657970


In [76]:
# Remove columns that do not apply (e.g., only one eye recorded).
# Drop one eye's columns if not interested in "both"
print(f'The eye we take care of {eye}')
remove_eye = set(("left", "right")) - set(eye)
if remove_eye:
    remove_eye = remove_eye.pop()  # Drop set decoration
    recording = recording.reindex(
        columns=[c for c in recording.columns if remove_eye not in c]
    )
    
columns = recording.columns
print("Columns:")
print(columns)
recording

The eye we take care of ('right',)
Columns:
Index(['timestamp', 'px_right', 'py_right', 'hx_right', 'hy_right', 'pa_right',
       'gx_right', 'gy_right', 'screen_ppdeg_x_coordinate',
       'screen_ppdeg_y_coordinate', 'gxvel_right', 'hxvel_right',
       'rxvel_right', 'fhxvel', 'frxvel'],
      dtype='object')


Unnamed: 0,timestamp,px_right,py_right,hx_right,hy_right,pa_right,gx_right,gy_right,screen_ppdeg_x_coordinate,screen_ppdeg_y_coordinate,gxvel_right,hxvel_right,rxvel_right,fhxvel,frxvel
0,11704897,-5290.0,-4578.0,354.0,147.0,1443.0,435.899994,314.899994,26.5,26.5,,,,-1.946136e+14,0.000000e+00
1,11704898,-5286.0,-4597.0,357.0,123.0,1447.0,436.200012,312.500000,26.5,26.5,,,,-1.946136e+14,0.000000e+00
2,11704899,-5285.0,-4618.0,357.0,97.0,1449.0,436.200012,309.799988,26.5,26.5,,,,-1.946136e+14,0.000000e+00
3,11704900,-5287.0,-4611.0,355.0,106.0,1448.0,436.000000,310.700012,26.5,26.5,,,,-1.946136e+14,0.000000e+00
4,11704901,-5288.0,-4583.0,356.0,141.0,1451.0,436.100006,314.299988,26.5,26.5,,,,-1.946136e+14,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
657965,12362862,-6307.0,-4443.0,-728.0,303.0,1757.0,326.100006,330.700012,26.5,26.5,,,,-1.946136e+14,2.161728e+20
657966,12362863,-6327.0,-4440.0,-750.0,305.0,1747.0,323.899994,330.899994,26.6,26.5,,,,-1.946136e+14,2.161728e+20
657967,12362864,-6339.0,-4439.0,-762.0,305.0,1739.0,322.700012,330.899994,26.6,26.5,,,,-1.946136e+14,2.161728e+20
657968,12362865,-6330.0,-4442.0,-752.0,302.0,1735.0,323.600006,330.600006,26.6,26.5,,,,-1.946136e+14,2.161728e+20


In [77]:
# Clean-up pupil size and gaze position. 
# These are the parameters we most likely we care for, so special curation is applied:
screen_resolution = [800, 600]

for eyenum, eyename in enumerate(eye):
    # Clean-up implausible values for pupil area (pa)
    recording.loc[
        recording[f"pa_{eyename}"] < 1, f"pa_{eyename}"
    ] = np.nan
    recording = recording.rename(
        columns={f"pa_{eyename}": f"eye{eyenum + 1}_pupil_size"}
    )
    print(f"pa_{eyename} renamed as: eye{eyenum + 1}_pupil_size")
    # Clean-up implausible values for gaze x position
    recording.loc[
        (recording[f"gx_{eyename}"] < 0)
        | (recording[f"gx_{eyename}"] > screen_resolution[0]),
        f"gx_{eyename}",
    ] = np.nan
    # Clean-up implausible values for gaze y position
    recording.loc[
        (recording[f"gy_{eyename}"] <= 0)
        | (recording[f"gy_{eyename}"] > screen_resolution[1]),
        f"gy_{eyename}",
    ] = np.nan
    
print(recording)
assert len(recording) == raw_recording_len

pa_right renamed as: eye1_pupil_size
        timestamp  px_right  py_right  hx_right  hy_right  eye1_pupil_size  \
0        11704897   -5290.0   -4578.0     354.0     147.0           1443.0   
1        11704898   -5286.0   -4597.0     357.0     123.0           1447.0   
2        11704899   -5285.0   -4618.0     357.0      97.0           1449.0   
3        11704900   -5287.0   -4611.0     355.0     106.0           1448.0   
4        11704901   -5288.0   -4583.0     356.0     141.0           1451.0   
...           ...       ...       ...       ...       ...              ...   
657965   12362862   -6307.0   -4443.0    -728.0     303.0           1757.0   
657966   12362863   -6327.0   -4440.0    -750.0     305.0           1747.0   
657967   12362864   -6339.0   -4439.0    -762.0     305.0           1739.0   
657968   12362865   -6330.0   -4442.0    -752.0     302.0           1735.0   
657969   12362866   -6320.0   -4435.0    -742.0     311.0           1740.0   

          gx_right    gy_r

In [78]:
# Munging columns to comply with BIDS. 
# At this point, the dataframe is almost ready for writing out as BIDS.
# Interpolate BIDS column names
columns = list(
    set(recording.columns)
    - set(
        (
            "timestamp",
            "screen_ppdeg_x_coordinate",
            "screen_ppdeg_y_coordinate",
            "eye1_pupil_size",#pa
            "eye2_pupil_size",#pa
        )
    )
)
bids_columns = []
for eyenum, eyename in enumerate(eye):
    for name in columns:
        colprefix = f"eye{eyenum + 1}" if name.endswith(f"_{eyename}") else ""
        _newname = name.split("_")[0]
        _newname = re.sub(r"([xy])$", r"_\1_coordinate", _newname)
        _newname = re.sub(r"([xy])vel$", r"_\1_velocity", _newname)
        _newname = _newname.split("_", 1)
        _newname[0] = EDF2BIDS_COLUMNS[_newname[0]]
        _newname.insert(0, colprefix)
        bids_columns.append("_".join((_n for _n in _newname if _n)))

# Rename columns to be BIDS-compliant
recording = recording.rename(columns=dict(zip(columns, bids_columns)))

# Reorder columns to render nicely (tracking first, pupil size after)
columns = sorted(
    set(recording.columns.values).intersection(BIDS_COLUMNS_ORDER),
    key=lambda entry: BIDS_COLUMNS_ORDER.index(entry),
)
columns += [c for c in recording.columns.values if c not in columns]
recording = recording.reindex(columns=columns)

print(recording)
assert len(recording) == raw_recording_len

        eye1_x_coordinate  eye1_y_coordinate  eye1_pupil_size  \
0              435.899994         314.899994           1443.0   
1              436.200012         312.500000           1447.0   
2              436.200012         309.799988           1449.0   
3              436.000000         310.700012           1448.0   
4              436.100006         314.299988           1451.0   
...                   ...                ...              ...   
657965         326.100006         330.700012           1757.0   
657966         323.899994         330.899994           1747.0   
657967         322.700012         330.899994           1739.0   
657968         323.600006         330.600006           1735.0   
657969         324.700012         331.500000           1740.0   

        eye1_pupil_x_coordinate  eye1_pupil_y_coordinate  \
0                       -5290.0                  -4578.0   
1                       -5286.0                  -4597.0   
2                       -5285.0        

# 3 Parsing the calibration messages

In [79]:
print(calibration)

    timestamp  trial                                            trialid
1    11613423     -1  !CAL \n>>>>>>> CALIBRATION (HV5,P-CR) FOR RIGH...
2    11613423     -1                           !CAL Calibration points:
3    11613423     -1                !CAL -44.9, -37.6         0,      0
4    11613424     -1                !CAL -45.1, -53.3         0,  -2457
5    11613424     -1                !CAL -46.4, -22.1         0,   2457
6    11613424     -1                !CAL -70.7, -35.4     -3474,      0
7    11613424     -1                !CAL -18.7, -35.4      3474,      0
8    11613424     -1  !CAL eye check box: (L,R,T,B)\n\t  -76   -14  ...
9    11613424     -1  !CAL href cal range: (L,R,T,B)\n\t-5211  5211 ...
10   11613424     -1  !CAL Cal coeff:(X=a+bx+cy+dxx+eyy,Y=f+gx+goaly...
11   11613424     -1     !CAL Prenormalize: offx, offy = -44.93 -37.582
12   11613424     -1       !CAL Gains: cx:135.647 lx:132.666 rx:145.700
13   11613424     -1       !CAL Gains: cy:168.228 ty:164.291 by:

In [80]:
# Parse calibration metadata
metadata["CalibrationCount"] = 0
if not calibration.empty:
    warn("Calibration of more than one eye is not implemented")
    calibration.trialid = calibration.trialid.str.replace("!CAL", "")
    calibration.trialid = calibration.trialid.str.strip()

    metadata["CalibrationLog"] = list(
        zip(
            calibration.timestamp.values.astype(int),
            calibration.trialid.values,
        )
    )

    calibrations_msg = calibration.trialid.str.startswith(
        "VALIDATION"
    ) & calibration.trialid.str.contains("ERROR")
    metadata["CalibrationCount"] = calibrations_msg.sum()

    calibration_last = calibration.index[calibrations_msg][-1]
    try:
        meta_calib = re.match(
            r"VALIDATION (?P<ctype>[\w\d]+) (?P<eyeid>[RL]+) (?P<eye>RIGHT|LEFT) "
            r"(?P<result>\w+) ERROR (?P<avg>-?\d+\.\d+) avg\. (?P<max>-?\d+\.\d+) max\s+"
            r"OFFSET (?P<offsetdeg>-?\d+\.\d+) deg\. "
            r"(?P<offsetxpix>-?\d+\.\d+),(?P<offsetypix>-?\d+\.\d+) pix\.",
            calibration.loc[calibration_last, "trialid"].strip(),
        ).groupdict()

        metadata["CalibrationType"] = meta_calib["ctype"]
        metadata["AverageCalibrationError"] = [float(meta_calib["avg"])]
        metadata["MaximalCalibrationError"] = [float(meta_calib["max"])]
        metadata["CalibrationResultQuality"] = [meta_calib["result"]]
        metadata["CalibrationResultOffset"] = [
            float(meta_calib["offsetdeg"]),
            (float(meta_calib["offsetxpix"]), float(meta_calib["offsetypix"])),
        ]
        metadata["CalibrationResultOffsetUnits"] = ["deg", "pixels"]
    except AttributeError:
        warn("Calibration data found but unsuccessfully parsed for results")
        
        
print(calibration)

    timestamp  trial                                            trialid
1    11613423     -1  >>>>>>> CALIBRATION (HV5,P-CR) FOR RIGHT: <<<<...
2    11613423     -1                                Calibration points:
3    11613423     -1                     -44.9, -37.6         0,      0
4    11613424     -1                     -45.1, -53.3         0,  -2457
5    11613424     -1                     -46.4, -22.1         0,   2457
6    11613424     -1                     -70.7, -35.4     -3474,      0
7    11613424     -1                     -18.7, -35.4      3474,      0
8    11613424     -1  eye check box: (L,R,T,B)\n\t  -76   -14   -56 ...
9    11613424     -1  href cal range: (L,R,T,B)\n\t-5211  5211 -3686...
10   11613424     -1  Cal coeff:(X=a+bx+cy+dxx+eyy,Y=f+gx+goaly+ixx+...
11   11613424     -1          Prenormalize: offx, offy = -44.93 -37.582
12   11613424     -1            Gains: cx:135.647 lx:132.666 rx:145.700
13   11613424     -1            Gains: cy:168.228 ty:164.291 by:

  warn("Calibration of more than one eye is not implemented")


# 4 Parsing the events dataframe

In [81]:
# events[
#     events["type"] == "saccade"
# ]

In [82]:
# print(events)
print(recording)

# Process events: first generate empty columns
recording["eye1_fixation"] = 0
recording["eye1_saccade"] = 0
recording["eye1_blink"] = 0

# Add fixations
for _, fixation_event in events[
    events["type"] == "fixation"
].iterrows():
    recording.loc[
        (recording["timestamp"] >= fixation_event["start"])
        & (recording["timestamp"] <= fixation_event["end"]),
        "eye1_fixation",
    ] = 1

# Add saccades, and blinks, which are a sub-event of saccades
for _, saccade_event in events[
    events["type"] == "saccade"
].iterrows():
    recording.loc[
        (recording["timestamp"] >= saccade_event["start"])
        & (recording["timestamp"] <= saccade_event["end"]),
        "eye1_saccade",
    ] = 1

    if saccade_event["contains_blink"] == 1: #Note here some version is "blink", depends on the item name
        recording.loc[
            (recording["timestamp"] >= saccade_event["start"])
            & (recording["timestamp"] <= saccade_event["end"]),
            "eye1_blink",
        ] = 1

        eye1_x_coordinate  eye1_y_coordinate  eye1_pupil_size  \
0              435.899994         314.899994           1443.0   
1              436.200012         312.500000           1447.0   
2              436.200012         309.799988           1449.0   
3              436.000000         310.700012           1448.0   
4              436.100006         314.299988           1451.0   
...                   ...                ...              ...   
657965         326.100006         330.700012           1757.0   
657966         323.899994         330.899994           1747.0   
657967         322.700012         330.899994           1739.0   
657968         323.600006         330.600006           1735.0   
657969         324.700012         331.500000           1740.0   

        eye1_pupil_x_coordinate  eye1_pupil_y_coordinate  \
0                       -5290.0                  -4578.0   
1                       -5286.0                  -4597.0   
2                       -5285.0        

In [83]:
print(recording)

        eye1_x_coordinate  eye1_y_coordinate  eye1_pupil_size  \
0              435.899994         314.899994           1443.0   
1              436.200012         312.500000           1447.0   
2              436.200012         309.799988           1449.0   
3              436.000000         310.700012           1448.0   
4              436.100006         314.299988           1451.0   
...                   ...                ...              ...   
657965         326.100006         330.700012           1757.0   
657966         323.899994         330.899994           1747.0   
657967         322.700012         330.899994           1739.0   
657968         323.600006         330.600006           1735.0   
657969         324.700012         331.500000           1740.0   

        eye1_pupil_x_coordinate  eye1_pupil_y_coordinate  \
0                       -5290.0                  -4578.0   
1                       -5286.0                  -4597.0   
2                       -5285.0        

# 5 Write the data into BIDS structure

In [84]:
from copy import deepcopy

metadata['Columns'] = recording.columns.tolist()
print(metadata)
save_metadata = deepcopy(metadata)
# metadata.pop('CalibrationLog', None)
# print(metadata)

{'StopTime': 12361867, 'StartTime': 11704897, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR', 'RecordedEye': 'right', 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]], 'PupilFitMethod': 'ellipse', 'PupilFitMethodNumberOfParameters': 5, 'PupilFitParameters': [(1.01, 4.0), (0.15, 0.05), (0.65, 0.65), (0.0, 0.0, 0.3)], 'ValidationPosition': [['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]]], 'ValidationErrors': [('eye1', 0.49, (-8.8, 9.9)), ('eye1', 0.24, (-5.6, -2.9)), ('eye1', 1.04, (20.1, 19.7)), ('eye1', 1.13, (-29.0, -9.5)), ('eye1', 0.72, (18.1, -6.6))], 'PupilThreshold': [74], 'CornealReflectionThreshold': [215], 'LoggedMessages': [(11586679, "ET: Start experiment 'dots'"), (11658945, 'NO Reply is disabled for function eyelink_cal_result'), (11704891, 'ET: recording started'), (11704896, 'RECCFG CR 1000 2 0 R'), (11704896, 'ELCLCFG TOWER'), (11705393, "ET: Start routine 'centered_dot'"), (11710389, "ET: Start rout

In [85]:
metadata = save_metadata

In [86]:

def convert_to_int(metadata):
    if 'CalibrationCount' in metadata:
        metadata['CalibrationCount'] = int(metadata['CalibrationCount']) if isinstance(metadata['CalibrationCount'], (np.int32, np.int64, int)) else metadata['CalibrationCount']
    if "CalibrationLog" in metadata:
        metadata["CalibrationLog"] = [(int(x[0]),x[1]) if isinstance(x[0], (np.int32, np.int64, int)) else x for x in metadata['CalibrationLog']]
    return metadata

        
convert_metadata = convert_to_int(metadata)
# print(convert_metadata)

In [87]:
# Load the autoreload extension
%load_ext autoreload
# Set autoreload to update the modules every time before executing a new line of code
%autoreload 2

import importlib
from write_bids_yiwei import write_bids_from_df
out_dir = DATA_PATH
edf_extension = 'EDF'
edf_name = edf_name
filename = edf_name.split('.')[0]
print(f'bid filename: {filename}')

write_bids_from_df(
    recording, convert_metadata,
    out_dir,
    filename,
)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
bid filename: 000003_fixed_dot-16_grid_T1w_2024-10-14_16h35


('/Users/cag/Documents/Dataset/MREyeTrack/EDF/000003_fixed_dot-16_grid_T1w_2024-10-14_16h35.tsv.gz',
 '/Users/cag/Documents/Dataset/MREyeTrack/EDF/000003_fixed_dot-16_grid_T1w_2024-10-14_16h35.json')

Now the files are generated.
- EDF Path
    - \<filename\>.EDF
    - \<filename\>.tsv.gz

In [39]:
print(recording)

        eye1_x_coordinate  eye1_y_coordinate  eye1_pupil_size  \
0              466.899994         279.200012           3294.0   
1              466.899994         279.899994           3287.0   
2              466.899994         279.799988           3272.0   
3              465.500000         280.399994           3264.0   
4              464.100006         281.000000           3258.0   
...                   ...                ...              ...   
657895                NaN                NaN              NaN   
657896                NaN                NaN              NaN   
657897                NaN                NaN              NaN   
657898                NaN                NaN              NaN   
657899                NaN                NaN              NaN   

        eye1_pupil_x_coordinate  eye1_pupil_y_coordinate  \
0                       -5044.0                  -4406.0   
1                       -5043.0                  -4400.0   
2                       -5044.0        