 Derived from
 
 https://www.axonlab.org/hcph-sops/data-management/edf-to-bids/
 
 https://github.com/TheAxonLab/hcph-sops/blob/mkdocs/code/eyetracking/convert.py

Make sure the python version >=3.7 to support the statement

In [1]:
from __future__ import annotations 
from pathlib import Path
import pandas as pd
import numpy as np
from pyedfread import read_edf
from collections import defaultdict
from itertools import product, groupby
from warnings import warn
import re

In [28]:
# Global variable from 
# https://github.com/TheAxonLab/hcph-sops/blob/mkdocs/code/eyetracking/convert.py

DEFAULT_EYE = "right"
DEFAULT_FREQUENCY = 1000 #According to Bene's paper the freq=2000, but in the eye tracking data, it is 1000
DEFAULT_MODE = "P-CR"
DEFAULT_SCREEN = (0, 800, 0, 600)

# EyeLink calibration coordinates from
# https://www.sr-research.com/calibration-coordinate-calculator/
EYELINK_CALIBRATION_COORDINATES = [
    (400, 300),
    (400, 51),
    (400, 549),
    (48, 300),
    (752, 300),
    (48, 51),
    (752, 51),
    (48, 549),
    (752, 549),
    (224, 176),
    (576, 176),
    (224, 424),
    (576, 424),
]

EYE_CODE_MAP = defaultdict(lambda: "unknown", {"R": "right", "L": "left", "RL": "both"})
EDF2BIDS_COLUMNS = {
    "g": '',
    "p": "pupil",
    "h": "href",
    "r": "raw",
    "fg": "fast",
    "fh": "fast_href",
    "fr": "fast_raw",
}

BIDS_COLUMNS_ORDER = (
    [f"eye{num}_{c}_coordinate" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_pupil_size" for num in (1, 2)]
    + [f"eye{num}_pupil_{c}_coordinate" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_fixation" for num in (1, 2)]
    + [f"eye{num}_saccade" for num in (1, 2)]
    + [f"eye{num}_blink" for num in (1, 2)]
    + [f"eye{num}_href_{c}_coordinate" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_{c}_velocity" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_href_{c}_velocity" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_raw_{c}_velocity" for num, c in product((1, 2), ("x", "y"))]
    + [f"fast_{c}_velocity" for c in ("x", "y")]
    + [f"fast_{kind}_{c}_velocity" for kind, c in product(("href", "raw"), ("x", "y"))]
    + [f"screen_ppdeg_{c}_coordinate" for c in ("x", "y")]
    + ["timestamp"]
)


Read in the edf file

In [29]:
DATA_PATH = Path("D:\\Eye_Dataset\\Sub004\\230928_anatomical_MREYE_study\\ET_EDF")
edf_name = "HV2.EDF"
file_path = str(DATA_PATH / edf_name)
# file_path = "D:\\Eye_Dataset\\Sub001\\230928_anatomical_MREYE_study\\ET_EDF\\Bold_GR4.edf"

print(file_path)
ori_recording, ori_events, ori_messages = read_edf(file_path)

# print(ori_messages)

# print(messages)
ori_messages = ori_messages.rename(
    columns={
        # Normalize weird header names generated by pyedfread
        "message": "trialid",
        "trial": "trial",
        # Convert some BIDS columns
        "time": "timestamp",
    }
)

recording = ori_recording
messages = ori_messages
events = ori_events
print(f'after replacing the column name: \n{messages}')

D:\Eye_Dataset\Sub004\230928_anatomical_MREYE_study\ET_EDF\HV2.EDF
after replacing the column name: 
    timestamp  trial                                            trialid
0     9459311     -1  !CAL \n>>>>>>> CALIBRATION (HV5,P-CR) FOR RIGH...
1     9459311     -1                           !CAL Calibration points:
2     9459311     -1                !CAL -27.7, -34.6         0,      0
3     9459311     -1                !CAL -26.9, -51.6         0,  -2457
4     9459311     -1                !CAL -26.7, -17.4         0,   2457
..        ...    ...                                                ...
86    9605060     -1                GAZE_COORDS 0.00 0.00 800.00 600.00
87    9605060     -1                                THRESHOLDS R 68 179
88    9605060     -1                             ELCL_PROC ELLIPSE  (5)
89    9605060     -1  ELCL_EFIT_PARAMS 1.01 4.00  0.15 0.05  0.65 0....
90    9605061     -1                         !MODE RECORD CR 1000 2 0 R

[91 rows x 3 columns]


# 1 Parsing the messages

In [30]:
messages = messages.rename(
    columns={c: c.strip() for c in messages.columns.values}
).drop_duplicates()

In [31]:
# Extract calibration headers
_cal_hdr = ori_messages.trialid.str.startswith("!CAL")
calibration = ori_messages[_cal_hdr]
# messages = messages.drop(messages.index[_cal_hdr])

In [32]:
# Extracting the StartTime and StopTime metadata.
message_first_trigger = 'start'
message_last_trigger = 'end'
metadata = {
    'StopTime': None,
    'StartTime': None
}

# Find Start time
start_rows = messages.trialid.str.contains(
    message_first_trigger, case=False, regex=True
)
stop_rows = messages.trialid.str.contains(
    message_last_trigger, case=False, regex=True
)

# Extract calibration headers
_cal_hdr = messages.trialid.str.startswith("!CAL")
calibration = messages[_cal_hdr]
messages = messages.drop(messages.index[_cal_hdr])

# Pick the LAST of the start messages
metadata["StartTime"] = (
    int(messages[start_rows].trialid_time.values[-1])
    if start_rows.any()
    else None
)

# Pick the FIRST of the stop messages
metadata["StopTime"] = (
    int(messages[stop_rows].trialid_time.values[0])
    if stop_rows.any()
    else None
)

# Drop start and stop messages from messages dataframe
messages = messages.loc[~start_rows & ~stop_rows, :]

metadata

{'StopTime': None, 'StartTime': None}

In [33]:
# Extracting basic metadata.
# !MODE RECORD CR 1000 2 0 R

mode_record = messages.trialid.str.startswith("!MODE RECORD")

meta_record = {
    "freq": DEFAULT_FREQUENCY,
    "mode": DEFAULT_MODE,
    "eye": DEFAULT_EYE,
}

if mode_record.any():
    try:
        meta_record = re.match(
            r"\!MODE RECORD (?P<mode>\w+) (?P<freq>\d+) \d \d (?P<eye>[RL]+)",
            messages[mode_record].trialid.iloc[-1].strip(),
        ).groupdict()

        meta_record["eye"] = EYE_CODE_MAP[meta_record["eye"]]
        meta_record["mode"] = (
            "P-CR" if meta_record["mode"] == "CR" else meta_record["mode"]
        )
    except AttributeError:
        warn(
            "Error extracting !MODE RECORD message, "
            "using default frequency, mode, and eye"
        )
    finally:
        messages = messages.loc[~mode_record]

eye = (
    ("right", "left") if meta_record["eye"] == "both" else (meta_record["eye"],)
)

metadata["SamplingFrequency"] = int(meta_record["freq"])
metadata["EyeTrackingMethod"] = meta_record["mode"]
metadata["RecordedEye"] = meta_record["eye"]

In [34]:
# Extracting screen parameters.
# GAZE_COORDS 0.00 0.00 800.00 600.00

# Extract GAZE_COORDS message signaling start of recording
gaze_msg = messages.trialid.str.startswith("GAZE_COORDS")

metadata["ScreenAOIDefinition"] = [
    "square",
    DEFAULT_SCREEN,
]
if gaze_msg.any():
    try:
        gaze_record = re.match(
            r"GAZE_COORDS (\d+\.\d+) (\d+\.\d+) (\d+\.\d+) (\d+\.\d+)",
            messages[gaze_msg].trialid.iloc[-1].strip(),
        ).groups()
        metadata["ScreenAOIDefinition"][1] = [
            int(round(float(gaze_record[0]))),
            int(round(float(gaze_record[2]))),
            int(round(float(gaze_record[1]))),
            int(round(float(gaze_record[3]))),
        ]
    except AttributeError:
        warn("Error extracting GAZE_COORDS")
    finally:
        messages = messages.loc[~gaze_msg]
        
print(metadata)

{'StopTime': None, 'StartTime': None, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR', 'RecordedEye': 'right', 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]]}


In [35]:
# Extracting parameters of the pupil fit model.
# ELCL_PROC ELLIPSE (5)
# ELCL_EFIT_PARAMS 1.01 4.00  0.15 0.05  0.65 0.65  0.00 0.00 0.30
# Extract ELCL_PROC AND ELCL_EFIT_PARAMS to extract pupil fit method
pupilfit_msg = messages.trialid.str.startswith("ELCL_PROC")

if pupilfit_msg.any():
    try:
        pupilfit_method = [
            val
            for val in messages[pupilfit_msg]
            .trialid.iloc[-1]
            .strip()
            .split(" ")[1:]
            if val
        ]
        metadata["PupilFitMethod"] = pupilfit_method[0].lower()
        metadata["PupilFitMethodNumberOfParameters"] = int(
            pupilfit_method[1].strip("(").strip(")")
        )
    except AttributeError:
        warn("Error extracting ELCL_PROC (pupil fitting method)")
    finally:
        messages = messages.loc[~pupilfit_msg]

pupilfit_msg_params = messages.trialid.str.startswith("ELCL_EFIT_PARAMS")
if pupilfit_msg_params.any():
    rows = messages[pupilfit_msg_params]
    row = rows.trialid.values[-1].strip().split(" ")[1:]
    try:
        metadata["PupilFitParameters"] = [
            tuple(float(val) for val in vals)
            for k, vals in groupby(row, key=bool)
            if k
        ]
    except AttributeError:
        warn("Error extracting ELCL_EFIT_PARAMS (pupil fitting parameters)")
    finally:
        messages = messages.loc[~pupilfit_msg_params]
        
metadata

{'StopTime': None,
 'StartTime': None,
 'SamplingFrequency': 1000,
 'EyeTrackingMethod': 'P-CR',
 'RecordedEye': 'right',
 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]],
 'PupilFitMethod': 'ellipse',
 'PupilFitMethodNumberOfParameters': 5,
 'PupilFitParameters': [(1.01, 4.0),
  (0.15, 0.05),
  (0.65, 0.65),
  (0.0, 0.0, 0.3)]}

In [36]:
# Calibration validation.
# VALIDATE R 4POINT 4 RIGHT at 752,300 OFFSET 0.35 deg. -8.7,-3.8 pix.
# Extract VALIDATE messages for a calibration validation
validation_msg = messages.trialid.str.startswith("VALIDATE")

if validation_msg.any():
    metadata["ValidationPosition"] = []
    metadata["ValidationErrors"] = []

for i_row, validate_row in enumerate(messages[validation_msg].trialid.values):
    prefix, suffix = validate_row.split("OFFSET")
    validation_eye = (
        f"eye{eye.index('right') + 1}"
        if "RIGHT" in prefix
        else f"eye{eye.index('left') + 1}"
    )
    validation_coords = [
        int(val.strip())
        for val in prefix.rsplit("at", 1)[-1].split(",")
        if val.strip()
    ]
    metadata["ValidationPosition"].append(
        [validation_eye, validation_coords]
    )

    validate_values = [
        float(val)
        for val in re.match(
            r"(-?\d+\.\d+) deg\.\s+(-?\d+\.\d+),(-?\d+\.\d+) pix\.",
            suffix.strip(),
        ).groups()
    ]

    metadata["ValidationErrors"].append(
        (validation_eye, validate_values[0], tuple(validate_values[1:]))
    )
messages = messages.loc[~validation_msg]

print(messages)
print(metadata)

    timestamp  trial                                            trialid
83    9594300     -1  NO Reply is disabled for function eyelink_cal_...
84    9605060     -1                               RECCFG CR 1000 2 0 R
85    9605060     -1                                      ELCLCFG TOWER
87    9605060     -1                                THRESHOLDS R 68 179
{'StopTime': None, 'StartTime': None, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR', 'RecordedEye': 'right', 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]], 'PupilFitMethod': 'ellipse', 'PupilFitMethodNumberOfParameters': 5, 'PupilFitParameters': [(1.01, 4.0), (0.15, 0.05), (0.65, 0.65), (0.0, 0.0, 0.3)], 'ValidationPosition': [['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]], ['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]], ['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 3

In [37]:
# Extracting final bits of metadata.
# Extract THRESHOLDS messages prior recording and process last
thresholds_msg = messages.trialid.str.startswith("THRESHOLDS")
if thresholds_msg.any():
    metadata["PupilThreshold"] = [None] * len(eye)
    metadata["CornealReflectionThreshold"] = [None] * len(eye)
    thresholds_chunks = (
        messages[thresholds_msg].trialid.iloc[-1].strip().split(" ")[1:]
    )
    eye_index = eye.index(EYE_CODE_MAP[thresholds_chunks[0]])
    metadata["PupilThreshold"][eye_index] = int(thresholds_chunks[-2])
    metadata["CornealReflectionThreshold"][eye_index] = int(
        thresholds_chunks[-1]
    )
messages = messages.loc[~thresholds_msg]
print(messages)
print(metadata)

    timestamp  trial                                            trialid
83    9594300     -1  NO Reply is disabled for function eyelink_cal_...
84    9605060     -1                               RECCFG CR 1000 2 0 R
85    9605060     -1                                      ELCLCFG TOWER
{'StopTime': None, 'StartTime': None, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR', 'RecordedEye': 'right', 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]], 'PupilFitMethod': 'ellipse', 'PupilFitMethodNumberOfParameters': 5, 'PupilFitParameters': [(1.01, 4.0), (0.15, 0.05), (0.65, 0.65), (0.0, 0.0, 0.3)], 'ValidationPosition': [['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]], ['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]], ['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]]], 'ValidationErrors': [('eye1', 1.07, (-27.4,

In [38]:
# Flush the remaining messages as a metadata entry.
# Consume the remainder of messages

if not messages.empty:
    metadata["LoggedMessages"] = [
        (int(msg_timestamp), msg.strip())
        for msg_timestamp, msg in messages[["timestamp", "trialid"]].values
    ]
    
print(messages)
print(metadata)

    timestamp  trial                                            trialid
83    9594300     -1  NO Reply is disabled for function eyelink_cal_...
84    9605060     -1                               RECCFG CR 1000 2 0 R
85    9605060     -1                                      ELCLCFG TOWER
{'StopTime': None, 'StartTime': None, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR', 'RecordedEye': 'right', 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]], 'PupilFitMethod': 'ellipse', 'PupilFitMethodNumberOfParameters': 5, 'PupilFitParameters': [(1.01, 4.0), (0.15, 0.05), (0.65, 0.65), (0.0, 0.0, 0.3)], 'ValidationPosition': [['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]], ['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]], ['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]]], 'ValidationErrors': [('eye1', 1.07, (-27.4,

# 2 Parsing the recording dataframe

In [39]:
recording = ori_recording
recording[20050:20100]

Unnamed: 0,time,px_left,px_right,py_left,py_right,hx_left,hx_right,hy_left,hy_right,pa_left,...,fgyvel,fhxvel,fhyvel,frxvel,fryvel,flags,input,buttons,htype,errors
20050,9625111.0,-32768.0,-3597.0,-32768.0,-4346.0,-32768.0,396.0,-32768.0,-199.0,-32768.0,...,0.0,9.584881000000001e-43,0.0,-4282.0,9.584881000000001e-43,32641.0,32768.0,0.0,-32768.0,0.0
20051,9625112.0,-32768.0,-3601.0,-32768.0,-4286.0,-32768.0,395.0,-32768.0,-123.0,-32768.0,...,0.0,9.584881000000001e-43,0.0,-4282.0,9.584881000000001e-43,24449.0,32768.0,0.0,-32768.0,0.0
20052,9625113.0,-32768.0,-3605.0,-32768.0,-4225.0,-32768.0,394.0,-32768.0,-46.0,-32768.0,...,0.0,9.584881000000001e-43,0.0,-4282.0,9.584881000000001e-43,24449.0,32768.0,0.0,-32768.0,0.0
20053,9625114.0,-32768.0,-3606.0,-32768.0,-4225.0,-32768.0,393.0,-32768.0,-46.0,-32768.0,...,0.0,9.584881000000001e-43,0.0,-4282.0,9.584881000000001e-43,24449.0,32768.0,0.0,-32768.0,0.0
20054,9625115.0,-32768.0,-3600.0,-32768.0,-4248.0,-32768.0,398.0,-32768.0,-75.0,-32768.0,...,0.0,9.584881000000001e-43,0.0,-4282.0,9.584881000000001e-43,24449.0,32768.0,0.0,-32768.0,0.0
20055,9625116.0,-32768.0,-3595.0,-32768.0,-4272.0,-32768.0,403.0,-32768.0,-106.0,-32768.0,...,0.0,9.584881000000001e-43,0.0,-4282.0,9.584881000000001e-43,24449.0,32768.0,0.0,-32768.0,0.0
20056,9625117.0,-32768.0,-3596.0,-32768.0,-4297.0,-32768.0,400.0,-32768.0,-137.0,-32768.0,...,0.0,9.584881000000001e-43,0.0,-4282.0,9.584881000000001e-43,24449.0,32768.0,0.0,-32768.0,0.0
20057,9625118.0,-32768.0,-3602.0,-32768.0,-4296.0,-32768.0,394.0,-32768.0,-136.0,-32768.0,...,0.0,9.584881000000001e-43,0.0,-4282.0,9.584881000000001e-43,24449.0,32768.0,0.0,-32768.0,0.0
20058,9625119.0,-32768.0,-3606.0,-32768.0,-4294.0,-32768.0,390.0,-32768.0,-133.0,-32768.0,...,0.0,9.584881000000001e-43,0.0,-4282.0,9.584881000000001e-43,24449.0,32768.0,0.0,-32768.0,0.0
20059,9625120.0,-32768.0,-3602.0,-32768.0,-4326.0,-32768.0,392.0,-32768.0,-174.0,-32768.0,...,0.0,9.584881000000001e-43,0.0,-4282.0,9.584881000000001e-43,24449.0,32768.0,0.0,-32768.0,0.0


In [40]:
# Curation of the input dataframe
# Normalize timestamps (should be int and strictly positive)
recording = recording.astype({"time": int})
recording = recording[recording["time"] > 0]

recording = recording.rename(
    columns={
#         # Fix buggy header names generated by pyedfread
#         "fhxyvel": "fhxvel",
#         "frxyvel": "frxvel",
        # Normalize weird header names generated by pyedfread
        "rx": "screen_ppdeg_x_coordinate",
        "ry": "screen_ppdeg_y_coordinate",
        # Convert some BIDS columns
        "time": "timestamp",
    }
)

# Split extra columns from the dataframe
extra = recording[["flags", "input", "htype"]]
recording = recording.drop(columns=["flags", "input", "htype"])

# Remove columns that are always very close to zero
recording = recording.loc[:, (recording.abs() > 1e-8).any(axis=0)]
# Remove columns that are always 1e8 or more
recording = recording.loc[:, (recording.abs() < 1e8).any(axis=0)]
# Replace unreasonably high values with NaNs
recording = recording.replace({1e8: np.nan})

recording

Unnamed: 0,timestamp,px_left,px_right,py_left,py_right,hx_left,hx_right,hy_left,hy_right,pa_left,pa_right,gx_left,gx_right,gy_left,gy_right,screen_ppdeg_x_coordinate,screen_ppdeg_y_coordinate,frxvel
0,9605061,-32768.0,-4125.0,-32768.0,-5222.0,-32768.0,-229.0,-32768.0,-1204.0,-32768.0,2555.0,-32768.0,376.700012,-32768.0,177.899994,26.6,26.700001,4.590794e-41
1,9605062,-32768.0,-4141.0,-32768.0,-5215.0,-32768.0,-245.0,-32768.0,-1196.0,-32768.0,2554.0,-32768.0,375.100006,-32768.0,178.699997,26.6,26.700001,4.590794e-41
2,9605063,-32768.0,-4140.0,-32768.0,-5210.0,-32768.0,-243.0,-32768.0,-1190.0,-32768.0,2554.0,-32768.0,375.299988,-32768.0,179.300003,26.6,26.600000,4.590794e-41
3,9605064,-32768.0,-4132.0,-32768.0,-5212.0,-32768.0,-235.0,-32768.0,-1192.0,-32768.0,2550.0,-32768.0,376.100006,-32768.0,179.100006,26.6,26.600000,4.590794e-41
4,9605065,-32768.0,-4124.0,-32768.0,-5213.0,-32768.0,-227.0,-32768.0,-1194.0,-32768.0,2546.0,-32768.0,376.899994,-32768.0,178.899994,26.6,26.600000,4.590794e-41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1106534,10711595,-32768.0,-3101.0,-32768.0,-4102.0,-32768.0,911.0,-32768.0,109.0,-32768.0,1938.0,-32768.0,492.299988,-32768.0,311.000000,26.6,26.500000,-4.308000e+03
1106535,10711596,-32768.0,-3129.0,-32768.0,-4092.0,-32768.0,883.0,-32768.0,121.0,-32768.0,1936.0,-32768.0,489.500000,-32768.0,312.299988,26.6,26.500000,-4.308000e+03
1106536,10711597,-32768.0,-3120.0,-32768.0,-4090.0,-32768.0,893.0,-32768.0,124.0,-32768.0,1933.0,-32768.0,490.399994,-32768.0,312.600006,26.6,26.500000,-4.308000e+03
1106537,10711598,-32768.0,-3101.0,-32768.0,-4104.0,-32768.0,911.0,-32768.0,106.0,-32768.0,1930.0,-32768.0,492.299988,-32768.0,310.799988,26.6,26.500000,-4.308000e+03


In [41]:
# Remove columns that do not apply (e.g., only one eye recorded).
# Drop one eye's columns if not interested in "both"
print(f'The eye we take care of {eye}')
remove_eye = set(("left", "right")) - set(eye)
if remove_eye:
    remove_eye = remove_eye.pop()  # Drop set decoration
    recording = recording.reindex(
        columns=[c for c in recording.columns if remove_eye not in c]
    )
    
columns = recording.columns
print("Columns:")
print(columns)

The eye we take care of ('right',)
Columns:
Index(['timestamp', 'px_right', 'py_right', 'hx_right', 'hy_right', 'pa_right',
       'gx_right', 'gy_right', 'screen_ppdeg_x_coordinate',
       'screen_ppdeg_y_coordinate', 'frxvel'],
      dtype='object')


In [42]:
# Clean-up pupil size and gaze position. 
# These are the parameters we most likely we care for, so special curation is applied:
screen_resolution = [800, 600]

for eyenum, eyename in enumerate(eye):
    # Clean-up implausible values for pupil area (pa)
    recording.loc[
        recording[f"pa_{eyename}"] < 1, f"pa_{eyename}"
    ] = np.nan
    recording = recording.rename(
        columns={f"pa_{eyename}": f"eye{eyenum + 1}_pupil_size"}
    )
    print(f"pa_{eyename} renamed as: eye{eyenum + 1}_pupil_size")
    # Clean-up implausible values for gaze x position
    recording.loc[
        (recording[f"gx_{eyename}"] < 0)
        | (recording[f"gx_{eyename}"] > screen_resolution[0]),
        f"gx_{eyename}",
    ] = np.nan
    # Clean-up implausible values for gaze y position
    recording.loc[
        (recording[f"gy_{eyename}"] <= 0)
        | (recording[f"gy_{eyename}"] > screen_resolution[1]),
        f"gy_{eyename}",
    ] = np.nan
    
print(recording)

pa_right renamed as: eye1_pupil_size
         timestamp  px_right  py_right  hx_right  hy_right  eye1_pupil_size  \
0          9605061   -4125.0   -5222.0    -229.0   -1204.0           2555.0   
1          9605062   -4141.0   -5215.0    -245.0   -1196.0           2554.0   
2          9605063   -4140.0   -5210.0    -243.0   -1190.0           2554.0   
3          9605064   -4132.0   -5212.0    -235.0   -1192.0           2550.0   
4          9605065   -4124.0   -5213.0    -227.0   -1194.0           2546.0   
...            ...       ...       ...       ...       ...              ...   
1106534   10711595   -3101.0   -4102.0     911.0     109.0           1938.0   
1106535   10711596   -3129.0   -4092.0     883.0     121.0           1936.0   
1106536   10711597   -3120.0   -4090.0     893.0     124.0           1933.0   
1106537   10711598   -3101.0   -4104.0     911.0     106.0           1930.0   
1106538   10711599   -3100.0   -4120.0     912.0      86.0           1926.0   

           gx_

In [43]:
# Munging columns to comply with BIDS. 
# At this point, the dataframe is almost ready for writing out as BIDS.
# Interpolate BIDS column names
columns = list(
    set(recording.columns)
    - set(
        (
            "timestamp",
            "screen_ppdeg_x_coordinate",
            "screen_ppdeg_y_coordinate",
            "eye1_pupil_size",#pa
            "eye2_pupil_size",#pa
        )
    )
)
bids_columns = []
for eyenum, eyename in enumerate(eye):
    for name in columns:
        colprefix = f"eye{eyenum + 1}" if name.endswith(f"_{eyename}") else ""
        _newname = name.split("_")[0]
        _newname = re.sub(r"([xy])$", r"_\1_coordinate", _newname)
        _newname = re.sub(r"([xy])vel$", r"_\1_velocity", _newname)
        _newname = _newname.split("_", 1)
        _newname[0] = EDF2BIDS_COLUMNS[_newname[0]]
        _newname.insert(0, colprefix)
        bids_columns.append("_".join((_n for _n in _newname if _n)))

# Rename columns to be BIDS-compliant
recording = recording.rename(columns=dict(zip(columns, bids_columns)))

# Reorder columns to render nicely (tracking first, pupil size after)
columns = sorted(
    set(recording.columns.values).intersection(BIDS_COLUMNS_ORDER),
    key=lambda entry: BIDS_COLUMNS_ORDER.index(entry),
)
columns += [c for c in recording.columns.values if c not in columns]
recording = recording.reindex(columns=columns)

print(recording)

         eye1_x_coordinate  eye1_y_coordinate  eye1_pupil_size  \
0               376.700012         177.899994           2555.0   
1               375.100006         178.699997           2554.0   
2               375.299988         179.300003           2554.0   
3               376.100006         179.100006           2550.0   
4               376.899994         178.899994           2546.0   
...                    ...                ...              ...   
1106534         492.299988         311.000000           1938.0   
1106535         489.500000         312.299988           1936.0   
1106536         490.399994         312.600006           1933.0   
1106537         492.299988         310.799988           1930.0   
1106538         492.399994         308.700012           1926.0   

         eye1_pupil_x_coordinate  eye1_pupil_y_coordinate  \
0                        -4125.0                  -5222.0   
1                        -4141.0                  -5215.0   
2                       

# 3 Parsing the calibration messages

In [44]:
print(calibration)

    timestamp  trial                                            trialid
0     9459311     -1  !CAL \n>>>>>>> CALIBRATION (HV5,P-CR) FOR RIGH...
1     9459311     -1                           !CAL Calibration points:
2     9459311     -1                !CAL -27.7, -34.6         0,      0
3     9459311     -1                !CAL -26.9, -51.6         0,  -2457
4     9459311     -1                !CAL -26.7, -17.4         0,   2457
..        ...    ...                                                ...
73    9571614     -1            !CAL PCR gain ratio(x,y) = 2.828, 2.416
74    9571614     -1             !CAL CR gain match(x,y) = 1.020, 1.020
75    9571614     -1                  !CAL Slip rotation correction OFF
76    9571615     -1                !CAL CALIBRATION HV5 R RIGHT   GOOD
77    9582967     -1  !CAL VALIDATION HV5 R RIGHT FAIR ERROR 0.98 av...

[68 rows x 3 columns]


In [45]:
# Parse calibration metadata
metadata["CalibrationCount"] = 0
if not calibration.empty:
    warn("Calibration of more than one eye is not implemented")
    calibration.trialid = calibration.trialid.str.replace("!CAL", "")
    calibration.trialid = calibration.trialid.str.strip()

    metadata["CalibrationLog"] = list(
        zip(
            calibration.timestamp.values.astype(int),
            calibration.trialid.values,
        )
    )

    calibrations_msg = calibration.trialid.str.startswith(
        "VALIDATION"
    ) & calibration.trialid.str.contains("ERROR")
    metadata["CalibrationCount"] = calibrations_msg.sum()

    calibration_last = calibration.index[calibrations_msg][-1]
    try:
        meta_calib = re.match(
            r"VALIDATION (?P<ctype>[\w\d]+) (?P<eyeid>[RL]+) (?P<eye>RIGHT|LEFT) "
            r"(?P<result>\w+) ERROR (?P<avg>-?\d+\.\d+) avg\. (?P<max>-?\d+\.\d+) max\s+"
            r"OFFSET (?P<offsetdeg>-?\d+\.\d+) deg\. "
            r"(?P<offsetxpix>-?\d+\.\d+),(?P<offsetypix>-?\d+\.\d+) pix\.",
            calibration.loc[calibration_last, "trialid"].strip(),
        ).groupdict()

        metadata["CalibrationType"] = meta_calib["ctype"]
        metadata["AverageCalibrationError"] = [float(meta_calib["avg"])]
        metadata["MaximalCalibrationError"] = [float(meta_calib["max"])]
        metadata["CalibrationResultQuality"] = [meta_calib["result"]]
        metadata["CalibrationResultOffset"] = [
            float(meta_calib["offsetdeg"]),
            (float(meta_calib["offsetxpix"]), float(meta_calib["offsetypix"])),
        ]
        metadata["CalibrationResultOffsetUnits"] = ["deg", "pixels"]
    except AttributeError:
        warn("Calibration data found but unsuccessfully parsed for results")
        
        
print(calibration)

    timestamp  trial                                            trialid
0     9459311     -1  >>>>>>> CALIBRATION (HV5,P-CR) FOR RIGHT: <<<<...
1     9459311     -1                                Calibration points:
2     9459311     -1                     -27.7, -34.6         0,      0
3     9459311     -1                     -26.9, -51.6         0,  -2457
4     9459311     -1                     -26.7, -17.4         0,   2457
..        ...    ...                                                ...
73    9571614     -1                 PCR gain ratio(x,y) = 2.828, 2.416
74    9571614     -1                  CR gain match(x,y) = 1.020, 1.020
75    9571614     -1                       Slip rotation correction OFF
76    9571615     -1                     CALIBRATION HV5 R RIGHT   GOOD
77    9582967     -1  VALIDATION HV5 R RIGHT FAIR ERROR 0.98 avg. 1....

[68 rows x 3 columns]


  after removing the cwd from sys.path.


# 4 Parsing the events dataframe

In [46]:
# print(events)
print(recording)

# Process events: first generate empty columns
recording["eye1_fixation"] = 0
recording["eye1_saccade"] = 0
recording["eye1_blink"] = 0

# Add fixations
for _, fixation_event in events[
    events["type"] == "fixation"
].iterrows():
    recording.loc[
        (recording["timestamp"] >= fixation_event["start"])
        & (recording["timestamp"] <= fixation_event["end"]),
        "eye1_fixation",
    ] = 1

# Add saccades, and blinks, which are a sub-event of saccades
for _, saccade_event in events[
    events["type"] == "saccade"
].iterrows():
    recording.loc[
        (recording["timestamp"] >= saccade_event["start"])
        & (recording["timestamp"] <= saccade_event["end"]),
        "eye1_saccade",
    ] = 1

    if saccade_event["blink"] == 1:
        recording.loc[
            (recording["timestamp"] >= saccade_event["start"])
            & (recording["timestamp"] <= saccade_event["end"]),
            "eye1_blink",
        ] = 1

         eye1_x_coordinate  eye1_y_coordinate  eye1_pupil_size  \
0               376.700012         177.899994           2555.0   
1               375.100006         178.699997           2554.0   
2               375.299988         179.300003           2554.0   
3               376.100006         179.100006           2550.0   
4               376.899994         178.899994           2546.0   
...                    ...                ...              ...   
1106534         492.299988         311.000000           1938.0   
1106535         489.500000         312.299988           1936.0   
1106536         490.399994         312.600006           1933.0   
1106537         492.299988         310.799988           1930.0   
1106538         492.399994         308.700012           1926.0   

         eye1_pupil_x_coordinate  eye1_pupil_y_coordinate  \
0                        -4125.0                  -5222.0   
1                        -4141.0                  -5215.0   
2                       

In [47]:
print(recording)

         eye1_x_coordinate  eye1_y_coordinate  eye1_pupil_size  \
0               376.700012         177.899994           2555.0   
1               375.100006         178.699997           2554.0   
2               375.299988         179.300003           2554.0   
3               376.100006         179.100006           2550.0   
4               376.899994         178.899994           2546.0   
...                    ...                ...              ...   
1106534         492.299988         311.000000           1938.0   
1106535         489.500000         312.299988           1936.0   
1106536         490.399994         312.600006           1933.0   
1106537         492.299988         310.799988           1930.0   
1106538         492.399994         308.700012           1926.0   

         eye1_pupil_x_coordinate  eye1_pupil_y_coordinate  \
0                        -4125.0                  -5222.0   
1                        -4141.0                  -5215.0   
2                       

# 5 Write the data into BIDS structure

In [48]:
from copy import deepcopy

metadata['Columns'] = recording.columns.tolist()
print(metadata)
save_metadata = deepcopy(metadata)
# metadata.pop('CalibrationLog', None)
# print(metadata)

{'StopTime': None, 'StartTime': None, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR', 'RecordedEye': 'right', 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]], 'PupilFitMethod': 'ellipse', 'PupilFitMethodNumberOfParameters': 5, 'PupilFitParameters': [(1.01, 4.0), (0.15, 0.05), (0.65, 0.65), (0.0, 0.0, 0.3)], 'ValidationPosition': [['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]], ['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]], ['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]]], 'ValidationErrors': [('eye1', 1.07, (-27.4, 9.2)), ('eye1', 2.07, (-19.8, 52.0)), ('eye1', 1.02, (-27.4, -2.0)), ('eye1', 0.25, (-4.4, -5.1)), ('eye1', 0.99, (9.8, 24.7)), ('eye1', 2.3, (35.1, -50.9)), ('eye1', 1.54, (-29.5, -29.2)), ('eye1', 1.21, (19.4, 26.2)), ('eye1', 1.22, (32.6, -3.4)), ('eye1', 0.06, (0.4, 1.5)), ('eye1', 

In [49]:
metadata = save_metadata

In [50]:
if isinstance(metadata, dict):
    for k, v in metadata.items():
        if isinstance(v, dict):
            for _k, _v in v.item():
                print(f"_k_v--{_k}: {type(_v)}")
        elif isinstance(v, list) and k=='CalibrationLog':
            for entry in v:
                print(f'entry: {entry}')
                timestamp, info = entry
                print(f'timestamp: {type(timestamp)}')
                print(f'info: {type(info)}')
                
            
        else:
            print(f"-k-v--{k}:{type(v)}--{v}")
  

# -k-v--CalibrationCount:<class 'numpy.int64'>

def convert_to_int(metadata):
    if 'CalibrationCount' in metadata:
        metadata['CalibrationCount'] = int(metadata['CalibrationCount']) if isinstance(metadata['CalibrationCount'], (np.int32, np.int64, int)) else metadata['CalibrationCount']
    if "CalibrationLog" in metadata:
        metadata["CalibrationLog"] = [(int(x[0]),x[1]) if isinstance(x[0], (np.int32, np.int64, int)) else x for x in metadata['CalibrationLog']]
    return metadata

        
convert_metadata = convert_to_int(metadata)
print(convert_metadata)

-k-v--StopTime:<class 'NoneType'>--None
-k-v--StartTime:<class 'NoneType'>--None
-k-v--SamplingFrequency:<class 'int'>--1000
-k-v--EyeTrackingMethod:<class 'str'>--P-CR
-k-v--RecordedEye:<class 'str'>--right
-k-v--ScreenAOIDefinition:<class 'list'>--['square', [0, 800, 0, 600]]
-k-v--PupilFitMethod:<class 'str'>--ellipse
-k-v--PupilFitMethodNumberOfParameters:<class 'int'>--5
-k-v--PupilFitParameters:<class 'list'>--[(1.01, 4.0), (0.15, 0.05), (0.65, 0.65), (0.0, 0.0, 0.3)]
-k-v--ValidationPosition:<class 'list'>--[['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]], ['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]], ['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]]]
-k-v--ValidationErrors:<class 'list'>--[('eye1', 1.07, (-27.4, 9.2)), ('eye1', 2.07, (-19.8, 52.0)), ('eye1', 1.02, (-27.4, -2.0)), ('eye1', 0.25, (-4.4, -

In [51]:
# Load the autoreload extension
%load_ext autoreload
# Set autoreload to update the modules every time before executing a new line of code
%autoreload 2

import importlib
from write_bids_yiwei import EyeTrackingRun, write_bids, write_bids_from_df
out_dir = DATA_PATH
edf_extension = 'EDF'
edf_name = edf_name
filename = edf_name.split('.')[0]
print(f'bid filename: {filename}')

write_bids_from_df(
    recording, convert_metadata,
    out_dir,
    filename,
)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
bid filename: HV2


('D:\\Eye_Dataset\\Sub004\\230928_anatomical_MREYE_study\\ET_EDF\\HV2.tsv.gz',
 'D:\\Eye_Dataset\\Sub004\\230928_anatomical_MREYE_study\\ET_EDF\\HV2.json')

Now the files are generated.
- EDF Path
    - \<filename\>.EDF
    - \<filename\>.tsv.gz

In [52]:
print(recording)

         eye1_x_coordinate  eye1_y_coordinate  eye1_pupil_size  \
0               376.700012         177.899994           2555.0   
1               375.100006         178.699997           2554.0   
2               375.299988         179.300003           2554.0   
3               376.100006         179.100006           2550.0   
4               376.899994         178.899994           2546.0   
...                    ...                ...              ...   
1106534         492.299988         311.000000           1938.0   
1106535         489.500000         312.299988           1936.0   
1106536         490.399994         312.600006           1933.0   
1106537         492.299988         310.799988           1930.0   
1106538         492.399994         308.700012           1926.0   

         eye1_pupil_x_coordinate  eye1_pupil_y_coordinate  \
0                        -4125.0                  -5222.0   
1                        -4141.0                  -5215.0   
2                       