 Derived from
 
 https://www.axonlab.org/hcph-sops/data-management/edf-to-bids/
 
 https://github.com/TheAxonLab/hcph-sops/blob/mkdocs/code/eyetracking/convert.py

Make sure the python version >=3.7 to support the statement

In [1]:
from __future__ import annotations 
from pathlib import Path
import pandas as pd
import numpy as np
from pyedfread import read_edf
from collections import defaultdict
from itertools import product, groupby
from warnings import warn
import re

In [2]:
# Global variable from 
# https://github.com/TheAxonLab/hcph-sops/blob/mkdocs/code/eyetracking/convert.py

# If setting WRITE_RAW_EDF as True, no preprocessing will be conducted on the edf data
WRITE_RAW_EDF = True
# -------------------------------------------------------------------------------
DEFAULT_EYE = "right"
DEFAULT_FREQUENCY = 1000 #It is 1000
DEFAULT_MODE = "P-CR"
DEFAULT_SCREEN = (0, 800, 0, 600)

# EyeLink calibration coordinates from
# https://www.sr-research.com/calibration-coordinate-calculator/
# Affect the performance?
EYELINK_CALIBRATION_COORDINATES = [
    (400, 300),
    (400, 51),
    (400, 549),
    (48, 300),
    (752, 300),
    (48, 51),
    (752, 51),
    (48, 549),
    (752, 549),
    (224, 176),
    (576, 176),
    (224, 424),
    (576, 424),
]

EYE_CODE_MAP = defaultdict(lambda: "unknown", {"R": "right", "L": "left", "RL": "both"})
EDF2BIDS_COLUMNS = {
    "g": '',
    "p": "pupil",
    "h": "href",
    "r": "raw",
    "fg": "fast",
    "fh": "fast_href",
    "fr": "fast_raw",
}

BIDS_COLUMNS_ORDER = (
    [f"eye{num}_{c}_coordinate" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_pupil_size" for num in (1, 2)]
    + [f"eye{num}_pupil_{c}_coordinate" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_fixation" for num in (1, 2)]
    + [f"eye{num}_saccade" for num in (1, 2)]
    + [f"eye{num}_blink" for num in (1, 2)]
    + [f"eye{num}_href_{c}_coordinate" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_{c}_velocity" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_href_{c}_velocity" for num, c in product((1, 2), ("x", "y"))]
    + [f"eye{num}_raw_{c}_velocity" for num, c in product((1, 2), ("x", "y"))]
    + [f"fast_{c}_velocity" for c in ("x", "y")]
    + [f"fast_{kind}_{c}_velocity" for kind, c in product(("href", "raw"), ("x", "y"))]
    + [f"screen_ppdeg_{c}_coordinate" for c in ("x", "y")]
    + ["timestamp"]
)


Read in the edf file

In [6]:
subject_idx = 1
T_idx = 1

if subject_idx == 1:
    
    DATA_PATH = Path("/Users/cag/Documents/Dataset/1_Pilot_MREye_Data/Sub001/230928_anatomical_MREYE_study/ET_EDF")
    edf_name = f"JB{T_idx}.EDF"
    
elif subject_idx == 2:
    DATA_PATH = Path("C:\\yiwei\\1_Pilot_MREye_Data\\Sub002\\230926_anatomical_MREYE_study\\ET_EDF")
    edf_name = f"BF_T{T_idx}.EDF"
    
elif subject_idx == 3:
    DATA_PATH = Path("/Users/cag/Documents/Dataset/1_Pilot_MREye_Data/Sub003/230928_anatomical_MREYE_study/ET_EDF")
    edf_name = f"OT{2*T_idx}.EDF"
    # "OT4.EDF"
else:
    DATA_PATH = Path("C:\\yiwei\\1_Pilot_MREye_Data\\Sub004\\230923_anatomical_MREYE_study\\ET_EDF")
    edf_name = f"HV{T_idx}.EDF"

file_path = str(DATA_PATH / edf_name)
print(file_path)
ori_recording, ori_events, ori_messages = read_edf(file_path)
# The first timestamp of  `recording`
print(f" {ori_recording[100000:100100]}")
# print(ori_messages)
# print(ori_events)
# print(messages)
ori_messages = ori_messages.rename(
    columns={
        # Normalize weird header names generated by pyedfread
        "message": "trialid",
        "trial": "trial",
        # Convert some BIDS columns
        "time": "timestamp",
    }
)

recording = ori_recording
messages = ori_messages
events = ori_events
print(f'\nThe entire info of `message`: \n{messages}')
recording.columns

/Users/cag/Documents/Dataset/1_Pilot_MREye_Data/Sub001/230928_anatomical_MREYE_study/ET_EDF/JB1.EDF
              time  px_left  px_right  py_left  py_right  hx_left  hx_right  \
100000  1184634.0 -32768.0   -1187.0 -32768.0   -6464.0 -32768.0      87.0   
100001  1184635.0 -32768.0   -1192.0 -32768.0   -6457.0 -32768.0      82.0   
100002  1184636.0 -32768.0   -1195.0 -32768.0   -6449.0 -32768.0      80.0   
100003  1184637.0 -32768.0   -1191.0 -32768.0   -6437.0 -32768.0      84.0   
100004  1184638.0 -32768.0   -1191.0 -32768.0   -6433.0 -32768.0      84.0   
...           ...      ...       ...      ...       ...      ...       ...   
100095  1184729.0 -32768.0   -1155.0 -32768.0   -6439.0 -32768.0     114.0   
100096  1184730.0 -32768.0   -1176.0 -32768.0   -6434.0 -32768.0      96.0   
100097  1184731.0 -32768.0   -1189.0 -32768.0   -6432.0 -32768.0      86.0   
100098  1184732.0 -32768.0   -1199.0 -32768.0   -6440.0 -32768.0      77.0   
100099  1184733.0 -32768.0   -1196.0 -327

Index(['time', 'px_left', 'px_right', 'py_left', 'py_right', 'hx_left',
       'hx_right', 'hy_left', 'hy_right', 'pa_left', 'pa_right', 'gx_left',
       'gx_right', 'gy_left', 'gy_right', 'rx', 'ry', 'gxvel_left',
       'gxvel_right', 'gyvel_left', 'gyvel_right', 'hxvel_left', 'hxvel_right',
       'hyvel_left', 'hyvel_right', 'rxvel_left', 'rxvel_right', 'ryvel_left',
       'ryvel_right', 'fgxvel', 'fgyvel', 'fhxvel', 'fhyvel', 'frxvel',
       'fryvel', 'flags', 'input', 'buttons', 'htype', 'errors'],
      dtype='object')

# 1 Parsing the messages

In [11]:
messages = messages.rename(
    columns={c: c.strip() for c in messages.columns.values}
).drop_duplicates()

In [12]:

# Extract calibration headers
_cal_hdr = ori_messages.trialid.str.startswith("!CAL")
calibration = ori_messages[_cal_hdr]
# messages = messages.drop(messages.index[_cal_hdr])
print(calibration)

    timestamp  trial                                            trialid
0     1047679     -1  !CAL \n>>>>>>> CALIBRATION (HV5,P-CR) FOR RIGH...
1     1047679     -1                           !CAL Calibration points:
2     1047680     -1                !CAL -10.3, -50.9         0,      0
3     1047680     -1                 !CAL -8.9, -71.9         0,  -2457
4     1047680     -1                 !CAL -9.3, -29.2         0,   2457
5     1047680     -1                !CAL -41.2, -50.5     -3474,      0
6     1047680     -1                !CAL  24.2, -49.8      3474,      0
7     1047680     -1  !CAL eye check box: (L,R,T,B)\n\t  -48    31  ...
8     1047680     -1  !CAL href cal range: (L,R,T,B)\n\t-5211  5211 ...
9     1047680     -1  !CAL Cal coeff:(X=a+bx+cy+dxx+eyy,Y=f+gx+goaly...
10    1047680     -1    !CAL Prenormalize: offx, offy = -10.326 -50.929
11    1047680     -1        !CAL Gains: cx:102.441 lx:116.794 rx:88.586
12    1047680     -1       !CAL Gains: cy:109.181 ty:118.022 by:

In [13]:
# Extracting the StartTime and StopTime metadata.
message_first_trigger = 'MODE RECORD'
message_last_trigger = 'end'
metadata = {
    'StopTime': None,
    'StartTime': None
}

# Find Start time
start_rows = messages.trialid.str.contains(
    message_first_trigger, case=False, regex=True
)
stop_rows = messages.trialid.str.contains(
    message_last_trigger, case=False, regex=True
)


# Extract calibration headers
_cal_hdr = messages.trialid.str.startswith("!CAL")
calibration = messages[_cal_hdr]
messages = messages.drop(messages.index[_cal_hdr])

# Pick the LAST of the start messages
metadata["StartTime"] = (
    int(messages[start_rows].timestamp.values[-1])
    if start_rows.any()
    else None
)

# Pick the FIRST of the stop messages
metadata["StopTime"] = (
    int(messages[stop_rows].timestamp.values[0])
    if stop_rows.any()
    else None
)

# Drop start and stop messages from messages dataframe
messages = messages.loc[~start_rows & ~stop_rows, :]

metadata

{'StopTime': None, 'StartTime': None}

In [14]:
# Extracting basic metadata.
# !MODE RECORD CR 1000 2 0 R

mode_record = messages.trialid.str.startswith("!MODE RECORD")

meta_record = {
    "freq": DEFAULT_FREQUENCY,
    "mode": DEFAULT_MODE,
    "eye": DEFAULT_EYE,
}

if mode_record.any():
    try:
        meta_record = re.match(
            r"\!MODE RECORD (?P<mode>\w+) (?P<freq>\d+) \d \d (?P<eye>[RL]+)",
            messages[mode_record].trialid.iloc[-1].strip(),
        ).groupdict()

        meta_record["eye"] = EYE_CODE_MAP[meta_record["eye"]]
        meta_record["mode"] = (
            "P-CR" if meta_record["mode"] == "CR" else meta_record["mode"]
        )
    except AttributeError:
        warn(
            "Error extracting !MODE RECORD message, "
            "using default frequency, mode, and eye"
        )
    finally:
        messages = messages.loc[~mode_record]

eye = (
    ("right", "left") if meta_record["eye"] == "both" else (meta_record["eye"],)
)

metadata["SamplingFrequency"] = int(meta_record["freq"])
metadata["EyeTrackingMethod"] = meta_record["mode"]
metadata["RecordedEye"] = meta_record["eye"]

In [15]:
# Extracting screen parameters.
# GAZE_COORDS 0.00 0.00 800.00 600.00

# Extract GAZE_COORDS message signaling start of recording
gaze_msg = messages.trialid.str.startswith("GAZE_COORDS")

metadata["ScreenAOIDefinition"] = [
    "square",
    DEFAULT_SCREEN,
]
if gaze_msg.any():
    try:
        gaze_record = re.match(
            r"GAZE_COORDS (\d+\.\d+) (\d+\.\d+) (\d+\.\d+) (\d+\.\d+)",
            messages[gaze_msg].trialid.iloc[-1].strip(),
        ).groups()
        metadata["ScreenAOIDefinition"][1] = [
            int(round(float(gaze_record[0]))),
            int(round(float(gaze_record[2]))),
            int(round(float(gaze_record[1]))),
            int(round(float(gaze_record[3]))),
        ]
    except AttributeError:
        warn("Error extracting GAZE_COORDS")
    finally:
        messages = messages.loc[~gaze_msg]
        
print(metadata)

{'StopTime': None, 'StartTime': None, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR', 'RecordedEye': 'right', 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]]}


In [16]:
# Extracting parameters of the pupil fit model.
# ELCL_PROC ELLIPSE (5)
# ELCL_EFIT_PARAMS 1.01 4.00  0.15 0.05  0.65 0.65  0.00 0.00 0.30
# Extract ELCL_PROC AND ELCL_EFIT_PARAMS to extract pupil fit method
pupilfit_msg = messages.trialid.str.startswith("ELCL_PROC")

if pupilfit_msg.any():
    try:
        pupilfit_method = [
            val
            for val in messages[pupilfit_msg]
            .trialid.iloc[-1]
            .strip()
            .split(" ")[1:]
            if val
        ]
        metadata["PupilFitMethod"] = pupilfit_method[0].lower()
        metadata["PupilFitMethodNumberOfParameters"] = int(
            pupilfit_method[1].strip("(").strip(")")
        )
    except AttributeError:
        warn("Error extracting ELCL_PROC (pupil fitting method)")
    finally:
        messages = messages.loc[~pupilfit_msg]

pupilfit_msg_params = messages.trialid.str.startswith("ELCL_EFIT_PARAMS")
if pupilfit_msg_params.any():
    rows = messages[pupilfit_msg_params]
    row = rows.trialid.values[-1].strip().split(" ")[1:]
    try:
        metadata["PupilFitParameters"] = [
            tuple(float(val) for val in vals)
            for k, vals in groupby(row, key=bool)
            if k
        ]
    except AttributeError:
        warn("Error extracting ELCL_EFIT_PARAMS (pupil fitting parameters)")
    finally:
        messages = messages.loc[~pupilfit_msg_params]
        
metadata

{'StopTime': None,
 'StartTime': None,
 'SamplingFrequency': 1000,
 'EyeTrackingMethod': 'P-CR',
 'RecordedEye': 'right',
 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]],
 'PupilFitMethod': 'centroid',
 'PupilFitMethodNumberOfParameters': 3}

In [17]:
# Calibration validation.
# VALIDATE R 4POINT 4 RIGHT at 752,300 OFFSET 0.35 deg. -8.7,-3.8 pix.
# Extract VALIDATE messages for a calibration validation
validation_msg = messages.trialid.str.startswith("VALIDATE")

if validation_msg.any():
    metadata["ValidationPosition"] = []
    metadata["ValidationErrors"] = []

for i_row, validate_row in enumerate(messages[validation_msg].trialid.values):
    prefix, suffix = validate_row.split("OFFSET")
    validation_eye = (
        f"eye{eye.index('right') + 1}"
        if "RIGHT" in prefix
        else f"eye{eye.index('left') + 1}"
    )
    validation_coords = [
        int(val.strip())
        for val in prefix.rsplit("at", 1)[-1].split(",")
        if val.strip()
    ]
    metadata["ValidationPosition"].append(
        [validation_eye, validation_coords]
    )

    validate_values = [
        float(val)
        for val in re.match(
            r"(-?\d+\.\d+) deg\.\s+(-?\d+\.\d+),(-?\d+\.\d+) pix\.",
            suffix.strip(),
        ).groups()
    ]

    metadata["ValidationErrors"].append(
        (validation_eye, validate_values[0], tuple(validate_values[1:]))
    )
messages = messages.loc[~validation_msg]

print(messages)
print(metadata)

    timestamp  trial                                            trialid
27    1066471     -1  NO Reply is disabled for function eyelink_cal_...
28    1084633     -1                               RECCFG CR 1000 2 0 R
29    1084633     -1                                      ELCLCFG TOWER
31    1084633     -1                                THRESHOLDS R 68 179
33    1084633     -1                               ELCL_PCR_PARAM 5 3.0
{'StopTime': None, 'StartTime': None, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR', 'RecordedEye': 'right', 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]], 'PupilFitMethod': 'centroid', 'PupilFitMethodNumberOfParameters': 3, 'ValidationPosition': [['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]]], 'ValidationErrors': [('eye1', 0.45, (-12.1, 1.8)), ('eye1', 0.21, (-5.6, 0.7)), ('eye1', 0.41, (-11.1, 1.8)), ('eye1', 0.5, (-12.7, -4.4)), ('eye1', 0.79, (-10.8, -18.3))]}


In [18]:
# Extracting final bits of metadata.
# Extract THRESHOLDS messages prior recording and process last
thresholds_msg = messages.trialid.str.startswith("THRESHOLDS")
if thresholds_msg.any():
    metadata["PupilThreshold"] = [None] * len(eye)
    metadata["CornealReflectionThreshold"] = [None] * len(eye)
    thresholds_chunks = (
        messages[thresholds_msg].trialid.iloc[-1].strip().split(" ")[1:]
    )
    eye_index = eye.index(EYE_CODE_MAP[thresholds_chunks[0]])
    metadata["PupilThreshold"][eye_index] = int(thresholds_chunks[-2])
    metadata["CornealReflectionThreshold"][eye_index] = int(
        thresholds_chunks[-1]
    )
messages = messages.loc[~thresholds_msg]
print(messages)
print(metadata)

    timestamp  trial                                            trialid
27    1066471     -1  NO Reply is disabled for function eyelink_cal_...
28    1084633     -1                               RECCFG CR 1000 2 0 R
29    1084633     -1                                      ELCLCFG TOWER
33    1084633     -1                               ELCL_PCR_PARAM 5 3.0
{'StopTime': None, 'StartTime': None, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR', 'RecordedEye': 'right', 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]], 'PupilFitMethod': 'centroid', 'PupilFitMethodNumberOfParameters': 3, 'ValidationPosition': [['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]]], 'ValidationErrors': [('eye1', 0.45, (-12.1, 1.8)), ('eye1', 0.21, (-5.6, 0.7)), ('eye1', 0.41, (-11.1, 1.8)), ('eye1', 0.5, (-12.7, -4.4)), ('eye1', 0.79, (-10.8, -18.3))], 'PupilThreshold': [68], 'CornealReflectionThreshold': [179]}


In [19]:
# Flush the remaining messages as a metadata entry.
# Consume the remainder of messages

if not messages.empty:
    metadata["LoggedMessages"] = [
        (int(msg_timestamp), msg.strip())
        for msg_timestamp, msg in messages[["timestamp", "trialid"]].values
    ]
    
print(messages)
print(metadata)

    timestamp  trial                                            trialid
27    1066471     -1  NO Reply is disabled for function eyelink_cal_...
28    1084633     -1                               RECCFG CR 1000 2 0 R
29    1084633     -1                                      ELCLCFG TOWER
33    1084633     -1                               ELCL_PCR_PARAM 5 3.0
{'StopTime': None, 'StartTime': None, 'SamplingFrequency': 1000, 'EyeTrackingMethod': 'P-CR', 'RecordedEye': 'right', 'ScreenAOIDefinition': ['square', [0, 800, 0, 600]], 'PupilFitMethod': 'centroid', 'PupilFitMethodNumberOfParameters': 3, 'ValidationPosition': [['eye1', [400, 300]], ['eye1', [400, 51]], ['eye1', [400, 549]], ['eye1', [48, 300]], ['eye1', [752, 300]]], 'ValidationErrors': [('eye1', 0.45, (-12.1, 1.8)), ('eye1', 0.21, (-5.6, 0.7)), ('eye1', 0.41, (-11.1, 1.8)), ('eye1', 0.5, (-12.7, -4.4)), ('eye1', 0.79, (-10.8, -18.3))], 'PupilThreshold': [68], 'CornealReflectionThreshold': [179], 'LoggedMessages': [(1066471, 'NO R

# 2 Parsing the recording dataframe

In [20]:
recording = ori_recording
ori_recording

Unnamed: 0,time,px_left,px_right,py_left,py_right,hx_left,hx_right,hy_left,hy_right,pa_left,...,fgyvel,fhxvel,fhyvel,frxvel,fryvel,flags,input,buttons,htype,errors
0,1084634.0,-32768.0,1126.0,-32768.0,-4793.0,-32768.0,1972.0,-32768.0,1384.0,-32768.0,...,1.401298e-45,4.569222e+36,0.0,0.0,0.0,32641.0,32768.0,0.0,-32768.0,0.0
1,1084635.0,-32768.0,1129.0,-32768.0,-4782.0,-32768.0,1974.0,-32768.0,1394.0,-32768.0,...,1.401298e-45,4.569222e+36,0.0,0.0,0.0,24449.0,32768.0,0.0,-32768.0,0.0
2,1084636.0,-32768.0,1138.0,-32768.0,-4769.0,-32768.0,1981.0,-32768.0,1405.0,-32768.0,...,1.401298e-45,4.569222e+36,0.0,0.0,0.0,24449.0,32768.0,0.0,-32768.0,0.0
3,1084637.0,-32768.0,1147.0,-32768.0,-4766.0,-32768.0,1988.0,-32768.0,1408.0,-32768.0,...,1.401298e-45,4.569222e+36,0.0,0.0,0.0,24449.0,32768.0,0.0,-32768.0,0.0
4,1084638.0,-32768.0,1152.0,-32768.0,-4762.0,-32768.0,1991.0,-32768.0,1411.0,-32768.0,...,1.401298e-45,4.569222e+36,0.0,0.0,0.0,24449.0,32768.0,0.0,-32768.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
761414,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
761415,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
761416,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
761417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
# Curation of the input dataframe
# Normalize timestamps (should be int and strictly positive)
recording = recording.astype({"time": int})
recording = recording[recording["time"] > 0]
raw_recording_len = len(recording)
print(f'raw_recording length: {raw_recording_len}')

recording = recording.rename(
    columns={
#         # Fix buggy header names generated by pyedfread
#         "fhxyvel": "fhxvel",
#         "frxyvel": "frxvel",
        # Normalize weird header names generated by pyedfread
        "rx": "screen_ppdeg_x_coordinate",
        "ry": "screen_ppdeg_y_coordinate",
        # Convert some BIDS columns
        "time": "timestamp",
    }
)

# Split extra columns from the dataframe
extra = recording[["flags", "input", "htype"]]
recording = recording.drop(columns=["flags", "input", "htype"])
print(len(recording))

# Remove columns that are always very close to zero
recording = recording.loc[:, (recording.abs() > 1e-8).any(axis=0)]
# Remove columns that are always 1e8 or more
recording = recording.loc[:, (recording.abs() < 1e8).any(axis=0)]
# Replace unreasonably high values with NaNs
recording = recording.replace({1e8: np.nan})

assert len(recording) == raw_recording_len

raw_recording length: 754932
754932


In [22]:
# Remove columns that do not apply (e.g., only one eye recorded).
# Drop one eye's columns if not interested in "both"
print(f'The eye we take care of {eye}')
remove_eye = set(("left", "right")) - set(eye)
if remove_eye:
    remove_eye = remove_eye.pop()  # Drop set decoration
    recording = recording.reindex(
        columns=[c for c in recording.columns if remove_eye not in c]
    )
    
columns = recording.columns
print("Columns:")
print(columns)
recording

The eye we take care of ('right',)
Columns:
Index(['timestamp', 'px_right', 'py_right', 'hx_right', 'hy_right', 'pa_right',
       'gx_right', 'gy_right', 'screen_ppdeg_x_coordinate',
       'screen_ppdeg_y_coordinate', 'fgxvel', 'fhxvel', 'frxvel'],
      dtype='object')


Unnamed: 0,timestamp,px_right,py_right,hx_right,hy_right,pa_right,gx_right,gy_right,screen_ppdeg_x_coordinate,screen_ppdeg_y_coordinate,fgxvel,fhxvel,frxvel
0,1084634,1126.0,-4793.0,1972.0,1384.0,2721.0,599.799988,440.299988,27.1,26.9,1.401298e-45,4.569222e+36,0.000000e+00
1,1084635,1129.0,-4782.0,1974.0,1394.0,2718.0,600.000000,441.299988,27.1,26.9,1.401298e-45,4.569222e+36,0.000000e+00
2,1084636,1138.0,-4769.0,1981.0,1405.0,2716.0,600.700012,442.399994,27.1,26.9,1.401298e-45,4.569222e+36,0.000000e+00
3,1084637,1147.0,-4766.0,1988.0,1408.0,2709.0,601.400024,442.700012,27.1,26.9,1.401298e-45,4.569222e+36,0.000000e+00
4,1084638,1152.0,-4762.0,1991.0,1411.0,2704.0,601.799988,443.000000,27.1,26.9,1.401298e-45,4.569222e+36,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
754927,1839561,-610.0,-5978.0,577.0,348.0,5546.0,458.500000,335.299988,26.5,26.5,1.401298e-45,4.569222e+36,8.332237e+11
754928,1839562,-610.0,-5971.0,578.0,354.0,5540.0,458.500000,335.899994,26.5,26.5,1.401298e-45,4.569222e+36,8.332237e+11
754929,1839563,-610.0,-5966.0,578.0,359.0,5530.0,458.500000,336.399994,26.5,26.5,1.401298e-45,4.569222e+36,8.332237e+11
754930,1839564,-610.0,-5971.0,577.0,355.0,5520.0,458.500000,335.899994,26.5,26.5,1.401298e-45,4.569222e+36,8.332237e+11


In [23]:
# Clean-up pupil size and gaze position. 
# These are the parameters we most likely we care for, so special curation is applied:
screen_resolution = [800, 600]

for eyenum, eyename in enumerate(eye):
    # Clean-up implausible values for pupil area (pa)
    recording.loc[
        recording[f"pa_{eyename}"] < 1, f"pa_{eyename}"
    ] = np.nan
    recording = recording.rename(
        columns={f"pa_{eyename}": f"eye{eyenum + 1}_pupil_size"}
    )
    print(f"pa_{eyename} renamed as: eye{eyenum + 1}_pupil_size")
    # Clean-up implausible values for gaze x position
    recording.loc[
        (recording[f"gx_{eyename}"] < 0)
        | (recording[f"gx_{eyename}"] > screen_resolution[0]),
        f"gx_{eyename}",
    ] = np.nan
    # Clean-up implausible values for gaze y position
    recording.loc[
        (recording[f"gy_{eyename}"] <= 0)
        | (recording[f"gy_{eyename}"] > screen_resolution[1]),
        f"gy_{eyename}",
    ] = np.nan
    
print(recording)
assert len(recording) == raw_recording_len

pa_right renamed as: eye1_pupil_size
        timestamp  px_right  py_right  hx_right  hy_right  eye1_pupil_size  \
0         1084634    1126.0   -4793.0    1972.0    1384.0           2721.0   
1         1084635    1129.0   -4782.0    1974.0    1394.0           2718.0   
2         1084636    1138.0   -4769.0    1981.0    1405.0           2716.0   
3         1084637    1147.0   -4766.0    1988.0    1408.0           2709.0   
4         1084638    1152.0   -4762.0    1991.0    1411.0           2704.0   
...           ...       ...       ...       ...       ...              ...   
754927    1839561    -610.0   -5978.0     577.0     348.0           5546.0   
754928    1839562    -610.0   -5971.0     578.0     354.0           5540.0   
754929    1839563    -610.0   -5966.0     578.0     359.0           5530.0   
754930    1839564    -610.0   -5971.0     577.0     355.0           5520.0   
754931    1839565    -610.0   -5979.0     578.0     347.0           5515.0   

          gx_right    gy_r

In [24]:
# Munging columns to comply with BIDS. 
# At this point, the dataframe is almost ready for writing out as BIDS.
# Interpolate BIDS column names
columns = list(
    set(recording.columns)
    - set(
        (
            "timestamp",
            "screen_ppdeg_x_coordinate",
            "screen_ppdeg_y_coordinate",
            "eye1_pupil_size",#pa
            "eye2_pupil_size",#pa
        )
    )
)
bids_columns = []
for eyenum, eyename in enumerate(eye):
    for name in columns:
        colprefix = f"eye{eyenum + 1}" if name.endswith(f"_{eyename}") else ""
        _newname = name.split("_")[0]
        _newname = re.sub(r"([xy])$", r"_\1_coordinate", _newname)
        _newname = re.sub(r"([xy])vel$", r"_\1_velocity", _newname)
        _newname = _newname.split("_", 1)
        _newname[0] = EDF2BIDS_COLUMNS[_newname[0]]
        _newname.insert(0, colprefix)
        bids_columns.append("_".join((_n for _n in _newname if _n)))

# Rename columns to be BIDS-compliant
recording = recording.rename(columns=dict(zip(columns, bids_columns)))

# Reorder columns to render nicely (tracking first, pupil size after)
columns = sorted(
    set(recording.columns.values).intersection(BIDS_COLUMNS_ORDER),
    key=lambda entry: BIDS_COLUMNS_ORDER.index(entry),
)
columns += [c for c in recording.columns.values if c not in columns]
recording = recording.reindex(columns=columns)

print(recording)
assert len(recording) == raw_recording_len

        eye1_x_coordinate  eye1_y_coordinate  eye1_pupil_size  \
0              599.799988         440.299988           2721.0   
1              600.000000         441.299988           2718.0   
2              600.700012         442.399994           2716.0   
3              601.400024         442.700012           2709.0   
4              601.799988         443.000000           2704.0   
...                   ...                ...              ...   
754927         458.500000         335.299988           5546.0   
754928         458.500000         335.899994           5540.0   
754929         458.500000         336.399994           5530.0   
754930         458.500000         335.899994           5520.0   
754931         458.500000         335.200012           5515.0   

        eye1_pupil_x_coordinate  eye1_pupil_y_coordinate  \
0                        1126.0                  -4793.0   
1                        1129.0                  -4782.0   
2                        1138.0        

# 3 Parsing the calibration messages

In [None]:
print(calibration)

In [None]:
# Parse calibration metadata
metadata["CalibrationCount"] = 0
if not calibration.empty:
    warn("Calibration of more than one eye is not implemented")
    calibration.trialid = calibration.trialid.str.replace("!CAL", "")
    calibration.trialid = calibration.trialid.str.strip()

    metadata["CalibrationLog"] = list(
        zip(
            calibration.timestamp.values.astype(int),
            calibration.trialid.values,
        )
    )

    calibrations_msg = calibration.trialid.str.startswith(
        "VALIDATION"
    ) & calibration.trialid.str.contains("ERROR")
    metadata["CalibrationCount"] = calibrations_msg.sum()

    calibration_last = calibration.index[calibrations_msg][-1]
    try:
        meta_calib = re.match(
            r"VALIDATION (?P<ctype>[\w\d]+) (?P<eyeid>[RL]+) (?P<eye>RIGHT|LEFT) "
            r"(?P<result>\w+) ERROR (?P<avg>-?\d+\.\d+) avg\. (?P<max>-?\d+\.\d+) max\s+"
            r"OFFSET (?P<offsetdeg>-?\d+\.\d+) deg\. "
            r"(?P<offsetxpix>-?\d+\.\d+),(?P<offsetypix>-?\d+\.\d+) pix\.",
            calibration.loc[calibration_last, "trialid"].strip(),
        ).groupdict()

        metadata["CalibrationType"] = meta_calib["ctype"]
        metadata["AverageCalibrationError"] = [float(meta_calib["avg"])]
        metadata["MaximalCalibrationError"] = [float(meta_calib["max"])]
        metadata["CalibrationResultQuality"] = [meta_calib["result"]]
        metadata["CalibrationResultOffset"] = [
            float(meta_calib["offsetdeg"]),
            (float(meta_calib["offsetxpix"]), float(meta_calib["offsetypix"])),
        ]
        metadata["CalibrationResultOffsetUnits"] = ["deg", "pixels"]
    except AttributeError:
        warn("Calibration data found but unsuccessfully parsed for results")
        
        
print(calibration)

# 4 Parsing the events dataframe

In [None]:
# events[
#     events["type"] == "saccade"
# ]

In [None]:
# print(events)
print(recording)

# Process events: first generate empty columns
recording["eye1_fixation"] = 0
recording["eye1_saccade"] = 0
recording["eye1_blink"] = 0

# Add fixations
for _, fixation_event in events[
    events["type"] == "fixation"
].iterrows():
    recording.loc[
        (recording["timestamp"] >= fixation_event["start"])
        & (recording["timestamp"] <= fixation_event["end"]),
        "eye1_fixation",
    ] = 1

# Add saccades, and blinks, which are a sub-event of saccades
for _, saccade_event in events[
    events["type"] == "saccade"
].iterrows():
    recording.loc[
        (recording["timestamp"] >= saccade_event["start"])
        & (recording["timestamp"] <= saccade_event["end"]),
        "eye1_saccade",
    ] = 1

    if saccade_event["contains_blink"] == 1: #Note here some version is "blink", depends on the item name
        recording.loc[
            (recording["timestamp"] >= saccade_event["start"])
            & (recording["timestamp"] <= saccade_event["end"]),
            "eye1_blink",
        ] = 1

In [None]:
print(recording)

# 5 Write the data into BIDS structure

In [None]:
from copy import deepcopy

metadata['Columns'] = recording.columns.tolist()
print(metadata)
save_metadata = deepcopy(metadata)
# metadata.pop('CalibrationLog', None)
# print(metadata)

In [None]:
metadata = save_metadata

In [None]:
# if isinstance(metadata, dict):
#     for k, v in metadata.items():
#         if isinstance(v, dict):
#             for _k, _v in v.item():
#                 print(f"_k_v--{_k}: {type(_v)}")
#         elif isinstance(v, list) and k=='CalibrationLog':
#             for entry in v:
#                 print(f'entry: {entry}')
#                 timestamp, info = entry
#                 print(f'timestamp: {type(timestamp)}')
#                 print(f'info: {type(info)}')
                
            
#         else:
#             print(f"-k-v--{k}:{type(v)}--{v}")
  



def convert_to_int(metadata):
    if 'CalibrationCount' in metadata:
        metadata['CalibrationCount'] = int(metadata['CalibrationCount']) if isinstance(metadata['CalibrationCount'], (np.int32, np.int64, int)) else metadata['CalibrationCount']
    if "CalibrationLog" in metadata:
        metadata["CalibrationLog"] = [(int(x[0]),x[1]) if isinstance(x[0], (np.int32, np.int64, int)) else x for x in metadata['CalibrationLog']]
    return metadata

        
convert_metadata = convert_to_int(metadata)
# print(convert_metadata)

In [None]:
# Load the autoreload extension
%load_ext autoreload
# Set autoreload to update the modules every time before executing a new line of code
%autoreload 2

import importlib
from write_bids_yiwei import write_bids_from_df
out_dir = DATA_PATH
edf_extension = 'EDF'
edf_name = edf_name
filename = edf_name.split('.')[0]
print(f'bid filename: {filename}')

write_bids_from_df(
    recording, convert_metadata,
    out_dir,
    filename,
)


Now the files are generated.
- EDF Path
    - \<filename\>.EDF
    - \<filename\>.tsv.gz

In [None]:
print(recording)