In [None]:
from prepare_language_folder import prepare_language_folder
from preprocessing.data_collection.multipleye_data_collection import MultipleyeDataCollection
from pathlib import Path

## Pre-processing MultiplEYE Data

In [None]:
data_collection_name = 'MultiplEYE_SQ_CH_Zurich_1_2025'
# data_collection_name = 'MultiplEYE_SL_SI_Ljubljana_1_2025'

If necessary, prepare the data folder by unzipping the downloaded files. Works only for MultiplEYE and MeRID data collections so far. Also, there might be some manual steps necessary.

In [None]:
prepare_language_folder(data_collection_name)
this_repo = Path().resolve()
data_folder_path = this_repo / "data" / data_collection_name

In [None]:
multipleye_sq = MultipleyeDataCollection.create_from_data_folder(data_folder_path)

preprocessed_data_folder = this_repo / "preprocessed_data" / data_collection_name
preprocessed_data_folder.mkdir(parents=True, exist_ok=True)

In [None]:
multipleye_sq

In [None]:
sessions = [s for s in multipleye_sq]
sess = sessions[0]
idf = sess.session_identifier

## Creating Gaze Frame from ASCII File

In [None]:
from preprocessing import peyepeline
from preprocessing import config

asc = sess.asc_path
output_folder = preprocessed_data_folder / idf
output_folder.mkdir(parents=True, exist_ok=True)

In [None]:
gaze, gaze_metadata = peyepeline.load_gaze_data(
    asc_file=asc,
    lab_config=sess.lab_config,
    session_idf=idf,
    trial_cols=config.TRIAL_COLS,
)

In [None]:
gaze

In [None]:
peyepeline.save_raw_data(output_folder / 'raw_data', sess.session_identifier, gaze)

In [None]:
sess.pm_gaze_metadata = gaze_metadata

In [None]:
sess.pm_gaze_metadata

## Coordinate and Velocity Preprocessing

Eye movements are recorded in screen pixel coordinates, which depend on stimulus size and monitor setup. To compare gaze behavior across participants, screens, or datasets, it is standard to convert pixel positions 
into **degrees of visual angle (dva)**. Next, we compute **gaze velocity**, which allows us to detect saccades and distinguish them from fixations.

In [None]:
peyepeline.preprocess_gaze(gaze)

## Detect Events and Compute Their Properties

Eye-tracking data are typically segmented into events, i.e. `fixations` and `saccades`. Fixations represent moments when the eyes remain relatively still, allowing visual information to be processed, while saccades are the rapid movements between fixations that reposition the gaze. Detecting these events and computing their properties, such as `dispersion`, fixation `duration`, saccade `amplitude`, and `peak velocity`, provides the foundation for analyzing visual behavior and understanding how participants explore a stimulus.

### Fixations

We can detect fixations by applying the `I-VT` or the `I-DT` method.

The **I-VT (Velocity-Threshold Identification)** method distinguishes fixation and saccade points based on their point-to-point velocities. Each point is classified as a fixation if its velocity is below the specified threshold. Consecutive fixation points are then merged into a single fixation. A threshold of 20 degrees/second is commonly used as a default maximum value. Read more about [the IVT algorithm in the documentation](https://pymovements.readthedocs.io/en/stable/reference/api/pymovements.events.detection.ivt.html) 

The **I-DT (Dispersion-Threshold Identification)** method finds fixations by grouping consecutive points within a maximum separation (dispersion) threshold and a minimum duration threshold. The algorithm slides a moving window across the data: if the dispersion within the window is below the threshold, the window represents a fixation and is gradually expanded until the dispersion exceeds the threshold.
Read more about [our implementation of the IDT method](https://pymovements.readthedocs.io/en/stable/reference/api/pymovements.events.detection.idt.html).

We use the `I-VT` algorithm with the following key deafault parameters:
- `minimum duration`: 100 ms 
- `velocity threshold`: 20.0

Such properties as `location`, containing the centroid coordinates of each fixation, and `dispersion` will also be calculated.

In [None]:
peyepeline.detect_fixations(
    gaze,
)

### Saccades

Saccades are rapid eye movements that shift the point of fixation from one location to another. We detect saccades (or micro-saccades) from the velocity sequence of gaze data using the [microsaccades algorithm](https://pymovements.readthedocs.io/en/stable/reference/api/pymovements.events.detection.microsaccades.html#pymovements.events.detection.microsaccades). This algorithm implements a noise-adaptive velocity threshold, meaning that the detection threshold automatically scales with the noise level of the velocity signal. Such properties as `amplitude` and `peak velocity` of the detected saccades will also be calcuated.

The key default parameters are:
- `threshold_factor`: Multiplier used to determine the velocity threshold relative to the noise level of the signal. The default value is 6. A higher factor makes the algorithm more conservative (detects fewer saccades), while a lower factor makes it more sensitive.
- `minimum_duration`: Defines how long a velocity peak must persist to be classified as a saccade. The duration is expressed in the same units as timesteps. If no timesteps are provided, the value refers to the number of samples (default = 6), which corresponds to about 12 ms at a 500 Hz sampling rate. Shorter events are ignored as noise. 

In [None]:
peyepeline.detect_saccades(
    gaze,
)

In [None]:
gaze

In [None]:
gaze.events.frame.head()

In [None]:
peyepeline.save_events_data(output_folder / 'fixations', sess.session_identifier, gaze)

In [None]:

peyepeline.save_scanpaths(output_folder / 'scanpaths', sess.session_identifier, gaze)

In [None]:
peyepeline.map_fixations_to_aois(
    gaze,
    sess.stimuli,
)

In [None]:
peyepeline.save_session_metadata(gaze, output_folder)

In [None]:
multipleye.create_session_overview(sess.session_identifier, path=output_folder)

## The END


In [None]:
gaze.detect(method="ivt", velocity_threshold=20, eye='auto', clear=False, name="fixation.ivt")
# You can now see the detected fixations in the gaze.events DataFrame under the name "fixation.ivt"
gaze

In [None]:
import polars as pl

In [None]:
# gaze.detect('microsaccades', minimum_duration=12)

# gaze.events.frame.filter(pl.col("name") == "saccades").head()

# You can experiment with different minimum durations for saccade detection
# for md in [0.1, 5, 10, 12, 20, 100]:
#     gaze.detect("microsaccades", minimum_duration=md)

#     print(md, gaze.events.frame.filter(pl.col("name") == "saccade").height)

In [None]:
gaze.detect('microsaccades', minimum_duration=12)
gaze

### Areas Of Interest

### Loading AOI File into DataFrame

In [None]:
from pymovements.stimulus.text import from_file

In [None]:
aoi_chars_files_folder = "data/MultiplEYE_SQ_CH_Zurich_1_2025/eye-tracking-sessions/data_piloting_stimuli_MultiplEYE_SQ_CH_Zurich_1_2025participant_id_1_to_5/aoi_stimuli_sq_ch_1/"

# concatenate all available AOI character files into one DataFrame
# To make your combined AOI dataset match the temporal order of the gaze data, 
# you must concatenate AOIs in the same sequence as the participant saw them.

# option without questions
# aoi_chars_file = "concatenated_aoi_no_questions.csv"

# option with questions
aoi_chars_file = "concatenated_aoi_all.csv"

stimulus = from_file(
    aoi_path=aoi_chars_file,
    aoi_column="char",
    start_x_column="top_left_x",
    start_y_column="top_left_y",
    width_column="width",
    height_column="height",
    page_column="page",
)

stimulus.aois.head(10)

### Mapping Fixations to AOI 

In [None]:
print(gaze.samples.columns)

In [None]:
#  We map each gaze point to an aoi, considering the boundary still part of the area of interest.

# explode the list column "pixel" into two numeric columns
# drop rows with null values in either pixel_xr or pixel_yr

gaze.samples = gaze.samples.with_columns([
    pl.col("pixel").list.get(0).alias("pixel_xr"),
    pl.col("pixel").list.get(1).alias("pixel_yr"),
]).drop_nulls(subset=["pixel_xr", "pixel_yr"])

In [None]:
# How many samples?
print(len(gaze.samples))

# How many AOIs?
print(stimulus.aois.height)


In [None]:
subset = gaze.samples.head(40000)
gaze_small = gaze.clone()
gaze_small.samples = subset
gaze_small.map_to_aois(aoi_dataframe=stimulus, eye="auto", gaze_type="pixel")

In [None]:
gaze_small.samples.columns

In [None]:
gaze_small.samples.head(10)


In [None]:
import matplotlib.pyplot as plt

plt.scatter(gaze_small.samples["pixel_xr"], gaze_small.samples["pixel_yr"],
            s=5, label="gaze")
plt.scatter(stimulus.aois["top_left_x"], stimulus.aois["top_left_y"],
            s=5, label="AOI top-left")
plt.legend(); plt.gca().invert_yaxis()
plt.show()


In [None]:
import polars as pl

# Extract all message events mentioning "question"
question_msgs = [
    (int(m["timestamp"]), m["message"])
    for m in sessions[0]["messages"]
    if "question" in m["message"].lower()
]

# Find min/max gaze time
gaze_min, gaze_max = gaze.samples["time"].min(), gaze.samples["time"].max()

for ts, msg in question_msgs:
    inside = gaze_min <= ts <= gaze_max
    print(f"{ts}: {'✅ inside gaze data' if inside else '❌ outside'}  | {msg}")

# tested

In [None]:
# gaze.map_to_aois(
#     aoi_dataframe=stimulus,
#     eye="auto",
#     gaze_type="pixel"
# )

Step 1 pf peyepline: create the gaze frame.

	-- data collection folder
	---- ...
	---- fixations
	---- saccades(?)
	---- reading_measures
	---- raw_data (i.e. gaze sample csv)