# Interactive Eye Gaze Analysis Pipeline 

This notebook demonstrates how to use the utility functions in `eye_gaze_utils.py` to process and analyze eye tracker data. 

In addition to setting up the environment using the requirements.txt file, this notebook requires the following: 
- Install R from [https://www.r-project.org/](https://www.r-project.org/)
- Install the following R packages:
    - `lmerTest` (for mixed-effects modeling)
    - `emmeans` (for estimated marginal means and pairwise comparisons)

## 1. Normalize the data and then calculate various eye_gaze metrics
Import utilities from `eye_gaze_utils.py`:

In [None]:
from eye_gaze_utils import load_file_data, normalize_gaze_to_screen, extract_eye_metrics, plot_data
import os
import pandas as pd

Set the data directory and find all the csv files containing the eye gaze data:

In [None]:
# Set the directory containing your eye tracker CSV files

# Prompt the user to input the PNAS-MATB folder path
directory = input("Enter the full path to your PNAS-MATB folder: ")
eyelink_directory = os.path.join(directory, "eyelink_data")

# List available CSV files
eye_files = [f for f in os.listdir(eyelink_directory) if f.endswith('.csv')]
print(f"Found {len(eye_files)} files. Example: {eye_files[:3]}")

Load and preprocess a single file as an example:

In [None]:
# Load and normalize data from the first file
example_file = eye_files[0]
file_data = load_file_data(eyelink_directory, example_file)
if file_data is not None:
    original_df = file_data['data']
    filtered_df = normalize_gaze_to_screen(original_df.copy())
    print(f"Loaded and normalized: {example_file}")
else:
    print(f"Failed to load {example_file}")

Plot original versus normalized gaze and pupil data:

In [None]:
columns_to_plot = ['R Gaze X', 'R Gaze Y', 'L Gaze X', 'L Gaze Y', 'R Pupil Size', 'L Pupil Size']
if file_data is not None:
    plot_data(original_df, filtered_df, columns_to_plot, filename=example_file, dir=directory, save=False)

Extract windowed eye metrics from the filtered data:

In [None]:
if file_data is not None:
    metrics_df = extract_eye_metrics(filtered_df)
    display(metrics_df.head(10))

Process all files and generate a single dataframe of metrics for all participants:

In [None]:
all_metrics = []
for fname in eye_files:
    file_data = load_file_data(eyelink_directory, fname)
    if file_data is not None:

        # Normalize data and extract metrics
        df = file_data['data']
        filtered_df = normalize_gaze_to_screen(df.copy())
        metrics_df = extract_eye_metrics(filtered_df)

        # Add file/participant/session info to each row
        metrics_df['file_name'] = fname
        metrics_df['participant_id'] = file_data.get('participant_id', None)
        metrics_df['session_number'] = file_data.get('session_number', None)
        all_metrics.append(metrics_df)

# Combine all metrics into a single DataFrame
if all_metrics:
    all_metrics_df = pd.concat(all_metrics, ignore_index=True)
    print("Processed all files!")
else:
    print("No valid files processed.")

Save data for further analysis. Skip if you want to just get the stats results and plots:

In [None]:
output_csv = os.path.join('..', 'rf_training_data', 'eyegaze_metrics.csv')
all_metrics_df.to_csv(output_csv, index=False)
print(f"Saved all metrics to {output_csv}")

## 2. Run stats and plot figures using `stats_figures.py` utilities

Import relevant libraries and set the metrics for analysis (Blink rate and Fixation rate):

In [None]:
import sys
sys.path.append('..')  # Add parent directory
from stats_figures import run_rpy2_lmer, barplot_ax
import matplotlib.pyplot as plt

# Define relevant metrics and labels
metrics = [
    ("blink_count", "Blink rate (blinks/min)"),
    ("fix_count", "Fixation rate (fixations/min)")
]

Load session info for all participants and add relevant dependent variables to all_metrics_df:

In [None]:
# Load session information
Session_Info = pd.read_csv(
    os.path.join(directory,"participant_info.csv")
)
# Add session_order column to Session_Info
if {"session01", "session02", "session03"}.issubset(Session_Info.columns):
    Session_Info["session_order"] = (
        Session_Info["session01"].str[0] +
        Session_Info["session02"].str[0] +
        Session_Info["session03"].str[0]
    )

# Map 'condition' from Session_Info to all_metrics_df using participant_id and session_number
def get_condition(row):
    pid = int(row['participant_id'])
    session_col = f"session{row['session_number']}"
    if pid in Session_Info["Participant ID"].values and session_col in Session_Info.columns:
        cond = Session_Info.loc[Session_Info["Participant ID"] == pid, session_col].values
        if len(cond) > 0:
            return cond[0]
    return None
all_metrics_df["condition"] = all_metrics_df.apply(get_condition, axis=1)
# Prepare session_order and session_order_numeric maps
session_order_numeric_map = {"LMH": 1, "LHM": 2}
# Map session_order_numeric from Session_Info to all_metrics_df using participant_id
if "session_order" in Session_Info.columns:
    session_info_numeric_map = Session_Info.set_index("Participant ID")["session_order"].map(session_order_numeric_map).to_dict()
    all_metrics_df["session_order_numeric"] = all_metrics_df["participant_id"].astype(int).map(session_info_numeric_map)

Run stats and make plots for the metrics of interest:

In [None]:
# Use the all_metrics_df from previous cell
if 'all_metrics_df' in locals():
    for metric, label in metrics:
        print(f"\n--- {label} ---")
        # Run mixed effects model and get stats
        pairwise_p, means, cis = run_rpy2_lmer(
            all_metrics_df, metric, label
        )
        # Prepare data for plotting
        conds = ["L", "M", "H"]
        mean_vals = [means.get(c, float('nan')) for c in conds]
        sems = [(cis[c][1] - cis[c][0]) / 3.92 if c in cis else float('nan') for c in conds]  # 95% CI to SEM
        pvals = [pairwise_p.get(("L", "M"), 1.0), pairwise_p.get(("L", "H"), 1.0), pairwise_p.get(("M", "H"), 1.0)]
        # Plot
        fig, ax = plt.subplots(figsize=(4, 5))
        barplot_ax(ax, mean_vals, sems, pvals, ylabel=label, metric_name=metric)
        ax.set_title(label, fontsize=13, weight='bold')
        plt.tight_layout()
        plt.show()
else:
    print("all_metrics_df not found. Please run the previous cell to generate metrics.")