## Aux Functions

In [1]:
from pathlib import Path
from datetime import datetime
import json
import os
import shutil
import subprocess
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random
import seaborn as sns
import statsmodels.formula.api as smf

### Others

In [2]:
def dedupe_inplace(seq):
    seen = set()
    write = 0
    for item in seq:
        if item not in seen:
            seen.add(item)
            seq[write] = item
            write += 1
    del seq[write:]

### Add a simulation to the list

In [3]:
# add simulations to list
def add(subject=None, story=None, simulations=None):
    if simulations is None:
        raise ValueError("Simulations list is required")    
    if subject is None and story is None:
        raise ValueError("Provide at least subject or story")
    # add all simulations of a subject
    if story == None:
        _add_subject(subject, simulations)
    # add all simulations of a story
    if subject == None:
        _add_story(story, simulations)
    # add simulation for a subject and story
    if subject != None and story != None:
        sim_path = _make_simulation_path(subject, story)
        if os.path.exists(sim_path):
            simulations.append(sim_path)
        else:
            raise ValueError(f"Simulation does not exist {sim_path}")
        
    dedupe_inplace(simulations)
    return simulations
        
# add all simulations of a subject
def _add_subject(subject, simulations):
    suffix = f"{subject}.tsv"
    simulations.extend(str(p) for p in Path("simulations").rglob("*.tsv")
                       if p.name.lower().endswith(suffix))

# add all simulations of a story
def _add_story(story, simulations):
    story_dir = Path("simulations") / story
    if story_dir.is_dir():
        simulations.extend(str(p) for p in story_dir.rglob("*.tsv"))


def _make_simulation_path(subject, story):
    return f'simulations/{story}/{story}_ob1_{subject}.tsv'

### Correlation

In [4]:
def load_pkl(file_path):
    return pd.read_pickle(file_path)

In [5]:
def load_all_subjects(folder):
    all_dfs = []
    for fname in os.listdir(folder):
        if fname.endswith(".pkl"):
            path = os.path.join(folder, fname)
            df = pd.read_pickle(path)
            df["subj"] = fname.replace(".pkl", "")
            all_dfs.append(df)
    return pd.concat(all_dfs, ignore_index=True)

In [6]:
def average_metrics_over_subjects(df):
    # metricas en las que 0 significa que no hubo fijacion -> van a NAN para que no se tengan en cuenta
    duration_metrics = ["FFD", "SFD", "FPRT", "RPD", "TFD", "RRT", "SPRT"]
    df[duration_metrics] = df[duration_metrics].replace(0, np.nan)
    # sacar las excluded
    #df = df[df["excluded"] == False]
    # hacer la media por id en oracion, id de palabra y palabra
    mean_df = df.groupby(["sentence_idx", "word_idx", "word"], as_index=False).mean(numeric_only=True)
    mean_df = mean_df.fillna(0)
    return mean_df

In [7]:
simulations_list = []

## Setup

Add simulations to the simulations_list using the add function, the possible parameter combinations :

- subject and story, to add a specific subject-story simulation
- only subject, to add all simulations for that subject
- only story, to add all simulations for that story

In [8]:
# add(subject = *, story = *, simulations=simulations_list)
# choose a subject and a story simulation or just a subject/story to add all its simulations
add(subject='subject0', story='Axolotl', simulations=simulations_list)
add(subject='subject1', story='El espejo', simulations=simulations_list)
#add(subject='perfil5', story='Axolotl', simulations=simulations_list)
#add(story='cuento2', simulations=simulations_list)
#add(subject='perfil4',simulations=simulations_list)

['simulations/Axolotl/Axolotl_ob1_subject0.tsv',
 'simulations/El espejo/El espejo_ob1_subject1.tsv']

## Experiment

### Make experiment folder & save files used

Creates a folder to save experiment information

In [9]:
stamp = datetime.now().strftime("%Y_%m_%d_%H%M")
experiment_folder = Path("experiments") / stamp
experiment_folder.mkdir(parents=True, exist_ok=True)
print("Created:", experiment_folder)

Created: experiments/2026_02_01_1827


In [10]:
out_path = Path(experiment_folder) / "simulations_list.json"
with out_path.open("w", encoding="utf-8") as f:
    json.dump(simulations_list, f, ensure_ascii=False, indent=2)

### Generate processed folder

Generates the processed folder the analysis script uses

Creates the folder

In [11]:
processed_path = "../data/processed"

if os.path.exists(processed_path):
    try:
        shutil.rmtree(processed_path)
        print(f"Folder '{processed_path}' and its contents deleted successfully.")
    except OSError as e:
        print(f"Error: {processed_path} : {e.strerror}")
else:
    print(f"Folder '{processed_path}' does not exist.")

Folder '../data/processed' and its contents deleted successfully.


In [12]:
trials_path = processed_path + '/trials'
os.makedirs(trials_path)
print(f"Folder '{trials_path}' created successfully.")

Folder '../data/processed/trials' created successfully.


Populates the folder in the correct format with the simulations in simulations_list

In [13]:
examples_directory = '../data/processed_examples'

In [14]:
def add_screens(simulation, destination_directory, stimuli_json, story):
    eye_value = 'R'
    pupil_value = -1
    df = pd.read_csv(simulation, sep="\t")

    for screen_id in range(df["text_id"].nunique()):
        df_screen = df[df["text_id"] == screen_id]
        data = []
        time_acum = 0

        # screen_index in json is 0-based
        screen_words = stimuli_json[story][screen_id]["words"]

        for _, row in df_screen.iterrows():
            index = row["fixation_counter"]
            t_start = time_acum
            duration = row["fixation_duration"]
            time_acum += duration

            word_idx = int(row["foveal_word_index"])
            x = screen_words[word_idx]["x"]
            y = screen_words[word_idx]["y"]

            data.append({
                "index": index,
                "eye": eye_value,
                "tStart": t_start,
                "tEnd": time_acum,
                "duration": duration,
                "xAvg": x,
                "yAvg": y,
                "pupilAvg": pupil_value
            })

        output_df = pd.DataFrame(data)
        output_dir = destination_directory + f"/screen_{screen_id+1}/fixations.pkl"
        print(output_dir)
        if os.path.exists(output_dir):
            os.remove(output_dir)
        output_df = pd.to_pickle(output_df, output_dir)

    print(f"Screens added to {destination_directory}")

In [15]:
with open("all_stimuli.json", "r", encoding="utf-8") as f:
    all_simulations_words = json.load(f)
    
for simulation in simulations_list:
    
    story = Path(simulation).stem.split("_")[0]
    subject = Path(simulation).stem.split("_")[2]
    subject_path = trials_path + '/' + subject
    print(f"Processing story {story} for subject {subject}")
    
    if not os.path.exists(processed_path):
        os.makedirs(subject_path)
    source_directory = examples_directory + '/' + story
    destination_directory = subject_path + '/' + story

    try:
        shutil.copytree(source_directory, destination_directory)
        print(f"Folder '{source_directory}' successfully copied to '{destination_directory}'")
    except FileExistsError:
        print(f"Error: Destination directory '{destination_directory}' already exists.")
    except Exception as e:
        print(f"An error occurred: {e}")
    
    add_screens(simulation, destination_directory, stimuli_json=all_simulations_words, story=story)
    print()  
    

Processing story Axolotl for subject subject0
Folder '../data/processed_examples/Axolotl' successfully copied to '../data/processed/trials/subject0/Axolotl'
../data/processed/trials/subject0/Axolotl/screen_1/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_2/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_3/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_4/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_5/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_6/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_7/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_8/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_9/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_10/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_11/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_12/fixations.pkl
../data/processed/trials/subject0/Axolotl/screen_13/fixation

### Run analysis script

In [16]:
stories = []
for sim in simulations_list:
    story = sim.split('/')[1]
    print(story)
    if story not in stories:
        stories.append(story)

Axolotl
El espejo


In [45]:
%%bash
rm -rf ../results/*

In [46]:
for story in stories:
    subprocess.run(
        ["python3", "em_analysis.py", "--item", story, "--reprocess"],
        cwd="..",
        check=True
    )

Assigning fixations to words...


Processing items in parallel: 100%|██████████| 1/1 [00:04<00:00,  4.80s/it]


Extracting eye-tracking measures from trials...


Processing items in parallel: 100%|██████████| 1/1 [00:07<00:00,  7.34s/it]


Analysing eye-tracking measures...
         subjs  words  words_excluded   fix  fix_excluded  regressions  skips  out_of_bounds  return_sweeps
Axolotl      1   1543             432  1786           580          733    684             36             77
Total        1   1543             432  1786           580          733    684             36             77
Figure(640x480)
Figure(1400x1200)
Figure(1400x1200)
[INFO] MLM salteado: se necesitan >=2 sujetos e >=2 ítems (hay subj=1, item=1).



R[write to console]: 1: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages

R[write to console]: 2: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages

R[write to console]: 3: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages

R[write to console]: 4: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-libr

Assigning fixations to words...


Processing items in parallel: 100%|██████████| 1/1 [00:02<00:00,  2.21s/it]


Extracting eye-tracking measures from trials...


Processing items in parallel: 100%|██████████| 1/1 [00:03<00:00,  3.59s/it]


Analysing eye-tracking measures...
           subjs  words  words_excluded  fix  fix_excluded  regressions  skips  out_of_bounds  return_sweeps
El espejo      1    628             155  775          <NA>          347    318           <NA>           <NA>
Total          1    628             155  775             0          347    318              0              0
Figure(640x480)
Figure(1400x1200)
Figure(1400x1200)
[INFO] MLM salteado: se necesitan >=2 sujetos e >=2 ítems (hay subj=1, item=1).



R[write to console]: 1: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages

R[write to console]: 2: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages

R[write to console]: 3: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages

R[write to console]: 4: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-libr

In [47]:
source_directory = "../results"
destination_directory = experiment_folder / "results"

try:
    shutil.copytree(source_directory, destination_directory)
    print(f"Folder '{source_directory}' successfully copied to '{destination_directory}'")
except FileExistsError:
    print(f"Error: Destination directory '{destination_directory}' already exists.")
except Exception as e:
    print(f"An error occurred: {e}")
    
    

Folder '../results' successfully copied to 'experiments/2026_02_01_1827/results'


### Correlations

In [48]:
experiment_folder

PosixPath('experiments/2026_02_01_1827')

In [49]:
# medias de todos los sujetos originales
folder_original = "results_og_all/measures/Axolotl"
all_data_original = load_all_subjects(folder_original)
mean_metrics_original = average_metrics_over_subjects(all_data_original)
# ob1
folder_ob1 = f"{experiment_folder}/results/measures/El espejo"
all_data_ob1 = load_all_subjects(folder_ob1)
mean_metrics_ob1 = average_metrics_over_subjects(all_data_ob1)

In [50]:
all_data_ob1.head(20)

Unnamed: 0,subj,screen,word_idx,word,sentence_idx,sentence_pos,screen_pos,excluded,FFD,SFD,FPRT,RPD,TFD,RRT,SPRT,FC,RC,LS,RR
0,subject1,1,0,nunca,0,0,0,True,,,,,,,,0,0,,
1,subject1,1,1,me,0,1,1,False,212.0,,430.0,430.0,430.0,,,2,0,0.0,0.0
2,subject1,1,2,pasaron,0,2,2,False,,,,,791.0,,791.0,3,3,1.0,0.0
3,subject1,1,3,cosas,0,3,3,False,287.0,,287.0,721.0,790.0,434.0,503.0,3,2,0.0,1.0
4,subject1,1,4,raras,0,4,4,True,,,,,,,,0,0,,
5,subject1,1,5,me,1,0,5,True,,,,,,,,0,0,,
6,subject1,1,6,refiero,1,1,6,False,529.0,,529.0,529.0,1186.0,,657.0,3,2,0.0,0.0
7,subject1,1,7,a,1,2,7,False,,,,,197.0,,197.0,1,1,1.0,0.0
8,subject1,1,8,historias,1,3,8,False,174.0,,346.0,1185.0,979.0,839.0,633.0,5,3,0.0,1.0
9,subject1,1,9,de,1,4,9,False,195.0,,195.0,1837.0,389.0,1642.0,194.0,2,1,0.0,1.0
