## Aux Functions

In [73]:
from pathlib import Path
from datetime import datetime
import json
import os
import shutil
import subprocess

### Others

In [74]:
def dedupe_inplace(seq):
    seen = set()
    write = 0
    for item in seq:
        if item not in seen:
            seen.add(item)
            seq[write] = item
            write += 1
    del seq[write:]

### Add a simulation to the list

In [75]:
# add simulations to list
def add(subject=None, story=None, simulations=None):
    if simulations is None:
        raise ValueError("Simulations list is required")    
    if subject is None and story is None:
        raise ValueError("Provide at least subject or story")
    # add all simulations of a subject
    if story == None:
        _add_subject(subject, simulations)
    # add all simulations of a story
    if subject == None:
        _add_story(story, simulations)
    # add simulation for a subject and story
    if subject != None and story != None:
        sim_path = _make_simulation_path(subject, story)
        if os.path.exists(sim_path):
            simulations.append(sim_path)
        else:
            raise ValueError(f"Simulation does not exist {sim_path}")
        
    dedupe_inplace(simulations)
    return simulations
        
# add all simulations of a subject
def _add_subject(subject, simulations):
    suffix = f"{subject}.tsv"
    simulations.extend(str(p) for p in Path("simulations").rglob("*.tsv")
                       if p.name.lower().endswith(suffix))

# add all simulations of a story
def _add_story(story, simulations):
    story_dir = Path("simulations") / story
    if story_dir.is_dir():
        simulations.extend(str(p) for p in story_dir.rglob("*.tsv"))


def _make_simulation_path(subject, story):
    return f'simulations/{story}/{story}_ob1_{subject}.tsv'

In [76]:
simulations_list = []

In [77]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random
import seaborn as sns

import statsmodels.formula.api as smf

## Setup

Add simulations to the simulations_list using the add function, the possible parameter combinations :

- subject and story, to add a specific subject-story simulation
- only subject, to add all simulations for that subject
- only story, to add all simulations for that story

In [78]:
# add(subject = *, story = *, simulations=simulations_list)
# choose a subject and a story simulation or just a subject/story to add all its simulations
add(subject='perfil1', story='Axolotl', simulations=simulations_list)
add(subject='perfil5', story='Axolotl', simulations=simulations_list)
#add(story='cuento2', simulations=simulations_list)
#add(subject='perfil4',simulations=simulations_list)

['simulations/Axolotl/Axolotl_ob1_perfil1.tsv',
 'simulations/Axolotl/Axolotl_ob1_perfil5.tsv']

## Experiment

### Make experiment folder & save files used

Creates a folder to save experiment information

In [79]:
stamp = datetime.now().strftime("%Y_%m_%d_%H%M")
experiment_folder = Path("experiments") / stamp
experiment_folder.mkdir(parents=True, exist_ok=True)
print("Created:", experiment_folder)

Created: experiments/2025_12_17_2302


In [80]:
out_path = Path(experiment_folder) / "simulations_list.json"
with out_path.open("w", encoding="utf-8") as f:
    json.dump(simulations_list, f, ensure_ascii=False, indent=2)

### Generate processed folder

Generates the processed folder the analysis script uses

Creates the folder

In [81]:
processed_path = "../data/processed"

if os.path.exists(processed_path):
    try:
        shutil.rmtree(processed_path)
        print(f"Folder '{processed_path}' and its contents deleted successfully.")
    except OSError as e:
        print(f"Error: {processed_path} : {e.strerror}")
else:
    print(f"Folder '{processed_path}' does not exist.")

Folder '../data/processed' and its contents deleted successfully.


In [82]:
trials_path = processed_path + '/trials'
os.makedirs(trials_path)
print(f"Folder '{trials_path}' created successfully.")

Folder '../data/processed/trials' created successfully.


Populates the folder in the correct format with the simulations in simulations_list

In [83]:
examples_directory = '../data/processed_examples'

In [84]:
def add_screens(simulation, destination_directory):
    
    eye_value = 'R'
    pupil_value = -1
    df = pd.read_csv(simulation, sep="\t")
    
    for i in range(df["text_id"].nunique()):
        df_screen = df[df["text_id"] == i]
        data = []
        time_acum = 0
        
        for idx, row in df_screen.iterrows():
            index = row["fixation_counter"]
            t_start = time_acum
            duration = row["fixation_duration"]
            time_acum += duration

            data.append({"index": index,
                        "eye": eye_value,
                        "tStart": t_start,
                        "tEnd": time_acum,
                        "duration": duration,
                        "xAvg": 0,
                        "yAvg": 0,
                        "pupilAvg": pupil_value              
                            })
            
        output_df = pd.DataFrame(data)
        output_dir = destination_directory + f"/screen_{i+1}/fixations.pkl"
        if os.path.exists(output_dir):
            os.remove(output_dir)
        output_df = pd.to_pickle(output_df, output_dir)
        
    print(f"Screens added to {destination_directory}")


In [85]:
for simulation in simulations_list:
    
    story = Path(simulation).stem.split("_")[0]
    subject = Path(simulation).stem.split("_")[2]
    subject_path = trials_path + '/' + subject
    print(f"Processing story {story} for subject {subject}")
    
    if not os.path.exists(processed_path):
        os.makedirs(subject_path)
    source_directory = examples_directory + '/' + story
    destination_directory = subject_path + '/' + story

    try:
        shutil.copytree(source_directory, destination_directory)
        print(f"Folder '{source_directory}' successfully copied to '{destination_directory}'")
    except FileExistsError:
        print(f"Error: Destination directory '{destination_directory}' already exists.")
    except Exception as e:
        print(f"An error occurred: {e}")
    
    add_screens(simulation, destination_directory)
    print()  
    

Processing story Axolotl for subject perfil1
Folder '../data/processed_examples/Axolotl' successfully copied to '../data/processed/trials/perfil1/Axolotl'
Screens added to ../data/processed/trials/perfil1/Axolotl

Processing story Axolotl for subject perfil5
Folder '../data/processed_examples/Axolotl' successfully copied to '../data/processed/trials/perfil5/Axolotl'
Screens added to ../data/processed/trials/perfil5/Axolotl



### Run analysis script

In [86]:
stories = []
for sim in simulations_list:
    story = sim.split('/')[1]
    print(story)
    if story not in stories:
        stories.append(story)

Axolotl
Axolotl


In [87]:
%%bash
rm -rf ../results/*

In [88]:
for story in stories:
    subprocess.run(
        ["python3", "em_analysis.py", "--item", story, "--reprocess"],
        cwd="..",
        check=True
    )

Assigning fixations to words...


Processing "Axolotl" trials: 100%|██████████| 1/1 [00:24<00:00, 24.10s/it]
Processing "Axolotl" trials:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting eye-tracking measures from trials...


Processing "Axolotl" trials: 100%|██████████| 1/1 [00:50<00:00, 50.88s/it]


Analysing eye-tracking measures...
         subjs  words  words_excluded  fix  fix_excluded  regressions  skips  out_of_bounds  return_sweeps
Axolotl      2   1544             431  352           195          284   3030              0            144
Total        2   1544             431  352           195          284   3030              0            144
Figure(640x480)
Figure(1400x1200)
Figure(1400x1200)
[INFO] MLM salteado: se necesitan >=2 sujetos e >=2 ítems (hay subj=2, item=1).



R[write to console]: 1: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages

R[write to console]: 2: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages

R[write to console]: 3: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages

R[write to console]: 4: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-libr

In [89]:
source_directory = "../results"
destination_directory = experiment_folder / "results"

try:
    shutil.copytree(source_directory, destination_directory)
    print(f"Folder '{source_directory}' successfully copied to '{destination_directory}'")
except FileExistsError:
    print(f"Error: Destination directory '{destination_directory}' already exists.")
except Exception as e:
    print(f"An error occurred: {e}")
    
    

Folder '../results' successfully copied to 'experiments/2025_12_17_2302/results'
