## Aux Functions

In [4]:
from pathlib import Path
from datetime import datetime
import json
import os
import shutil
import subprocess
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random
import seaborn as sns
import statsmodels.formula.api as smf

In [19]:
from scipy.io import loadmat

item = "Axolotl"  # change to your stimuli name (without .mat)
mat = loadmat(f"../data/stimuli/{item}.mat", simplify_cells=True)

lines = mat["lines"]  # list of line dicts
print("Number of lines:", len(lines))

# Show all lines with their spaces_pos
for i, line in enumerate(lines):
    text = line["text"]
    spaces_pos = line["spaces_pos"]
    screen = line["screen"]
    print(f"\nLine {i} (screen {screen})")
    print("text:", text)
    print("spaces_pos:", spaces_pos)


Number of lines: 232

Line 0 (screen 1)
text:    Hubo un tiempo en que yo pensaba mucho en los
spaces_pos: [144 159 173 187 257 299 397 440 496 538 650 735 777 833]

Line 1 (screen 1)
text: axolotl. Iba a verlos al acuario del Jardín des
spaces_pos: [144 271 327 355 454 496 608 664 763 819]

Line 2 (screen 1)
text: Plantes y me quedaba horas mirándolos, observando su
spaces_pos: [144 257 285 327 440 524 692 847 889]

Line 3 (screen 1)
text: inmovilidad, sus oscuros movimientos. Ahora soy un
spaces_pos: [144 327 383 496 678 763 819 861]

Line 4 (screen 1)
text: axolotl.
spaces_pos: [144 271]

Line 5 (screen 1)
text:    El azar me llevó hasta ellos una mañana de
spaces_pos: [144 159 173 187 229 299 341 425 510 594 650 749 791]

Line 6 (screen 1)
text: primavera en que París abría su cola de pavo real
spaces_pos: [144 285 327 383 468 552 594 664 706 777 847]

Line 7 (screen 1)
text: después de la lenta invernada. Bajé por el bulevar de
spaces_pos: [144 257 299 341 425 580 650 706 749 861 

In [6]:
from pathlib import Path
from scipy.io import loadmat
import pandas as pd
import json

def first_subject_with_stimulus(trials_root, item):
    trials_root = Path(trials_root)
    for subj_dir in sorted(trials_root.iterdir()):
        item_dir = subj_dir / item
        if item_dir.is_dir() and any(item_dir.rglob("lines.pkl")):
            return item_dir
    return None

def stimuli_to_json(item, stimuli_path="stimuli", trials_root="data/processed/trials"):
    mat = loadmat(f"{stimuli_path}/{item}.mat", simplify_cells=True)
    lines = mat["lines"]

    screens = {}
    for line in lines:
        screen_id = int(line["screen"])
        screens.setdefault(screen_id, []).append(line)

    item_trial_path = first_subject_with_stimulus(trials_root, item)

    out = []
    global_index = 0
    for screen_id in sorted(screens.keys()):
        words_list = []
        screen_word_index = 0

        y_bounds = None
        if item_trial_path:
            lines_pkl = Path(item_trial_path) / f"screen_{screen_id}" / "lines.pkl"
            if lines_pkl.exists():
                y_bounds = pd.read_pickle(lines_pkl)["y"].to_numpy()

        for line_idx, line in enumerate(screens[screen_id]):
            words = line["text"].split()
            spaces_pos = line["spaces_pos"]

            y = None
            if y_bounds is not None and line_idx + 1 < len(y_bounds):
                y = (y_bounds[line_idx] + y_bounds[line_idx + 1]) / 2

            for i, word in enumerate(words):
                x = (spaces_pos[i] + spaces_pos[i + 1]) / 2
                words_list.append({
                    "word": word,
                    "x": float(x),
                    "y": None if y is None else float(y),
                    "line": line_idx,
                    "index": global_index,
                    "screen_word_index": screen_word_index
                })
                global_index += 1
                screen_word_index += 1

        out.append({
            "screen_index": screen_id - 1,
            "words": words_list
        })

    return out

def build_all_stimuli_combined(stimuli_path="stimuli", trials_root="data/processed/trials", out_file="all_stimuli.json"):
    stimuli_path = Path(stimuli_path)
    all_data = {}

    for mat_file in sorted(stimuli_path.glob("*.mat")):
        item = mat_file.stem
        all_data[item] = stimuli_to_json(item, stimuli_path=stimuli_path, trials_root=trials_root)
        print(f"processed {item}")

    with open(out_file, "w", encoding="utf-8") as f:
        json.dump(all_data, f, ensure_ascii=False, indent=2)


In [10]:
build_all_stimuli_combined(stimuli_path = '../data/stimuli', trials_root='../data/processed_all_humans/trials', out_file="all_stimuli.json")

processed Ahora debería reírme, si no estuviera muerto
processed Axolotl
processed Bienvenido Bob
processed Buenos Aires
processed Carta a una señorita en París
processed Carta abierta
processed Cómo funciona caminar en la nieve
processed Cómo funcionan los bolsillos
processed Educar para escalar y bucear
processed El almohadón de plumas
processed El espejo
processed El golpe de gracia
processed El loco cansino
processed El negro de París
processed El origen de las especies
processed Embarrar la magia
processed La canción que cantábamos todos los días
processed La de la Obsesión por la Patineta
processed La gallina degollada
processed La lluvia de fuego
processed La máscara de la Muerte Roja
processed La noche de los feos
processed La salud de los enfermos
processed Las fotografías
processed Rebeca
processed Rubí y el lago danzante
processed Sombras sobre vidrio esmerilado 1
processed Sombras sobre vidrio esmerilado 2
processed Test
processed Una rosa para Emilia
processed Wakefield


In [13]:
import json

with open("all_stimuli.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# see stimuli names
print("Number of stimuli:", len(data))
print("Some stimuli:", list(data.keys())[:5])

# inspect one stimuli
item = list(data.keys())[0]
print("Inspecting item:", item)
print("Number of screens:", len(data[item]))
print("First screen data:", data[item][0])  # first screen dict

# inspect first word entry of first screen
print("First word entry of first screen:", data[item][0]["words"][0])

# pretty-print a small slice
#import pprint
#pprint.pprint(data[item][0]["words"][:5])


Number of stimuli: 31
Some stimuli: ['Ahora debería reírme, si no estuviera muerto', 'Axolotl', 'Bienvenido Bob', 'Buenos Aires', 'Carta a una señorita en París']
Inspecting item: Ahora debería reírme, si no estuviera muerto
Number of screens: 4
First screen data: {'screen_index': 0, 'words': [{'word': 'Una', 'x': 304.0, 'y': 173.43839835728954, 'line': 0, 'index': 0, 'screen_word_index': 0}, {'word': 'vez', 'x': 360.0, 'y': 173.43839835728954, 'line': 0, 'index': 1, 'screen_word_index': 1}, {'word': 'hubo', 'x': 423.0, 'y': 173.43839835728954, 'line': 0, 'index': 2, 'screen_word_index': 2}, {'word': 'dos', 'x': 486.0, 'y': 173.43839835728954, 'line': 0, 'index': 3, 'screen_word_index': 3}, {'word': 'mujeres', 'x': 570.0, 'y': 173.43839835728954, 'line': 0, 'index': 4, 'screen_word_index': 4}, {'word': 'casadas', 'x': 682.0, 'y': 173.43839835728954, 'line': 0, 'index': 5, 'screen_word_index': 5}, {'word': 'que', 'x': 766.0, 'y': 173.43839835728954, 'line': 0, 'index': 6, 'screen_word_i

In [9]:
list(data.keys())[:5]

[]