# Adding sentence id / constituent id to events.tsv files

In [None]:

# Neuro
import mne
import mne_bids

# ML/Data
import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler

# Tools
from pathlib import Path
import os
import subprocess
from utils import match_list, add_syntax

In [None]:
def read_modify_events(subject, run_id, events_return=False, modality="visual"):
    print(f"Reading raw files for modality: {modality}")
    path = get_path(modality)
    task_map = {"auditory": "listen", "visual": "read", "fmri": "listen"}
    task = task_map[modality]
    print(f"\n Epoching for run {run_id}, subject: {subject}\n")
    bids_path = mne_bids.BIDSPath(
        subject=subject,
        session="01",
        task=task,
        datatype="meg",
        root=path,
        run=run_id,
    )

    # Generate event_file path
    event_file = path / f"sub-{bids_path.subject}"
    event_file = event_file / f"ses-{bids_path.session}"
    event_file = event_file / "meg"
    event_file = str(event_file / f"sub-{bids_path.subject}")
    event_file += f"_ses-{bids_path.session}"
    event_file += f"_task-{bids_path.task}"
    event_file += f"_run-{bids_path.run}_events.tsv"
    assert Path(event_file).exists()

    # read events
    meta = pd.read_csv(event_file, sep="\t")
    
    base_meta = meta.copy()

    meta["wlength"] = meta.word.apply(len)
    # Enriching the metadata with outside files:
    # path_syntax = get_code_path() / "data/syntax"
    path_syntax = get_code_path() / "data" / "syntax_new_no_punct"  # testing new syntax

    # Send raw metadata
    meta = add_syntax(meta, path_syntax, int(run_id))

    # add sentence and word positions
    meta["sequence_id"] = np.cumsum(meta.is_last_word.shift(1, fill_value=False))
    for s, d in meta.groupby("sequence_id"):
        meta.loc[d.index, "word_id"] = range(len(d))

    meta['word_onset'] = True
    meta['word_stop'] = meta.start + meta.duration
    meta['sentence_onset'] = meta.word_id == 0
    meta['prev_closing'] = meta['n_closing'].shift(1)
    meta['constituent_onset'] = meta.apply(lambda x: x['prev_closing'] > x['n_closing'] and x['n_closing'] == 1, axis=1)
    meta['constituent_onset'].fillna(False, inplace=True)
    meta.drop('prev_closing', axis=1, inplace=True)

    # Adding the sentence stop info
    meta['sentence_id'] = np.cumsum(meta.sentence_onset)
    for s, d in meta.groupby('sentence_id'):
        meta.loc[d.index, 'sent_word_id'] = range(len(d))
        meta.loc[d.index, 'sentence_start'] = d.start.min()
        meta.loc[d.index, 'sentence_stop'] = d.start.max()

    # Adding the constituents stop info
    meta['constituent_id'] = np.cumsum(meta.constituent_onset)
    for s, d in meta.groupby('constituent_id'):
        meta.loc[d.index, 'constituent_start'] = d.start.min()
        meta.loc[d.index, 'constituent_stop'] = d.start.max()
        meta.loc[d.index, 'const_word_id'] = range(len(d))
        
    base_meta['sentence_id'] = meta.sentence_id
    base_meta['constituent_id'] = meta.constituent_id
    return base_meta, meta

In [None]:
for run in range(1,runs+1):
        base_meta, meta = read_raw(subject, run, events_return = True, modality=modality)
