# Adding sentence id / constituent id to events.tsv files

In [1]:

# Neuro
import mne
import mne_bids

# ML/Data
import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler

# Tools
from pathlib import Path
import os
import subprocess
from utils import match_list, add_syntax

In [20]:
def read_modify_events(subject, run_id, events_return=False, modality="visual"):
    print(f"Reading raw files for modality: {modality}")
    path = get_path(modality)
    task_map = {"auditory": "listen", "visual": "read", "fmri": "listen"}
    task = task_map[modality]
    print(f"\n Epoching for run {run_id}, subject: {subject}\n")
    bids_path = mne_bids.BIDSPath(
        subject=subject,
        session="01",
        task=task,
        datatype="meg",
        root=path,
        run=run_id,
    )

    raw = mne_bids.read_raw_bids(bids_path)
    raw.del_proj()  # To fix proj issues
    raw.pick_types(meg=True, stim=True)

    # Generate event_file path
    event_file = path / f"sub-{bids_path.subject}"
    event_file = event_file / f"ses-{bids_path.session}"
    event_file = event_file / "meg"
    event_file = str(event_file / f"sub-{bids_path.subject}")
    event_file += f"_ses-{bids_path.session}"
    event_file += f"_task-{bids_path.task}"
    event_file += f"_run-{bids_path.run}_events.tsv"
    assert Path(event_file).exists()

    # read events
    meta = pd.read_csv(event_file, sep="\t")
    base_meta = meta.copy()
    events = mne.find_events(raw, stim_channel="STI101", shortest_event=1)

    # meta['word'] = meta['trial_type'].apply(lambda x: eval(x)['word'] if type(eval(x)) == dict else np.nan)
    # Initial wlength, as presented in the stimuli / triggers to match list
    meta["wlength"] = meta.word.apply(len)
    # Enriching the metadata with outside files:
    # path_syntax = get_code_path() / "data/syntax"
    path_syntax = get_code_path() / "data" / "syntax_new_no_punct"  # testing new syntax

    # Send raw metadata
    meta = add_syntax(meta, path_syntax, int(run_id))

    # add sentence and word positions
    meta["sequence_id"] = np.cumsum(meta.is_last_word.shift(1, fill_value=False))
    for s, d in meta.groupby("sequence_id"):
        meta.loc[d.index, "word_id"] = range(len(d))

    # XXX FIXME
    # Making sure that there is no problem with words that contain ""
    meta.word = meta.word.str.replace('"', "")

    # Two cases for match list: is it auditory or visual ?
    if modality == 'auditory':
        word_events = events[events[:, 2] > 1]
        meg_delta = np.round(np.diff(word_events[:, 0]/raw.info['sfreq']))
        meta_delta = np.round(np.diff(meta.onset.values))
        i, j = match_list(meg_delta, meta_delta)
        assert len(i) > 1000
        # events = events[i]  # events = words_events[i]

    # For auditory, we match on the time difference between triggers
    elif modality == "visual":
        # For visual, we match on the difference of word length encoded in the triggers
        # Here, events are the presented stimuli: with hyphens.
        # Have to make sure meta.word still contains the hyphens.
        # However, the meta.word might have lost the hyphens because
        # of the previous match hen adding syntax.

        i, j = match_list(events[:, 2], meta.wlength)
        assert len(i) > (0.9 * len(events))
        assert (events[i, 2] == meta.loc[j].wlength).mean() > 0.95

    meta["has_trigger"] = False
    meta.loc[j, "has_trigger"] = True

    # integrate events to meta for simplicity
    meta.loc[j, "start"] = events[i, 0] / raw.info["sfreq"]

    meta['word_onset'] = True
    meta['word_stop'] = meta.start + meta.duration
    meta['sentence_onset'] = meta.word_id == 0
    meta['prev_closing'] = meta['n_closing'].shift(1)
    meta['constituent_onset'] = meta.apply(lambda x: x['prev_closing'] > x['n_closing'] and x['n_closing'] == 1, axis=1)
    meta['constituent_onset'].fillna(False, inplace=True)
    meta.drop('prev_closing', axis=1, inplace=True)

    # Adding the sentence stop info
    meta['sentence_id'] = np.cumsum(meta.sentence_onset)
    for s, d in meta.groupby('sentence_id'):
        meta.loc[d.index, 'sent_word_id'] = range(len(d))
        meta.loc[d.index, 'sentence_start'] = d.start.min()
        meta.loc[d.index, 'sentence_stop'] = d.start.max()

    # Adding the constituents stop info
    meta['constituent_id'] = np.cumsum(meta.constituent_onset) + 1
    for s, d in meta.groupby('constituent_id'):
        meta.loc[d.index, 'constituent_start'] = d.start.min()
        meta.loc[d.index, 'constituent_stop'] = d.start.max()
        meta.loc[d.index, 'const_word_id'] = range(len(d))
        
    base_meta['sentence_id'] = meta.sentence_id
    base_meta['constituent_id'] = meta.constituent_id
    base_meta['word_id'] = meta.word_id + 1
    return base_meta, meta

In [21]:
from dataset import read_raw, get_subjects, get_path, add_embeddings, get_code_path
from utils import decod_xy, mne_events
import mne
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils import match_list
import spacy

modality = "visual"

path = get_path(modality)
subjects = get_subjects(path)
runs = 9
subject = subjects[0]

for run in range(1,runs+1):
        base_meta, meta = read_modify_events(subject, run, events_return = True, modality=modality)

        base_meta[['word', 'onset','duration','trial_type','sentence_id', 'constituent_id','word_id']].to_csv(f'./new_events/events_run{run}.tsv', index=False)

Reading raw files for modality: visual

 Epoching for run 1, subject: 1

Opening raw data file /home/is153802/data/LPP_MEG_visual/sub-1/ses-01/meg/sub-1_ses-01_task-read_run-01_meg.fif...
    Read a total of 13 projection items:
        grad_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v6 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v7 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v8 (1 x 306)  idle
    Range : 24000 ... 535999 =     24.000 ...   535.999 secs
Ready.
Reading e

  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)


1466 events found
Event IDs: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
Reading raw files for modality: visual

 Epoching for run 2, subject: 1

Opening raw data file /home/is153802/data/LPP_MEG_visual/sub-1/ses-01/meg/sub-1_ses-01_task-read_run-02_meg.fif...
    Read a total of 13 projection items:
        grad_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v6 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v7 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v8 (1 x 306)  idle
 

  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)


1607 events found
Event IDs: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16]
Reading raw files for modality: visual

 Epoching for run 3, subject: 1

Opening raw data file /home/is153802/data/LPP_MEG_visual/sub-1/ses-01/meg/sub-1_ses-01_task-read_run-03_meg.fif...
    Read a total of 13 projection items:
        grad_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v6 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v7 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v8 (1 x 306)  idl

  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)


1719 events found
Event IDs: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Reading raw files for modality: visual

 Epoching for run 4, subject: 1

Opening raw data file /home/is153802/data/LPP_MEG_visual/sub-1/ses-01/meg/sub-1_ses-01_task-read_run-04_meg.fif...
    Read a total of 13 projection items:
        grad_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v6 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v7 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v8 (1 x 306

  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)


1493 events found
Event IDs: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 19]
Reading raw files for modality: visual

 Epoching for run 5, subject: 1

Opening raw data file /home/is153802/data/LPP_MEG_visual/sub-1/ses-01/meg/sub-1_ses-01_task-read_run-05_meg.fif...
    Read a total of 13 projection items:
        grad_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v6 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v7 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v8 (1 x 306

  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)


1372 events found
Event IDs: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 16 20]
Reading raw files for modality: visual

 Epoching for run 6, subject: 1

Opening raw data file /home/is153802/data/LPP_MEG_visual/sub-1/ses-01/meg/sub-1_ses-01_task-read_run-06_meg.fif...
    Read a total of 13 projection items:
        grad_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v6 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v7 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v8 (1 x 306)  idl

  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)


1696 events found
Event IDs: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16]
Reading raw files for modality: visual

 Epoching for run 7, subject: 1

Opening raw data file /home/is153802/data/LPP_MEG_visual/sub-1/ses-01/meg/sub-1_ses-01_task-read_run-07_meg.fif...
    Read a total of 13 projection items:
        grad_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v6 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v7 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v8 (1 x 306)  idl

  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)


1553 events found
Event IDs: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
Reading raw files for modality: visual

 Epoching for run 8, subject: 1

Opening raw data file /home/is153802/data/LPP_MEG_visual/sub-1/ses-01/meg/sub-1_ses-01_task-read_run-08_meg.fif...
    Read a total of 13 projection items:
        grad_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v6 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v7 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v8 (1 x 306)  idle
 

  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)


1391 events found
Event IDs: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 19 21]
Reading raw files for modality: visual

 Epoching for run 9, subject: 1

Opening raw data file /home/is153802/data/LPP_MEG_visual/sub-1/ses-01/meg/sub-1_ses-01_task-read_run-09_meg.fif...
    Read a total of 13 projection items:
        grad_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v6 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v7 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v8 (1 x 306

  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)


1652 events found
Event IDs: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17]


In [18]:
meta.word_id += 1


In [19]:
meta.word_id

0        1.0
1        2.0
2        3.0
3        4.0
4        5.0
        ... 
1646     8.0
1647     9.0
1648    10.0
1649    11.0
1650    12.0
Name: word_id, Length: 1651, dtype: float64