In [1]:
from dataset import read_raw, get_subjects, get_path
from utils import decod_xy, mne_events
import mne
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils import match_list
import spacy
nlp = spacy.load("fr_core_news_sm")

all_evos = []
all_scores = []

path = get_path("LPP_read")
subjects = get_subjects(path)
task = "read"
# Debug
runs = 9

epoch_windows = {"word": {"onset_min": -0.3, "onset_max": 1.0, "offset_min": -1.0, "offset_max": 0.3},
                  "constituent": {"offset_min": -2.0, "offset_max": 0.5, "onset_min": -0.5, "onset_max": 2.0},
                  "sentence": {"offset_min": -4.0, "offset_max": 1.0, "onset_min": -1.0, "onset_max": 4.0}}

levels = ('sentence','word')
starts = ('onset','offset')
        
for subject in subjects[2:6]:
    dict_epochs = dict() # DICT containing epochs grouped by conditions (start x level)
    # Dict init
    for start in starts: 
            for level in levels:
                epoch_key = f'{level}_{start}'
                dict_epochs[epoch_key] = [] 
    for run in range(1,runs+1):
        raw, meta_, events = read_raw(subject, run, events_return = True)
        meta = meta_.copy()
        # Metadata update
        # Word start
        meta['word_onset'] = True
        meta['word_stop'] = meta.start + meta.duration

        # Sent start
        meta['sentence_onset'] = meta.word_id == 0

        # Const start
        meta['prev_closing'] = meta['n_closing'].shift(1)
        meta['constituent_onset'] = meta.apply(lambda x: x['prev_closing'] > x['n_closing'] and x['n_closing'] == 1, axis=1)
        meta['constituent_onset'].fillna(False, inplace=True)
        meta.drop('prev_closing', axis=1, inplace=True)
        
        # Adding the sentence stop info
        meta['sentence_id'] = np.cumsum(meta.sentence_onset)
        for s, d in meta.groupby('sentence_id'):
            meta.loc[d.index, 'sent_word_id'] = range(len(d))
            meta.loc[d.index, 'sentence_start'] = d.start.min()
            meta.loc[d.index, 'sentence_stop'] = d.start.max() # TO Verify!
            
        # Adding the constituents stop info
        meta['constituent_id'] = np.cumsum(meta.constituent_onset)
        for s, d in meta.groupby('constituent_id'):
            meta.loc[d.index, 'constituent_start'] = d.start.min()
            meta.loc[d.index, 'constituent_stop'] = d.start.max() # TO Verify!
            meta.loc[d.index, 'const_word_id'] = range(len(d))

        for start in starts: 
            # for level in ('word', 'constituent', 'sentence'):
            # for level in ('sentence', 'constituent', 'word'):
            for level in levels:
                
                # Select only the rows containing the True for the conditions
                # Simplified to only get for the onset: sentence onset epochs, constituent onset epochs,etc
                sel = meta.query(f'{level}_onset==True')
                assert sel.shape[0] > 10  #
                # TODO check variance as well for sentences
                # Matchlist events and meta
                # So that we can epoch now that's we've sliced our metadata
                i, j = match_list(events[:, 2], sel.word.apply(len))
                sel = sel.reset_index().loc[j]
                # Making sure there is not hidden bug when matching
                assert sel.shape[0] > 0.8 *  (meta.query(f'{level}_onset==True')).shape[0]

                # Epoching from the metadata having all onset events: if the start=Offset, the mne events
                # Function will epoch on the offset of each level instead of the onset
                # TODO: add adaptative baseline
                epochs = mne.Epochs(raw, **mne_events(sel, raw ,start=start, level=level), decim = 100,
                                     tmin = epoch_windows[f'{level}'][f'{start}_min'],
                                       tmax = epoch_windows[f'{level}'][f'{start}_max'],
                                         event_repeated = 'drop', # check event repeated
                                            preload=True,
                                                baseline=None)  # n_words OR n_constitutent OR n_sentences
                epoch_key = f'{level}_{start}'
                assert epochs.metadata.isnull().sum().sum() < 10

                dict_epochs[epoch_key].append(epochs)

            




 Epoching for run 1, subject: 14

Opening raw data file /home/is153802/data/LPP_MEG_visual/sub-14/ses-01/meg/sub-14_ses-01_task-read_run-01_meg.fif...
    Read a total of 13 projection items:
        grad_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        grad_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v1 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v2 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v3 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v4 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v5 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v6 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v7 (1 x 306)  idle
        mag_ssp_upright.fif : PCA-v8 (1 x 306)  idle
    Range : 30000 ... 496999 =     30.000 ...   496.999 secs
Ready.
Reading channel info from /home/is153802/data

  raw = mne_bids.read_raw_bids(bids_path)

The search_str was "/home/is153802/data/LPP_MEG_visual/sub-14/**/meg/sub-14_ses-01*events.tsv"
  raw = mne_bids.read_raw_bids(bids_path)
  raw = mne_bids.read_raw_bids(bids_path)


AssertionError: 