# Attempt to decode word end / sentence end

In [None]:
from dataset import read_raw, get_subjects, get_path
from utils import decod_xy
import mne
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils import match_list
import spacy

path = get_path("LPP_read")
subjects = get_subjects(path)
task = "read"
# Debug
runs = 3

level = 'word' # Sentence or word

epoch_windows = {"word": {"onset_min": -0.3, "onset_max": 1.0, "offset_min": -1.0, "offset_max": 0.3},
                  "constituent": {"offset_min": -2.0, "offset_max": 0.5, "onset_min": -0.5, "onset_max": 2.0},
                  "sentence": {"offset_min": -4.0, "offset_max": 1.0, "onset_min": -1.0, "onset_max": 4.0}}
start = 'onset'

for subject in subjects[2:3]:
    all_epochs = []
    for run in range(1,runs+1):
        
        # Set the metadata for this case
        raw, meta_, events = read_raw(subject, run, events_return = True)
        meta = meta_.copy()
        # Metadata update
        # Word start
        meta['word_onset'] = True
        meta['word_stop'] = meta.start + meta.duration

        # Sent start
        meta['sentence_onset'] = meta.word_id == 0

        # Const start
        meta['prev_closing'] = meta['n_closing'].shift(1)
        meta['constituent_onset'] = meta.apply(lambda x: x['prev_closing'] > x['n_closing'] and x['n_closing'] == 1, axis=1)
        meta['constituent_onset'].fillna(False, inplace=True)
        meta.drop('prev_closing', axis=1, inplace=True)
        
        # Adding the sentence stop info
        meta['sentence_id'] = np.cumsum(meta.sentence_onset)
        for s, d in meta.groupby('sentence_id'):
            meta.loc[d.index, 'sentence_start'] = d.start.min()
            
        # Adding the constituents stop info
        meta['constituent_id'] = np.cumsum(meta.constituent_onset)
        for s, d in meta.groupby('constituent_id'):
            meta.loc[d.index, 'constituent_start'] = d.start.min()
            
            
        
        # Select either the sentence end to decode, or the word end
        sel = meta.query(f'{level}_onset==True')
        assert sel.shape[0] > 10  #
        # TODO check variance as well for sentences
        # Matchlist events and meta
        # So that we can epoch now that's we've sliced our metadata
        i, j = match_list(events[:, 2], sel.word.apply(len))
        sel = sel.reset_index().loc[j]
        # Making sure there is not hidden bug when matching
        assert sel.shape[0] > 0.8 *  (meta.query(f'{level}_onset==True')).shape[0]

        # Epoching from the metadata having all onset events: if the start=Offset, the mne events
        # Function will epoch on the offset of each level instead of the onset
        # TODO: add adaptative baseline
        epochs = mne.Epochs(raw, **mne_events(sel, raw ,start=start, level=level), decim = 100,
                             tmin = epoch_windows[f'{level}'][f'{start}_min'],
                               tmax = epoch_windows[f'{level}'][f'{start}_max'],
                                 event_repeated = 'drop', # check event repeated
                                    preload=True,
                                        baseline=None)  # n_words OR n_constitutent OR n_sentences
        epoch_key = f'{level}_{start}'

        all_epochs.append(epochs)
        
    for epo in all_epochs:
        epo.info["dev_head_t"] = all_epochs_chosen[1].info["dev_head_t"]

        all_epochs = mne.concatenate_epochs(all_epochs)
        
    # Decode
    epochs = epochs.load_data().pick_types(meg=True, stim=False, misc=False)
    X = epochs.get_data()
    y = epochs.metadata.
    R_vec = decod_xy(X, embeddings)
    scores = np.mean(R_vec, axis=1)

    for t, score in enumerate(scores):
        all_scores.append(dict(subject=subject, score=score, start=start, level=level, t=epochs.times[t]))

    all_scores = pd.DataFrame(all_scores)


In [2]:
from dataset import read_raw, get_subjects, get_path
from utils import decod_xy
import mne
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils import match_list
import spacy

path = get_path("LPP_read")
subjects = get_subjects(path)
task = "read"
# Debug
runs = 3

level = 'word' # Sentence or word

epoch_windows = {"word": {"onset_min": -0.3, "onset_max": 1.0, "offset_min": -1.0, "offset_max": 0.3},
                  "constituent": {"offset_min": -2.0, "offset_max": 0.5, "onset_min": -0.5, "onset_max": 2.0},
                  "sentence": {"offset_min": -4.0, "offset_max": 1.0, "onset_min": -1.0, "onset_max": 4.0}}
start = 'onset'

subject = subjects[2]
all_epochs = []
for run in range(1,runs+1):

    # Set the metadata for this case
    raw, meta_, events = read_raw(subject, run, events_return = True)
    meta = meta_.copy()
    # Metadata update
    # Word start
    meta['word_onset'] = True
    meta['word_stop'] = meta.start + meta.duration

    # Sent start
    meta['sentence_onset'] = meta.word_id == 0

    # Const start
    meta['prev_closing'] = meta['n_closing'].shift(1)
    meta['constituent_onset'] = meta.apply(lambda x: x['prev_closing'] > x['n_closing'] and x['n_closing'] == 1, axis=1)
    meta['constituent_onset'].fillna(False, inplace=True)
    meta.drop('prev_closing', axis=1, inplace=True)

    # Adding the sentence stop info
    meta['sentence_id'] = np.cumsum(meta.sentence_onset)
    for s, d in meta.groupby('sentence_id'):
        meta.loc[d.index, 'sentence_start'] = d.start.min()

    # Adding the constituents stop info
    meta['constituent_id'] = np.cumsum(meta.constituent_onset)
    for s, d in meta.groupby('constituent_id'):
        meta.loc[d.index, 'constituent_start'] = d.start.min()



    # Select either the sentence end to decode, or the word end
    sel = meta.query(f'{level}_onset==True')
    assert sel.shape[0] > 10  #
    # TODO check variance as well for sentences
    # Matchlist events and meta
    # So that we can epoch now that's we've sliced our metadata
    i, j = match_list(events[:, 2], sel.word.apply(len))
    sel = sel.reset_index().loc[j]
    # Making sure there is not hidden bug when matching
    assert sel.shape[0] > 0.8 *  (meta.query(f'{level}_onset==True')).shape[0]

    # Epoching from the metadata having all onset events: if the start=Offset, the mne events
    # Function will epoch on the offset of each level instead of the onset
    # TODO: add adaptative baseline
    epochs = mne.Epochs(raw, **mne_events(sel, raw ,start=start, level=level), decim = 100,
                         tmin = epoch_windows[f'{level}'][f'{start}_min'],
                           tmax = epoch_windows[f'{level}'][f'{start}_max'],
                             event_repeated = 'drop', # check event repeated
                                preload=True,
                                    baseline=None)  # n_words OR n_constitutent OR n_sentences
    epoch_key = f'{level}_{start}'

    all_epochs.append(epochs)

for epo in all_epochs:
    epo.info["dev_head_t"] = all_epochs_chosen[1].info["dev_head_t"]

    all_epochs = mne.concatenate_epochs(all_epochs)

# Decode
epochs = epochs.load_data().pick_types(meg=True, stim=False, misc=False)
X = epochs.get_data()
y = epochs.metadata.word
"""
R_vec = decod_xy(X, embeddings)
scores = np.mean(R_vec, axis=1)

for t, score in enumerate(scores):
    all_scores.append(dict(subject=subject, score=score, start=start, level=level, t=epochs.times[t]))

all_scores = pd.DataFrame(all_scores)
"""

FileNotFoundError: [Errno 2] No such file or directory: '../../data/data_path.txt'