In [1]:
import preprocess_eegdata
import numpy as np
from glob import glob
import os
import sys
import mne_bids
from contextlib import contextmanager
import mne
from datetime import datetime
%load_ext autoreload

%autoreload 2

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
parent_dir = r'D:\SCR_raw_data' # directory where your raw data (folder containing brainvision, eyetracking asc, and behavior is stored)
data_dir = r'E:\datasets\supracapacity' # where to output data
if not os.path.exists(data_dir):
    os.makedirs(data_dir)




file_prefix='SCR_' # prefix to your vhdr files. Assuming it is in the format [prefix]_[number]

overwrite_subs = False # if you want to overwrite the data for a subject, set to True

subject_dirs=[]  # if you want to analyze a specific subset of subjects
if len(subject_dirs) == 0:
    subject_dirs=sorted(glob('*',root_dir=parent_dir))
if not overwrite_subs:
    subject_dirs = [sub for sub in subject_dirs if sub not in [f[4:] for f in glob('sub-*',root_dir=data_dir)]]
    

EXPERIMENT_NAME = 'supracapacity' # name of the experiment


TRIAL_START_TIME=-0.4 # epoch start before your designated timelock code
TRIAL_END_TIME=1.4
BASELINE_TIME=(-.25,0) # time for baseline correction
REJECTION_TIME=[-0.25,1.0] # time for artifact rejection (if you want longer epochs)

SRATE = 1000 # hz, will resample if different from 1k
FILTER_FREQS=(None,80) # None to not do one of the filtering steps

LINEAR_R2 = 0.3


event_names_dict={ # this should be a dict of names of ALL the event codes that appear
    'trl_start':1,
    "attention/ss2/no_placeholders": 12,
    "attention/ss4/no_placeholders": 14,
    "attention/ss6/no_placeholders": 16,
    "attention/ss8/no_placeholders": 18,
    "attention/ss2/placeholders": 22,
    "attention/ss4/placeholders": 24,
    "attention/ss6/placeholders": 26,
    "attention/ss8/placeholders": 28,
    "memory/ss2/no_placeholders": 32,
    "memory/ss4/no_placeholders": 34,
    "memory/ss6/no_placeholders": 36,
    "memory/ss8/no_placeholders": 38,
    "memory/ss2/placeholders": 42,
    "memory/ss4/placeholders": 44,
    "memory/ss6/placeholders": 46,
    "memory/ss8/placeholders": 48,
    'delay_start':2,
    'attn_probe': 3,
    'delay_end':4,
}


# you might not need all the subsequent code, this is specific to the supracapacity experiment
# really, all you need is two dicts:
# event_dict is a list of name: number pairings for all the TRIAL event codes
# event_code_dict: a dict of code: sequence pairings for each trial
# so, if you have a trial with fixation (1) -> SS2 stimulus (12) -> delay (3) -> test (4), that you want to map to code  12:
#{12 : [1,12,3,4]}... and so on


event_dict=event_names_dict.copy()
event_code_dict={} # define event codes based on sequence

stim_conditions=[]
for key,ev in event_names_dict.items():
    if ev > 10:
        event_dict.update({key+'/TARGET':ev+1}) # add in keys for targets
        event_code_dict.update({ev:[1,ev,2,4,4]})
        event_code_dict.update({ev+1:[1,ev,2,3,2,4,4]})
        stim_conditions.extend([ev,ev+1])



POSITION_TO_TIMELOCK = 1 # which position (IN THE LIST ABOVE) to timelock to. TODO: make this dynamic


EEG_TRIALS_DROP = {}   # Must be in the form of {'subject number':[list of ints]}
EYE_TRIALS_DROP = {'10':[0,1,2,3]} # edge case when we forgot to start the recording, manually drop certain trials
DROP_CHANNELS=[] # channels to delete from the dataset entirely. Recommendation is to leave this blank and instead set REJ_TRIALS_IGNORE later on


In [148]:
pre = preprocess_eegdata.Preprocess(
                                    data_dir=data_dir,
                                    root_dir = parent_dir,
                                    experiment_name = EXPERIMENT_NAME,
                                    srate=SRATE,
                                    trial_start=TRIAL_START_TIME,
                                    trial_end=TRIAL_END_TIME,
                                    event_names = event_names_dict,
                                    event_dict=event_dict,
                                    stim_conditions=stim_conditions,
                                    event_code_dict=event_code_dict,
                                    timelock_ix=POSITION_TO_TIMELOCK,
                                    baseline_time=BASELINE_TIME,
                                    rejection_time=REJECTION_TIME,
                                    no_et_spaces=False,
                                    drop_channels=DROP_CHANNELS,
                                    filter_freqs=FILTER_FREQS)


In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [149]:
@contextmanager
def redirect_stdout(new_stdout): # writes the output to a log file
    save_stdout = sys.stdout
    save_stderr = sys.stderr
    sys.stdout = new_stdout
    sys.stderr = sys.stdout
    try:
        yield None
    finally:
        sys.stdout = save_stdout
        sys.stderr = save_stderr


with open('preprocessing_log.txt','a+') as f:
    with redirect_stdout(f):

        print('\n\n\n##########################\n'+
            'STARTING PREPROCESSING RUN\n'+
            '##########################\n\n\n')
        print(f'Run started at {datetime.now().strftime("%H:%M:%S")}')
        for subject_number in subject_dirs:

            print('\n\n#############################\n'+
                    f'## STARTING NEW SUBJECT {subject_number} ##\n' +
                    '#############################\n')

            #####################
            #### IMPORT DATA ####
            #####################

            # import into the RAW bids dataset
            eeg,eeg_events = pre.import_eeg(subject_number,overwrite=True)
            eye,eye_events = pre.import_eyetracker(subject_number,overwrite=True)
            pre.import_behavior(subject_number)
            
            ########################################
            #### PREPROCESS EEG AND MAKE EPOCHS ####
            ########################################

            reref_index = mne.pick_channels(eeg.ch_names, ["TP9"]) # TODO: custom rereferencing? 
            eeg.load_data().apply_function(pre.rereference_to_average, picks=["eeg"], reref_values=np.squeeze(eeg.get_data()[reref_index]))
            eeg.filter(*pre.filter_freqs, n_jobs=-1)
            epochs = pre.make_and_sync_epochs(eeg,eeg_events,eye,eye_events,eeg_trials_drop = EEG_TRIALS_DROP.get(subject_number, []),eye_trials_drop = EYE_TRIALS_DROP.get(subject_number, []))

            ###############################
            #### DO ARTIFACT REJECTION ####
            ###############################

            p2p=pre.artreject_slidingP2P(epochs,rejection_criteria={'eeg':100e-6,'eog':200},win=200,win_step=100)               # peak to peak in the window
            saccades = pre.artreject_step(epochs,rejection_criteria={'eyegaze':pre.deg2pix(0.5),'eog':50},win=80,win_step=10)   # saccades in EOG or eye tracking
            steps = pre.artreject_step(epochs,rejection_criteria={'eeg':60e-6},win=250,win_step=20)                             # steps (saccade like) in EEG

            absolute_value=pre.artreject_value(epochs,rejection_criteria={'eyegaze':pre.deg2pix(1), 'eeg':100e-6, 'eog':300})   # absolute value rejection
            linear_fit = pre.artreject_linear(epochs)                                                                           # linear fit (drift) rejection
            flatline = pre.artreject_flatline(epochs,rejection_criteria={'eeg':0,'eog':0,'eyegaze':0},flatline_duration=200)    # check for flatlines


            # combine rejection reasons
            rej_electrodes = p2p | saccades | steps | absolute_value | linear_fit | flatline
            rej_reasons = np.char.array(np.full(rej_electrodes.shape,'', dtype="<U30"))  # NOTE: dtype is important, must be >= the max possible str length
            rej_reasons[p2p] = 'P2P '
            rej_reasons[saccades] = rej_reasons[saccades] + 'SAC '
            rej_reasons[steps] = rej_reasons[steps] + 'STEP '
            rej_reasons[absolute_value] = rej_reasons[absolute_value] + 'ABS '
            rej_reasons[linear_fit] = rej_reasons[linear_fit] + 'LIN '
            rej_reasons[flatline] = rej_reasons[flatline] + 'FLAT '


            
            rej_counts = lambda x: f'{x.any(1).sum()} ({round(x.any(1).sum() / x.shape[0] * 100,1)}%)'
            print((f'Rejected {rej_electrodes.any(1).sum()} trials ({round(rej_electrodes.any(1).sum() / rej_electrodes.shape[0] * 100,1)}%) for the following reasons:\n'
                f'Peak to peak amplitude: {rej_counts(p2p)}\n'
                f'Saccades: {rej_counts(saccades)}\n'
                f'Steps: {rej_counts(steps)}\n'
                f'Absolute value: {rej_counts(absolute_value)}\n'
                f'Linear fit: {rej_counts(linear_fit)}\n'
                f'Flatline: {rej_counts(flatline)}\n'))

            print('Worst electrodes by count:\n' + '\n'.join([f'{epochs.ch_names[i]}: {rej_electrodes[:,i].sum()}' for i in np.argsort(rej_electrodes.sum(0))[::-1][0:5]]))


            #################################
            #### SAVE DATA AS DERIVATIVE ####
            #################################


            pre.save_all_data(subject_number,epochs,rej_reasons)
print(mne_bids.make_report(data_dir))


Summarizing participants.tsv E:\datasets\supracapacity\participants.tsv...
Summarizing scans.tsv files [WindowsPath('E:/datasets/supracapacity/sub-01/sub-01_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-03/sub-03_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-04/sub-04_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-05/sub-05_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-06/sub-06_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-07/sub-07_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-08/sub-08_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-09/sub-09_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-10/sub-10_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-11/sub-11_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-12/sub-12_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-13/sub-13_scans.tsv'), WindowsPath('E:/datasets/supracapacity/sub-14/sub-14_scans.tsv'), WindowsPath('E:/datasets/supracapacity

In [166]:
event_dict

{'trl_start': 1,
 'attention/ss2/no_placeholders': 12,
 'attention/ss4/no_placeholders': 14,
 'attention/ss6/no_placeholders': 16,
 'attention/ss8/no_placeholders': 18,
 'attention/ss2/placeholders': 22,
 'attention/ss4/placeholders': 24,
 'attention/ss6/placeholders': 26,
 'attention/ss8/placeholders': 28,
 'memory/ss2/no_placeholders': 32,
 'memory/ss4/no_placeholders': 34,
 'memory/ss6/no_placeholders': 36,
 'memory/ss8/no_placeholders': 38,
 'memory/ss2/placeholders': 42,
 'memory/ss4/placeholders': 44,
 'memory/ss6/placeholders': 46,
 'memory/ss8/placeholders': 48,
 'delay_start': 2,
 'attn_probe': 3,
 'delay_end': 4,
 'attention/ss2/no_placeholders/TARGET': 13,
 'attention/ss4/no_placeholders/TARGET': 15,
 'attention/ss6/no_placeholders/TARGET': 17,
 'attention/ss8/no_placeholders/TARGET': 19,
 'attention/ss2/placeholders/TARGET': 23,
 'attention/ss4/placeholders/TARGET': 25,
 'attention/ss6/placeholders/TARGET': 27,
 'attention/ss8/placeholders/TARGET': 29,
 'memory/ss2/no_place

In [46]:
%matplotlib qt
sub = input('Enter subject number: ') # you can also just set this to a string

REJ_CHANNELS_IGNORE=['HEOG','VEOG','Fp1','Fp2','TP9'] # exclude fp1 and fp2 here, they are dropped later


viz = preprocess_eegdata.Visualizer(sub,
                                    parent_dir = data_dir,                                      
                                    experiment_name=EXPERIMENT_NAME,
                                    srate=SRATE,
                                    trial_start = TRIAL_START_TIME,         
                                    trial_end = TRIAL_END_TIME,
                                    rejection_time=REJECTION_TIME,
                                    downscale={'eyegaze':1e-6,'misc':1e-4,'eeg':1,'eog':1e-6}, # convert to equivalent units (probably uV)
                                    channels_drop=['StimTrak','pupil_left','pupil_right'],
                                    channels_ignore=REJ_CHANNELS_IGNORE)


rejection_sums = viz.rej_chans.sum(axis=0)
sort_ix = np.argsort(rejection_sums)[::-1]

for ichan,chan in enumerate(viz.chan_labels[sort_ix]):
    if rejection_sums[sort_ix][ichan] > 0:
        print(chan,rejection_sums[sort_ix][ichan])

viz.preprocess_data_for_plot()
viz.open_figure()

Reading E:\datasets\supracapacity\derivatives\sub-03\eeg\sub-03_task-supracapacity_desc-preprocessed_eeg.fif ...


  self.epochs_obj = mne.read_epochs(self.data_path.fpath)


    Found the data of interest:
        t =    -400.00 ...    1400.00 ms
        0 CTF compensation matrices available
Not setting metadata
1578 matching events found
No baseline correction applied
0 projection items activated
xpos_left 32
xpos_right 32
FC5 24
Fz 21
F3 20
F4 10
FC1 10
ypos_right 9
ypos_left 9
FC2 7
F8 6
C4 5
FC6 5
C3 4
CP6 3
P3 3
CP1 3
F7 3
CP5 2
Pz 2
P4 2
CP2 2
Cz 2
O1 1
Oz 1
O2 1
P8 1
PO7 1
PO3 1
PO8 1
PO4 1


Traceback (most recent call last):
  File "c:\Users\Darius\.conda\envs\mvload\lib\site-packages\matplotlib\cbook.py", line 298, in process
    func(*args, **kwargs)
  File "c:\Users\Darius\src\preprocessing_pypeline\preprocess_eegdata.py", line 1275, in keypress_event
    self.update(force=True)
  File "c:\Users\Darius\src\preprocessing_pypeline\preprocess_eegdata.py", line 1227, in update
    self.plot_pos(pos)
  File "c:\Users\Darius\src\preprocessing_pypeline\preprocess_eegdata.py", line 1144, in plot_pos
    self.ax.set_yticks(self.offset_dict_stacked.values(), self.offset_dict_stacked.keys())
  File "c:\Users\Darius\.conda\envs\mvload\lib\site-packages\matplotlib\axes\_base.py", line 73, in wrapper
    return get_method(self)(*args, **kwargs)
  File "c:\Users\Darius\.conda\envs\mvload\lib\site-packages\matplotlib\axis.py", line 2138, in set_ticks
    result = self._set_tick_locations(ticks, minor=minor)
  File "c:\Users\Darius\.conda\envs\mvload\lib\site-packages\matplotlib\axis.p