In [1]:
import sys
import preprocess_eegdata
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import os
import mne
import seaborn as sns
import pandas as pd

%load_ext autoreload

%autoreload 2

In [2]:
parent_dir = '../data/raw_data'
file_prefix='SCR_' # prefix to your vhdr files. Assuming it is in the format [prefix]_[number]

subject_dirs=[]  # if you want to analyze a specific subset of subjects
if len(subject_dirs) == 0:
    subject_dirs=sorted(glob('*',root_dir=parent_dir))


TRIAL_START_TIME=-0.4
TRIAL_END_TIME=1.4
BASELINE_TIME=(-.25,0)
REJECTION_TIME=[-0.25,1.0]

SRATE = 500 # hz, will resample if different from 1k
FILTER_FREQS=(None,80) # None to not do lowpass

LINEAR_R2 = 0.3


og_event_dict={
    'trl_start':1,
    "A2N": 12,
    "A4N": 14,
    "A6N": 16,
    "A8N": 18,
    "A2P": 22,
    "A4P": 24,
    "A6P": 26,
    "A8P": 28,
    "M2N": 32,
    "M4N": 34,
    "M6N": 36,
    "M8N": 38,
    "M2P": 42,
    "M4P": 44,
    "M6P": 46,
    "M8P": 48,
    'delay_start':2,
    'delay_end':4,
}
event_dict=og_event_dict.copy()
event_code_dict={} # define event codes based on sequence

stim_conditions=[]
for key,ev in og_event_dict.items():
    if ev > 10:
        event_dict.update({key+'/TARGET':ev+1}) # add in keys for targets
        event_code_dict.update({ev:[1,ev,2,4,4]})
        event_code_dict.update({ev+1:[1,ev,2,3,2,4,4]})
        stim_conditions.extend([ev,ev+1])



POSITION_TO_TIMELOCK = 1 # which position (IN THE LIST ABOVE) to timelock to


EEG_TRIALS_DROP = {}   # TODO: make this a dict
EYE_TRIALS_DROP = {10:[0,1,2,3]} # edge case when we forgot to start the recording, drop certain trials
DROP_CHANNELS=[] #['Fp1','Fp2']


# Epoch, Baseline, Preprocess, and Identy Artifacts

In [3]:
pre = preprocess_eegdata.Preprocess(
                                    parent_dir=parent_dir,
                                    file_prefix='SCR_',
                                    trial_start=TRIAL_START_TIME,
                                    trial_end=TRIAL_END_TIME,
                                    event_dict=event_dict,
                                    stim_conditions=stim_conditions,
                                    event_code_dict=event_code_dict,
                                    timelock_ix=POSITION_TO_TIMELOCK,
                                    baseline_time=BASELINE_TIME,
                                    rejection_time=REJECTION_TIME,
                                    no_et_spaces=False,
                                    drop_channels=DROP_CHANNELS,
                                    filter_freqs=FILTER_FREQS)


In [4]:
for sub in subject_dirs:
    print(f'Starting subject {sub}')
            
    subject_dir = os.path.join(pre.parent_dir,sub)

    # load in EEG and eye data
    eeg,eeg_events,eeg_event_dict = pre.load_eeg(subject_dir)
    eye,eye_events,et_event_dict = pre.load_eyetracking(subject_dir)


    # delete any events that do not appear in both ET and EEG (usually a boundary)
    unmatched_codes = list(set(eeg_event_dict.values()) ^ set(et_event_dict.values())) 
    eeg_events=eeg_events[~ np.isin(eeg_events,unmatched_codes).any(axis=1)]
    eye_events = eye_events[~ np.isin(eye_events,unmatched_codes).any(axis=1)]
    eeg_events = pre.filter_events(eeg_events)
    eye_events = pre.filter_events(eye_events)

    

    # build epochs for EEG and eyetracking
    assert eeg.info['sfreq'] % SRATE == 0
    decim = eeg.info['sfreq'] / SRATE
    eeg_epochs = mne.Epochs(eeg,eeg_events,pre.event_dict,tmin=pre.trial_start_t,tmax=pre.trial_end_t,
                            on_missing='ignore',baseline=pre.baseline_time,
                            reject_tmin=pre.rejection_time[0],reject_tmax=pre.rejection_time[1],preload=True,decim=decim).drop(EEG_TRIALS_DROP.get(sub, [])) # set up epochs object
    if pre.drop_channels is not None:
        eeg_epochs = eeg_epochs.drop_channels(pre.drop_channels)

    # build epochs for eye tracking
    # figure out decim, may be different from EEG
    assert eye.info['sfreq'] % SRATE == 0
    decim = eye.info['sfreq'] / SRATE
    eye_epochs = mne.Epochs(eye,eye_events,pre.event_dict,tmin=pre.trial_start_t,tmax=pre.trial_end_t,
                            on_missing='ignore',baseline=pre.baseline_time,reject=None,flat=None,reject_by_annotation=False,preload=True,decim=decim).drop(EYE_TRIALS_DROP.get(sub, []))
    
    # grab pupil size data to save out, if it exists
    pupil_chs = [ch for ch in eye_epochs.ch_names if 'pupil' in ch]
    if len(pupil_chs) > 0:
        pupil_epochs = eye_epochs.copy().pick(pupil_chs)  
        np.save(os.path.join(pre.parent_dir,sub,f'{sub}_pupil'),pupil_epochs.get_data(copy=False))
    
    eye_epochs = eye_epochs.pick(np.setdiff1d(eye_epochs.ch_names,['pupil_left','pupil_right','DIN'])) # exclude non-location based eye channels

    # make sure the same epochs are selected, might vary if mne drops an epoch or 2
    if (len(eye_epochs.selection)!=len(eeg_epochs.selection)) or (np.any(eeg_epochs.selection != eye_epochs.selection)):
        print('WARNING: EEG and ET trials do not match up, subsetting to overlapping trials')
        shared_selection = np.intersect1d(eeg_epochs.selection,eye_epochs.selection)

        np.save(os.path.join(pre.parent_dir,sub,f'{sub}_preArt_selections.npy'),shared_selection) # for filtering behavior later

        eeg_drops = np.setdiff1d(eeg_epochs.selection,shared_selection)
        eye_drops = np.setdiff1d(eye_epochs.selection,shared_selection)

        eeg_epochs = eeg_epochs.drop(eeg_drops)
        eye_epochs = eye_epochs.drop(eye_drops)


    # concatenate EEG and eyetracking
    epochs=eeg_epochs.copy()
    epochs.add_channels([eye_epochs],force_update_info=True)



    # DO REJECTION HERE
    # IMPORTANT UNITS: eyegaze in pixels (use deg2pix to convert), EEG in volts, EOG in microvolts
    #TODO: fix units?
    p2p=pre.artreject_slidingP2P(epochs,rejection_criteria={'eeg':100e-6,'eog':200},win=200,win_step=100)               # peak to peak in the window
    saccades = pre.artreject_step(epochs,rejection_criteria={'eyegaze':pre.deg2pix(0.5),'eog':50},win=80,win_step=10)   # saccades in EOG
    steps = pre.artreject_step(epochs,rejection_criteria={'eeg':60e-6},win=250,win_step=20)                             # steps (saccade like) in EEG

    absolute_value=pre.artreject_value(epochs,rejection_criteria={'eyegaze':pre.deg2pix(1), 'eeg':100e-6, 'eog':300})   # absolute value rejection
    linear_fit = pre.artreject_linear(epochs)                                                                           # linear fit (drift) rejection
    flatline = pre.artreject_flatline(epochs,rejection_criteria={'eeg':0,'eog':0,'eyegaze':0},flatline_duration=200)    # check for flatlines


    # combine rejection reasons
    rej_electrodes = p2p | saccades | steps | absolute_value | linear_fit | flatline
    rej_reasons = np.char.array(np.full(rej_electrodes.shape,'', dtype="<U30"))  # NOTE: dtype is important, must be >= the max possible str length
    rej_reasons[p2p] = 'P2P '
    rej_reasons[saccades] = rej_reasons[saccades] + 'SAC '
    rej_reasons[steps] = rej_reasons[steps] + 'STEP '
    rej_reasons[absolute_value] = rej_reasons[absolute_value] + 'ABS '
    rej_reasons[linear_fit] = rej_reasons[linear_fit] + 'LIN '
    rej_reasons[flatline] = rej_reasons[flatline] + 'FLAT '


    # save files
    np.save(os.path.join(pre.parent_dir,sub,f'{sub}_rej.npy'),rej_electrodes)                                   # matrix of trials x electrodes by which were rejected
    np.save(os.path.join(pre.parent_dir,sub,f'{sub}_rej_reasons.npy'),rej_reasons)                              # trials x electrodes with rejection reasons
    epochs.save(os.path.join(pre.parent_dir,sub,f'{sub}_epo.fif'),overwrite=True)                               # save mne epochs object (for later)
    np.save(os.path.join(pre.parent_dir,sub,f'{sub}_epo'),epochs.get_data(copy=False))                          # save data as a npy for ease loading
    np.save(os.path.join(pre.parent_dir,sub,f'{sub}_conditions'),eeg_events[:,2])                               # condition labels
    pd.Series(epochs.ch_names).to_csv(os.path.join(pre.parent_dir,sub,f'{sub}_chan_labels.csv'),header=False)   # channel labels
    

Starting subject 026
Extracting parameters from ../data/raw_data/026/SCR_026.vhdr...
Setting channel info structure...
Reading 0 ... 8950019  =      0.000 ...  8950.019 secs...


  eegdata = mne.io.read_raw_brainvision(eegfile, eog=["HEOG", "VEOG"], misc=["StimTrak"], preload=True)  # read into mne.raw structure
  eegdata = mne.io.read_raw_brainvision(eegfile, eog=["HEOG", "VEOG"], misc=["StimTrak"], preload=True)  # read into mne.raw structure
  eegdata = mne.io.read_raw_brainvision(eegfile, eog=["HEOG", "VEOG"], misc=["StimTrak"], preload=True)  # read into mne.raw structure
['HEOG', 'VEOG', 'StimTrak']
Consider setting the channel types to be of EEG/sEEG/ECoG/DBS/fNIRS using inst.set_channel_types before calling inst.set_montage, or omit these channels when creating your montage.
  eegdata = mne.io.read_raw_brainvision(eegfile, eog=["HEOG", "VEOG"], misc=["StimTrak"], preload=True)  # read into mne.raw structure


Filtering raw data in 1 contiguous segment
Setting up low-pass filter at 80 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal lowpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Upper passband edge: 80.00 Hz
- Upper transition bandwidth: 20.00 Hz (-6 dB cutoff frequency: 90.00 Hz)
- Filter length: 165 samples (0.165 s)



[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done  29 out of  31 | elapsed:   11.1s remaining:    0.8s
[Parallel(n_jobs=-1)]: Done  31 out of  31 | elapsed:   11.4s finished


Used Annotations descriptions: ['New Segment/', 'Stimulus/S  1', 'Stimulus/S  2', 'Stimulus/S  3', 'Stimulus/S  4', 'Stimulus/S 12', 'Stimulus/S 14', 'Stimulus/S 16', 'Stimulus/S 18', 'Stimulus/S 22', 'Stimulus/S 24', 'Stimulus/S 26', 'Stimulus/S 28', 'Stimulus/S 32', 'Stimulus/S 34', 'Stimulus/S 36', 'Stimulus/S 38', 'Stimulus/S 42', 'Stimulus/S 44', 'Stimulus/S 46', 'Stimulus/S 48']
Loading /Users/henryjones/Documents/research/banyan/preprocessing/../data/raw_data/026/et_temp.asc
Pixel coordinate data detected.Pass `scalings=dict(eyegaze=1e3)` when using plot method to make traces more legible.
Pupil-size area detected.
No saccades were found in this file. Not returning any info on saccades.
There are 1705 recording blocks in this file. Times between blocks will be annotated with BAD_ACQ_SKIP.
Used Annotations descriptions: ['ELCL_PCR_PARAM 5 3.0', 'ELCL_PROC CENTROID (3)', 'SYNC 1', 'SYNC 12', 'SYNC 14', 'SYNC 16', 'SYNC 18', 'SYNC 2', 'SYNC 22', 'SYNC 24', 'SYNC 26', 'SYNC 28', 'SY

  eye_epochs = mne.Epochs(eye,eye_events,pre.event_dict,tmin=pre.trial_start_t,tmax=pre.trial_end_t,


Overwriting existing file.
Overwriting existing file.


# Visualize and Confirm Artifacts

In [11]:
sub = "011"

In [12]:
%matplotlib qt

REJ_CHANNELS_IGNORE=['HEOG','VEOG','Fp1','Fp2'] # exclude fp1 and fp2 here, they are dropped later


viz = preprocess_eegdata.Visualizer(sub,
                                    parent_dir = parent_dir,
                                    srate=SRATE,
                                    timelock=0.2,
                                    trial_start = TRIAL_START_TIME,
                                    trial_end = TRIAL_END_TIME,
                                    rejection_time=REJECTION_TIME,
                                    condition_dict = {v:k for k,v in event_dict.items()},
                                    downscale={'eyegaze':1e-6,'misc':1e-4,'eeg':1,'eog':1e-6},
                                    channels_drop=['StimTrak'],
                                    channels_ignore=REJ_CHANNELS_IGNORE)


rejection_sums = viz.rej_chans.sum(axis=0)
sort_ix = np.argsort(rejection_sums)[::-1]

for ichan,chan in enumerate(viz.chan_labels[sort_ix]):
    if rejection_sums[sort_ix][ichan] > 0:
        print(chan,rejection_sums[sort_ix][ichan])


Reading /Users/henryjones/Documents/research/banyan/preprocessing/../data/raw_data/011/011_epo.fif ...


    Found the data of interest:
        t =    -400.00 ...    1400.00 ms
        0 CTF compensation matrices available
Not setting metadata
1600 matching events found
No baseline correction applied
0 projection items activated
xpos_right 655
xpos_left 655
ypos_right 635
ypos_left 635
F8 14
Cz 2
FC2 1


In [13]:
viz.preprocess_data_for_plot()
viz.open_figure()

675/1600 trials rejected. Saving annotations as ".../011_rej_FINAL.npy"
674/1600 trials rejected. Saving annotations as ".../011_rej_FINAL.npy"


In [8]:
# viz.save_annotations()

key not recognized: s
saving current rejections as rej_FINAL.npy
