In [None]:
import mne
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

sys.path.append(os.path.join('..'))
from utils.preprocessing import preprocess

# THINGS-EEG


In [7]:
data_dir = os.path.join("..", "..", "data", "things-eeg")
subjects = pd.read_csv(os.path.join(data_dir, "participants.tsv"), sep="\t")
subjects = subjects.loc[subjects['exclude'] == 0, 'participant_id'].to_list()

# Check if inputs and labels are already created, if so, no need to process the data again
if os.path.exists(os.path.join(data_dir, "inputs.npy")) and os.path.exists(os.path.join(data_dir, "labels.npy")):
    inputs = np.load(os.path.join(data_dir, "inputs.npy"))
    labels = np.load(os.path.join(data_dir, "labels.npy"))

else:

    inputs, labels = None, None
    ch_names = ['Fp1', 'Fz', 'F3', 'F7', 'FT9', 'FC5', 'FC1', 'C3', 'T7', 'TP9', 'CP5', 'CP1', 'Pz', 'P3', 'P7', 'O1', 'Oz', 'O2', 'P4', 'P8', 'TP10', 'CP6', 'CP2', 'C4', 'T8', 'FT10', 'FC6', 'FC2', 'F4', 'F8', 'Fp2', 'AF7', 'AF3', 'AFz', 'F1', 'F5', 'FT7', 'FC3', 'C1', 'C5', 'TP7', 'CP3', 'P1', 'P5', 'PO7', 'PO3', 'POz', 'PO4', 'PO8', 'P6', 'P2', 'CPz', 'CP4', 'TP8', 'C6', 'C2', 'FC4', 'FT8', 'F6', 'AF8', 'AF4', 'F2', 'FCz']

    for subject in subjects:
        print(f"\nProcessing {subject}...\n")

        # Check if the subject's EEG data is already preprocessed
        # If not, preprocess the raw data and save it
        if os.path.exists(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_eeg_preprocessed.fif")):
            print(f"Preprocessed data for {subject} already exists. Loading...\n")
            raw = mne.io.read_raw_fif(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_eeg_preprocessed.fif"), preload=True)
        else:
            raw = mne.io.read_raw_brainvision(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_eeg.vhdr"), preload=True)
            raw.rename_channels({ch: ch.replace("EEG ", "") for ch in raw.ch_names})
            raw.drop_channels([ch for ch in raw.ch_names if ch not in ch_names])
            montage = mne.channels.make_standard_montage("standard_1020")
            raw.set_montage(montage)
            raw = preprocess(raw)
            raw.save(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_eeg_preprocessed.fif"), overwrite=True)

        # Check if the subject's epochs are already created
        # If not, create epochs and save them
        if os.path.exists(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_eeg_epochs.fif")):
            print(f"\nEpochs for {subject} already exist. Loading...\n")
            epochs = mne.read_epochs(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_eeg_epochs.fif"))
        else:
            events, event_id = mne.events_from_annotations(raw, regexp="Event/E  1")
            epochs = mne.Epochs(raw, events, event_id=10001, tmin=-0.001, tmax=0.051, baseline=(None, 0), preload=True)
            epochs.save(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_eeg_epochs.fif"), overwrite=True)
        
        # Append the epochs and related objects to the inputs and labels arrays
        objects = pd.read_csv(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_events.tsv"), sep="\t")["object"].to_numpy()
        inputs = epochs.get_data() if inputs is None else np.append(inputs, epochs.get_data(), axis=0)
        labels = objects if labels is None else np.append(labels, objects, axis=0)
    
    # Save the inputs and labels to disk
    np.save(os.path.join(data_dir, "inputs.npy"), inputs)
    np.save(os.path.join(data_dir, "labels.npy"), labels)


Processing sub-02...

Preprocessed data for sub-02 already exists. Loading...


Epochs for sub-02 already exist. Loading...


Processing sub-03...

Preprocessed data for sub-03 already exists. Loading...


Epochs for sub-03 already exist. Loading...


Processing sub-04...

Preprocessed data for sub-04 already exists. Loading...


Epochs for sub-04 already exist. Loading...


Processing sub-05...

Preprocessed data for sub-05 already exists. Loading...


Epochs for sub-05 already exist. Loading...


Processing sub-07...

Preprocessed data for sub-07 already exists. Loading...


Epochs for sub-07 already exist. Loading...


Processing sub-08...

Preprocessed data for sub-08 already exists. Loading...


Epochs for sub-08 already exist. Loading...


Processing sub-09...

Preprocessed data for sub-09 already exists. Loading...


Epochs for sub-09 already exist. Loading...


Processing sub-10...

Preprocessed data for sub-10 already exists. Loading...


Epochs for sub-10 already exist. Load

[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    1.4s


EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Fitting ICA to data using 56 channels (please be patient, this may take a while)
Selecting by number: 6 components
Fitting ICA took 48.9s.
Using EOG channels: F3, F7, F4, F8, AF3, AF4
EOG channel index for this subject is: [ 2  3 28 29 32 60]
Filtering the data to remove DC offset to help distinguish blinks from saccades
Selecting channel F3 for blink detection
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 10000 samples (10.000 s)

Now detecting blinks and generating cor

  raw.save(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_eeg_preprocessed.fif"), overwrite=True)


Closing s:\PolySecLabProjects\eeg-image-decode\code\notebooks\..\..\data\things-eeg\sub-49\eeg\sub-49_task-rsvp_eeg_preprocessed.fif
[done]
Used Annotations descriptions: [np.str_('Event/E  1')]
Not setting metadata
24648 matching events found
Setting baseline interval to [-0.001, 0.0] s
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 24648 events and 53 original time points ...
0 bad epochs dropped


  epochs.save(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_eeg_epochs.fif"), overwrite=True)



Processing sub-50...

Extracting parameters from ..\..\data\things-eeg\sub-50\eeg\sub-50_task-rsvp_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 3270959  =      0.000 ...  3270.959 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 50.00 Hz
- Upper transition bandwidth: 12.50 Hz (-6 dB cutoff frequency: 56.25 Hz)
- Filter length: 3301 samples (3.301 s)



[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.8s


EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Fitting ICA to data using 56 channels (please be patient, this may take a while)
Selecting by number: 6 components
Fitting ICA took 28.8s.
Using EOG channels: F3, F7, F4, F8, AF3, AF4
EOG channel index for this subject is: [ 2  3 28 29 32 60]
Filtering the data to remove DC offset to help distinguish blinks from saccades
Selecting channel F7 for blink detection
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 10000 samples (10.000 s)

Now detecting blinks and generating cor

  raw.save(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_eeg_preprocessed.fif"), overwrite=True)


Closing s:\PolySecLabProjects\eeg-image-decode\code\notebooks\..\..\data\things-eeg\sub-50\eeg\sub-50_task-rsvp_eeg_preprocessed.fif
[done]
Used Annotations descriptions: [np.str_('Event/E  1')]
Not setting metadata
24648 matching events found
Setting baseline interval to [-0.001, 0.0] s
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 24648 events and 53 original time points ...
0 bad epochs dropped


  epochs.save(os.path.join(data_dir, subject, "eeg", f"{subject}_task-rsvp_eeg_epochs.fif"), overwrite=True)


In [None]:
inputs.shape, labels.shape

((46896, 63, 53), (46896,))