## NEvent Data Prep-Processing

NEvent Data Post-Processing: 

1. Introduction and overview of the dataset and objectives.
2. Apply minimal filtering: Steps for initial data cleaning and filtering.
3. Manually annotate bad data spans: Instructions and tools for marking poor-quality data.
4. Save the filtered+annotated+cropped data to a new file: Finalizing the preprocessing phase by saving the cleaned data.

In [None]:
# First, load the necessary packages
import os
import numpy as np
import glob as glob
import mne
import matplotlib
matplotlib.use('Qt5Agg')
import matplotlib.pyplot as plt
import scipy as sp
import scipy.stats as spst
import pandas as pd
import sys
import time
import joblib

# Determine which computer you're running on, setting the /mindstore/ezzyatlab 
# server location accordingly
import socket
if socket.gethostname() == 'Youssefs-iMac.local': # home office
    server_folder = '/Volumes/ezzyatlab/'
elif socket.gethostname() == 'Youssefs-MacBook-Air.local': # laptop
    server_folder = '/Volumes/ezzyatlab/'
elif socket.gethostname() == 'yezzyat-21': # lab office
    server_folder = '/Volumes/ezzyatlab/'
else:
    server_folder = '/Volumes/ezzyatlab/'

# Using the server location, load a library of lab code to be used
# for post-processing

sys.path.append(server_folder + 'labutils/') 
sys.path.append(server_folder + 'labutils/scalpeeg/') 

#exp_folder = server_folder + 'experiments/NEvent/'

#participants = pd.read_csv(exp_folder + 'participants.tsv',
#                           delimiter='\t')

#print(f'\nMNE-Python Version: {mne.__version__}\n')

# Set the subject code/number for the to-be-processed dataset
subject_code = 'sub-066'

# Set the path to the data folder
data_folder = server_folder + 'experiments/NEvent/exp_eeg_v1/' 
data_raw_file = os.path.join(data_folder,subject_code,
                             'eeg','raw',f'{subject_code}_NEvent-task.vhdr')

In [None]:


# Low and high frequency filters for raw data
raw_l_freq = 0.5
raw_h_freq = 200
linenoise_min = 60
linenoise_max = 181

# ICA parameters
ica_sfreq = 250             # Resampling frequency
ica_l_freq = 1.             # Filter cutoff: low
ica_h_freq = None           # Filter cutoff: high
ica_flat = dict(eeg=5e-6)   # Minimum channel amplitude for inclusion

# Set the channel locations montage
# montage = mne.channels.make_standard_montage('GSN-HydroCel-65_1.0')
# montage.ch_names[-1] = 'E65'

# # Rename the channels to 10-10 convention
# tenten_file = os.path.join(server_folder,'labdocs','scalp','10-10_vs_EGI.csv')
# #tenten = pd.read_excel(tenten_file)
# tenten = pd.read_csv(tenten_file)

# # Define old -> new mapping
# chan_name_map = dict(zip(tenten.Labels_EGI64,tenten.Labels_1010)) 

# Scale factors, for use in raw.plot()
scalings = dict(mag=1e-12, grad=4e-11, eeg=75e-6, eog=150e-6, ecg=5e-4,
     emg=1e-3, ref_meg=1e-12, misc=1e-3, stim=1,
     resp=1, chpi=1e-4, whitened=1e2)

In [None]:
# Load the raw data object and apply the 10-10 channel names
raw = mne.io.read_raw_brainvision(data_raw_file, preload=True)

# Apply new channels names to raw object
# raw.set_montage(montage)
# mne.rename_channels(raw.info,chan_name_map)

# # Change channel type for Cz
# raw.set_channel_types({'Cz': 'misc'})
    
# # Create a list of the data channels (excluding Cz, EOG, STIM, etc)
#data_channels = raw.ch_names[0:60]
data_channels = raw.ch_names

# Display the data, just make sure it looks like raw data
# and that the channel names imported
#fig = raw.plot(start=0, duration=60, n_channels=30, scalings=scalings)

#print(f'{raw.n_times/(1000)} seconds')

## Apply minimal filtering
Filter the raw data to remove low-frequency drifts and high-frequency noise. Also apply a notch filter to remove the effects of 60 Hz line noise.

In [None]:
raw.load_data().filter(l_freq=raw_l_freq, h_freq=raw_h_freq)
raw.notch_filter(np.arange(linenoise_min,linenoise_max,60))

## Manually annotate bad data spans

In [None]:
fig = raw.plot(start=0, duration=60, n_channels=30, scalings=scalings)

In [None]:
# Save your annotations to a csv file
raw.annotations.save(data_raw_file.
                     replace('.vhdr','-annotations.csv').replace('/raw/','/postproc/'),overwrite=True)

In [None]:
#Identify the timepoint corresponding to end of the session
# and set tmax to that value. This will crop the raw file and
# remove any post-experiment portion of the recording.
raw.info
#raw.crop(tmax=5830)
#raw.crop(tmin=170)

In [None]:
# Identify bad electrodes(s)
if raw.info['bads']:
    print("Bad channels:", raw.info['bads'])

    # Find the index(es) of the bad channel(s)
    bad_channel_idxs = mne.pick_channels(raw.info['ch_names'], include=raw.info['bads'])

    # Set the bad channel(s) data to NaN
    for idx in bad_channel_idxs:
        raw._data[idx, :] = np.nan  
else:
    print("No bad channels identified.")

## Save the filtered+annotated+cropped data to a new file 

In [None]:
fname = os.path.join(data_folder,subject_code,'eeg','postproc',
             f'{subject_code}_filtered_annot_crop.fif')

raw.load_data().save(fname,overwrite=True)