In [2]:

# Import some libraries
import os
import numpy as np
import scipy
import mne
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib
import h5io
from pyprep.prep_pipeline import PrepPipeline
import pyprep as ppr
from Path_Config import Raw_Data_Path as data_path
from BBO_Analysis_Functions import infer_rights, correct_drift
# matplotlib.use('Qt5Agg')

## First Set of cells are all for ICA cleaning and channel selection

In [None]:
# For the figures of the presentation tomorrow
#%%############################################################################
"""import data"""
"""This first part is only for computing the ICA on high pass filtered data and Channel selection"""
###############################################################################
for sub in range(26,30):
    index = sub
    Chan_sel = False
    working_dir = data_path + "EEG/" 
    EEGfiles = [f for f in os.listdir(working_dir) if f.endswith('.vhdr')]
    file = EEGfiles[index]
    print(f'Working on file {file}')
    EEG_fpath = os.path.join(working_dir, file)
    data = mne.io.read_raw_brainvision(EEG_fpath, preload = True)

    # Filter data
    data.load_data().filter(l_freq = .3, h_freq = 35)
    data.set_eeg_reference(ref_channels = "average")

    # Extract and label events
    events = mne.events_from_annotations(data)
    events = events[0]
    event_id = {'Left':10002,'Invalid':10003, 'Right':10004}

    # Remove invalid events
    event_dict = {}
    eventcount = {}
    for key, val in event_id.items():
        numevents = len(events[events[:,2] == val])
        temp = events[events[:,2] == val]
        print(f'The original number of events for {key} is {numevents}')
        eventcount[key] = numevents
        event_dict[key] = temp
        if numevents > 45:
            temp = temp[-45:,:]
            event_dict[key] = temp
        elif numevents == 0:
            temp = []
            event_dict[key] = temp
        elif numevents == 12:
            event_dict[key] = temp


    # Import the csv file with psychopy data
    responsepath = data_path + "Psychopy/"
    # extract all .csv files in the folder
    responsefiles = [f for f in os.listdir(responsepath) if f.endswith('.csv') and (f.startswith(str(index+1)+'_') and not f.__contains__('trial'))]
    # # sort in ascending order
    responsefiles.sort()
    filename = responsefiles[0]
    print(filename)

    # try each file to see if it is the correct one
    for filename in responsefiles:
        fpath = os.path.join(responsepath, filename)
        responses = pd.read_csv(fpath)
        if 'p_port' not in responses.keys():
            continue
        elif len(responses['p_port']>1) == 102:
            break
        
    # Assert that the EEG and the response file match in the participant number
    assert int(filename.split('_')[0]) == index+1 == int(file.split('_')[1]), 'EEG and Psychopy files dont match'

    fpath = os.path.join(responsepath, filename)
    responses = pd.read_csv(fpath)

    # If there are no right events, infer the right indices and correct for drift
    if len(event_dict['Right']) == 0:
        print('No right events found. Inferring and correcting for drift...')
        srate = data.info['sfreq']
        left_idx = event_dict['Left'][:,0]
        right_idx = infer_rights(responses, srate, left_idx)
        right_idx = correct_drift(responses, srate, left_idx, right_idx)
        event_dict['Right'] = np.zeros((len(right_idx),3))
        event_dict['Right'][:,0] = right_idx
        event_dict['Right'][:,1] = 0
        event_dict['Right'][:,2] = 10004

    # Save the events in the EEG file
    events_in_eeg = np.load(os.path.join(working_dir,'events_in_eeg.npy'), allow_pickle=True).item()
    events_in_eeg[file] = eventcount # This will not work
    np.save(os.path.join(working_dir,'events_in_eeg.npy'), events_in_eeg) 

    # Now convert the dict back to an array
    events = np.concatenate([v for v in event_dict.values()], axis=0)
    del event_dict
    events = events.astype(int)
    events = events[events[:,0].argsort()]

    assert len(events[:,2][events[:,2] == 10002]) == 45 == len(events[:,2][events[:,2] == 10004]), 'Events are not 45'
    assert len(events[:,2][events[:,2] == 10003]) == 12, 'Invalid events are not 12'

    # Set Montage
    montage = mne.channels.make_standard_montage('easycap-M1')
    data.rename_channels({'M1': 'TP9', 'M2': 'TP10'})
    data.set_montage(montage)

    # #%%############################################################################
    # """remove and interpolate bad channels"""
    # ###############################################################################

    # Instantiate NoisyChannels with Raw data
    noisy_chans = ppr.NoisyChannels(data, do_detrend=False)

    # Find bad by correlation
    noisy_chans.find_bad_by_correlation(correlation_secs=5.0, correlation_threshold=0.4, frac_bad=0.1)
    print(f'After correlation removal: {noisy_chans.get_bads()}')
    data.info['bads'].extend(noisy_chans.get_bads())  # Update bads

    # Find bad by deviation
    noisy_chans.find_bad_by_deviation()
    print(f'After deviation removal: {noisy_chans.bad_by_deviation}')
    data.info['bads'].extend(noisy_chans.get_bads())  # Update bads

    # Find bad by high-frequency noise
    noisy_chans.find_bad_by_hfnoise(HF_zscore_threshold=5.0)
    print(f'After HF noise removal: {noisy_chans.get_bads()}')
    data.info['bads'].extend(noisy_chans.get_bads())  # Update bads
    data.info['bads'] = list(set(data.info['bads']))
    bad_channels_ERPs = list(set(data.info['bads']))
    data.interpolate_bads(reset_bads=True)

    # # Run RANSAC to find additional bad channels
    # noisy_chans.find_bad_by_ransac(n_samples=50, 
    #                             sample_prop=0.25, 
    #                             corr_thresh=0.75, 
    #                             frac_bad=0.4, 
    #                             corr_window_secs=5.0, 
    #                             channel_wise=False, 
    #                             max_chunk_size=None)
    # print(f'After RANSAC removal: {noisy_chans.get_bads()}')
    # print(f'Bad channels before RANSAC: {data.info["bads"]}')
    # data.info['bads'].extend(noisy_chans.get_bads())  # Update bads
    # data.info['bads'] = list(set(data.info['bads']))
    # bad_channels_ERPs = list(set(data.info['bads']))
    # data.interpolate_bads(reset_bads=True)

    #%%############################################################################
    """Epoch data"""
    ###############################################################################
    epstart = -.5
    epend = 1.5
    Selection_epochs = mne.Epochs(data,
                        events = events,
                        event_id = event_id,
                        tmin = epstart, tmax = epend,
                        proj = False, baseline = None,
                        preload = True, reject = None)
    Selection_epochs.set_eeg_reference(ref_channels = "average")

    #%%############################################################################
    """Independent Components Analysis"""
    ###############################################################################

    ncomp = 30
    ica = mne.preprocessing.ICA(n_components = ncomp)
    ica.fit(Selection_epochs)
    # ica.plot_properties(Selection_epochs, picks=range(0, 20))
    # Look at the timecourse of the component
    ica.plot_sources(Selection_epochs)
    plt.show()

    # Pause and wait for user input
    input("Press Enter to continue...")
    print(f'Channels to be excluded: {ica.exclude}')

    #%%############################################################################
    """Channel selection - based on van Ede et al. 2011"""
    ###############################################################################
    if Chan_sel:
        print('Starting channel selection....')
        # plot erp
        # Selection_epochs["Left"].average().plot(picks = ['Cz','C3','C4'])
        # Selection_epochs["Right"].apply_baseline((None,0)).average().plot_topomap(times= np.linspace(0.1, 0.5, 5))
        
        left_chans = ['FC1', 'C3', 'CP1']
        right_chans = ['FC2', 'C4', 'CP2']

        freqs = np.linspace(1,30,30)
        freqs = np.array([1, 30]) # for stockwell method
        power = Selection_epochs.compute_tfr(method = 'morlet', 
                                            freqs = freqs,
                                            n_cycles = freqs/2,
                                            output='power',
                                            average=False, 
                                            return_itc=False, 
                                            decim=2)


        # power.save(os.path.join(working_dir + 'Processed/', file[:6] + 'Chan_Sel-tfr.hdf5'), overwrite = True)
        left = power['Left'].average().crop(tmin = .1, tmax = .3, fmin = 15, fmax = 30).get_data()
        right = power['Right'].average().crop(tmin = .1, tmax = .3, fmin = 15, fmax = 30).get_data()

        lmean = left.mean(axis=(1, 2))
        rmean = right.mean(axis=(1, 2))
        result = (rmean - lmean) / (rmean + lmean)

        # Get indices of the three largest values
        indices_of_largest = np.argsort(result)[-3:]
        indices_of_smallest = np.argsort(result)[:3]

        # Get the channel names corresponding to these indices
        channel_names_of_largest = [power.info['ch_names'][i] for i in indices_of_largest]
        channel_names_of_smallest = [power.info['ch_names'][i] for i in indices_of_smallest]
        print(f'Channels with largest difference: {channel_names_of_largest}')
        print(f'Channels with smallest difference: {channel_names_of_smallest}')

        left_chans = channel_names_of_largest # Check if this is correct
        right_chans = channel_names_of_smallest # Check if this is correct

    # Apply ICA solution to the same epochs
    ica.apply(Selection_epochs)

    # Create folder for this participant in the processed folder
    Write_path = os.path.join(working_dir + 'Processed/', file[:6] + '/')
    if not os.path.exists(Write_path):
        os.makedirs(Write_path)
        print(f"Subfolder created at: {Write_path}")
    else:
        print(f"Subfolder already exists at: {Write_path}")

    epo_fName = os.path.join(Write_path + file[:6] + '_ERPs-epo.fif')
    Selection_epochs.save(epo_fName, overwrite = True)
    with open(Write_path + file[:6] + 'bad_channels_ERP.txt', 'w') as f:
        for channel in bad_channels_ERPs:
            f.write(f"{channel}\n")


    Selection_epochs.apply_baseline((None,0))
    freqs = np.arange(.3, 35, 1)
    n_cycles = freqs / 2.0
    power = Selection_epochs.compute_tfr(method = 'morlet',
                                            freqs = freqs,
                                            n_cycles = n_cycles,
                                            output='power',
                                            average=False,
                                            return_itc=False,
                                            decim=1)

    power.save(os.path.join(Write_path + file[:6] + 'ERPs-tfr.hdf5'), overwrite = True)

    # Save the ICA soulution that will be applied to the data
    compname = os.path.join(Write_path + file[:6] + '_weights-ica.fif')
    ica.save(fname = compname, overwrite = True)

    del Selection_epochs, data


In [None]:
#%%############################################################################
"""import data"""
"""This code is only for performing the BBO preprocessing, not the ERP part"""
###############################################################################
working_dir = data_path + "EEG/"
EEGfiles = [f for f in os.listdir(working_dir) if f.endswith('.vhdr')]

for sub in range(0,len(EEGfiles)):
    index = sub
    file = EEGfiles[index]
    print(f'Working on file {file}')
    EEG_fpath = os.path.join(working_dir, file)

    ###############################################################################
    """Import the original data for BBO"""
    ###############################################################################
    BBO = mne.io.read_raw_brainvision(EEG_fpath, preload = True)
    # Filter data
    BBO.load_data().filter(l_freq = 3, h_freq = 50)
    BBO.notch_filter(freqs=(50,100), filter_length='auto', notch_widths = 3)
    BBO.set_eeg_reference(ref_channels = "average")

    events = mne.events_from_annotations(BBO)
    events = events[0]
    event_id = {'Left':10002,'Invalid':10003, 'Right':10004}

    # Remove invalid events
    event_dict = {}
    eventcount = {}
    for key, val in event_id.items():
        numevents = len(events[events[:,2] == val])
        temp = events[events[:,2] == val]
        print(f'The original number of events for {key} is {numevents}')
        eventcount[key] = numevents
        event_dict[key] = temp
        if numevents > 45:
            temp = temp[-45:,:]
            event_dict[key] = temp
        elif numevents == 0:
            temp = []
            event_dict[key] = temp
        elif numevents == 12:
            event_dict[key] = temp

    # Set Montage
    montage = mne.channels.make_standard_montage('easycap-M1')
    BBO.rename_channels({'M1': 'TP9', 'M2': 'TP10'})
    BBO.set_montage(montage)

    ###############################################################################
    """Import the csv file with psychopy data"""
    ###############################################################################

     # Import the csv file with psychopy data
    responsepath = data_path + "Psychopy/"
    # extract all .csv files in the folder
    responsefiles = [f for f in os.listdir(responsepath) if f.endswith('.csv') and (f.startswith(str(index+1)+'_') and not f.__contains__('trial'))]
    # # sort in ascending order
    responsefiles.sort()

    # try each file to see if it is the correct one
    for filename in responsefiles:
        fpath = os.path.join(responsepath, filename)
        responses = pd.read_csv(fpath)
        if 'p_port' not in responses.keys():
            continue
        elif len(responses['p_port']>1) == 102:
            break
        
    # Assert that the EEG and the response file match in the participant number
    assert int(filename.split('_')[0]) == index+1 == int(file.split('_')[1]), 'EEG and Psychopy files dont match'

    # If there are no right events, infer the right indices and correct for drift
    if len(event_dict['Right']) == 0:
        print('No right events found. Inferring and correcting for drift...')
        srate = BBO.info['sfreq']
        left_idx = event_dict['Left'][:,0]
        right_idx = infer_rights(responses, srate, left_idx)
        right_idx = correct_drift(responses, srate, left_idx, right_idx)
        event_dict['Right'] = np.zeros((len(right_idx),3))
        event_dict['Right'][:,0] = right_idx
        event_dict['Right'][:,1] = 0
        event_dict['Right'][:,2] = 10004

    # Save the events in the EEG file
    events_in_eeg = np.load(os.path.join(working_dir,'events_in_eeg.npy'), allow_pickle=True).item()
    events_in_eeg[file] = eventcount # This will not work
    np.save(os.path.join(working_dir,'events_in_eeg.npy'), events_in_eeg) 

    # Now convert the dict back to an array
    events = np.concatenate([v for v in event_dict.values()], axis=0)
    del event_dict
    events = events.astype(int)
    events = events[events[:,0].argsort()]

    assert len(events[:,2][events[:,2] == 10002]) == 45 == len(events[:,2][events[:,2] == 10004]), 'Events are not 45'
    assert len(events[:,2][events[:,2] == 10003]) == 12, 'Invalid events are not 12'

    ###############################################################################
    """remove and interpolate bad channels"""
    ###############################################################################

    # Instantiate NoisyChannels with Raw data
    noisy_chans = ppr.NoisyChannels(BBO, do_detrend = False)

    # Find bad by correlation
    noisy_chans.find_bad_by_correlation(correlation_secs=5.0, correlation_threshold=0.4, frac_bad=0.1)
    print(f'After correlation removal: {noisy_chans.get_bads()}')
    BBO.info['bads'].extend(noisy_chans.get_bads())  # Update bads

    # Find bad by deviation
    noisy_chans.find_bad_by_deviation()
    print(f'After deviation removal: {noisy_chans.bad_by_deviation}')
    BBO.info['bads'].extend(noisy_chans.get_bads())  # Update bads

    # Find bad by high-frequency noise
    noisy_chans.find_bad_by_hfnoise(HF_zscore_threshold=5.0)
    print(f'After HF noise removal: {noisy_chans.get_bads()}')
    BBO.info['bads'].extend(noisy_chans.get_bads())  # Update bads
    bad_channels_Final = list(set(BBO.info['bads']))
    BBO.interpolate_bads(reset_bads=True)

    #%%############################################################################
    """Epoch data"""
    ###############################################################################

    epstart = -4
    epend = 1
    real_epochs = mne.Epochs(BBO,
                        events = events,
                        event_id = event_id,
                        tmin = epstart, tmax = epend,
                        proj = False, baseline = None,
                        preload = True, reject = None)
    real_epochs.set_eeg_reference(ref_channels = "average")
    real_epochs.apply_baseline(baseline=(None, None))

    # Create folder for this participant in the processed folder
    Write_path = os.path.join(working_dir + 'Processed/', file[:6] + '/')
    if not os.path.exists(Write_path):
        os.makedirs(Write_path)
        print(f"Subfolder created at: {Write_path}")
    else:
        print(f"Subfolder already exists at: {Write_path}")

    # save the final version of the data
    epo_fName = os.path.join(Write_path + file[:6] + '_for_BBO-epo.fif')
    real_epochs.save(epo_fName, overwrite = True)
    with open(Write_path + file[:6] + 'bad_channels_Final.txt', 'w') as f:
        for channel in bad_channels_Final:
            f.write(f"{channel}\n")

    freqs = np.arange(3, 51, 1)
    n_cycles = freqs / 2.0
    power = real_epochs.compute_tfr(method = 'morlet',
                                            freqs = freqs,
                                            n_cycles = n_cycles,
                                            output='power',
                                            average=False,
                                            return_itc=False,
                                            decim=1)

    power.save(os.path.join(Write_path + file[:6] + '-tfr.hdf5'), overwrite = True)
