# Preprocessing of sEEG data

### Modules

In [None]:
import mne 
from mne.preprocessing import compute_proj_ecg
from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
np.set_printoptions(threshold=10000)
import pandas as pd
pd.set_option("display.max_rows", 999)
import os
import copy
import re
from importlib import reload

### Data loading

In [None]:
# path to data files
data_path = "./"

# define subject 
subj = ['xx'] 
sub_idx = 0 # subject number

# define the list of conditions
condition_list = ['produce_music', 'perceive_music_produced', 'perceive_music_new', 'perceive_music_newrepetition', 'produce_speech', 'perceive_speech_produced', 'perceive_speech_new', 'perceive_speech_newrepetition']

D:\


In [None]:
# Function to load data

def load_data(condition): 
    ''' This function can be used to load the eeg data for a condition of interest. 
    
    Argument: 
        condition: name of the condition (possible arguments: produce_music, perceive_music_produced, perceive_music_new, perceive_music_newrepetition
        produce_speech, perceive_speech_produced, perceive_speech_new, perceive_speech_newrepetition, rest)
    
    Return: 
        raw: mne compatible file for the specified condition 
    
    '''
 
    # condition: produce_music (playing)
    if condition == "produce_music": 
        for files in os.listdir('D:/data_mne/' + subj[sub_idx] + '/'):
            if 'playmusique_corrected.vhdr' in files:
                produce_music = 'D:/data_mne/' + subj[sub_idx] + '/' + files
                raw = mne.io.read_raw_brainvision(produce_music, preload=True) 
                break
            else: 
                raw = "NA"

    # condition: perceive_music_produced
    elif condition == "perceive_music_produced": 
        for files in os.listdir('D:/data_mne/' + subj[sub_idx] + '/'):
            if 'ListenMusique_corrected.vhdr' in files:
                perceive_music_produced = 'D:/data_mne/' + subj[sub_idx] + '/' + files        
                raw = mne.io.read_raw_brainvision(perceive_music_produced, preload=True)
                break
            else: 
                raw = "NA"

    # condition: perceive_music_new 
    elif condition == "perceive_music_new": 
        for files in os.listdir('D:/data_mne/' + subj[sub_idx] + '/'):
            if 'ListenMusiquePassive_1_corrected.vhdr' in files:
                perceive_music_new = 'D:/data_mne/' + subj[sub_idx] + '/' + files
                raw = mne.io.read_raw_brainvision(perceive_music_new, preload=True) 
                break
            else: 
                raw = "NA"
    
    # condition: perceive_music_newrepetition
    elif condition == "perceive_music_newrepetition": 
        for files in os.listdir('D:/data_mne/' + subj[sub_idx] + '/'):
            if 'ListenMusique_2_corrected.vhdr' in files:
                perceive_music_newrepetition = 'D:/data_mne/' + subj[sub_idx] + '/' + files
                raw = mne.io.read_raw_brainvision(perceive_music_newrepetition, preload=True) 
                break
            else: 
                raw = "NA"
        
    # condition: produce_speech (reading)
    
    elif condition == "produce_speech": 
        for files in os.listdir('D:/data_mne/' + subj[sub_idx] + '/'):
            if '22_BouNi_Reading_corrected.vhdr' in files:
                produce_speech = 'D:/data_mne/' + subj[sub_idx] + '/' + files
                raw = mne.io.read_raw_brainvision(produce_speech, preload=True) 
                break
            else: 
                raw = "NA"
        
    # condition: perceive_speech_produced 
    elif condition == "perceive_speech_produced": 
        for files in os.listdir('D:/data_mne/' + subj[sub_idx] + '/'):
            if 'ListenReading_corrected.vhdr' in files:
                perceive_speech_produced = 'D:/data_mne/' + subj[sub_idx] + '/' + files
                raw = mne.io.read_raw_brainvision(perceive_speech_produced, preload=True)
                break
            else: 
                raw = "NA"
        
    # condition: perceive_speech_new 
    elif condition == "perceive_speech_new": 
        for files in os.listdir('D:/data_mne/' + subj[sub_idx] + '/'):
            if 'ListenText_1_corrected.vhdr' in files:
                perceive_speech_new = 'D:/data_mne/' + subj[sub_idx] + '/' + files
                raw = mne.io.read_raw_brainvision(perceive_speech_new, preload=True) 
                break
            else: 
                raw = "NA"

    # condition: perceive_speech_newreptition
    elif condition == "perceive_speech_newrepetition": 
        for files in os.listdir('D:/data_mne/' + subj[sub_idx] + '/'):
            if 'ListeText_2_corrected.vhdr' in files:
                perceive_speech_newrepetition = 'D:/data_mne/' + subj[sub_idx] + '/' + files
                raw = mne.io.read_raw_brainvision(perceive_speech_newrepetition, preload=True) 
                break
            else: 
                raw = "NA"

    # condition: rest (eyes closed)
    elif condition == "rest": 
        for files in os.listdir('D:/data_mne/' + subj[sub_idx] + '/'):
            if 'Rest_corrected.vhdr' in files:
                rest = 'D:/data_mne/' + subj[sub_idx] + '/' + files
                raw =  mne.io.read_raw_brainvision(rest, preload=True) 
                break
            else: 
                raw = "NA"

    else: 
        raw = "NA"
        print("No corresponding file found")
        
    return(raw)



### Loop to preprocess data 

In [None]:
for condition in condition_list: 
    preprocessed_path = data_path + subj[sub_idx] + "/preprocessed/" + condition + "/"

    raw = load_data(condition)
    
    psd = raw.plot_psd(fmax=256,verbose=0,)  
    psd.savefig(preprocessed_path + condition + '_day1_psd_raw_data.jpg')
    
    
    # annotations 
    events_from_annot, event_dict = mne.events_from_annotations(raw)
    print(events_from_annot)

    # time markes
    if (condition == 'produce_music') or (condition == 'produce_speech'): 
        start = events_from_annot[3,0]
        end = events_from_annot[5,0]
        start_time = (start / raw.info['sfreq']) 
        end_time   = end / raw.info['sfreq'] 
        tmax = 300

        onset = [start_time] 
        duration = 0 
        description = ['Stimulus/S New']

        event_new = mne.Annotations(onset, duration, description)
        raw.set_annotations(event_new)

    if (condition == 'perceive_music_produced') or (condition == 'perceive_music_new') or (condition == 'perceive_music_newrepetition')  or (condition == 'perceive_speech_produced') or (condition == 'perceive_speech_new') or (condition == 'perceive_speech_newrepetition'): 
        start = events_from_annot[4,0]
        start_time = start / raw.info['sfreq']
        tmax = 300
        
    # cropping 
    cropped = None
    cropped = int(start_time)
    raw.crop(tmin=start_time)
    
    cropped = None
    cropped = int(tmax)
    raw.crop(tmax=tmax)
    
    events_from_annot, event_dict = mne.events_from_annotations(raw)

    
    ###### channel localization #####ä
    coord_path = 'D:/data_mne/Bou_Ni/freesurfer/Bou_Ni/elec_recon/'
    elec2atlas = mat73.loadmat(coord_path + 'elec2atlas.mat')
    elec_tissues = mat73.loadmat(coord_path + 'elec2tissues.mat');

    # retrieve electrode position
    label = [item for sublist in elec2atlas["cfg"]["elec_mni"]["label"] for item in sublist]
    elec_pos = elec2atlas["cfg"]["elec_mni"]["elecpos"]
    coord_mni_mono = {k:v for k,v in zip(label,elec_pos)}
    for key in coord_mni_mono.keys():
        coord_mni_mono[key] = coord_mni_mono[key]/1000

    # retrieve electrode info (in/out brain + tissue prob)
    centroid_bin = pd.DataFrame(elec_tissues["centroid_bin"], columns = ['label', 'location'])
    centroid_prob = pd.DataFrame(elec_tissues["centroid_prob"], columns = list(elec_tissues["centroid_prob"].keys()))
    centroid_prob["label"] = centroid_prob["label"].apply(lambda x: x[0])

    # freesurfer directory
    fs_path = 'D:/data_mne/Bou_Ni/freesurfer/'
    
    
    ###### anatomical channel rejection ######
    verbose = False
    gray_thresh = 0.0

    # get list of channels in both raw data and location files
    chs = copy.copy(raw.ch_names)
    for ch_name in centroid_bin['label'].to_list():
        if ch_name not in chs:
            chs.append(ch_name)
    ch_rejection = pd.DataFrame({"label":chs})
    ch_rejection['kept for analysis'] = ''

    ch_rejection['not in data'] = ''
    for ch_name in centroid_bin['label'].to_list(): #remove channels info if not in data
        if ch_name not in raw.ch_names:
            #print('Not in raw data : ' + ch_name)
            ch_rejection.loc[ch_rejection["label"] == ch_name,"not in data"] = True
            centroid_bin = centroid_bin[centroid_bin["label"]!=ch_name]
            centroid_prob = centroid_prob[centroid_prob["label"]!=ch_name]

    ch_rejection['no location info'] = ''
    for ch_name in raw.ch_names: #remove channels if no info about location
        if ch_name not in centroid_bin['label'].to_list() and ch_name !='ECG':
            raw.drop_channels(ch_name)
            if verbose:print('Channel "'+ch_name+ '" cound not be located and has been removed') 
            ch_rejection.loc[ch_rejection["label"] == ch_name,"no location info"] = True

    ch_rejection['out of brain'] = ''
    for ch_name in raw.ch_names: #remove channels if out of the brain
        if centroid_bin.loc[(centroid_bin["label"] == ch_name)]["location"].values != "inside" and ch_name != 'ECG':
            centroid_bin = centroid_bin[centroid_bin["label"]!=ch_name]
            centroid_prob = centroid_prob[centroid_prob["label"]!=ch_name]
            raw.drop_channels(ch_name)
            if verbose:print('Channel "'+ch_name+ '" was not inside the brain and has been removed')
            ch_rejection.loc[ch_rejection["label"] == ch_name,"out of brain"] = True

    ch_rejection['<gray_thresh'] = ''
    for ch_name in raw.ch_names: #remove channels based on tissue prob
        if centroid_prob.loc[(centroid_prob["label"] == ch_name)]["gray"].values < gray_thresh and ch_name != 'ECG':
            centroid_bin = centroid_bin[centroid_bin["label"]!=ch_name]
            centroid_prob = centroid_prob[centroid_prob["label"]!=ch_name]
            raw.drop_channels(ch_name)
            if verbose:print('Channel "'+ch_name+ '" had less than '+str(gray_tresh)+' gray prob and has been removed')
            ch_rejection.loc[ch_rejection["label"] == ch_name,"<gray_thresh"] = True

    for ch_name in raw.ch_names:
        ch_rejection.loc[ch_rejection["label"] == ch_name,"kept for analysis"] = True
    ch_rejection.to_csv(preprocessed_path + condition +'_ch_rejection.csv')

    print(str(len(centroid_bin["label"])) + ' channels remaining')
    
    
    
    ####### montage #######
    montage_type = 'bipolar' # 'monopolar' or 'bipolar'

    # retrieve electrode ID per shaft properly
    ch_dict = dict()
    for ch in raw.ch_names: #retrieve shafts
        shaft = re.findall("[a-zA-Z']+", ch)[0]
        ch_dict[shaft] = []
    for ch in raw.ch_names: #retrieve electrodes
        if ch == 'ECG': continue
        shaft = re.findall("[a-zA-Z']+", ch)[0]
        num = int(re.findall("\d+", ch)[0])
        ch_dict[shaft].append(num)

    if montage_type == 'monopolar':  
        for ch in list(coord_mni_mono.keys()):
            if ch not in raw.ch_names:
                coord_mni_mono.pop(ch,None)
        montage = mne.channels.make_dig_montage(ch_pos = coord_mni_mono, coord_frame='mni_tal')
        montage.add_mni_fiducials(subjects_dir=fs_path)

        raw.set_montage(montage)

        montage_fig = montage.plot()
        montage_fig.savefig(preprocessed_path + '_day1_montage_2d_monopolar.jpg')
        print(str(len(raw.ch_names)) + " channels remaining")

    elif montage_type == 'bipolar':
        anodes = []
        cathodes = []
        coord_mni_bipol = dict()
        for shaft in ch_dict:
            for i0 in range(len(ch_dict[shaft])-1):
                if ch_dict[shaft][i0] == ch_dict[shaft][i0+1]-1:
                    an = shaft+str(ch_dict[shaft][i0])
                    ca = shaft+str(ch_dict[shaft][i0+1])
                    coord_mni_bipol[an+'-'+ca] = np.mean(np.array((coord_mni_mono[an],coord_mni_mono[ca])),axis=0)
                    anodes.append(an)
                    cathodes.append(ca)

        raw.load_data()
        raw = mne.set_bipolar_reference(raw, anode=anodes, cathode=cathodes)
        for ch in raw.ch_names: #remove channels that are not in bipolar montage
            if '-' not in ch:
                raw.drop_channels(ch)

        montage = mne.channels.make_dig_montage(ch_pos = coord_mni_bipol, coord_frame='mni_tal')
        # montage.add_mni_fiducials(subjects_dir=fs_path)
        raw.set_montage(montage)
        montage_fig = montage.plot()
        montage_fig.savefig(preprocessed_path + '_day1_montage_2d_bipolar.jpg')
        print(str(len(raw.ch_names)) + " channels remaining")
        
        
    ###### drop channels #######
    bad_channel = ["TP'6-TP'7", "TP'7-TP'8"]
    raw.drop_channels(bad_channel)
        
        
    ###### filtering ####### 
    # notch filter for line noise 
    raw.notch_filter(np.array([50,150,250])) 

    # high-pass filter for low frequencies below 0.5 Hz 
    raw.filter(l_freq=0.5, h_freq = None, n_jobs=-1) # freq = 0.5, for TRF 1 Hz 

    # plot for checking filtering
    psd = raw.plot_psd(fmax=256,verbose=0)
    psd.savefig(preprocessed_path + condition + '_day1_psd_post-filtering.jpg')
    plt.show()
    
    
    ###### epoching ####### 
    reject_criteria = None 
    tw = [0,tmax]   
        
    epochs = mne.Epochs(raw, events_from_annot, event_id=event_dict, tmin=tw[0], tmax=tw[1], baseline = None,
                    reject=reject_criteria, event_repeated = 'drop', preload=True,
                    picks = None, reject_by_annotation = False,
                    detrend = None) 
        
    if (condition == 'produce_music') or (condition == 'produce_speech'): 
        epochs = epochs['Stimulus/S New']

    if (condition == 'perceive_music_produced') or (condition == 'perceive_music_new') or (condition == 'perceive_music_newrepetition')  or (condition == 'perceive_speech_produced') or (condition == 'perceive_speech_new') or (condition == 'perceive_speech_newrepetition'): 
        epochs = epochs['Stimulus/S  1'] 
           
    raw.save(preprocessed_path + condition + "_" + subj[sub_idx] + "_day1_bipolar_raw_preprocessed.fif", overwrite=True)
    epochs.save(preprocessed_path + condition + "_" + subj[sub_idx] + "_day1_bipolar_epochs_preprocessed.fif", overwrite=True)
    
    del raw 
    del epochs
    
