### Data2BraindecodeFormat
##### Read the info csv table (as created by Read_HCI) and EEG data from BDF files and bring the EEG data into format that is ready to be used by Braindecode toolbox


In [39]:
import pandas as pd
import numpy as np
import pyedflib
import matplotlib.pyplot as plt
import mne
import pickle
import braindecode

from mne.io import concatenate_raws
from braindecode.datautil.signal_target import SignalAndTarget

In [6]:
# FUNCTIONS

def get_sig(fname, ch):
    
    '''
    Returns signal of the desired channel ch in BDF file fname
    '''
    
    with pyedflib.EdfReader(fname) as f: 
        
        #file_dur=f.getFileDuration()
        #print(header['label'])
        
        header=f.getSignalHeader(ch)
        sig=f.readSignal(ch)
    
    return sig



import subprocess

def find_files(file_name):
    
    '''
    Ubuntu: finds the indicated file and returns a list of all 
    subdirectories that contain file of name file_name
    '''
    
    command = ['locate', file_name]

    output = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0]
    output = output.decode()

    search_results = output.split('\n')

    return search_results



# DATASET-SPECIFIC FUNCTIONS

def subjTrial_Xy(subjID, EEGFile, subj_df, tPriorVideo, tAfterVideo, sFrq, decode): 
    """
    subjID
    EEGFile
    sFrq: sampling frequency
    decode: name of column in df containing the target value (string)
    tPriorVideo: time in ms prior to video start --> determines time window to be cut out from EEG data
    tAfterVideo: time in ms after video end --> determines time window to be cut out from EEG data
    
    Returns array X in the form channels x timesteps (for trial that corresponds EEGFile)
    and trial-corresponding target y for subject subjID. 
 
    """
    
    # get filepath 
    filepath = find_files(EEGFile)[0]
    
    # get status channel
    statusCh = get_sig(filepath, 46)
    
    # get video start (sample point)
    statusEvents = np.where(statusCh == max(statusCh))
    spVideoStart = np.min(statusEvents)
    
    # get idx of sample point representing video start - tPriorVideo
    spPrior = int(spVideoStart - (sFrq*tPriorVideo))
    
    # get video end
    spVideoEnd = np.max(statusEvents)
    
    # get idx of sample point representing video end + tAfterVideo
    spAfter = int(spVideoEnd + (sFrq*tAfterVideo))    
    
    # get length (in sample points) of window 
    winLen = spAfter - spPrior
    
    # initialize empty array 
    subjTrial_X = np.empty([33, winLen])
    
    for i in range(32): 
        subjTrial_X[i,:] = get_sig(filepath, i)[spPrior:spAfter]
    
    subjTrial_X[32,:] = statusCh[spPrior:spAfter]
    
    #plt.plot(subjTrial_X[32,:])
    #plt.show()
    
    subjTrial_y = int(subj_df[decode][subj_df['EEGFile according to xml'] == EEGFile])
    
    return subjTrial_X, subjTrial_y


def subj_Xy(subjID, df, decode, tPriorVideo = 0.5, tAfterVideo = 0, sFrq = 256):
    '''
    subjID
    sFrq: sampling frequency
    decode: name of column in df containing the target value (string)
    tPriorVideo: time in ms prior to video start --> determines time window to be cut out from EEG data
    tAfterVideo: time in ms after video end --> determines time window to be cut out from EEG data
    
    Returns list X of arrays in the form trials x channels x timesteps (where channels x timesteps is 
    an array) and corresponding target vector y for subject subjID. 
    
    '''
    
    # Create new dataframe only with rows, where condition to be decoded is not NaN
    df = df[np.isfinite(df[decode])]
    
    # Choose only rows that belong to subject
    subj_df = df[df['subjectID'] == subjID] 
    
    subj_X = []
    subj_y = []
    
    for EEGFile in subj_df['EEGFile according to xml']: 
        
        subjTrial_X, subjTrial_y = subjTrial_Xy(subjID, EEGFile, subj_df, tPriorVideo, tAfterVideo, sFrq, decode)
        subj_X.append(subjTrial_X)
        subj_y.append(subjTrial_y)
        
    return subj_X, subj_y


In [19]:
# Which column in the dataframe shall be decoded for? 
decode = 'vlncLevel'

# Read csv (containing info on all subjects and all trials) to dataframe
df = pd.read_csv('EmotionElicitation_Info')

# Create new dataframe only with rows, where condition to be decoded is not NaN  
nameOfDF = decode + '_df'
globals()[nameOfDF] = df[np.isfinite(df[decode])]

#Save dataframe as csv
globals()[nameOfDF].to_csv('EmotionElicitation_' + decode)

# Create array that contains the subjectIDs of all subjects in arousal condition
subjIDs = np.sort(globals()[nameOfDF]['subjectID'].unique())
noSubj = len(subjIDs)


In [21]:
sFrq = 256
tPriorVideo = 0.5
tAfterVideo = 0 

# Create two dictionaries of lists: 
# 1. X: with subjID as key and the respective list subj_X as value
# 2. y: with subjID as key and the respective list subj_y as value
X = {}
y = {}

for subj in subjIDs: 
    
    subj_X, subj_y = subj_Xy(subj, df, decode, tPriorVideo = 0.5, tAfterVideo = 0, sFrq = 256)
    X[subj] = subj_X
    y[subj] = subj_y
    

In [37]:
# TEST 
#for subj in subjIDs: 
    #print(len(X[subj]) == len(y[subj]))

In [41]:
# Save dictionaries as pickle-files
f = open("X.pkl","wb")
pickle.dump(X,f)
f.close()

f = open('y.pkl', 'wb')
pickle.dump(y,f)
f.close()

In [None]:
####### END ########

In [74]:
def subjTrial_Xy(subjID, EEGFile, subj_df, tPriorVideo, tAfterVideo, sFrq, decode): 
    """
    subjID
    EEGFile
    sFrq: sampling frequency
    decode: name of column in df containing the target value (string)
    tPriorVideo: time in ms prior to video start --> determines time window to be cut out from EEG data
    tAfterVideo: time in ms after video end --> determines time window to be cut out from EEG data
    
    Returns array X in the form channels x timesteps (for trial that corresponds EEGFile)
    and trial-corresponding target y for subject subjID. 
 
    """
    
    # get filepath 
    filepath = find_files(EEGFile)[0]
    
    # get status channel
    statusCh = get_sig(filepath, 46)
    
    # get video start (sample point)
    statusEvents = np.where(statusCh == max(statusCh))
    spVideoStart = np.min(statusEvents)
    
    # get idx of sample point representing video start - tPriorVideo
    spPrior = int(spVideoStart - (sFrq*tPriorVideo))
    
    # get video end
    spVideoEnd = np.max(statusEvents)
    
    # get idx of sample point representing video end + tAfterVideo
    spAfter = int(spVideoEnd + (sFrq*tAfterVideo))    
    
    # get length (in sample points) of window 
    winLen = spAfter - spPrior
    
    # initialize empty array 
    subjTrial_X = np.empty([33, winLen])
    
    for i in range(32): 
        subjTrial_X[i,:] = get_sig(filepath, i)[spPrior:spAfter]
    
    # COMMENT OUT FOR REAL DECODING
    subjTrial_X[32,:] = statusCh[spPrior:spAfter]
    
    #plt.plot(subjTrial_X[32,:])
    #plt.show()
    
    subjTrial_y = int(subj_df[decode][subj_df['EEGFile according to xml'] == EEGFile])
    
    return subjTrial_X, subjTrial_y




def subj_Xy(subjID, df, decode, tPriorVideo = 0.5, tAfterVideo = 0, sFrq = 256):
    '''
    subjID
    sFrq: sampling frequency
    decode: name of column in df containing the target value (string)
    tPriorVideo: time in ms prior to video start --> determines time window to be cut out from EEG data
    tAfterVideo: time in ms after video end --> determines time window to be cut out from EEG data
    
    Returns list X of arrays in the form trials x channels x timesteps (where channels x timesteps is 
    an array) and corresponding target vector y for subject subjID. 
    
    '''
    subj_df = df[df['subjectID'] == subjID] # Choose only rows that belong to subject
    
    subj_X = []
    subj_y = []
    
    for EEGFile in subj_df['EEGFile according to xml']: 
        
        subjTrial_X, subjTrial_y = subjTrial_Xy(subjID, EEGFile, subj_df, tPriorVideo, tAfterVideo, sFrq, decode)
        subj_X.append(subjTrial_X)
        subj_y.append(subjTrial_y)
        
    return subj_X, subj_y
    
    

In [42]:
# Read csv to dataframe
df = pd.read_csv('EmotionElicitation_Info')

# Condition to be decoded 
decode = 'arslLevel'

# Decoding for AROUSAL
# Create new dataframe only with rows, where condition to be decoded is not NaN
arsl_df = df[np.isfinite(df[decode])]

#Save arousal-dataframe
arsl_df.to_csv('EmotionElicitation_Arousal')

# Create array that contains the subjectIDs of all subjects in arousal condition
arslSubjIDs = np.sort(arsl_df['subjectID'].unique())
noArslSubj = len(arslSubjIDs)


# Decoding for VALENCE
# Create new dataframe only with rows, where vlnc is not NaN
vlnc_df = df[np.isfinite(df['vlncLevel'])]

#Save arousal-dataframe
vlnc_df.to_csv('EmotionElicitation_Valence')

# Create array that contains the subjectIDs of all subjects in arousal condition
vlncSubjIDs = np.sort(vlnc_df['subjectID'].unique())
noVlncSubj = len(vlncSubjIDs)

In [68]:
print(subjTrial_X[1:, :5])
vlnc_df

[[ -7.08978378e+03  -7.09087752e+03  -7.09247127e+03  -7.09434627e+03
   -7.09631501e+03]
 [ -3.10744738e+03  -3.10722863e+03  -3.10735363e+03  -3.10925988e+03
   -3.11157238e+03]
 [ -1.71742026e+04  -1.71764214e+04  -1.71769526e+04  -1.71767026e+04
   -1.71746089e+04]
 [ -1.06053398e+04  -1.06040273e+04  -1.06040273e+04  -1.06064960e+04
   -1.06071523e+04]
 [  8.24357851e+02   8.25232849e+02   8.24857850e+02   8.22732854e+02
    8.20451608e+02]
 [ -2.20673030e+03  -2.20466780e+03  -2.20516780e+03  -2.20807405e+03
   -2.20904279e+03]
 [ -1.09769953e+04  -1.09695579e+04  -1.09747453e+04  -1.09847766e+04
   -1.09832766e+04]
 [ -6.29803524e+03  -6.29503524e+03  -6.29516024e+03  -6.29728524e+03
   -6.29622274e+03]
 [  1.71532495e+03   1.71598120e+03   1.71623120e+03   1.71607495e+03
    1.71601245e+03]
 [ -1.35804437e+03  -1.35726312e+03  -1.35560687e+03  -1.35451312e+03
   -1.35323188e+03]
 [ -8.92443664e+03  -8.92446789e+03  -8.91953039e+03  -8.91415540e+03
   -8.91315541e+03]
 [  1.3466

Unnamed: 0.1,Unnamed: 0,EEGFile according to xml,cutLenSec,cutNr,experimentType,feltArsl,feltCtrl,feltEmo,feltPred,feltVlnc,isStim,path,sessionID,subjectID,arslLevel,vlncLevel
3,3,Part_7_S_Trial15_emotion.bdf,154.8446,30,emotion elicitation,6.0,3.0,5.0,8.0,1.0,1,ion/HCI/Sessions/810,810,7,0.0,0.0
6,6,Part_5_S_Trial5_emotion.bdf,119.8279,10,emotion elicitation,7.0,6.0,4.0,1.0,8.0,1,ion/HCI/Sessions/530,530,5,1.0,2.0
16,16,Part_1_S_Trial13_emotion.bdf,108.3634,26,emotion elicitation,7.0,2.0,3.0,7.0,2.0,1,tion/HCI/Sessions/26,26,1,2.0,0.0
17,17,Part_11_S_Trial2_emotion.bdf,134.1462,4,emotion elicitation,7.0,9.0,0.0,9.0,3.0,1,on/HCI/Sessions/1304,1304,11,0.0,1.0
18,18,Part_3_S_Trial15_emotion.bdf,174.3458,30,emotion elicitation,1.0,8.0,6.0,2.0,6.0,1,ion/HCI/Sessions/290,290,3,2.0,1.0
24,24,Part_3_S_Trial7_emotion.bdf,90.8796,14,emotion elicitation,2.0,3.0,2.0,2.0,1.0,1,ion/HCI/Sessions/274,274,3,0.0,0.0
29,29,Part_16_S_Trial4_emotion.bdf,124.6335,8,emotion elicitation,1.0,3.0,0.0,4.0,5.0,1,on/HCI/Sessions/1958,1958,16,0.0,1.0
30,30,Part_9_S_Trial4_emotion.bdf,70.5256,8,emotion elicitation,6.0,4.0,4.0,9.0,1.0,1,on/HCI/Sessions/1048,1048,9,1.0,2.0
31,31,Part_2_S_Trial1_emotion.bdf,151.0231,2,emotion elicitation,3.0,5.0,11.0,4.0,5.0,1,ion/HCI/Sessions/132,132,2,1.0,2.0
34,34,Part_30_S_Trial3_emotion.bdf,124.6334,6,emotion elicitation,4.0,7.0,11.0,7.0,7.0,1,on/HCI/Sessions/3776,3776,30,1.0,2.0


In [None]:
# Read csv to dataframe
df = pd.read_csv('EmotionElicitation_Info')

# Condition to be decoded 
decode = 'arslLevel'

# Decoding for AROUSAL
# Create new dataframe only with rows, where condition to be decoded is not NaN
arsl_df = df[np.isfinite(df[decode])]

#Save arousal-dataframe
arsl_df.to_csv('EmotionElicitation_Arousal')

# Create array that contains the subjectIDs of all subjects in arousal condition
arslSubjIDs = np.sort(arsl_df['subjectID'].unique())
noArslSubj = len(arslSubjIDs)


# Decoding for VALENCE
# Create new dataframe only with rows, where vlnc is not NaN
vlnc_df = df[np.isfinite(df['vlncLevel'])]

#Save arousal-dataframe
vlnc_df.to_csv('EmotionElicitation_Valence')

# Create array that contains the subjectIDs of all subjects in arousal condition
vlncSubjIDs = np.sort(vlnc_df['subjectID'].unique())
noVlncSubj = len(vlncSubjIDs)
