In [1]:
import os 
import re

import pyedflib
import numpy as np 
import matplotlib.pyplot as plt

import skfda
#from skfda.representation.grid import FDataGrid
#NOTE: dataset needs to be downloaded and unziped  

#filter edf files
eeg_signalfiles_names = list(filter(lambda s : s.endswith(".edf") , 
                            [os.path.abspath(os.path.join("../data/eeg-during-mental-arithmetic-tasks-1.0.0", f)) for 
                            f in os.listdir("../data/eeg-during-mental-arithmetic-tasks-1.0.0")]
                        ))

#dicts with subject_id - eeg_signal_filepath
before_arith_task = {}
after_arith_task = {}

# process each file
for edf_file_name in eeg_signalfiles_names :
    file_name = os.path.basename(edf_file_name)
    
    # extract subject identifier and suffix
    if "_1.edf" in file_name:
        subject_id = int(re.findall(r"\d\d", file_name)[0])
        before_arith_task[subject_id] = edf_file_name
    
    elif "_2.edf" in file_name:
        subject_id = int(re.findall(r"\d\d", file_name)[0])
        after_arith_task[subject_id] = edf_file_name

In [2]:
print("len of dict bat: ",len(before_arith_task.keys()))
print("len of dict aat: ",len(after_arith_task.keys()))

len of dict bat:  36
len of dict aat:  36


In [3]:
label_index_dict = {"Fp1":0,"Fp2":1,"F3":2,"F4":3,"F7":4,"F8":5,"T3":6,"T4":7,"C3":8,
                    "C4":9,"T5":10,"T6":11,"P3":12,"P4":13,"O1":14,"O2":15,"Fz":16, "Cz":17,"Pz":18}

In [4]:
f = pyedflib.EdfReader(before_arith_task[0])
#print edf file metadata
print(f.file_info_long())
eeg_signal = f.readSignal(label_index_dict.get("P4", None))
f.close()
print()
print("eeg signal number 0: ", eeg_signal)

file name: /home/enki/Documents/THESIS/FDA-EEG/data/eeg-during-mental-arithmetic-tasks-1.0.0/Subject00_1.edf
signals in file: 21
label: EEG Fp1 fs: 500.0 nsamples 91000
label: EEG Fp2 fs: 500.0 nsamples 91000
label: EEG F3 fs: 500.0 nsamples 91000
label: EEG F4 fs: 500.0 nsamples 91000
label: EEG F7 fs: 500.0 nsamples 91000
label: EEG F8 fs: 500.0 nsamples 91000
label: EEG T3 fs: 500.0 nsamples 91000
label: EEG T4 fs: 500.0 nsamples 91000
label: EEG C3 fs: 500.0 nsamples 91000
label: EEG C4 fs: 500.0 nsamples 91000
label: EEG T5 fs: 500.0 nsamples 91000
label: EEG T6 fs: 500.0 nsamples 91000
label: EEG P3 fs: 500.0 nsamples 91000
label: EEG P4 fs: 500.0 nsamples 91000
label: EEG O1 fs: 500.0 nsamples 91000
label: EEG O2 fs: 500.0 nsamples 91000
label: EEG Fz fs: 500.0 nsamples 91000
label: EEG Cz fs: 500.0 nsamples 91000
label: EEG Pz fs: 500.0 nsamples 91000
label: EEG A2-A1 fs: 500.0 nsamples 91000
label: ECG ECG fs: 500.0 nsamples 91000
None

eeg signal number 0:  [7.08614378e+00 9.

In [6]:
def eeg_data_matrix(subject_idx, subject_edfsignal_dict, labels):
    #label-index map
    label_index_dict = {"Fp1":0,"Fp2":1,"F3":2,"F4":3,"F7":4,"F8":5,"T3":6,"T4":7,"C3":8,
                    "C4":9,"T5":10,"T6":11,"P3":12,"P4":13,"O1":14,"O2":15,"Fz":16, "Cz":17,"Pz":18}
    #check if there's a edf file for a given subject index
    if subject_edfsignal_dict.get(subject_idx,-1) == -1:
        raise KeyError
    #build data matrix
    with pyedflib.EdfReader(subject_edfsignal_dict[subject_idx]) as f:
        eeg_signals_arr = []
        for label in labels:
            idx = label_index_dict.get(label,-1)
            if idx == -1:
               raise KeyError
            eeg_signal = f.readSignal(idx)
            eeg_signals_arr.append(eeg_signal)
        f.close()
        return np.matrix(eeg_signals_arr)

In [7]:
all_labels = list(label_index_dict.keys())
data = eeg_data_matrix(1,before_arith_task,all_labels)
print("data : ", data)

data :  [[-6.34791221e+00 -5.88470922e+00 -5.16095454e+00 ...  1.39021195e-03
   1.39021195e-03  1.39021195e-03]
 [-6.71995401e+00 -6.25268698e+00 -5.50295629e+00 ...  5.77824979e-04
   5.77824979e-04  5.77824979e-04]
 [-5.61789152e+00 -5.75924681e+00 -5.68246122e+00 ...  1.41752346e-03
   1.41752346e-03  1.41752346e-03]
 ...
 [-3.57152858e+00 -3.90513513e+00 -4.15791191e+00 ... -3.21821012e-04
  -3.21821012e-04 -3.21821012e-04]
 [-5.23125354e+00 -6.30394179e+00 -7.29497765e+00 ... -6.97556420e-04
  -6.97556420e-04 -6.97556420e-04]
 [-6.41279333e+00 -6.48198043e+00 -6.30901266e+00 ...  1.84008240e-03
   1.84008240e-03  1.84008240e-03]]


In [None]:
#raw eeg-signals transformed into functional data using 
#B-splines basis functions
fd = skfda.FDataGrid(
            data_matrix=data,
            grid_points=np.linspace(0,91000)/500
        )
