# Conversion of EDF files to CSV files for easy handling of files in the later stages.

In [1]:
import numpy as np
import pandas as pd
import pyedflib

In [None]:
f = pyedflib.EdfReader("E:\\EEG DATA\\v1.3.0\\edf\\train\\02_tcp_le\\006\\00000609\\s001_2003_10_22\\00000609_s001_t000.edf")

In [None]:
n = f.signals_in_file
signal_labels = f.getSignalLabels()
sigbufs = np.zeros((n, f.getNSamples()[0]))
for i in np.arange(n):
     sigbufs[i, :] = f.readSignal(i)

In [None]:
len(sigbufs)
sigbufs.shape

In [None]:
data = pd.DataFrame(sigbufs)

In [None]:
data

In [None]:
f = pyedflib.data.test_generator()
print("\nlibrary version: %s" % pyedflib.version.version)

print("\ngeneral header:\n")

# print("filetype: %i\n"%hdr.filetype);
print("edfsignals: %i" % f.signals_in_file)
print("file duration: %i seconds" % f.file_duration)
print("startdate: %i-%i-%i" % (f.getStartdatetime().day,f.getStartdatetime().month,f.getStartdatetime().year))
print("starttime: %i:%02i:%02i" % (f.getStartdatetime().hour,f.getStartdatetime().minute,f.getStartdatetime().second))
# print("patient: %s" % f.getP);
# print("recording: %s" % f.getPatientAdditional())
print("patientcode: %s" % f.getPatientCode())
print("gender: %s" % f.getGender())
print("birthdate: %s" % f.getBirthdate())
print("patient_name: %s" % f.getPatientName())
print("patient_additional: %s" % f.getPatientAdditional())
print("admincode: %s" % f.getAdmincode())
print("technician: %s" % f.getTechnician())
print("equipment: %s" % f.getEquipment())
print("recording_additional: %s" % f.getRecordingAdditional())
print("datarecord duration: %f seconds" % f.getFileDuration())
print("number of datarecords in the file: %i" % f.datarecords_in_file)
print("number of annotations in the file: %i" % f.annotations_in_file)

channel = 3
print("\nsignal parameters for the %d.channel:\n\n" % channel)

print("label: %s" % f.getLabel(channel))
print("samples in file: %i" % f.getNSamples()[channel])
# print("samples in datarecord: %i" % f.get
print("physical maximum: %f" % f.getPhysicalMaximum(channel))
print("physical minimum: %f" % f.getPhysicalMinimum(channel))
print("digital maximum: %i" % f.getDigitalMaximum(channel))
print("digital minimum: %i" % f.getDigitalMinimum(channel))
print("physical dimension: %s" % f.getPhysicalDimension(channel))
print("prefilter: %s" % f.getPrefilter(channel))
print("transducer: %s" % f.getTransducer(channel))
print("samplefrequency: %f" % f.getSampleFrequency(channel))

annotations = f.readAnnotations()
for n in np.arange(f.annotations_in_file):
    print("annotation: onset is %f    duration is %s    description is %s" % (annotations[0][n],annotations[1][n],annotations[2][n]))

buf = f.readSignal(channel)
n = 200
print("\nread %i samples\n" % n)
result = ""
for i in np.arange(n):
    result += ("%.1f, " % buf[i])
    print(result)
    f._close()
del f

In [None]:
data.to_csv("E:\\EEG DATA\\test.csv", header=None)

## Conversion and saving by iterating over all the files

In [None]:
locations = pd.read_table("C:/Users/elonm/Desktop/search.txt", header=None)
locations.head()

In [None]:
locations[0] = locations[0].apply(lambda link: link[1:])
locations.head()

In [None]:
def reformat_links(link):
    location = "E:/EEG DATA/train_ordered/02_tcp_le/Formatted"
    link = location + link
#     link = link.replace("/", "\\")
    return link

import tqdm
tqdm.tqdm.pandas()
locations[0] = locations[0].progress_apply(lambda link: reformat_links(link))

locations[0][0]

In [None]:
folder_link = "E:/EEG DATA/train_ordered/02_tcp_le/EDFS in CSVS/"

In [2]:
# Function to convert edf to csv files and store them in the given folder with the index name
def edf_to_csv(link, folder_link, index):
    #Loading the edf file
    edf = pyedflib.EdfReader(link)
    #Reading data from the file into a numpy array
    n = edf.signals_in_file
    signal_labels = edf.getSignalLabels()
    sigbufs = np.zeros((n, edf.getNSamples()[0]))
    for i in np.arange(n):
        sigbufs[i, :] = edf.readSignal(i)
    #Converting the numpy array to Dataframe
    df = pd.DataFrame(sigbufs)
    #Storing the dataframe to csv
    df.to_csv(folder_link+str(index)+".csv", header=None)
    #Closing the file
    edf._close()
    del(edf)

In [None]:
#Just to test out the function:
edf_to_csv(locations[0][0], folder_link, 10)

# Yes. Our function is working fine. 
# We can now go ahead and convert all our edf files from both configurations to csv

In [3]:
#Dealing with the tcp_le configurations
tcp_le_locations = pd.read_table("C:/Users/elonm/Desktop/search.txt", header=None)
tcp_ar_a_locations = pd.read_table("C:/Users/elonm/Desktop/search_ar_a.txt", header=None)
tcp_le_locations[0] = tcp_le_locations[0].apply(lambda link: link[1:])
tcp_ar_a_locations[0] = tcp_ar_a_locations[0].apply(lambda link: link[1:])
tcp_ar_a_locations.head()

Unnamed: 0,0
0,/00000006/00000006_s004_t000.edf
1,/00000006/00000006_s004_t001.edf
2,/00000006/00000006_s004_t002.edf
3,/00000006/00000006_s005_t000.edf
4,/00000006/00000006_s006_t000.edf


In [50]:
le_folder = "E:/EEG DATA/train_ordered/02_tcp_le/Formatted"
ar_a_folder = "E:/EEG DATA/train_ordered/03_tcp_ar_a/Formatted"

import tqdm
tqdm.tqdm.pandas()
tcp_le_locations[0] = tcp_le_locations[0].progress_apply(lambda link: le_folder+link)
tcp_ar_a_locations[0] = tcp_ar_a_locations[0].progress_apply(lambda link: ar_a_folder+link)

100%|████████████████████████████████████████████████████████████████████████████████████████| 310/310 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████| 895/895 [00:00<?, ?it/s]


In [5]:
le_format_folder = "E:/EEG DATA/train_ordered/02_tcp_le/EDFS in CSVS/"
ar_a_format_folder = "E:/EEG DATA/train_ordered/03_tcp_ar_a/EDFS IN CSVS/"

index1 = 0
for i in tcp_le_locations[0]:
    edf_to_csv(i, le_format_folder, index1)
    print(index1)
    index1 += 1
    


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

NameError: name 'tcp_lar_a_locations' is not defined

In [7]:
index2 = 0
for i in tcp_ar_a_locations[0]:
    try:
        edf_to_csv(i, ar_a_format_folder, index2)
        print(index2)
        index2 += 1
    except:
        print("Error on index", index2)
        index2 += 1
        pass

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
Error on index 80
Error on index 81
Error on index 82
83
84
85
86
87
Error on index 88
Error on index 89
Error on index 90
Error on index 91
Error on index 92
Error on index 93
Error on index 94
Error on index 95
Error on index 96
Error on index 97
Error on index 98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
Error on index 171
Error on index 172
Error on index 173
Error on index 174
Error on index 175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
Error on index 201
20

In [None]:
#TESTING

In [4]:
tcp_ar_a_locations[0][10]

'/00000883/00000883_s003_t003.edf'

In [None]:
# Function to convert edf to csv files and store them in the given folder with the index name
def edf_to_csv(link, folder_link, index):
    #Loading the edf file
    edf = pyedflib.EdfReader(link)
    #Reading data from the file into a numpy array
    n = edf.signals_in_file
    signal_labels = edf.getSignalLabels()
    sigbufs = np.zeros((n, edf.getNSamples()[0]))
    for i in np.arange(n):
        sigbufs[i, :] = edf.readSignal(i)
    #Converting the numpy array to Dataframe
    df = pd.DataFrame(sigbufs)
    #Storing the dataframe to csv
    df.to_csv(folder_link+str(index)+".csv", header=None)
    #Closing the file
    edf._close()
    del(edf)

In [5]:
f1 = pyedflib.EdfReader("E:/EEG DATA/train_ordered/03_tcp_ar_a/Formatted"+tcp_ar_a_locations[0][10])

In [11]:
f1.getSignalLabels()

['EEG FP1-REF',
 'EEG FP2-REF',
 'EEG F3-REF',
 'EEG F4-REF',
 'EEG C3-REF',
 'EEG C4-REF',
 'EEG P3-REF',
 'EEG P4-REF',
 'EEG O1-REF',
 'EEG O2-REF',
 'EEG F7-REF',
 'EEG F8-REF',
 'EEG T3-REF',
 'EEG T4-REF',
 'EEG T5-REF',
 'EEG T6-REF',
 'EEG FZ-REF',
 'EEG CZ-REF',
 'EEG PZ-REF',
 'EEG 20-REF',
 'EEG 21-REF',
 'EEG 22-REF',
 'EEG 23-REF',
 'EEG 24-REF',
 'EEG 25-REF',
 'EEG 26-REF',
 'EEG 27-REF',
 'EEG 28-REF',
 'EEG 29-REF',
 'EEG 30-REF',
 'EEG 31-REF',
 'EEG 32-REF']

In [7]:
f2 = pyedflib.EdfReader("E:/EEG DATA/train_ordered/03_tcp_ar_a/Formatted"+tcp_ar_a_locations[0][88])

In [13]:
f2.getSignalLabels()

['EEG FP1-REF',
 'EEG FP2-REF',
 'EEG F3-REF',
 'EEG F4-REF',
 'EEG C3-REF',
 'EEG C4-REF',
 'EEG P3-REF',
 'EEG P4-REF',
 'EEG O1-REF',
 'EEG O2-REF',
 'EEG F7-REF',
 'EEG F8-REF',
 'EEG T3-REF',
 'EEG T4-REF',
 'EEG T5-REF',
 'EEG T6-REF',
 'EEG T1-REF',
 'EEG T2-REF',
 'EEG FZ-REF',
 'EEG CZ-REF',
 'EEG PZ-REF',
 'EEG EKG1-REF',
 'EEG C3P-REF',
 'EEG C4P-REF',
 'EEG SP1-REF',
 'EEG SP2-REF',
 'EMG-REF',
 'EEG 29-REF',
 'EEG 30-REF',
 'EEG 31-REF',
 'EEG 32-REF',
 'IBI',
 'BURSTS',
 'SUPPR']

In [17]:
f3 = pyedflib.EdfReader("E:/EEG DATA/train_ordered/02_tcp_le/Formatted"+tcp_le_locations[0][5])

In [19]:
f3.getSignalLabels()

['EEG FP1-LE',
 'EEG FP2-LE',
 'EEG F3-LE',
 'EEG F4-LE',
 'EEG C3-LE',
 'EEG C4-LE',
 'EEG A1-LE',
 'EEG A2-LE',
 'EEG P3-LE',
 'EEG P4-LE',
 'EEG O1-LE',
 'EEG O2-LE',
 'EEG F7-LE',
 'EEG F8-LE',
 'EEG T3-LE',
 'EEG T4-LE',
 'EEG T5-LE',
 'EEG T6-LE',
 'EEG FZ-LE',
 'EEG CZ-LE',
 'EEG PZ-LE',
 'EEG OZ-LE',
 'EEG PG1-LE',
 'EEG PG2-LE',
 'EEG EKG-LE',
 'EEG 26-LE',
 'EEG 27-LE',
 'EEG 28-LE',
 'EEG 29-LE',
 'EEG 30-LE',
 'EEG 31-LE',
 'EEG 32-LE',
 'PHOTIC PH',
 'DC1-DC',
 'DC2-DC',
 'DC3-DC',
 'DC4-DC',
 'DC5-DC',
 'DC6-DC',
 'DC7-DC',
 'DC8-DC']

In [21]:
print("TCP_AR NORMAL", len(f1.getSignalLabels()))
print("TCP_AR PROBLEM", len(f2.getSignalLabels()))
print("TCP_LE", len(f3.getSignalLabels()))

TCP_AR NORMAL 32
TCP_AR PROBLEM 34
TCP_LE 41


In [45]:
n = f2.signals_in_file-3
signal_labels = f2.getSignalLabels()
sigbufs = np.zeros((n, f2.getNSamples()[0]))
for i in np.arange(n):
    sigbufs[i, :] = f2.readSignal(i)

In [44]:
n

34

In [25]:
100096/391

256.0

In [26]:
tcp_ar_a_locations[0][88]

'/00002991/00002991_s004_t000.edf'

In [27]:
f2.file_info_long()

file name: E:/EEG DATA/train_ordered/03_tcp_ar_a/Formatted/00002991/00002991_s004_t000.edf
signals in file: 34
label: EEG FP1-REF fs: 256 nsamples 100096
label: EEG FP2-REF fs: 256 nsamples 100096
label: EEG F3-REF fs: 256 nsamples 100096
label: EEG F4-REF fs: 256 nsamples 100096
label: EEG C3-REF fs: 256 nsamples 100096
label: EEG C4-REF fs: 256 nsamples 100096
label: EEG P3-REF fs: 256 nsamples 100096
label: EEG P4-REF fs: 256 nsamples 100096
label: EEG O1-REF fs: 256 nsamples 100096
label: EEG O2-REF fs: 256 nsamples 100096
label: EEG F7-REF fs: 256 nsamples 100096
label: EEG F8-REF fs: 256 nsamples 100096
label: EEG T3-REF fs: 256 nsamples 100096
label: EEG T4-REF fs: 256 nsamples 100096
label: EEG T5-REF fs: 256 nsamples 100096
label: EEG T6-REF fs: 256 nsamples 100096
label: EEG T1-REF fs: 256 nsamples 100096
label: EEG T2-REF fs: 256 nsamples 100096
label: EEG FZ-REF fs: 256 nsamples 100096
label: EEG CZ-REF fs: 256 nsamples 100096
label: EEG PZ-REF fs: 256 nsamples 100096
label

In [35]:
151200/f1.file_duration

400.0

In [34]:
f1.file_info_long()

file name: E:/EEG DATA/train_ordered/03_tcp_ar_a/Formatted/00000883/00000883_s003_t003.edf
signals in file: 32
label: EEG FP1-REF fs: 400 nsamples 151200
label: EEG FP2-REF fs: 400 nsamples 151200
label: EEG F3-REF fs: 400 nsamples 151200
label: EEG F4-REF fs: 400 nsamples 151200
label: EEG C3-REF fs: 400 nsamples 151200
label: EEG C4-REF fs: 400 nsamples 151200
label: EEG P3-REF fs: 400 nsamples 151200
label: EEG P4-REF fs: 400 nsamples 151200
label: EEG O1-REF fs: 400 nsamples 151200
label: EEG O2-REF fs: 400 nsamples 151200
label: EEG F7-REF fs: 400 nsamples 151200
label: EEG F8-REF fs: 400 nsamples 151200
label: EEG T3-REF fs: 400 nsamples 151200
label: EEG T4-REF fs: 400 nsamples 151200
label: EEG T5-REF fs: 400 nsamples 151200
label: EEG T6-REF fs: 400 nsamples 151200
label: EEG FZ-REF fs: 400 nsamples 151200
label: EEG CZ-REF fs: 400 nsamples 151200
label: EEG PZ-REF fs: 400 nsamples 151200
label: EEG 20-REF fs: 400 nsamples 151200
label: EEG 21-REF fs: 400 nsamples 151200
label

In [41]:
new = [80,81,82,88,89,90,91,92,93,94,95,96,97,98,171,172,173,174,175,201,208,209,210,211,212,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,272,273,274,275,278,279,280,281,282,283,284,285,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,354,355,356,361,362,363,364,365,366,367,368,369,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,408 ,409 ,410 ,411 ,412 ,413 ,414 ,415 ,416 ,417 ,418 ,419 ,420 ,421 ,422 ,423 ,424 ,425 ,426 ,427 ,428 ,429 ,430 ,431 ,432 ,433 ,434 ,435 ,436 ,437 ,438 ,439 ,440 ,441 ,442 ,443 ,444,445, 446]

In [43]:
for i in new:
    print(i)

80
81
82
88
89
90
91
92
93
94
95
96
97
98
171
172
173
174
175
201
208
209
210
211
212
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
272
273
274
275
278
279
280
281
282
283
284
285
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
354
355
356
361
362
363
364
365
366
367
368
369
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446


In [46]:
def edf_to_csv(link, folder_link, index):
    #Loading the edf file
    edf = pyedflib.EdfReader(link)
    #Reading data from the file into a numpy array
    n = edf.signals_in_file - 3
    signal_labels = edf.getSignalLabels()
    sigbufs = np.zeros((n, edf.getNSamples()[0]))
    for i in np.arange(n):
        sigbufs[i, :] = edf.readSignal(i)
    #Converting the numpy array to Dataframe
    df = pd.DataFrame(sigbufs)
    #Storing the dataframe to csv
    df.to_csv(folder_link+str(index)+".csv", header=None)
    #Closing the file
    edf._close()
    del(edf)

In [56]:
folder_link = "E:/EEG DATA/train_ordered/03_tcp_ar_a/EDFS IN CSVS/34_channels/"

for i in new:
    edf_to_csv(tcp_ar_a_locations[0][i], folder_link, i)
    print(i)

80
81
82
88
89
90
91
92
93
94
95
96
97
98
171
172
173
174
175
201
208
209
210
211
212
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
272
273
274
275
278
279
280
281
282
283
284
285
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
354
355
356
361
362
363
364
365
366
367
368
369
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
