# Preprocessing data channels

In [1]:
import json
import pandas as pd
import numpy as np
import pyedflib
from pathlib import Path

### Pandas options

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## File paths

In [7]:
root = Path()
info_path = root / 'info'
data_path = root /'data'
raw_data_path = data_path / 'data_down'
preproc_path = data_path / 'data_pre_proc'
preproc_path.mkdir(exist_ok=True)

### Experiment info dict (loading)

In [4]:
exp_dict_file = info_path / 'experiment_dict.json'
with open(exp_dict_file) as f:
   exp_dict = json.load(f)
f.close()
exp_dict

{'01': {'Experiment': 1, 'Type': 'Baseline', 'Activity': 'Eyes Open'},
 '02': {'Experiment': 2, 'Type': 'Baseline', 'Activity': 'Eyes Closed'},
 '03': {'Experiment': 3, 'Type': 'Real', 'Activity': 'Left Right'},
 '04': {'Experiment': 4, 'Type': 'Imaginary', 'Activity': 'Left Right'},
 '05': {'Experiment': 5, 'Type': 'Real', 'Activity': 'Top Down'},
 '06': {'Experiment': 6, 'Type': 'Imaginary', 'Activity': 'Top Down'},
 '07': {'Experiment': 7, 'Type': 'Real', 'Activity': 'Left Right'},
 '08': {'Experiment': 8, 'Type': 'Imaginary', 'Activity': 'Left Right'},
 '09': {'Experiment': 9, 'Type': 'Real', 'Activity': 'Top Down'},
 '10': {'Experiment': 10, 'Type': 'Imaginary', 'Activity': 'Top Down'},
 '11': {'Experiment': 11, 'Type': 'Real', 'Activity': 'Left Right'},
 '12': {'Experiment': 12, 'Type': 'Imaginary', 'Activity': 'Left Right'},
 '13': {'Experiment': 13, 'Type': 'Real', 'Activity': 'Top Down'},
 '14': {'Experiment': 14, 'Type': 'Imaginary', 'Activity': 'Top Down'}}

In [16]:
cha_dict_file = info_path / 'channels_code_proc.json'
with open(cha_dict_file) as f:
   cha_dict_aux = json.load(f)
f.close()
cha_dict = {}
for i in cha_dict_aux:
   cha_dict[i['Code']] = [i['Simple_code'], i['Latex']]
cha_dict

{'Fc5.': ['Fc5', 'Fc_{5}'],
 'Fc3.': ['Fc3', 'Fc_{3}'],
 'Fc1.': ['Fc1', 'Fc_{1}'],
 'Fcz.': ['Fcz', 'Fc_{z}'],
 'Fc2.': ['Fc2', 'Fc_{2}'],
 'Fc4.': ['Fc4', 'Fc_{4}'],
 'Fc6.': ['Fc6', 'Fc_{6}'],
 'C5..': ['C5', 'C_{5}'],
 'C3..': ['C3', 'C_{3}'],
 'C1..': ['C1', 'C_{1}'],
 'Cz..': ['Cz', 'C_{z}'],
 'C2..': ['C2', 'C_{2}'],
 'C4..': ['C4', 'C_{4}'],
 'C6..': ['C6', 'C_{6}'],
 'Cp5.': ['Cp5', 'Cp_{5}'],
 'Cp3.': ['Cp3', 'Cp_{3}'],
 'Cp1.': ['Cp1', 'Cp_{1}'],
 'Cpz.': ['Cpz', 'Cp_{z}'],
 'Cp2.': ['Cp2', 'Cp_{2}'],
 'Cp4.': ['Cp4', 'Cp_{4}'],
 'Cp6.': ['Cp6', 'Cp_{6}'],
 'Fp1.': ['Fp1', 'Fp_{1}'],
 'Fpz.': ['Fpz', 'Fp_{z}'],
 'Fp2.': ['Fp2', 'Fp_{2}'],
 'Af7.': ['Af7', 'Af_{7}'],
 'Af3.': ['Af3', 'Af_{3}'],
 'Afz.': ['Afz', 'Af_{z}'],
 'Af4.': ['Af4', 'Af_{4}'],
 'Af8.': ['Af8', 'Af_{8}'],
 'F7..': ['F7', 'F_{7}'],
 'F5..': ['F5', 'F_{5}'],
 'F3..': ['F3', 'F_{3}'],
 'F1..': ['F1', 'F_{1}'],
 'Fz..': ['Fz', 'F_{z}'],
 'F2..': ['F2', 'F_{2}'],
 'F4..': ['F4', 'F_{4}'],
 'F6..': ['F6', 'F_{

## Extracting channels data

In [10]:
edf_folders = raw_data_path.glob('*')

cut_len = 15_742
# extracted experiments df
## list if columns
col_list =['Subject', 'Experiment number', ' Experiment Activity', ' Experiment Type', 'Channels number', 'Channels lenght', 'Non-zero lenght']
## empty df
df_experients = pd.DataFrame(columns=col_list )


for folder in edf_folders:
    sub_proc_path = preproc_path / folder.stem
    sub_proc_path.mkdir(exist_ok=True)
    edf_files = folder.glob("*.edf")
    
    for file_path in edf_files:
        exp_num = file_path.stem.split('R')[-1]
        exp_path = sub_proc_path / '_'.join([folder.stem, exp_num])
        exp_path.mkdir(exist_ok=True)
        # exporting selected channels
        f = pyedflib.EdfReader(str(file_path))
        n = f.signals_in_file
        signal_labels = f.getSignalLabels()
        #print(n, f.getNSamples() , signal_labels)
        # extracting   channels
        sigbufs = np.zeros((n, f.getNSamples()[0]))
        for i in np.arange(n):
                #print(i)
                sigbufs[i, :] = f.readSignal(i)
        # close file
        f.close()
  
        # signals to DataFrame
        df = pd.DataFrame(sigbufs.T, columns= signal_labels)

        # cutting trail zeroes

        i=len(df)-1
        x=df.iloc[i]

        while (df.iloc[i].all()==0):
            i-=1
            x=df.iloc[i]
        df=df[0:i+1]

        # recording experiments info
        df_exp_aux = pd.DataFrame( [[folder.stem, exp_num, exp_dict[exp_num]['Activity'], exp_dict[exp_num]['Type'], sigbufs.shape[0], sigbufs.shape[1], df.shape[0] ]], columns=col_list)
        df_experients = pd.concat([df_experients, df_exp_aux])

        # print experiments info
        print(' Subject: {}, Array shape: {}, non-zero: {}, Experiment number: {}, Experiment info: {}'.format(  folder.stem, sigbufs.shape,  df.shape[0], exp_num, ' - '.join([ exp_dict[exp_num]['Activity'], exp_dict[exp_num]['Type']] ) ))
        
        df = df.head(cut_len) # cortando as series
        for i in cha_dict.keys():
            df_cha_aux = df[i]
            file_name = exp_path / ("_".join([folder.stem , exp_num , cha_dict[i][0]]) + '.txt')
            df_cha_aux.to_csv(file_name, header=False, index=False)

# save experiments info to csv
df_experients.reset_index(drop=True, inplace=True)
df_experients.to_csv(data_path / 'experiments_info.csv')


 Subject: S001, Array shape: (64, 20000), non-zero: 19920, Experiment number: 03, Experiment info: Left Right - Real
 Subject: S001, Array shape: (64, 20000), non-zero: 19920, Experiment number: 04, Experiment info: Left Right - Imaginary
 Subject: S001, Array shape: (64, 20000), non-zero: 19920, Experiment number: 05, Experiment info: Top Down - Real
 Subject: S001, Array shape: (64, 20000), non-zero: 19917, Experiment number: 06, Experiment info: Top Down - Imaginary
 Subject: S001, Array shape: (64, 20000), non-zero: 19920, Experiment number: 07, Experiment info: Left Right - Real
 Subject: S001, Array shape: (64, 20000), non-zero: 19916, Experiment number: 08, Experiment info: Left Right - Imaginary
 Subject: S001, Array shape: (64, 20000), non-zero: 19920, Experiment number: 09, Experiment info: Top Down - Real
 Subject: S001, Array shape: (64, 20000), non-zero: 19920, Experiment number: 10, Experiment info: Top Down - Imaginary
 Subject: S001, Array shape: (64, 20000), non-zero: 

In [9]:
df_smaller = df_experients[df_experients['Non-zero lenght'] < cut_len]
print(df_smaller.shape)
df_experients.sort_values('Non-zero lenght')

(1, 7)


Unnamed: 0,Subject,Experiment number,Experiment Activity,Experiment Type,Channels number,Channels lenght,Non-zero lenght
1262,S106,5,Top Down,Real,64,5920,5808
1197,S100,12,Left Right,Imaginary,64,15744,15742
1198,S100,13,Top Down,Real,64,15744,15743
1196,S100,11,Left Right,Real,64,15744,15743
1188,S100,3,Left Right,Real,64,15744,15743
1199,S100,14,Top Down,Imaginary,64,15744,15744
1195,S100,10,Top Down,Imaginary,64,15744,15744
1194,S100,9,Top Down,Real,64,15744,15744
1193,S100,8,Left Right,Imaginary,64,15744,15744
1192,S100,7,Left Right,Real,64,15744,15744


## Images interval


In [1]:
interval = 6.25 * 10**-3
print("taxa de amostragem em segundos: \t\t{}s".format(interval))
print(1000 * interval)
print(66* interval)
print( "Duração experimento mínimo (descartado): \t{:.3f}s\nProporção de duração do experimento descartado: {}\nDuração (d) adotada após corte: \t\t{:.3f}s\nProporção da duração adotada(d)\nem relação aos 120 segundos de\nduração estimada do experimento (d/120) \t{:.3f}".format(5808 * interval, 5808* interval/120,  15742* interval, 15742* interval/120))

taxa de amostragem em segundos: 		0.00625s
6.25
0.41250000000000003
Duração experimento mínimo (descartado): 	36.300s
Proporção de duração do experimento descartado: 0.30250000000000005
Duração (d) adotada após corte: 		98.388s
Proporção da duração adotada(d)
em relação aos 120 segundos de
duração estimada do experimento (d/120) 	0.820
