# Preprocessing data channels

In [1]:
import json
import pandas as pd
import numpy as np
import pyedflib
from pathlib import Path

### Pandas options

In [2]:
pd.set_option('max_row', None)
pd.set_option('max_columns', None)

## Channels and expereiments info

cha_dict = {'F3..' : '32', 'F6..': '37', 'P3..': '49', 'P6..' :'54'}

exp_dict = {'03' : ['D_E_',  'R' ], '04' : ['D_E_',  'I' ] , '05' : ['C_B_',  'R' ], '06' : ['C_B_',  'I' ]}

## File paths

In [3]:
root = Path()
info_path = root / 'info'
data_path = root /'data'
raw_data_path = data_path / 'data_down'
preproc_path = data_path / 'data_pre_proc'
preproc_path.mkdir(exist_ok=True)

### Experiment info dict (loading)

In [4]:
exp_dict_file = info_path / 'experiment_dict.json'
with open(exp_dict_file) as f:
   exp_dict = json.load(f)
f.close()
exp_dict

{'01': {'Experiment': 1, 'Type': 'Baseline', 'Activity': 'Eyes Open'},
 '02': {'Experiment': 2, 'Type': 'Baseline', 'Activity': 'Eyes Closed'},
 '03': {'Experiment': 3, 'Type': 'Real', 'Activity': 'Left Right'},
 '04': {'Experiment': 4, 'Type': 'Imaginary', 'Activity': 'Left Right'},
 '05': {'Experiment': 5, 'Type': 'Real', 'Activity': 'Top Down'},
 '06': {'Experiment': 6, 'Type': 'Imaginary', 'Activity': 'Top Down'},
 '07': {'Experiment': 7, 'Type': 'Real', 'Activity': 'Left Right'},
 '08': {'Experiment': 8, 'Type': 'Imaginary', 'Activity': 'Left Right'},
 '09': {'Experiment': 9, 'Type': 'Real', 'Activity': 'Top Down'},
 '10': {'Experiment': 10, 'Type': 'Imaginary', 'Activity': 'Top Down'},
 '11': {'Experiment': 11, 'Type': 'Real', 'Activity': 'Left Right'},
 '12': {'Experiment': 12, 'Type': 'Imaginary', 'Activity': 'Left Right'},
 '13': {'Experiment': 13, 'Type': 'Real', 'Activity': 'Top Down'},
 '14': {'Experiment': 14, 'Type': 'Imaginary', 'Activity': 'Top Down'}}

In [5]:
exp_num = '05'
' : '.join([exp_dict[exp_num]['Type'], exp_dict[exp_num]['Activity']])

'Real : Top Down'

In [6]:
cha_dict_file = info_path / 'channels_code.json'
with open(cha_dict_file) as f:
   cha_dict_aux = json.load(f)
f.close()
cha_dict = {}
for i in cha_dict_aux:
   cha_dict[i['Code']] = i['Number']
cha_dict

{'F3..': 32, 'F6..': 37, 'P3..': 49, 'P6..': 54}

## Extracting channels data

In [37]:
edf_folders = raw_data_path.glob('*')

cut_len = 19_000
# extracted experiments df
## list if columns
col_list =['Subject', 'Experiment number', ' Experiment Activity', ' Experiment Type', 'Channels number', 'Channels lenght', 'Non-zero lenght']
## empty df
df_experients = pd.DataFrame(columns=col_list )


for folder in edf_folders:
    sub_proc_path = preproc_path / folder.stem
    sub_proc_path.mkdir(exist_ok=True)
    edf_files = folder.glob("*.edf")
    
    for file_path in edf_files:
        exp_num = file_path.stem.split('R')[-1]
        exp_path = sub_proc_path / '_'.join([folder.stem, exp_num])
        exp_path.mkdir(exist_ok=True)
        # exporting selected channels
        f = pyedflib.EdfReader(str(file_path))
        n = f.signals_in_file
        signal_labels = f.getSignalLabels()
        #print(n, f.getNSamples() , signal_labels)
        # extracting   channels
        sigbufs = np.zeros((n, f.getNSamples()[0]))
        for i in np.arange(n):
                #print(i)
                sigbufs[i, :] = f.readSignal(i)
        # close file
        f.close()
  
        # signals to DataFrame
        df = pd.DataFrame(sigbufs.T, columns= signal_labels)

        # cutting trail zeroes

        i=len(df)-1
        x=df.iloc[i]

        while (df.iloc[i].all()==0):
            i-=1
            x=df.iloc[i]
        df=df[0:i+1]

        # recording experiments info
        df_exp_aux = pd.DataFrame( [[folder.stem, exp_num, exp_dict[exp_num]['Activity'], exp_dict[exp_num]['Type'], sigbufs.shape[0], sigbufs.shape[1], df.shape[0] ]], columns=col_list)
        df_experients = pd.concat([df_experients, df_exp_aux])

        # print experiments info
        print(' Subject: {}, Array shape: {}, non-zero {}, Experiment number: {}, Experiment info: {}'.format(  folder.stem, sigbufs.shape,  df.shape[0], exp_num, ' - '.join([ exp_dict[exp_num]['Activity'], exp_dict[exp_num]['Type']] ) ))
        
        df = df.head(cut_len) # cortando as series
        for i in cha_dict.keys():
            df_cha_aux = df[i]
            file_name = exp_path / ("_".join([folder.stem , exp_num , str(cha_dict[i])]) + '.txt')
            df_cha_aux.to_csv(file_name, header=False, index=False)

# save experiments info to csv
df_experients.reset_index(inplace=True)
df_experients.to_csv(data_path / 'experiments_info.csv')

df_smaller = df_experients[df_experients['Non-zero lenght'] < cut_len]

df_smaller.to_csv(data_path / 'experiments_smaller.csv')

 Subject: S001, Array shape: (64, 20000), non-zero 19920, Experiment number: 03, Experiment info: Left Right - Real
 Subject: S001, Array shape: (64, 20000), non-zero 19920, Experiment number: 04, Experiment info: Left Right - Imaginary
 Subject: S001, Array shape: (64, 20000), non-zero 19920, Experiment number: 05, Experiment info: Top Down - Real
 Subject: S001, Array shape: (64, 20000), non-zero 19917, Experiment number: 06, Experiment info: Top Down - Imaginary
 Subject: S001, Array shape: (64, 20000), non-zero 19920, Experiment number: 07, Experiment info: Left Right - Real
 Subject: S001, Array shape: (64, 20000), non-zero 19916, Experiment number: 08, Experiment info: Left Right - Imaginary
 Subject: S001, Array shape: (64, 20000), non-zero 19920, Experiment number: 09, Experiment info: Top Down - Real
 Subject: S001, Array shape: (64, 20000), non-zero 19920, Experiment number: 10, Experiment info: Top Down - Imaginary
 Subject: S001, Array shape: (64, 20000), non-zero 19919, Ex

In [38]:
df_smaller = df_experients[df_experients['Non-zero lenght'] < 19_000]
df_smaller.shape

(38, 8)

In [39]:
df_experients.sort_values('Non-zero lenght')

Unnamed: 0,index,Subject,Experiment number,Experiment Activity,Experiment Type,Channels number,Channels lenght,Non-zero lenght
1262,0,S106,5,Top Down,Real,64,5920,5808
1197,0,S100,12,Left Right,Imaginary,64,15744,15742
1198,0,S100,13,Top Down,Real,64,15744,15743
1196,0,S100,11,Left Right,Real,64,15744,15743
1188,0,S100,3,Left Right,Real,64,15744,15743
1199,0,S100,14,Top Down,Imaginary,64,15744,15744
1195,0,S100,10,Top Down,Imaginary,64,15744,15744
1194,0,S100,9,Top Down,Real,64,15744,15744
1193,0,S100,8,Left Right,Imaginary,64,15744,15744
1192,0,S100,7,Left Right,Real,64,15744,15744


In [32]:
d={'x':[0,0,1,2,1,0,0,4,4,0,0]}
df=pd.DataFrame(d)

print(df)



df

    x
0   0
1   0
2   1
3   2
4   1
5   0
6   0
7   4
8   4
9   0
10  0


Unnamed: 0,x
0,0
1,0
2,1
3,2
4,1
5,0
6,0
7,4
8,4
