# Dataset creation

This code performs several data processing tasks using pandas to manipulate a series of EEG (Electroencephalography) data files.

The main characteristics of this code are:

1. **Adjusting Size**: It finds the minimum number of rows among all the DataFrames and trims each DataFrame to that minimum length to ensure they all have the same number of rows.

2. **Dividing DataFrames**: It defines a function `divide_into_10` that divides a DataFrame into 10 equal parts and resets the indices of each part. Then, it applies this function to each trimmed DataFrame and stores the parts in a dictionary `divided_dataframes`.

3. **Processing DataFrames**: For each DataFrame in `divided_dataframes`, it applies the transposition function to each of the 10 parts, concatenates the transposed parts into a single DataFrame, and assigns a corresponding target.

4. **Final Combination**: It concatenates all the combined DataFrames with their respective targets into a single final DataFrame `final_combined_dataframe`.


In [1]:
import pandas as pd

In [2]:
column_names = ['timestamp', 'counter', 'eeg', 'attention', 'meditation', 'blinking']
baseline = pd.read_csv('data/baseline.dat', delimiter=' ', names=column_names)
exhalar = pd.read_csv('data/exhalar.dat', delimiter=' ', names=column_names)
golpes1 = pd.read_csv('data/golpes1.dat', delimiter=' ', names=column_names)
golpes2 = pd.read_csv('data/golpes2.dat', delimiter=' ', names=column_names)
cerrados = pd.read_csv('data/cerrados.dat', delimiter=' ', names=column_names)
mentalimagery = pd.read_csv('data/mentalimagery.dat', delimiter=' ', names=column_names)
pestaneos = pd.read_csv('data/pestaneos.dat', delimiter=' ', names=column_names)
inhalar = pd.read_csv('data/inhalar.dat', delimiter=' ', names=column_names)

## Dataset Filter

In [3]:
import numpy as np
from scipy.signal import butter, lfilter

In [4]:
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

In [5]:
mentalimagery['eeg'] = butter_bandpass_filter(mentalimagery['eeg'], 10, 50, 512, 5)
cerrados['eeg'] = butter_bandpass_filter(cerrados['eeg'], 5, 15, 512, 5)
inhalar['eeg'] = butter_bandpass_filter(inhalar['eeg'], 1, 15, 512, 5)
exhalar['eeg'] = butter_bandpass_filter(exhalar['eeg'], 1, 15, 512, 5)

### Dividir dataset

In [6]:
partition_number = 100

In [7]:
#dataframes = [baseline, exhalar, golpes1, golpes2, cerrados, mentalimagery, pestaneos, inhalar]
dataframes = [baseline, exhalar, cerrados, mentalimagery, pestaneos, inhalar]
min_length = min(df.shape[0] for df in dataframes)
dataframes_trimmed = [df.iloc[:min_length] for df in dataframes]

def divide_into(df):
    rows_per_df = len(df) // partition_number
    return [df.iloc[i*rows_per_df: (i+1)*rows_per_df].reset_index(drop=True) for i in range(partition_number)]

divided_dataframes = {}
for i, df in enumerate(dataframes_trimmed):
    divided_dataframes[f'dataframe_{i+1}'] = divide_into(df)

### Create dataset


In [8]:
def transpose_eeg_dataframe(df):
    transposed_df = df['eeg'].to_frame().T
    transposed_df.columns = [f'row_{i}' for i in df.index]
    return transposed_df

targets = {
    'dataframe_1': 'baseline',
    'dataframe_2': 'exhalar',
    #'dataframe_3': 'golpes1',
    #'dataframe_4': 'golpes2',
    'dataframe_3': 'cerrados',
    'dataframe_4': 'mentalimagery',
    'dataframe_5': 'pestaneos',
    'dataframe_6': 'inhalar'
}

all_combined_dataframes = []
for key, target in targets.items():
    dataframe_list = divided_dataframes[key]
    transposed_dataframes = []
    for df in dataframe_list:
        transposed_df = transpose_eeg_dataframe(df)
        transposed_dataframes.append(transposed_df)
    combined_dataframe = pd.concat(transposed_dataframes, ignore_index=True)
    combined_dataframe['target'] = target
    all_combined_dataframes.append(combined_dataframe)

final_combined_dataframe = pd.concat(all_combined_dataframes, ignore_index=True)

print(final_combined_dataframe)

            row_0         row_1         row_2         row_3         row_4  \
0    9.000000e+01  1.040000e+02  1.040000e+02  1.000000e+02  1.050000e+02   
1    3.000000e+00  1.100000e+01  2.400000e+01  3.200000e+01  1.800000e+01   
2   -3.420000e+02 -3.260000e+02 -3.390000e+02 -3.410000e+02 -3.310000e+02   
3   -6.000000e+00  2.100000e+01  4.100000e+01  4.500000e+01  5.100000e+01   
4    2.500000e+01  1.900000e+01  1.000000e+01  0.000000e+00 -5.000000e+00   
..            ...           ...           ...           ...           ...   
595 -1.348205e+07 -1.351810e+07 -1.355264e+07 -1.358568e+07 -1.361721e+07   
596  1.588049e+07  1.590177e+07  1.592128e+07  1.593901e+07  1.595495e+07   
597 -1.869657e+07 -1.869439e+07 -1.869011e+07 -1.868370e+07 -1.867518e+07   
598  2.167341e+07  2.164222e+07  2.160855e+07  2.157240e+07  2.153378e+07   
599 -2.479696e+07 -2.472516e+07 -2.465050e+07 -2.457298e+07 -2.449260e+07   

            row_5         row_6         row_7         row_8         row_9  

In [9]:
file_name = f'data/combined_dataset_filtered_{partition_number}_partitions.csv'
final_combined_dataframe.to_csv(file_name, index=False)