# Dataset creation

This code performs several data processing tasks using pandas to manipulate a series of EEG (Electroencephalography) data files.

The main characteristics of this code are:

1. **Adjusting Size**: It finds the minimum number of rows among all the DataFrames and trims each DataFrame to that minimum length to ensure they all have the same number of rows.

2. **Dividing DataFrames**: It defines a function `divide_into_10` that divides a DataFrame into 10 equal parts and resets the indices of each part. Then, it applies this function to each trimmed DataFrame and stores the parts in a dictionary `divided_dataframes`.

3. **Processing DataFrames**: For each DataFrame in `divided_dataframes`, it applies the transposition function to each of the 10 parts, concatenates the transposed parts into a single DataFrame, and assigns a corresponding target.

4. **Final Combination**: It concatenates all the combined DataFrames with their respective targets into a single final DataFrame `final_combined_dataframe`.


In [1]:
import pandas as pd

In [2]:
column_names = ['timestamp', 'counter', 'eeg', 'attention', 'meditation', 'blinking']
baseline = pd.read_csv('data/baseline.dat', delimiter=' ', names=column_names)
exhalar = pd.read_csv('data/exhalar.dat', delimiter=' ', names=column_names)
golpes1 = pd.read_csv('data/golpes1.dat', delimiter=' ', names=column_names)
golpes2 = pd.read_csv('data/golpes2.dat', delimiter=' ', names=column_names)
cerrados = pd.read_csv('data/cerrados.dat', delimiter=' ', names=column_names)
mentalimagery = pd.read_csv('data/mentalimagery.dat', delimiter=' ', names=column_names)
pestaneos = pd.read_csv('data/pestaneos.dat', delimiter=' ', names=column_names)
inhalar = pd.read_csv('data/inhalar.dat', delimiter=' ', names=column_names)

## Dataset Filter

In [3]:
import numpy as np
from scipy.signal import butter, lfilter

In [4]:
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

In [5]:
mentalimagery['eeg'] = butter_bandpass_filter(mentalimagery['eeg'], 10, 50, 512, 5)

In [6]:
mentalimagery['eeg']

0        -0.010588
1        -0.090561
2        -0.372605
3        -0.997763
4        -1.984062
           ...    
30855   -15.021058
30856   -16.187874
30857   -17.132227
30858   -17.406881
30859   -16.705947
Name: eeg, Length: 30860, dtype: float64

### Dividir dataset

In [7]:
partition_number = 100

In [8]:
dataframes = [baseline, exhalar, golpes1, golpes2, cerrados, mentalimagery, pestaneos, inhalar]
min_length = min(df.shape[0] for df in dataframes)
dataframes_trimmed = [df.iloc[:min_length] for df in dataframes]

def divide_into(df):
    rows_per_df = len(df) // partition_number
    return [df.iloc[i*rows_per_df: (i+1)*rows_per_df].reset_index(drop=True) for i in range(partition_number)]

divided_dataframes = {}
for i, df in enumerate(dataframes_trimmed):
    divided_dataframes[f'dataframe_{i+1}'] = divide_into(df)

### Create dataset


In [9]:
def transpose_eeg_dataframe(df):
    transposed_df = df['eeg'].to_frame().T
    transposed_df.columns = [f'row_{i}' for i in df.index]
    return transposed_df

targets = {
    'dataframe_1': 'baseline',
    'dataframe_2': 'exhalar',
    'dataframe_3': 'golpes1',
    'dataframe_4': 'golpes2',
    'dataframe_5': 'cerrados',
    'dataframe_6': 'mentalimagery',
    'dataframe_7': 'pestaneos',
    'dataframe_8': 'inhalar'
}

all_combined_dataframes = []
for key, target in targets.items():
    dataframe_list = divided_dataframes[key]
    transposed_dataframes = []
    for df in dataframe_list:
        transposed_df = transpose_eeg_dataframe(df)
        transposed_dataframes.append(transposed_df)
    combined_dataframe = pd.concat(transposed_dataframes, ignore_index=True)
    combined_dataframe['target'] = target
    all_combined_dataframes.append(combined_dataframe)

final_combined_dataframe = pd.concat(all_combined_dataframes, ignore_index=True)

print(final_combined_dataframe)

     row_0  row_1  row_2  row_3  row_4  row_5  row_6  row_7  row_8  row_9  \
0     90.0  104.0  104.0  100.0  105.0  114.0   89.0   58.0   36.0   33.0   
1      3.0   11.0   24.0   32.0   18.0   -6.0  -14.0   -2.0    3.0    3.0   
2   -342.0 -326.0 -339.0 -341.0 -331.0 -334.0 -347.0 -370.0 -387.0 -406.0   
3     -6.0   21.0   41.0   45.0   51.0   59.0   57.0   22.0  -21.0  -21.0   
4     25.0   19.0   10.0    0.0   -5.0   -1.0   16.0   24.0   35.0   45.0   
..     ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   
795   53.0   81.0   92.0   76.0   65.0   45.0   35.0   43.0   45.0   51.0   
796   51.0   39.0   23.0   34.0   57.0   40.0    6.0   -5.0   22.0   38.0   
797   22.0   68.0   21.0  -51.0  -75.0  -50.0  -50.0  -44.0  -12.0   23.0   
798   10.0    9.0   36.0   59.0   42.0   -2.0  -33.0  -34.0   -7.0   16.0   
799  -17.0    8.0   11.0   54.0   69.0   16.0   10.0   55.0   67.0   42.0   

     ...  row_299  row_300  row_301  row_302  row_303  row_304  row_305  \


In [10]:
file_name = f'data/combined_dataset_filtered_{partition_number}_partitions.csv'
final_combined_dataframe.to_csv(file_name, index=False)