# File Processing
## Fetching data from bucket

In [1]:
!gsutil -m cp -r gs://dataset_eeg_cafe2022 ./

Copying gs://dataset_eeg_cafe2022/Formato dos dados.docx...
Copying gs://dataset_eeg_cafe2022/Y_All.xlsx...                                 
Copying gs://dataset_eeg_cafe2022/renameFiles.py...                             
Copying gs://dataset_eeg_cafe2022/RenamedFiles/alpha_11.csv...
Copying gs://dataset_eeg_cafe2022/RenamedFiles/alpha_18.csv...
Copying gs://dataset_eeg_cafe2022/RenamedFiles/alpha_17.csv...                  
Copying gs://dataset_eeg_cafe2022/RenamedFiles/alpha_1.csv...
Copying gs://dataset_eeg_cafe2022/RenamedFiles/alpha_19.csv...                  
Copying gs://dataset_eeg_cafe2022/RenamedFiles/alpha_14.csv...
Copying gs://dataset_eeg_cafe2022/RenamedFiles/alpha_10.csv...
Copying gs://dataset_eeg_cafe2022/RenamedFiles/alpha_12.csv...
Copying gs://dataset_eeg_cafe2022/RenamedFiles/alpha_16.csv...                  
Copying gs://dataset_eeg_cafe2022/RenamedFiles/alpha_7.csv...                   
Copying gs://dataset_eeg_cafe2022/RenamedFiles/alpha_3.csv...                

In [4]:
import os, sys
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
import plotly.express as px

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [5]:
csv_files_path = './dataset_eeg_cafe2022/RenamedFiles'
csv_files = [file for file in os.listdir(csv_files_path) if '.csv' in file]

if len(csv_files) == 0:
    print('Sem dados no diretório especificado.')
    exit()

In [6]:
def load_dfs(path, files):
    dfs = []
    for file in files:
        df = pd.read_csv(f'{path}/{file}', delimiter='\t')
        dfs.append(df)
        
    return dfs

## Filtering

In [7]:
def filter_dfs(df_list):
    for df in df_list:
        df['Timestamp'] = df['Timestamp'] - df['Timestamp'].min()

In [8]:
def remove_other_columns(df_list):
    new_df_list = []
    for df in df_list:
        others = df.columns.to_list()[10:23]
        others.append('other.13')
        others.append('Unnamed: 0')
        others.append('Timestamp (Formatted)')
        df = df.drop(labels=others, axis=1)
        new_df_list.append(df)
    return new_df_list

In [14]:
def overwrite_csv(df_list, path, file_names):
    for index, df in enumerate(df_list):
        df.to_csv(f'{path}/{file_names[index]}')

## Execution

In [15]:
df_list = load_dfs(csv_files_path, csv_files)

In [16]:
filter_dfs(df_list)

In [17]:
df_list = remove_other_columns(df_list)

In [18]:
overwrite_csv(df_list, csv_files_path, csv_files)