In [1]:
import utils as u

import os

import time

import mne

### Paths

In [2]:
# Define the path to the main data folder where the raw eeg can be found
DATA_FOLDER = 'E:\DATA_FOLDER'

# Define the path to the main folder where the preprocessed data will be stored
DATA_PREPROCESS = 'E:\DATA_PREPROCESSED'
# Crear la carpeta para el paciente en la carpeta de destino
os.makedirs(DATA_PREPROCESS, exist_ok=True)

### Preprocessing

In [None]:
# Recording time required for the preprocessing
t = time.time()

# Loop through subject folders
for subject in os.listdir(DATA_FOLDER):
    print(subject)

    # Check if the data has already been preprocessed
    if os.path.exists(os.path.join(DATA_PREPROCESS, subject)):
        print(f"The files for the {subject} patient already exist. Going for the next patient.\n")
        continue

    else:
        # Construct the full path to the subject folder
        subject_path = os.path.join(DATA_FOLDER, subject)
    
        # Check if it's a directory
        if os.path.isdir(subject_path):
            print(f"\nProcessing data for subject {subject}...")
            
            # Construct the path to the BrainVision files folder
            rseeg_folder = os.path.join(subject_path, 'RSEEG')
            
            # Check if the RSEEG folder exists
            if os.path.exists(rseeg_folder):

                vhdr_files = [file for file in os.listdir(rseeg_folder) if file.endswith('.vhdr')]
                            
                # Check if a .vhdr file is found
                if vhdr_files:
                    
                    # Construct the full path to the .vhdr file
                    vhdr_path = os.path.join(rseeg_folder, vhdr_files[0])
                    
                    # Read the BrainVision files
                    try:
                        raw = mne.io.read_raw_brainvision(vhdr_path, eog=('VEOG',), preload=True, misc='auto', scale=1, verbose=None)
                        raw.set_montage("standard_1020")
                    except Exception as e:
                        print(f"Error reading files for subject {subject}: {e}")

                    # PREPROCESSING
                    # Cleaning EEG: Filtering and removing (ocular) artifacts
                    cleaned_eeg = u.preprocessing(raw)
                    data = cleaned_eeg._data
                    
                    # Dictionary with data
                    d = u.dictionary(cleaned_eeg)

                    # Samples corresponding to different events between "Closed Eyes" and "Open Eyes"
                    start_samples_ce, start_samples_oe, end_samples_ce, end_samples_oe = u.segment_samples(d)

                    # Segment division between "Closed Eyes" and "Open Eyes"
                    segments_ce = u.division_segments(data, start_samples_ce, end_samples_ce)
                    segments_oe = u.division_segments(data, start_samples_oe, end_samples_oe)

                    # Segment store in a common list and transformation into a DataFrame
                    segments = u.segments_df(cleaned_eeg, segments_ce, segments_oe)

                    # Guardar los DataFrames en esa carpeta
                    subject_dir = os.path.join(DATA_PREPROCESS, subject)
                    os.makedirs(subject_dir, exist_ok=True)

                    # Store each of the dataframes
                    for i in range(len(segments)):
                        # Name the preprocessed data file
                        if (i+1) % 2 != 0:
                            file_name = subject + '_ce_' + str(i+1) + '.csv'
                        elif (i+1) % 2 == 0:
                            file_name = subject + '_oe_' + str(i+1) + '.csv'

                        # Path for the files
                        file_path = os.path.join(subject_dir, file_name)
                        
                        # Verify if the files already exist
                        if not os.path.exists(file_path):
                            segments[i].to_csv(file_path, index=False, sep='\t', header=False)
                    
                else:
                    print(f"No .vhdr file found for subject {subject}.")
            else:
                print(f"RSEEG folder not found for subject {subject}.")
        else:
            print(f"There is no subject {subject}.")

pt = time.time() - t
print('Tiempo de preprocesado', pt)

### Check preproceesing

In [4]:
# Folder paths
ruta_data_folder = 'E:\DATA_FOLDER'
ruta_data_preprocessed = 'E:\DATA_PREPROCESSED'

# Get subfolder names in DATA_FOLDER
subfolders_data_folder = os.listdir(ruta_data_folder)

# Get subfolder names in DATA_PREPROCESSED
subfolders_data_preprocessed = os.listdir(ruta_data_preprocessed)

# Find missing subfolders
subfolders_faltantes = [folder for folder in subfolders_data_folder if folder not in subfolders_data_preprocessed]

# Print missing subfolders
print("The following subfolders are present in DATA_FOLDER but are missing in DATA_PREPROCESSED:")
for folder in subfolders_faltantes:
    print(folder)

The following subfolders are present in DATA_FOLDER but are missing in DATA_PREPROCESSED:
