RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for bi2013a-AO


In [1]:
import numpy as np
import pandas as pd
import os
import shutil

In [4]:
def reorganize(source_folder, destination_folder):
    """
    Reorganizes the 1.csv files from each subject/session into a new folder.
    Ensures subject and session numbers are formatted with leading zeros.

    Args:
        source_folder: Path to the folder containing all subjects
        destination_folder: Path to the folder where renamed files will be copied
    """
    # Create destination folder if it doesn't exist
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # Loop through all subject folders
    for subject_folder in os.listdir(source_folder):
        subject_path = os.path.join(source_folder, subject_folder)

        # Check if it's a directory
        if not os.path.isdir(subject_path):
            continue

        # Extract subject number
        if subject_folder.startswith("subject"):
            if "_session" in subject_folder:
                # Format: subject01_session01
                subject_num = int(subject_folder.split("_session")[0].replace("subject", ""))
                session_num = int(subject_folder.split("_session")[1])
            else:
                # Format: subject08, subject09, etc.
                subject_num = int(subject_folder.replace("subject", ""))
                session_num = 1  # Only one session for subjects 8-24

            # Look for the Session folder
            for item in os.listdir(subject_path):
                if item.startswith("Session"):
                    session_path = os.path.join(subject_path, item)

                    # Look for the 2.csv file
                    csv_file = os.path.join(session_path, "2.csv")
                    if os.path.exists(csv_file):
                        # Create new filename
                        new_name = f"subject_{subject_num:02d}_session_{session_num:02d}.csv"
                        destination_path = os.path.join(destination_folder, new_name)

                        # Copy the file with the new name
                        shutil.copy2(csv_file, destination_path)
                        print(f"Copied: {csv_file} -> {destination_path}")

    print(f"Reorganization completed. Files are in: {destination_folder}")

In [5]:
source = "C:\\Users\\doumif\\Downloads\\CSV zenodo bi2013a\\"
destination = "C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\" 


In [6]:
reorganize(source, destination)

Copied: C:\Users\doumif\Downloads\CSV zenodo bi2013a\subject01_session01\Session1\2.csv -> C:\Users\doumif\Downloads\CSV bi2013a-AO\subject_01_session_01.csv
Copied: C:\Users\doumif\Downloads\CSV zenodo bi2013a\subject01_session02\Session2\2.csv -> C:\Users\doumif\Downloads\CSV bi2013a-AO\subject_01_session_02.csv
Copied: C:\Users\doumif\Downloads\CSV zenodo bi2013a\subject01_session03\Session3\2.csv -> C:\Users\doumif\Downloads\CSV bi2013a-AO\subject_01_session_03.csv
Copied: C:\Users\doumif\Downloads\CSV zenodo bi2013a\subject01_session04\Session4\2.csv -> C:\Users\doumif\Downloads\CSV bi2013a-AO\subject_01_session_04.csv
Copied: C:\Users\doumif\Downloads\CSV zenodo bi2013a\subject01_session05\Session5\2.csv -> C:\Users\doumif\Downloads\CSV bi2013a-AO\subject_01_session_05.csv
Copied: C:\Users\doumif\Downloads\CSV zenodo bi2013a\subject01_session06\Session6\2.csv -> C:\Users\doumif\Downloads\CSV bi2013a-AO\subject_01_session_06.csv
Copied: C:\Users\doumif\Downloads\CSV zenodo bi2013a

In [7]:
# Forward your file with all .csv of the dataset
# Cattan files have been downloaded from the zenodo repository, and then reorganized as you have seen above. We need to "clean" it before converting to npz
file_dir = "C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\"
output_dir = "C:\\Users\\doumif\\work\\OfficeWork\\BCI Databases\\CSV\\P300\\bi2013a-AO"
all_files = [os.path.join(file_dir, file) for file in os.listdir(file_dir)]
all_files

['C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_01_session_01.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_01_session_02.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_01_session_03.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_01_session_04.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_01_session_05.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_01_session_06.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_01_session_07.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_01_session_08.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_02_session_01.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_02_session_02.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_02_session_03.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_02_session_04.csv',
 'C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_02_session_05.csv',
 'C:\\Users\

In [12]:
file = "C:\\Users\\doumif\\Downloads\\CSV bi2013a-AO\\subject_01_session_02.csv"

In [13]:
df = pd.read_csv(file, header=0)
data = np.array(df)

In [14]:
print(f"Unique values in stim column: {np.count_nonzero(data[:, 17])}\n")

Unique values in stim column: 168



In [15]:
unique_values, counts = np.unique(data[:, 17], return_counts=True)
for value, count in zip(unique_values, counts):
    print(f"Dans data : Nombre de {value} = {count}")

Dans data : Nombre de 0.0 = 104472
Dans data : Nombre de 33285.0 = 28
Dans data : Nombre de 33286.0 = 140


In [16]:
def process_eeg_file(file_path, output_dir):
    """
    Process a single EEG file and save it with a new format.

    Parameters:
    -----------
    file_path : str
        Path to the input CSV file
    output_dir : str
        Directory where processed file will be saved

    Returns:
    --------
    str
        Path to the processed file

    The function:
    - Reads the CSV file
    - Processes the data (merges target/non-target columns, removes unnecessary columns)
    - Saves processed file with format 'subject_XX_session_01.csv'
    """
    # Extract subject number from filename (assuming format like 'subject_01_PC.csv')
    filename = os.path.basename(file_path)

    # Read and process the data
    df = pd.read_csv(file_path, header=None)
    data = np.array(df)

    # 1. Merge target and non-target columns
    # Convert 33285 to 2 and 33286 to 1 in target column (column 18)
    data[:, 17] = np.where(data[:, 17] == 33285, 2, 
                  np.where(data[:, 17] == 33286, 1, 
                           data[:, 17]))

    # Convert back to DataFrame
    df_processed = pd.DataFrame(data)

    # Save processed file
    output_path = os.path.join(output_dir, filename)
    df_processed.to_csv(output_path, index=False, header=False)

    print(f"Processed and saved: {filename}")
    print(f"Shape: {data.shape}")
    unique_values, counts = np.unique(data[:, 17], return_counts=True)
    for value, count in zip(unique_values, counts):
        print(f"Dans data : Nombre de {value} = {count}")

    return output_path


In [17]:
# 16 corresponds to the haeder
for i, file in enumerate(all_files, 1):
    print(f"File number {i} out of {len(all_files)}")
    csv = process_eeg_file(file, output_dir)


File number 1 out of 73
Processed and saved: subject_01_session_01.csv
Shape: (125185, 18)
Dans data : Nombre de 0.0 = 124992
Dans data : Nombre de 1.0 = 160
Dans data : Nombre de 2.0 = 32
Dans data : Nombre de 16.0 = 1
File number 2 out of 73
Processed and saved: subject_01_session_02.csv
Shape: (104641, 18)
Dans data : Nombre de 0.0 = 104472
Dans data : Nombre de 1.0 = 140
Dans data : Nombre de 2.0 = 28
Dans data : Nombre de 16.0 = 1
File number 3 out of 73
Processed and saved: subject_01_session_03.csv
Shape: (101185, 18)
Dans data : Nombre de 0.0 = 101028
Dans data : Nombre de 1.0 = 130
Dans data : Nombre de 2.0 = 26
Dans data : Nombre de 16.0 = 1
File number 4 out of 73
Processed and saved: subject_01_session_04.csv
Shape: (97505, 18)
Dans data : Nombre de 0.0 = 97360
Dans data : Nombre de 1.0 = 120
Dans data : Nombre de 2.0 = 24
Dans data : Nombre de 16.0 = 1
File number 5 out of 73
Processed and saved: subject_01_session_05.csv
Shape: (114049, 18)
Dans data : Nombre de 0.0 = 113