RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for BNCI2015003-train

In [89]:
import numpy as np
import pandas as pd
import moabb.datasets

In [90]:
#Load Database
m_dataset = moabb.datasets.BNCI2015_003()
m_data = m_dataset.get_data()

In [83]:
#See all canal names (EEG, misc, stim...)
raws = m_data[10]['0']['1test']
raws
print("Canal list :", raws.ch_names)

Canal list : ['Fz', 'Cz', 'P3', 'Pz', 'P4', 'PO7', 'Oz', 'PO8', 'Target', 'Flash']


In [84]:
#Know what index is stim channel (we will need it later for the CSV to NY conversion)
stim_channel_name = 'Target'
stim_idx = raws.ch_names.index(stim_channel_name)
print(f"Canal index {stim_channel_name} is : {stim_idx}")


Canal index Target is : 8


In [85]:
#count stim data unique values (1 non target, 2 = target with a ratio needed of 5 to 1)
stim_data = raws.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)

for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

(1, 25922)
Value : 0.0, Occurences count : 22322
Value : 1.0, Occurences count : 3000
Value : 2.0, Occurences count : 600


In [86]:

raws.drop_channels(['Flash'])
data = raws.get_data()  # forme: (n_channels, n_times_run)
dataT = data.T

In [87]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df[""] = df[""].astype(int)

In [16]:
df.to_csv("data.csv", index=False)

In [88]:
stim_col = str(n_channels - 1)  # La dernière colonne contient les stimulations


# On garde seulement le premier timestamp de chaque bloc d'événements
previous_val = 0
for i in range(len(df)):
    current_val = df[stim_col].iloc[i]
    if current_val == previous_val:  # Si même valeur que précédemment
        df.loc[i, stim_col] = 0  # On met à 0
    previous_val = current_val


# Sauvegarde du fichier CSV
df.to_csv("data.csv", index=False)


# Vérification du nombre d'événements
events = df[stim_col].values
n_target = len(events[events == 2])
n_non_target = len(events[events == 1])
print(f"\nNombre de Target (2): {n_target}")
print(f"Nombre de Non-Target (1): {n_non_target}")
print(f"Ratio Non-Target/Target: {n_non_target/n_target if n_target > 0 else 'inf'}")



Nombre de Target (2): 150
Nombre de Non-Target (1): 750
Ratio Non-Target/Target: 5.0


In [92]:
m_data = m_dataset.get_data()
subject_list = list(m_data.keys())

for subject in subject_list:
    session_keys = sorted(m_data[subject].keys())
    for idx, session in enumerate(session_keys, start=1):
        raw_session = m_data[subject][session]['1test']
        raw_session.drop_channels(['Flash'])
        data = raw_session.get_data()

        # Transposer pour obtenir dataT de forme (total_timesamples, n_channels)
        dataT = data.T
        n_times, n_channels = dataT.shape

        # Création de la colonne de timestamps
        timestamps = np.arange(n_times, dtype=int)
        datacsv = np.column_stack((timestamps, dataT))
        header = [""] + [str(i) for i in range(n_channels)]
        df = pd.DataFrame(datacsv, columns=header)
        df[""] = df[""].astype(int)

        # Correction des événements
        stim_col = str(n_channels - 1)  # La dernière colonne contient les stimulations
        previous_val = 0
        for i in range(len(df)):
            current_val = df[stim_col].iloc[i]
            if current_val == previous_val:  # Si même valeur que précédemment
                df.loc[i, stim_col] = 0  # On met à 0
            previous_val = current_val

        # Nommer le fichier
        subject_str = f"{int(subject):02d}"
        session_str = f"{idx:02d}"
        filename = f"subject_{subject_str}_session_{session_str}.csv"
        # Sauvegarde avec comptage des événements
        df.to_csv(filename, index=False)

        # Affichage des informations
        events = df[stim_col].values
        n_target = len(events[events == 2])
        n_non_target = len(events[events == 1])
        print(f"\nFichier sauvegardé : {filename}")
        print(f"Nombre de Target (2): {n_target}")
        print(f"Nombre de Non-Target (1): {n_non_target}")
        print(f"Ratio Non-Target/Target: {n_non_target/n_target if n_target > 0 else 'inf'}")



Fichier sauvegardé : subject_01_session_01.csv
Nombre de Target (2): 75
Nombre de Non-Target (1): 2625
Ratio Non-Target/Target: 35.0

Fichier sauvegardé : subject_02_session_01.csv
Nombre de Target (2): 75
Nombre de Non-Target (1): 2625
Ratio Non-Target/Target: 35.0

Fichier sauvegardé : subject_03_session_01.csv
Nombre de Target (2): 150
Nombre de Non-Target (1): 750
Ratio Non-Target/Target: 5.0

Fichier sauvegardé : subject_04_session_01.csv
Nombre de Target (2): 150
Nombre de Non-Target (1): 750
Ratio Non-Target/Target: 5.0

Fichier sauvegardé : subject_05_session_01.csv
Nombre de Target (2): 150
Nombre de Non-Target (1): 750
Ratio Non-Target/Target: 5.0

Fichier sauvegardé : subject_06_session_01.csv
Nombre de Target (2): 150
Nombre de Non-Target (1): 750
Ratio Non-Target/Target: 5.0

Fichier sauvegardé : subject_07_session_01.csv
Nombre de Target (2): 150
Nombre de Non-Target (1): 750
Ratio Non-Target/Target: 5.0

Fichier sauvegardé : subject_08_session_01.csv
Nombre de Target (2