RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for Weibo2014



In [None]:
import numpy as np
import pandas as pd
import moabb.datasets
import os

In [None]:
#Load Database
m_dataset = moabb.datasets.Weibo2014()
m_data = m_dataset.get_data()

In [None]:
#See all canal names (EEG, misc, stim...)
raws = m_data[1]['0']['0']
# raws
print("Canal list :", raws.ch_names)

In [None]:
#Know what index is stim channel (we will need it later for the CSV to NY conversion)
stim_channel_name = 'STIM014'
stim_idx = raws.ch_names.index(stim_channel_name)
print(f"Canal index {stim_channel_name} is : {stim_idx}")


In [None]:
#count stim data unique values (1 non target, 2 = target with a ratio needed of 5 to 1)
stim_data = raws.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)

for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
# Transpose to invert columns/lines
raws.drop_channels(['VEO', 'HEO', 'CB1', 'CB2'])
data = raws.get_data()
dataT = data.T
print(dataT.shape)

In [None]:
nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
dataT = dataT[nonzero_indices, :]
print("Forme de dataT :", dataT.shape)

In [None]:
# Extraire la dernière colonne (canal stim)
stim_col = dataT[:, -1]

# Compter les valeurs uniques
unique_vals, counts = np.unique(stim_col, return_counts=True)

for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurrence count : {count}")


In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df[""] = df[""].astype(int)

In [None]:
# Swapper les valeurs 1 et 2 dans la colonne stim
stim_col = str(n_channels - 1)  # La dernière colonne contient les stimulations
df[stim_col] = df[stim_col].replace({5:0, 6:0}) #Retrait des classe 5 et 6 combinaison main/pieds
df[stim_col] = df[stim_col].replace({3:5}) #both hands vers 5
df[stim_col] = df[stim_col].replace({4:3}) # feet vers 3
df[stim_col] = df[stim_col].replace({7:4}) # rest vers 4

In [None]:
# Affichage des informations avec les valeurs swappées
events = df[stim_col].values
n_lh = len(events[events == 1])  # Maintenant 1 est target
n_rh = len(events[events == 2]) 
n_f = len(events[events == 3]) 
rest = len(events[events == 4])
hands = len(events[events == 5]) # Maintenant 2 est non-target
print(f"Nombre de Left hand (1): {n_lh}")
print(f"Nombre de Right hand (2): {n_rh}")
print(f"Nombre de feet (3): {n_f}")
print(f"Nombre de rest (4): {rest}")
print(f"Nombre de hands (5): {hands}")


In [None]:
# Test to check csv file
output_dir = 'C:/Users/doumif/work/Prog/Weibo2014'
filename = "data.csv"
filepath = os.path.join(output_dir, filename)

df.to_csv(filepath, index=False)

In [None]:
m_dataset = moabb.datasets.Weibo2014()
m_data = m_dataset.get_data()

In [None]:
subject_list = list(m_data.keys())

for subject in subject_list:
    session_keys = sorted(m_data[subject].keys())
    for idx, session in enumerate(session_keys, start=1):
        raw_session = m_data[subject][session]['0']
        raw_session.drop_channels(['VEO', 'HEO', 'CB1', 'CB2'])
        data = raw_session.get_data()

        # Transposer pour obtenir dataT de forme (total_timesamples, n_channels)
        dataT = data.T
        nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
        dataT = dataT[nonzero_indices, :]
        print("Forme de dataT :", dataT.shape)
        n_times, n_channels = dataT.shape

        # Création de la colonne de timestamps
        timestamps = np.arange(n_times, dtype=int)
        datacsv = np.column_stack((timestamps, dataT))
        header = [""] + [str(i) for i in range(n_channels)]
        df = pd.DataFrame(datacsv, columns=header)
        df[""] = df[""].astype(int)

        # Swapper les valeurs 1 et 2 dans la colonne stim
        stim_col = str(n_channels - 1)  # La dernière colonne contient les stimulations
        df[stim_col] = df[stim_col].replace({5:0, 6:0}) #Retrait des classe 5 et 6 combinaison main/pieds
        df[stim_col] = df[stim_col].replace({3:5}) #both hands vers 5
        df[stim_col] = df[stim_col].replace({4:3}) # feet vers 3
        df[stim_col] = df[stim_col].replace({7:4}) # rest vers 4

        # Nommer le fichier
        subject_str = f"{int(subject):02d}"
        session_str = f"{idx:02d}"
        filename = f"subject_{subject_str}_session_{session_str}.csv"
        output_dir = 'C:/Users/doumif/work/Prog/Weibo2014'
        filepath = os.path.join(output_dir, filename)
        df.to_csv(filepath, index=False)

        events = df[stim_col].values
        n_lh = len(events[events == 1])  
        n_rh = len(events[events == 2]) 
        n_f = len(events[events == 3]) 
        rest = len(events[events == 4])
        hands = len(events[events == 5]) 
        print(f"\nFichier sauvegardé : {filename}")
        print(f"Nombre de Left hand (1): {n_lh}")
        print(f"Nombre de Right hand (2): {n_rh}")
        print(f"Nombre de feet (3): {n_f}")
        print(f"Nombre de rest (4): {rest}")
        print(f"Nombre de hands (5): {hands}")
