MOABB to CSV / RAW to CSV

This code convert the data sets in RAW format to CSV format.

It has been specifically conceived for BCI data.

This script is for PhysionetMI-T4



In [None]:
import numpy as np
import pandas as pd
import moabb.datasets
import os

In [None]:
m_dataset = moabb.datasets.PhysionetMI()
m_data = m_dataset.get_data()

In [None]:
#See all canal names (EEG, misc, stim...)
raw = m_data[1]['0']['3']
raw
print("Canal list :", raw.ch_names)

In [None]:
#Know what index is stim channel (we will need it later for the CSV to NY conversion)
stim_channel_name = 'STIM'
stim_idx = raw.ch_names.index(stim_channel_name)
print(f"Canal index {stim_channel_name} is : {stim_idx}")


In [None]:
#count stim data unique values (1 non target, 2 = target with a ratio needed of 5 to 1)
stim_data = raw.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)

for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
runs = ['3', '4', '5']
# Initialiser une liste pour stocker les données de chaque run
all_runs_data = []

# Parcourir chaque run et extraire ses données
for run in runs:
    raw_run = m_data[1]['0'][run]
    run_data = raw_run.get_data()  # forme: (n_channels, n_times_run)
    all_runs_data.append(run_data)

# Concaténer les données le long de l'axe temporel (axis=1)
concatenated_data = np.concatenate(all_runs_data, axis=1)  # forme: (n_channels, total_timesamples)

# Transposer pour avoir un tableau de forme (total_timesamples, n_channels)
dataT = concatenated_data.T

print("Forme de dataT :", dataT.shape)

In [None]:
nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
dataT = dataT[nonzero_indices, :]
print("Forme de dataT :", dataT.shape)

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df.iloc[:, 0] = df.iloc[:, 0].astype(int)

In [None]:
# Swapper les valeurs 1 et 2 dans la colonne stim
stim_col = str(n_channels - 1)  # La dernière colonne contient les stimulations
df[stim_col] = df[stim_col].replace({5:3, 1:6 , 4:7})
df[stim_col] = df[stim_col].replace({6:4 , 7:5})

In [None]:
# Affichage des informations avec les valeurs swappées
events = df[stim_col].values
n_f = len(events[events == 3])  # Maintenant 1 est target
n_hs = len(events[events == 5]) 
rest = len(events[events == 4]) # Maintenant 2 est non-target
print(f"Nombre de Feet (3): {n_f}")
print(f"Nombre de Hands (5): {n_hs}")
print(f"Nombre de Rest(4): {rest}")


In [None]:
# Test to check csv file
output_dir = 'C:/Users/doumif/work/Prog/PhysionetMI-T4'
filename = "data.csv"
filepath = os.path.join(output_dir, filename)

df.to_csv(filepath, index=False)

In [None]:
m_dataset = moabb.datasets.PhysionetMI()
m_data = m_dataset.get_data()
subject_list = list(m_data.keys())

In [None]:
for subject in subject_list:
    session_keys = sorted(m_data[subject].keys())
    for idx, session in enumerate(session_keys, start=1):
        # Récupérer la liste des runs dans cette session
        run_keys = ['3', '4', '5']
        # Initialiser une liste pour stocker les données de chaque run
        all_runs_data = []
        for run in run_keys:
            raw_run = m_data[subject][session][run]
            run_data = raw_run.get_data()  # forme: (n_channels, n_times_run)
            all_runs_data.append(run_data)
        
        # Concaténer les données le long de l'axe temporel (axis=1)
        concatenated_data = np.concatenate(all_runs_data, axis=1)  # (n_channels, total_timesamples)
        
        # Transposer pour obtenir dataT de forme (total_timesamples, n_channels)
        dataT = concatenated_data.T
        nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
        dataT = dataT[nonzero_indices, :]
        print("Forme de dataT :", dataT.shape)
        n_times, n_channels = dataT.shape

        # Création de la colonne de timestamps
        timestamps = np.arange(n_times, dtype=int)
        datacsv = np.column_stack((timestamps, dataT))
        header = [""] + [str(i) for i in range(n_channels)]
        df = pd.DataFrame(datacsv, columns=header)
        df[""] = df[""].astype(int)

        # Swapper les valeurs 1 et 2 dans la colonne stim
        stim_col = str(n_channels - 1)  # La dernière colonne contient les stimulations
        df[stim_col] = df[stim_col].replace({5:3, 1:6 , 4:7})
        df[stim_col] = df[stim_col].replace({6:4 , 7:5})

        # Nommer le fichier
        subject_str = f"{int(subject):03d}"
        session_str = f"{idx:02d}"
        filename = f"subject_{subject_str}_session_{session_str}.csv"
        output_dir = 'C:/Users/doumif/work/Prog/PhysionetMI-T4'
        filepath = os.path.join(output_dir, filename)
        df.to_csv(filepath, index=False)

        # Affichage des informations avec les valeurs swappées
        events = df[stim_col].values
        n_f = len(events[events == 3])  # Maintenant 1 est target
        n_hs = len(events[events == 5]) 
        rest = len(events[events == 4]) # Maintenant 2 est non-target
        print(f"Nombre de Feet (3): {n_f}")
        print(f"Nombre de Hands (5): {n_hs}")
        print(f"Nombre de Rest(4): {rest}")