RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for PhysionetMI-T2

In [1]:
import numpy as np
import pandas as pd
from moabb import datasets

In [None]:
# WARNING: If you plan to use this script, know that the Lee2019MI database is quite extensive. 
# Therefore, I recommend loading half of the database at a time (e.g. m_dataset.get_data([m_dataset.subject_list[1,2,3,4,...]])).
# Load Database 
m_dataset = datasets.PhysionetMI()
m_data = m_dataset.get_data(subjects = [m_dataset.subject_list[0]])

Used Annotations descriptions: ['left_hand', 'rest', 'right_hand']
Used Annotations descriptions: ['left_hand', 'rest', 'right_hand']
Used Annotations descriptions: ['left_hand', 'rest', 'right_hand']
Used Annotations descriptions: ['feet', 'hands', 'rest']
Used Annotations descriptions: ['feet', 'hands', 'rest']
Used Annotations descriptions: ['feet', 'hands', 'rest']


In [5]:
#See all canal names (EEG, misc, stim...)
raw = m_data[1]['0']['0']
print("Canal list :", raw.ch_names)

Canal list : ['FC5', 'FC3', 'FC1', 'FCz', 'FC2', 'FC4', 'FC6', 'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', 'CPz', 'CP2', 'CP4', 'CP6', 'Fp1', 'Fpz', 'Fp2', 'AF7', 'AF3', 'AFz', 'AF4', 'AF8', 'F7', 'F5', 'F3', 'F1', 'Fz', 'F2', 'F4', 'F6', 'F8', 'FT7', 'FT8', 'T7', 'T8', 'T9', 'T10', 'TP7', 'TP8', 'P7', 'P5', 'P3', 'P1', 'Pz', 'P2', 'P4', 'P6', 'P8', 'PO7', 'PO3', 'POz', 'PO4', 'PO8', 'O1', 'Oz', 'O2', 'Iz', 'STIM']


In [6]:
# Know what index is stim channel
stm_name = 'STIM'
stim_idx = raw.ch_names.index(stm_name)
print(f"Canal index {stm_name} is : {stim_idx}")

Canal index STIM is : 64


In [7]:
#count stim data unique values (depends on the database)
stim_data = raw.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

(1, 20000)
Value : 0.0, Occurences count : 19970
Value : 1.0, Occurences count : 15
Value : 2.0, Occurences count : 8
Value : 3.0, Occurences count : 7


In [8]:
# PhysionetMI Task 2 runs
run_keys = ['0', '1', '2']

# Initialize a list to store the data from each run
all_runs_data = []

# Iterate over each training run and extract its data
for run in run_keys:
    raw_run = m_data[1]['0'][run]
    # Extract the data: shape (n_channels, n_times_run)
    run_data = raw_run.get_data()
    all_runs_data.append(run_data)

# Concatenate the data along the time axis (axis=1)
concatenated_data = np.concatenate(all_runs_data, axis=1)  # forme: (n_channels, total_timesamples)

In [None]:
# Transpose to get dataT of shape (total_timesamples, n_channels)
dataT = concatenated_data.T
print("Shape of dataT before dropping rows:", dataT.shape)

# Remove all time samples (rows) if they only contain 0s
# To be sure, we check if the sum per row is equal to 0
nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
dataT = dataT[nonzero_indices, :]
print("Shape of dataT before dropping rows:", dataT.shape)

Forme de dataT : (60000, 65)
Forme de dataT : (59759, 65)


In [10]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df.iloc[:, 0] = df.iloc[:, 0].astype(int)

In [None]:
# Swapper les valeurs 1 et 2 dans la colonne stim
stim_col = str(n_channels - 1)  # La dernière colonne contient les stimulations
df[stim_col] = df[stim_col].replace({1: 4, 2: 1, 3:2})

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:


for subject in subject_list:
    session_keys = sorted(m_data[subject].keys())
    for idx, session in enumerate(session_keys, start=1):
        # Récupérer la liste des runs dans cette session
        run_keys = ['0', '1', '2']
        # Initialiser une liste pour stocker les données de chaque run
        all_runs_data = []
        for run in run_keys:
            raw_run = m_data[subject][session][run]
            run_data = raw_run.get_data()  # forme: (n_channels, n_times_run)
            all_runs_data.append(run_data)
        
        # Concaténer les données le long de l'axe temporel (axis=1)
        concatenated_data = np.concatenate(all_runs_data, axis=1)  # (n_channels, total_timesamples)
        
        # Transposer pour obtenir dataT de forme (total_timesamples, n_channels)
        dataT = concatenated_data.T
        nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
        dataT = dataT[nonzero_indices, :]
        print("Forme de dataT :", dataT.shape)
        n_times, n_channels = dataT.shape

        # Création de la colonne de timestamps
        timestamps = np.arange(n_times, dtype=int)
        datacsv = np.column_stack((timestamps, dataT))
        header = [""] + [str(i) for i in range(n_channels)]
        df = pd.DataFrame(datacsv, columns=header)
        df[""] = df[""].astype(int)

        # Swapper les valeurs 1 et 2 dans la colonne stim
        stim_col = str(n_channels - 1)  # La dernière colonne contient les stimulations
        df[stim_col] = df[stim_col].replace({1: 4, 2: 1, 3:2})

        # Nommer le fichier
        subject_str = f"{int(subject):03d}"
        session_str = f"{idx:02d}"
        filename = f"subject_{subject_str}_session_{session_str}.csv"
        output_dir = 'C:/Users/doumif/work/Prog/PhysionetMI-T2'
        filepath = os.path.join(output_dir, filename)
        df.to_csv(filepath, index=False)

        # Affichage des informations avec les valeurs swappées
        events = df[stim_col].values
        n_lh = len(events[events == 1])  
        n_rh = len(events[events == 2]) 
        rest = len(events[events == 4]) 
        print(f"Nombre de Left hand (1): {n_lh}")
        print(f"Nombre de Right hand (2): {n_rh}")
        print(f"Nombre de Rest(4): {rest}")
