RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for Zhou2016 (new version as of the last update from MOABB (Jul 28, 2025))

In [None]:
import numpy as np
import pandas as pd
import mne
from moabb import datasets

In [None]:
#Load Database
m_dataset = datasets.Zhou2016()
m_data = m_dataset.get_data(subjects=[m_dataset.subject_list[0]])

In [None]:
#See all canal names (EEG, misc, stim...)
raw = m_data[1]['0']['0']
print("Canal list :", raw.ch_names)

In [None]:
# get events from annonations
events_from_annot, event_dict = mne.events_from_annotations(raw)

# create stim canal
stim_data = np.zeros((1, len(raw.times)))

# for each event, place the value of the event in stim canal
for event in events_from_annot:
    stim_data[0, event[0]] = event[2]

# Add stim canal in raw object
info = mne.create_info(['STIM'], raw.info['sfreq'], ['stim'])
stim_raw = mne.io.RawArray(stim_data, info)
raw.add_channels([stim_raw], force_update_info = True)

print("Canal list :", raw.ch_names)
print(raw.ch_names[-1]) 

In [None]:
#Know what index is stim channel
stim_name = 'STIM'
stim_idx = raw.ch_names.index(stim_name)
print(f"Canal index {stim_name} is : {stim_idx}")

In [None]:
#count stim data unique values (1 non target, 2 = target with a ratio needed of 5 to 1)
stim_data = raw.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
# For example, for subject 1 and a given session '0'
subject = 1
session = '0'

# Retrieve the list of runs in this session
run_keys = sorted(m_data[subject][session].keys())
print("Runs found in the session:", run_keys)

# Initialize a list to store the data from each run
all_runs_data = []

# Iterate through each run and extract its data
for run in run_keys:
    raw_run = m_data[subject][session][run]
    # get events from annonations
    events_from_annot, event_dict = mne.events_from_annotations(raw_run)

    # create stim canal
    stim_data = np.zeros((1, len(raw_run.times)))

    # for each event, place the value of the event in stim canal
    for event in events_from_annot:
        stim_data[0, event[0]] = event[2]

    # Add stim canal in raw object
    info = mne.create_info(['STIM'], raw_run.info['sfreq'], ['stim'])
    stim_raw = mne.io.RawArray(stim_data, info)
    raw_run.add_channels([stim_raw], force_update_info = True)

    print("Canal list :", raw_run.ch_names)
    print(raw_run.ch_names[-1]) 
    run_data = raw_run.get_data()  # shape: (n_channels, n_times_run)
    all_runs_data.append(run_data)

# Concatenate the data along the time axis (axis=1)
concatenated_data = np.concatenate(all_runs_data, axis=1)  # shape: (n_channels, total_timesamples)

# Transpose to get an array of shape (total_timesamples, n_channels)
dataT = concatenated_data.T
print("Shape of dataT:", dataT.shape)

In [None]:
# Extract the last column (stim channel)
stim_col = dataT[:, -1]

# Count the unique values
unique_vals, counts = np.unique(stim_col, return_counts=True)

# Loop through unique values and their counts to print the results
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurrence count : {count}")

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df[""] = df[""].astype(int)

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Loop for all subjects
subject_list = list(m_data.keys())

for subject in subject_list:
    session_keys = sorted(m_data[subject].keys())
    for session in session_keys:
        # Retrieve the list of runs in this session
        run_keys = sorted(m_data[subject][session].keys())
        
        # Initialize a list to store data for each run
        all_runs_data = []
        for run in run_keys:
            raw_run = m_data[subject][session][run]
            # Get events from annotations
            events_from_annot, event_dict = mne.events_from_annotations(raw_run)

            # Create stimulation channel
            stim_data = np.zeros((1, len(raw_run.times)))

            # For each event, place the event value into the stim channel
            for event in events_from_annot:
                stim_data[0, event[0]] = event[2]

            # Add stim channel to the raw object
            info = mne.create_info(['STIM'], raw_run.info['sfreq'], ['stim'])
            stim_raw = mne.io.RawArray(stim_data, info)
            raw_run.add_channels([stim_raw], force_update_info=True)

            print("Channel list:", raw_run.ch_names)
            print("Last channel:", raw_run.ch_names[-1]) 
            run_data = raw_run.get_data()  # shape: (n_channels, n_times_run)
            all_runs_data.append(run_data)
        
        # Concatenate data along the time axis (axis=1)
        concatenated_data = np.concatenate(all_runs_data, axis=1)  # (n_channels, total_timesamples)
        
        # Transpose to get dataT with shape (total_timesamples, n_channels)
        dataT = concatenated_data.T
        n_times, n_channels = dataT.shape
        
        # Create the timestamps column
        timestamps = np.arange(n_times, dtype=int)
        datacsv = np.column_stack((timestamps, dataT))
        header = [""] + [str(i) for i in range(n_channels)]
        df = pd.DataFrame(datacsv, columns=header)
        df[""] = df[""].astype(int)
        
        # File naming
        subject_str = f"{int(subject):02d}"
        # get the right session number from the string (e.g., '0train' or '1test')
        session_str = f"{int(session[0]) + 1:02d}"
        filename = f"subject_{subject_str}_session_{session_str}.csv"
        df.to_csv(filename, index=False)
        print(f"Saved file: {filename}")

        # Display info
        events = df.iloc[:, -1]
        n_lh = len(events[events == 1])  
        n_rh = len(events[events == 2]) 
        n_f = len(events[events == 3]) 
        print(f"Number of Left hand (1): {n_lh}")
        print(f"Number of Right hand (2): {n_rh}")
        print(f"Number of feet (3): {n_f}")