RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for BNCI2015003-train

In [None]:
import numpy as np
import pandas as pd
from moabb import datasets

In [None]:
#Load Database
m_dataset = datasets.BNCI2015003()
m_data = m_dataset.get_data(subjects=[m_dataset.subject_list[0]])

In [None]:
#See all channel names (EEG, misc, stim...)
raw = m_data[1]['session_0']['run_1']
print("Canal list :", raw.ch_names)

In [None]:
#Know what index is stim channel 
stim_name = 'Target'
stim_idx = raw.ch_names.index(stim_name)
print(f"Canal index {stim_name} is : {stim_idx}")

In [None]:
#count stim data unique values (depends on the database)
stim_data = raw.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
# Ensure stim_data is a 1D vector
stim_data = np.array(stim_data, copy=True).flatten()

# Keep only the first timestamp of each event block
previous_val = 0
for i in range(len(stim_data)):
    current_val = stim_data[i]
    if current_val == previous_val:  # If the value is the same as the previous one
        stim_data[i] = 0  # Set to 0
    previous_val = current_val

In [None]:
# quick check : print unique values of stim channel after transformation.
unique_vals, counts = np.unique(stim_data, return_counts=True)
for val, count in zip(unique_vals, counts):
    print(f"(Apr√®s) Valeur : {val}, Occurrences : {count}")

In [None]:
# Drop Flash channel
raw.drop_channels(['Flash'])
data = raw.get_data()  # forme: (n_channels, n_times_run)
dataT = data.T

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df[""] = df[""].astype(int)
df.iloc[:, stim_idx + 1] = stim_data # new stim column

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Loop through all subjects
subject_list = list(m_data.keys())

for subject in subject_list:    
    raw_session = m_data[subject]['session_0']['run_1']
    raw_session.drop_channels(['Flash'])

    # Process the stimulation channel for this subject
    stim_data = raw_session.get_data(picks=stim_idx) # shape: (1, n_times)
    
    # Ensure stim_data is a 1D vector
    stim_data = np.array(stim_data, copy=True).flatten()

    # Keep only the first sample of each event block
    previous_val = 0
    for i in range(len(stim_data)):
        current_val = stim_data[i]
        if current_val == previous_val:  # If value is same as previous one
            stim_data[i] = 0             # Set to 0
        previous_val = current_val
    
    # Transpose to obtain dataT with shape (total_timesamples, n_channels)
    data = raw_session.get_data()
    dataT = data.T
    n_times, n_channels = dataT.shape

    # Create the timestamps column
    timestamps = np.arange(n_times, dtype=int)
    datacsv = np.column_stack((timestamps, dataT))
    header = [""] + [str(i) for i in range(n_channels)]
    df = pd.DataFrame(datacsv, columns=header)
    df[""] = df[""].astype(int)

    # Integrate the new stimulation channel into the DataFrame
    # The corresponding column in the DataFrame is at stim_idx+1 (after the "timestamp" column)
    df.iloc[:, stim_idx + 1] = stim_data
    
    # Define the filename
    subject_str = f"{int(subject):02d}"
    filename = f"subject_{subject_str}_session_01.csv"
    
    # Save the file and count events
    df.to_csv(filename, index=False)
    print(f"Saved file : {filename}")

    # Display information
    events = df.iloc[:, stim_idx + 1]
    n_t = len(events[events == 2])
    n_nt = len(events[events == 1])
    print(f"Number of Target (2): {n_t}")
    print(f"Number of Non-Target (1): {n_nt}")