RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for BNCI2014004-Train

In [None]:
import numpy as np
import pandas as pd
from moabb import datasets

In [None]:
#Load Database
m_dataset = datasets.BNCI2014004()
m_data = m_dataset.get_data()

In [None]:
#See all canal names (EEG, misc, stim...)
raw = m_data[1]['0train']['0']
print("Canal list :", raw.ch_names)

In [None]:
#Know what index is stim channel
stim_name = 'stim'
stim_idx = raw.ch_names.index(stim_name)
print(f"Canal index {stim_name} is : {stim_idx}")

In [None]:
#count stim data unique values (depends on the database)
stim_data = raw.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
# Drop EOG channels
raw_session = m_data[1]['0train']['0']
raw_session.drop_channels(['EOG1', 'EOG2', 'EOG3'])
data = raw_session.get_data()  # shape: (n_channels, n_times_run)

In [None]:
# Transpose to get an array of shape (total_timesamples, n_channels)
dataT = data.T
# Filter out time samples (rows) where the sum across all channels is zero
nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
dataT = dataT[nonzero_indices, :]
print("Shape of dataT:", dataT.shape)

In [None]:
# Extract the last column (stim channel)
stim_col = dataT[:, -1]

# Count the unique values
unique_vals, counts = np.unique(stim_col, return_counts=True)

# Loop through unique values and their counts to print the results
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurrence count : {count}")

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df[""] = df[""].astype(int)

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Loop through all subjects
subject_list = list(m_data.keys())

# Define the sessions to process (here '0train' and '1train')
sessions = ['0train', '1train'] # ERROR IN MOABB 3RD session NOT TRAIN BUT TEST !!!!!!!!!!!!!!!!!!!!!

for subject in subject_list:
    # Loop through the defined sessions
    for session in sessions:
        raw_session = m_data[subject][session]['0']
        # Drop EOG channels
        raw_session.drop_channels(['EOG1', 'EOG2', 'EOG3'])
        data = raw_session.get_data() 
        
        # Transpose to get dataT of shape (total_timesamples, n_channels)
        dataT = data.T

        # Filter out time samples (rows) where the sum across all channels is zero
        nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
        dataT = dataT[nonzero_indices, :]
        
        n_times, n_channels = dataT.shape
        
        # Create the timestamps column
        timestamps = np.arange(n_times, dtype=int)
        datacsv = np.column_stack((timestamps, dataT))
        header = [""] + [str(i) for i in range(n_channels)]
        df = pd.DataFrame(datacsv, columns=header)
        df[""] = df[""].astype(int)
        
        # Name the file
        subject_str = f"{int(subject):02d}"

        # Calculate the session number from the string '0train' or '1train'
        session_str = f"{int(session[0]) + 1:02d}"
        filename = f"subject_{subject_str}_session_{session_str}.csv"
        df.to_csv(filename, index=False)
        print(f"Saved file : {filename}")
        
        # display info
        events = df.iloc[:, -1]
        n_lh = len(events[events == 1]) 
        n_rh = len(events[events == 2]) 
        print(f"Number of left hand (1): {n_lh}")
        print(f"Number of Right hand (2): {n_rh}")