RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for Shin2017A

In [None]:
import numpy as np
import pandas as pd
from moabb import datasets

In [None]:
#Load Database
m_dataset = datasets.Shin2017A(accept=True)
m_data = m_dataset.get_data()

In [None]:
#See all canal names (EEG, misc, stim...)
raw = m_data[1]['0imagery']['0']
print("Canal list :", raw.ch_names)

In [None]:
#Know what index is stim channel 
stim_name = 'Stim'
stim_idx = raw.ch_names.index(stim_name)
print(f"Canal index {stim_name} is : {stim_idx}")

In [None]:
#count stim data unique values (depends on the database)
stim_data = raw.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
# Drop EOG channels
raw.drop_channels(['VEOG', 'HEOG'])
data = raw.get_data() # shape: (n_channels, n_times_run)
dataT = data.T

In [None]:
# Extract the last column (stim channel)
stim_col = dataT[:, -1]

# Count the unique values
unique_vals, counts = np.unique(stim_col, return_counts=True)

# Loop through unique values and their counts to print the results
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurrence count : {count}")

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df[""] = df[""].astype(int)

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Loop for all subjects
subject_list = list(m_data.keys())

for subject in subject_list:
    session_keys = sorted(m_data[subject].keys())
    # Loop through all sessions for the current subject, starting the index at 1
    for idx, session in enumerate(session_keys, start=1):
        # Load the raw session data for run '0'
        raw_session = m_data[subject][session]['0']
        # Remove ocular artifact channels (Vertical and Horizontal EOG)
        raw_session.drop_channels(['VEOG', 'HEOG'])

        data = raw_session.get_data() 
        # Transpose to get dataT with shape (total_timesamples, n_channels)
        dataT = data.T
        n_times, n_channels = dataT.shape
        
        # Create the timestamps column
        timestamps = np.arange(n_times, dtype=int)
        datacsv = np.column_stack((timestamps, dataT))
        header = [""] + [str(i) for i in range(n_channels)]
        df = pd.DataFrame(datacsv, columns=header)
        df[""] = df[""].astype(int)
        
        # File naming
        subject_str = f"{int(subject):02d}"
        # Format the session number using the current index
        session_str = f"{idx:02d}" 
        filename = f"subject_{subject_str}_session_{session_str}.csv"
        # Export the DataFrame to a CSV file
        df.to_csv(filename, index=False)
        print(f"Saved file : {filename}")

        # Display information 
        events = df.iloc[:, -1]
        n_lh = len(events[events == 1])  
        n_rh = len(events[events == 2]) 
        print(f"Number of Left hand (1): {n_lh}")
        print(f"Number of Right hand (2): {n_rh}")