RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for GrosseWentrup2009

In [None]:
import numpy as np
import pandas as pd
from moabb import datasets

# Import decimate 
import sys
import os
sys.path.append(os.path.abspath('..'))
from EEGtools import decimate

In [None]:
# WARNING: If you plan to use this script, know that the GrosseWentrup2009 database is quite extensive. 
# Therefore, I recommend loading half of the database at a time (e.g. m_dataset.get_data(subjects=[m_dataset.subject_list[1,2,3,4,...]])).
# Load Database 
m_dataset = datasets.GrosseWentrup2009()
m_data = m_dataset.get_data()

In [None]:
#See all canal names (EEG, misc, stim...)
raw = m_data[1]['0']['0']
print("Canal list :", raw.ch_names)

In [None]:
# Know what index is stim channel
stim_name = 'STIM'
stim_idx = raw.ch_names.index(stim_name)
print(f"Canal index {stim_name} is : {stim_idx}")

In [None]:
# downsampling
sfreq = 500
decimation_factor = 2 
raw_decimated = decimate(raw, sfreq, decimation_factor, stim_name)

In [None]:
#count stim data unique values (depends on the database)
stim_data = raw_decimated.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
# Transpose to invert columns/lines
data = raw_decimated.get_data()
dataT = data.T
print(dataT.shape)

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df.iloc[:, 0] = df.iloc[:, 0].astype(int)

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Loop on all subjects
subject_list = list(m_data.keys())

# Downsampling parameters
sfreq = 500
decimation_factor = 2 

# Loop through all subjects
for subject in subject_list:
    raw_session = m_data[subject]['0']['0']
    
    # Downsampling
    raw_decimated = decimate(raw_session, sfreq, decimation_factor, stim_name)
    data = raw_decimated.get_data()

    # Transpose to get dataT of shape (total_timesamples, n_channels)
    dataT = data.T
    n_times, n_channels = dataT.shape

    # Create the timestamps column
    timestamps = np.arange(n_times, dtype=int)
    datacsv = np.column_stack((timestamps, dataT))
    header = [""] + [str(i) for i in range(n_channels)]
    df = pd.DataFrame(datacsv, columns=header)
    df[""] = df[""].astype(int)

    subject_str = f"{int(subject):02d}"
    filename = f"subject_{subject_str}_session_01.csv"
    # Export the DataFrame to CSV
    df.to_csv(filename, index=False)

    # Display information
    events = df.iloc[:, -1]
    n_rh = len(events[events == 2])
    n_lh = len(events[events == 1])
    print(f"\nFile saved: {filename}")
    print(f"Number of Left hand (1): {n_lh}")      
    print(f"Number of Right hand (2): {n_rh}")