RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for Lee2019MI

In [None]:
import numpy as np
import pandas as pd
from moabb import datasets

# Import decimate 
import sys
import os
sys.path.append(os.path.abspath('..'))
from ConvTools import decimate

In [None]:
# WARNING: If you plan to use this script, know that the Lee2019_MI database is quite extensive. 
# Therefore, I recommend loading half of the database at a time (e.g. m_dataset.get_data([m_dataset.subject_list[1,2,3,4,...]])).
# Load Database 
m_dataset = datasets.Lee2019_MI()
m_data = m_dataset.get_data()

In [None]:
#See all canal names (EEG, misc, stim...)
raw = m_data[1]['0']['1train']
print("Canal list :", raw.ch_names)

In [None]:
# Know what index is stim channel
stim_name = 'STI 014'
stim_idx = raw.ch_names.index(stim_name)
print(f"Canal index {stim_name} is : {stim_idx}")

In [None]:
# downsampling
sfreq = 1000
decimation_factor = 5
raw_decimated = decimate(raw, sfreq, decimation_factor, stim_name)

In [None]:
#count stim data unique values (depends on the database)
stim_data = raw_decimated.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
# Removing EMG channels and transpose to invert columns/lines 
raw_decimated.drop_channels(['EMG1', 'EMG2', 'EMG3', 'EMG4'])
data = raw_decimated.get_data()
dataT = data.T
print(dataT.shape)

In [None]:
# Standardize labels in the stimulation channel (last column):
# Change marker 2 to 3 (temp)
dataT[:, -1] = np.where(dataT[:, -1] == 2, 3, dataT[:, -1])
# Change marker 1 to 2
dataT[:, -1] = np.where(dataT[:, -1] == 1, 2, dataT[:, -1])
# Change marker 3 to 1 
dataT[:, -1] = np.where(dataT[:, -1] == 3, 1, dataT[:, -1])
print("Shape of dataT:", dataT.shape)

In [None]:
# Extract the last column (stim channel)
stim_col = dataT[:, -1]

# Count the unique values
unique_vals, counts = np.unique(stim_col, return_counts=True)

# Loop through unique values and their counts to print the results
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurrence count : {count}")

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df.iloc[:, 0] = df.iloc[:, 0].astype(int)

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Loop for all subjects
subject_list = list(m_data.keys())

# Downsampling parameters
sfreq = 512
decimation_factor = 2

for subject in subject_list:
    session_keys = sorted(m_data[subject].keys())
    # Loop through all sessions for the current subject, starting index at 1
    for idx, session in enumerate(session_keys, start=1):
        # Load the specified run ('1train') for the current subject and session
        raw_session = m_data[subject][session]['1train']
        
        # Downsampling
        raw_decimated = decimate(raw_session, sfreq, decimation_factor, stim_name)

        # Removing of EMG channels 
        raw_decimated.drop_channels(['EMG1', 'EMG2', 'EMG3', 'EMG4'])
        data = raw_decimated.get_data()

        # Transpose to get dataT of shape (total_timesamples, n_channels)
        dataT = data.T
        n_times, n_channels = dataT.shape

        # Standardize labels in the stimulation channel (last column):
        # Change marker 2 to 3 (temp)
        dataT[:, -1] = np.where(dataT[:, -1] == 2, 3, dataT[:, -1])
        # Change marker 1 to 2
        dataT[:, -1] = np.where(dataT[:, -1] == 1, 2, dataT[:, -1])
        # Change marker 3 to 1 
        dataT[:, -1] = np.where(dataT[:, -1] == 3, 1, dataT[:, -1])
        print("Shape of dataT:", dataT.shape)

        # Create the timestamps column
        timestamps = np.arange(n_times, dtype=int)
        datacsv = np.column_stack((timestamps, dataT))
        header = [""] + [str(i) for i in range(n_channels)]
        df = pd.DataFrame(datacsv, columns=header)
        df[""] = df[""].astype(int)

        # Name the file
        subject_str = f"{int(subject):02d}"
        session_str = f"{idx:02d}"
        filename = f"subject_{subject_str}_session_{session_str}.csv"
        # Export the DataFrame to CSV
        df.to_csv(filename, index=False)

        # Display information
        events = df.iloc[:, -1]
        n_lh = len(events[events == 1])
        n_rh = len(events[events == 2]) 
        print(f"\nFile saved: {filename}")
        print(f"Number of Left hand (1): {n_lh}")
        print(f"Number of Right hand (2): {n_rh}")