RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for EPFLP300-4 (Door run 4)



In [None]:
import numpy as np
import pandas as pd
from moabb import datasets

# Import decimate 
import sys
import os
sys.path.append(os.path.abspath('..'))
from ConvTools import decimate

In [None]:
#Load Database
m_dataset = datasets.EPFLP300()
m_data = m_dataset.get_data()

In [None]:
#See all channel names (EEG, misc, stim...)
raw = m_data[1]['0']['3']
print("Canal list :", raw.ch_names)

In [None]:
#Know what index is stim channel 
stim_name = 'STI'
stim_idx = raw.ch_names.index(stim_name)
print(f"Canal index {stim_name} is : {stim_idx}")

In [None]:
#count stim data unique values (depends on the database)
stim_data = raw.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
# downsampling
sfreq = 2048
decimation_factor = 8 
raw_decimated = decimate(raw, sfreq, decimation_factor, stim_name)

In [None]:
#Know what index is Mastoid channels
ma1_name = 'MA1'
ma2_name = 'MA2'
ma1_idx = raw_decimated.ch_names.index(ma1_name)
ma2_idx = raw_decimated.ch_names.index(ma2_name)

In [None]:
# Preparing referencing col 
MA1 = raw_decimated.get_data(picks=ma1_idx)
MA2 = raw_decimated.get_data(picks=ma2_idx)
MA = ((MA1 + MA2)/2)
MA = np.array(MA, copy=True).flatten()  # make sure MA is a 1D vector

In [None]:
# Drop Mastoid channels
raw_decimated.drop_channels(['MA1','MA2'])
data = raw_decimated.get_data()  # forme: (n_channels, n_times_run)

# Transpose to invert columns/lines
dataT = data.T

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df[""] = df[""].astype(int)
df.iloc[:, 1:-1] = df.iloc[:, 1:-1] - MA[:, np.newaxis] # referencing

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Loop through all subjects
subject_list = list(m_data.keys())

for subject in subject_list:

    session_keys = sorted(m_data[subject].keys())
    # Loop through all sessions for the current subject
    for idx, session in enumerate(session_keys, start=1):
        raw_session = m_data[subject][session]['3']

        # downsampling
        sfreq = 2048
        decimation_factor = 8 
        raw_decimated = decimate(raw, sfreq, decimation_factor, stim_name)

        # Preparing referencing col 
        MA1 = raw_decimated.get_data(picks=ma1_idx)
        MA2 = raw_decimated.get_data(picks=ma2_idx)
        MA = ((MA1 + MA2)/2)
        MA = np.array(MA, copy=True).flatten()  # make sure MA is a 1D vector
        
        # Drop Mastoid channels
        raw_decimated.drop_channels(['MA1','MA2'])
        data = raw_decimated.get_data()  # forme: (n_channels, n_times_run)

        # Transpose to invert columns/lines
        dataT = data.T
    
        # creating timestamps and header
        n_times, n_channels = dataT.shape
        timestamps = np.arange(n_times, dtype=int)
        data_with_timestamp = np.column_stack((timestamps, dataT))
        header = [""] + [str(i) for i in range(n_channels)]

        # Removing decimals from timestamps
        df = pd.DataFrame(data_with_timestamp, columns=header)
        df[""] = df[""].astype(int)
        df.iloc[:, 1:-1] = df.iloc[:, 1:-1] - MA[:, np.newaxis] # referencing
        
        # Define the filename
        subject_str = f"{int(subject):02d}"
        session_str = f"{idx:02d}"
        filename = f"subject_{subject_str}_session_{session_str}.csv"
        
        # Save the file and count events
        df.to_csv(filename, index=False)
        print(f"Saved file : {filename}")

        # Display information
        events = df.iloc[:, -1]
        n_t = len(events[events == 2])
        n_nt = len(events[events == 1])
        print(f"Number of Target (2): {n_t}")
        print(f"Number of Non-Target (1): {n_nt}")