RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for PhysionetMI-T2

In [None]:
import numpy as np
import pandas as pd
from moabb import datasets

In [None]:
# Load Database 
m_dataset = datasets.PhysionetMI()
m_data = m_dataset.get_data()

In [None]:
#See all canal names (EEG, misc, stim...)
raw = m_data[1]['0']['0']
print("Canal list :", raw.ch_names)

In [None]:
# Know what index is stim channel
stm_name = 'STIM'
stim_idx = raw.ch_names.index(stm_name)
print(f"Canal index {stm_name} is : {stim_idx}")

In [None]:
#count stim data unique values (depends on the database)
stim_data = raw.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
# PhysionetMI Task 2 runs
run_keys = ['0', '1', '2']

# Initialize a list to store the data from each run
all_runs_data = []

# Iterate over each training run and extract its data
for run in run_keys:
    raw_run = m_data[1]['0'][run]
    # Extract the data: shape (n_channels, n_times_run)
    run_data = raw_run.get_data()
    all_runs_data.append(run_data)

# Concatenate the data along the time axis (axis=1)
concatenated_data = np.concatenate(all_runs_data, axis=1)  # forme: (n_channels, total_timesamples)

In [None]:
# Transpose to get dataT of shape (total_timesamples, n_channels)
dataT = concatenated_data.T
print("Shape of dataT before dropping rows:", dataT.shape)

# Remove all time samples (rows) if they only contain 0s
# To be sure, we check if the sum per row is equal to 0
nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
dataT = dataT[nonzero_indices, :]
print("Shape of dataT before dropping rows:", dataT.shape)

In [None]:
# Standardize labels in the stimulation channel (last column):
# Change marker 1 to 4
dataT[:, -1] = np.where(dataT[:, -1] == 1, 4, dataT[:, -1])
# Change marker 2 to 1
dataT[:, -1] = np.where(dataT[:, -1] == 2, 1, dataT[:, -1])
# Change marker 3 to 2 
dataT[:, -1] = np.where(dataT[:, -1] == 3, 2, dataT[:, -1])
print("Shape of dataT:", dataT.shape)

In [None]:
# Extract the last column (stim channel)
stim_col = dataT[:, -1]

# Count the unique values
unique_vals, counts = np.unique(stim_col, return_counts=True)

# Loop through unique values and their counts to print the results
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurrence count : {count}")

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df.iloc[:, 0] = df.iloc[:, 0].astype(int)

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Loop for all subjects
subject_list = list(m_data.keys())

for subject in subject_list:
    # Get the list of runs in this session
    run_keys = ['0', '1', '2']
    # Initialize a list to store the data from each run
    all_runs_data = []
    for run in run_keys:
        raw_run = m_data[subject]['0'][run]
        run_data = raw_run.get_data()  # shape: (n_channels, n_times_run)
        all_runs_data.append(run_data)
    
    # Concatenate the data along the time axis (axis=1)
    concatenated_data = np.concatenate(all_runs_data, axis=1)  # (n_channels, total_timesamples)
    
    # Transpose to get dataT of shape (total_timesamples, n_channels)
    dataT = concatenated_data.T
    # Filter out time samples (rows) where the sum across all channels is zero
    nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
    dataT = dataT[nonzero_indices, :]
    print("Shape of dataT:", dataT.shape)
    n_times, n_channels = dataT.shape

    # Standardize labels in the stimulation channel (last column):
    # Change marker 1 to 4 (Rest)
    dataT[:, -1] = np.where(dataT[:, -1] == 1, 4, dataT[:, -1])
    # Change marker 2 to 1 (Left Hand)
    dataT[:, -1] = np.where(dataT[:, -1] == 2, 1, dataT[:, -1])
    # Change marker 3 to 2 (Right Hand)
    dataT[:, -1] = np.where(dataT[:, -1] == 3, 2, dataT[:, -1])

    # Create the timestamps column
    timestamps = np.arange(n_times, dtype=int)
    datacsv = np.column_stack((timestamps, dataT))
    header = [""] + [str(i) for i in range(n_channels)]
    df = pd.DataFrame(datacsv, columns=header)
    df[""] = df[""].astype(int)

    # Name the file
    subject_str = f"{int(subject):03d}"
    filename = f"subject_{subject_str}_session_01.csv"
    # Export the DataFrame to CSV
    df.to_csv(filename, index=False)

    # Display information with swapped values
    events = df.iloc[:, -1] # Using iloc to target the last column
    n_lh = len(events[events == 1])  
    n_rh = len(events[events == 2]) 
    rest = len(events[events == 4]) 
    print(f"Number of Left hand (1): {n_lh}")
    print(f"Number of Right hand (2): {n_rh}")
    print(f"Number of Rest (4): {rest}")