RAW (MOABB) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for Weibo2014

In [None]:
import numpy as np
import pandas as pd
from moabb import datasets

In [None]:
#Load Database
m_dataset = datasets.Weibo2014()
m_data = m_dataset.get_data()

In [None]:
#See all canal names (EEG, misc, stim...)
raw = m_data[1]['0']['0']
print("Canal list :", raw.ch_names)

In [None]:
#Know what index is stim channel
stim_name = 'STIM014'
stim_idx = raw.ch_names.index(stim_name)
print(f"Canal index {stim_name} is : {stim_idx}")

In [None]:
#count stim data unique values (depends on the database)
stim_data = raw.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
# Drop channels
raw.drop_channels(['VEO', 'HEO', 'CB1', 'CB2'])
# Transpose to invert columns/lines
data = raw.get_data()
dataT = data.T
print(dataT.shape)

In [None]:
# Remove all time samples (rows) if they only contain 0s
# To be sure, we check if the sum per row is equal to 0
nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
dataT = dataT[nonzero_indices, :]
print("Shape of dataT after dropping rows:", dataT.shape)

In [None]:
# Standardize labels in the stimulation channel (last column):
# Change marker 5 to 0 (class removed)
dataT[:, -1] = np.where(dataT[:, -1] == 5, 0, dataT[:, -1])
# Change marker 6 to 0 (class removed)
dataT[:, -1] = np.where(dataT[:, -1] == 6, 0, dataT[:, -1])
# Change marker 3 to 5
dataT[:, -1] = np.where(dataT[:, -1] == 3, 5, dataT[:, -1])
# Change marker 4 to 3
dataT[:, -1] = np.where(dataT[:, -1] == 4, 3, dataT[:, -1])
# Change marker 7 to 4
dataT[:, -1] = np.where(dataT[:, -1] == 7, 4, dataT[:, -1])
print("Shape of dataT:", dataT.shape)

In [None]:
# Extract the last column (stim channel)
stim_col = dataT[:, -1]

# Count the unique values
unique_vals, counts = np.unique(stim_col, return_counts=True)

# Loop through unique values and their counts to print the results
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurrence count : {count}")

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df[""] = df[""].astype(int)

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Loop through all subjects
subject_list = list(m_data.keys())

for subject in subject_list:
    # Access the raw data for the specific subject and session
    raw_session = m_data[subject]['0']['0']
    # Remove artifact and reference channels
    raw_session.drop_channels(['VEO', 'HEO', 'CB1', 'CB2'])
    data = raw_session.get_data()

    # Transpose to get dataT with shape (total_timesamples, n_channels)
    dataT = data.T
    # Filter out time samples (rows) where the sum across all channels is zero
    nonzero_indices = np.where(np.sum(dataT, axis=1) != 0)[0]
    dataT = dataT[nonzero_indices, :]
    print("Shape of dataT before labeling:", dataT.shape)
    n_times, n_channels = dataT.shape

    # Standardize labels in the stimulation channel (last column):
    # Change marker 5 to 0 (class removed)
    dataT[:, -1] = np.where(dataT[:, -1] == 5, 0, dataT[:, -1])
    # Change marker 6 to 0 (class removed)
    dataT[:, -1] = np.where(dataT[:, -1] == 6, 0, dataT[:, -1])
    # Change marker 3 to 5
    dataT[:, -1] = np.where(dataT[:, -1] == 3, 5, dataT[:, -1])
    # Change marker 4 to 3
    dataT[:, -1] = np.where(dataT[:, -1] == 4, 3, dataT[:, -1])
    # Change marker 7 to 4
    dataT[:, -1] = np.where(dataT[:, -1] == 7, 4, dataT[:, -1])
    print("Shape of dataT after labeling:", dataT.shape)

    # Create the timestamps column
    timestamps = np.arange(n_times, dtype=int)
    datacsv = np.column_stack((timestamps, dataT))
    header = [""] + [str(i) for i in range(n_channels)]
    df = pd.DataFrame(datacsv, columns=header)
    df[""] = df[""].astype(int)

    # Name the file
    subject_str = f"{int(subject):02d}"
    filename = f"subject_{subject_str}_session_01.csv"
    # Export the DataFrame to CSV
    df.to_csv(filename, index=False)

    # Display  information for each event type
    events = df.iloc[:, -1]
    n_lh = len(events[events == 1])  
    n_rh = len(events[events == 2]) 
    n_f = len(events[events == 3]) 
    n_r = len(events[events == 4])
    n_bh = len(events[events == 5]) 
    print(f"\nFile saved: {filename}")
    print(f"Number of Left hand (1): {n_lh}")
    print(f"Number of Right hand (2): {n_rh}")
    print(f"Number of feet (3): {n_f}")
    print(f"Number of rest (4): {n_r}")
    print(f"Number of Both hands (5): {n_bh}")