MOABB to CSV / RAW to CSV

This code convert the data sets in RAW format to CSV format.

It has been specifically conceived for BCI data.

This script is for Schirrmeister2017


In [None]:
import numpy as np
import pandas as pd
import moabb.datasets
import mne
import os

In [None]:
def decimate(raw, sfreq, decimation_factor):
    
    """
    Decimate Raw data and display informations for validation.

    Parameters:
    -----------
    raw_path : str
        EEG data
    sfreq : int 
        Base sampling rate or frequency (Hz)
    decimation_factor : int
        Decimation factor, must be an integer and the result
        of the new frequency needs to be an integer too

    Returns:
    --------
    raw_decimated : mne.io.RawArray
        Decimated data
    """
    # 1. Loading of raw data

    print(f"Original sampling frequency : {raw.info['sfreq']} Hz")
    new_freq = sfreq/decimation_factor
    print(f"New sampling frequency will be : {new_freq} Hz")

    
    h_freq = int((new_freq/3)-2) # h_freq needs to be lower than 1/3 of new_freq

    # 2. Low-pass Filter
    print("\n=== Application du filtre passe-bas ===")
    raw_filtered = raw.copy().filter(
        l_freq=None,
        h_freq=h_freq,      
        method='iir',
        iir_params=dict(
            order=4,
            ftype='butter'
        ),
        phase='zero'    # forward-backward filtering 
    )

    # 3. Decimation
    print("\n=== Data decimation===")
    raw_decimated = raw_filtered.copy().resample(new_freq)

    print("\n=== EVENTS ===")
    events_orig = mne.find_events(raw, stim_channel='STIM')
    events_dec = mne.find_events(raw_decimated, stim_channel='STIM')
    
    print("\n=== Labels Check ===")
    stim_data = raw.get_data(picks='stim')
    stim_data_d = raw_decimated.get_data(picks='stim')
    unique_vals, counts = np.unique(stim_data[stim_data != 0], return_counts=True)
    unique_valsd, countsd = np.unique(stim_data_d[stim_data_d != 0], return_counts=True)
    print("Original:")
    for val, count in zip(unique_vals, counts):
        print(f"Value : {val}, Occurences count : {count}")
    print("Decimated:")
    for val, count in zip(unique_valsd, countsd):
        print(f"Value : {val}, Occurences count : {count}")

    # Validation
    print("\n=== Checking discrepancies between events ===")

    # Calculation of deviations for original data
    gaps_orig = np.diff(events_orig[:, 0]) / raw.info['sfreq']  # in seconds

    # Calculation of deviations for decimated data
    gaps_dec = np.diff(events_dec[:, 0]) / raw_decimated.info['sfreq']  # in seconds

    # Displaying deviation statistics
    print("\nTime between events (seconds):")
    print("Original:")
    print(f"  Min: {np.min(gaps_orig):.3f}s")
    print(f"  Max: {np.max(gaps_orig):.3f}s")
    print(f"  Mean: {np.mean(gaps_orig):.3f}s")
    print(f"  Standard deviation: {np.std(gaps_orig):.3f}s")


    print("\nDecimated:")
    print(f"  Min: {np.min(gaps_dec):.3f}s")
    print(f"  Max: {np.max(gaps_dec):.3f}s")
    print(f"  Mean: {np.mean(gaps_dec):.3f}s")
    print(f"  Standard deviation: {np.std(gaps_dec):.3f}s")

    # Display of the first 5 deviations for comparison
    print("\nComparison of the first 5 gaps:")
    print("N° | Original (s) | Decimated (s) | Diff (ms)")
    print("-" * 45)
    for i in range(min(5, len(gaps_orig))):
        diff_ms = (gaps_orig[i] - gaps_dec[i]) * 1000
        print(f"{i+1:2d} | {gaps_orig[i]:11.3f} | {gaps_dec[i]:10.3f} | {diff_ms:14.3f}")

    return raw_decimated

In [None]:
m_dataset = moabb.datasets.Schirrmeister2017()
m_data = m_dataset.get_data()

In [None]:
#See all canal names (EEG, misc, stim...)
raw_tr = m_data[1]['0']['0train']
raw_te = m_data[1]['0']['1test']


In [None]:
raw = mne.concatenate_raws([raw_tr, raw_te])
print("Canal list :", raw.ch_names)

In [None]:
# get events from annonations
events_from_annot, event_dict = mne.events_from_annotations(raw)

# create stim canal
stim_data = np.zeros((1, len(raw.times)))

# for each event, place the value of the event in stim canal
for event in events_from_annot:
    stim_data[0, event[0]] = event[2]

# Add stim canal in raw object
info = mne.create_info(['STIM'], raw.info['sfreq'], ['stim'])
stim_raw = mne.io.RawArray(stim_data, info)
raw.add_channels([stim_raw], force_update_info = True)

print("Canal list :", raw.ch_names)
print(raw.ch_names[-1]) 

In [None]:
# Extract events from stim canal
events_from_stim = mne.find_events(raw)

# Verify if stim canal has the same events compared to annotations 
print("Number of events from annonations :", len(events_from_annot))
print("Number of events from STIM canal :", len(events_from_stim))
print(events_from_annot == events_from_stim)

In [None]:
#Know what index is stim channel (we will need it later for the CSV to NY conversion)
stim_channel_name = 'STIM'
stim_idx = raw.ch_names.index(stim_channel_name)
print(f"Canal index {stim_channel_name} is : {stim_idx}")


In [None]:
#count stim data unique values
stim_data = raw.get_data(picks=stim_idx)
print(stim_data.shape)
unique_vals, counts = np.unique(stim_data, return_counts=True)

for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurences count : {count}")

In [None]:
sfreq = 500
decimation_factor = 2 
raw_decimated = decimate(raw, sfreq, decimation_factor)

In [None]:
# Transpose to invert columns/lines
data = raw_decimated.get_data()
dataT = data.T
print(dataT.shape)

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df.iloc[:, 0] = df.iloc[:, 0].astype(int)

In [None]:
# Swapping values from stim channel
stim_col = str(n_channels - 1)  # La dernière colonne contient les stimulations
df[stim_col] = df[stim_col].replace({1:5}) # feet vers 5
df[stim_col] = df[stim_col].replace({2:1}) # lh vers 1
df[stim_col] = df[stim_col].replace({4:2}) # rh vers 2
df[stim_col] = df[stim_col].replace({3:4}) # rest vers 4
df[stim_col] = df[stim_col].replace({5:3}) # feet vers 3

In [None]:
# Affichage des informations avec les valeurs swappées
events = df[stim_col].values
n_lh = len(events[events == 1])  
n_rh = len(events[events == 2]) 
n_f = len(events[events == 3]) 
rest = len(events[events == 4])
print(f"Nombre de Left hand (1): {n_lh}")
print(f"Nombre de Right hand (2): {n_rh}")
print(f"Nombre de feet (3): {n_f}")
print(f"Nombre de rest (4): {rest}")


In [None]:
# Test to check csv file
output_dir = 'C:/Users/doumif/work/Prog/Schirrmeister2017'
filename = "data.csv"
filepath = os.path.join(output_dir, filename)

df.to_csv(filepath, index=False)

In [None]:
subject_list = m_dataset.subject_list
sfreq = 500
decimation_factor = 2 

for subject in subject_list:
    session_keys = sorted(m_data[subject].keys())
    for idx, session in enumerate(session_keys, start=1):
        raw_tr = m_data[subject]['0']['0train']
        raw_te = m_data[subject]['0']['1test']
        raw_session = mne.concatenate_raws([raw_tr, raw_te])

        # get events from annonations
        events_from_annot, event_dict = mne.events_from_annotations(raw_session)
        # create stim canal
        stim_data = np.zeros((1, len(raw_session.times)))
        # for each event, place the value of the event in stim canal
        for event in events_from_annot:
            stim_data[0, event[0]] = event[2]
        # Add stim canal in raw object
        info = mne.create_info(['STIM'], raw_session.info['sfreq'], ['stim'])
        stim_raw = mne.io.RawArray(stim_data, info)
        raw_session.add_channels([stim_raw], force_update_info = True)

        raw_decimated = decimate(raw_session, sfreq, decimation_factor)
        data = raw_decimated.get_data()

        # Transposer pour obtenir dataT de forme (total_timesamples, n_channels)
        dataT = data.T
        n_times, n_channels = dataT.shape

        # Création de la colonne de timestamps
        timestamps = np.arange(n_times, dtype=int)
        datacsv = np.column_stack((timestamps, dataT))
        header = [""] + [str(i) for i in range(n_channels)]
        df = pd.DataFrame(datacsv, columns=header)
        df[""] = df[""].astype(int)

        # Swapping values from stim channel
        stim_col = str(n_channels - 1) 
        df[stim_col] = df[stim_col].replace({1:5}) # feet vers 5
        df[stim_col] = df[stim_col].replace({2:1}) # lh vers 1
        df[stim_col] = df[stim_col].replace({4:2}) # rh vers 2
        df[stim_col] = df[stim_col].replace({3:4}) # rest vers 4
        df[stim_col] = df[stim_col].replace({5:3}) # feet vers 3

        subject_str = f"{int(subject):02d}"
        session_str = f"{idx:02d}"
        filename = f"subject_{subject_str}_session_{session_str}.csv"
        output_dir = 'C:/Users/doumif/work/Prog/Schirrmeister2017'
        filepath = os.path.join(output_dir, filename)
        df.to_csv(filepath, index=False)

        # display info
        events = df[stim_col].values
        n_lh = len(events[events == 1])  
        n_rh = len(events[events == 2]) 
        n_f = len(events[events == 3]) 
        rest = len(events[events == 4])
        print(f"Nombre de Left hand (1): {n_lh}")
        print(f"Nombre de Right hand (2): {n_rh}")
        print(f"Nombre de feet (3): {n_f}")
        print(f"Nombre de rest (4): {rest}")

