RAW (MAT) to CSV

This code convert the data sets from RAW format to CSV format.

It has been specifically conceived for BCI data.

This script is for SPSM2025


In [None]:
import numpy as np
import pandas as pd
import os
from scipy.io import loadmat

In [None]:
# Data treatment for 1 run 

filepath = "C:\\Users\\doumif\\Downloads\\Calibration Phase Data\\A01\\A01_run1"
mat = loadmat(filepath, struct_as_record=False, squeeze_me=True)["data"]

X = mat.X
y = mat.y
trial = mat.trial
classes = mat.classes

Xn = np.float64(X * 1e-6) # µV to V and conversion to Float64
yn = np.where(y == 3, 4, y) # standardization of classes (here rest 3==>4)
stim = np.zeros(X.shape[0]) # initiate stim col

# Fill stim col with corresponding labels
for i, (label, time_idx) in enumerate(zip(yn, trial)):
    stim[time_idx] = label

run = np.column_stack([Xn, stim]) # Concatenate X + stim
run = run[1:, :] # remove first lign filled with zeros

In [None]:
#LOOP for ALL subjects

subjects_folder = "C:\\Users\\doumif\\Downloads\\Calibration Phase Data\\"
subjects = [d for d in os.listdir(subjects_folder) if d.startswith('A') and os.path.isdir(os.path.join(subjects_folder, d))]

for subject in subjects:
    print(f"\n Treating subject {subject} ...")
    
    session = None
    subject_path = os.path.join(subjects_folder, subject)
    
    # loop for the 3 runs
    for run_num in [1, 2, 3]:
        print(f"Traitement de {subject}_run{run_num}...")
        
        filepath = f"{subject_path}\\{subject}_run{run_num}"
        mat = loadmat(filepath, struct_as_record=False, squeeze_me=True)["data"]
        
        X = mat.X
        y = mat.y
        trial = mat.trial
        classes = mat.classes

        # Data treatment for 1 run
        Xn = np.float64(X * 1e-6) # µV to V and conversion to Float64
        yn = np.where(y == 3, 4, y) # standardization of classes (here rest 3==>4)
        stim = np.zeros(X.shape[0]) # initiate stim col
        
        # Fill stim col with corresponding labels
        for i, (label, time_idx) in enumerate(zip(yn, trial)):
            stim[time_idx] = label
        
        run = np.column_stack([Xn, stim]) # Concatenate X + stim
        run = run[1:, :] # remove first lign filled with zeros
        
        print(f"  {subject}_run{run_num} shape: {run.shape}")
        
        # conc runs into 1 session
        if session is None:
            session = run
        else:
            session = np.vstack([session, run])

    print(f"Session shape: {session.shape}")

    # creating timestamps and header
    n_times, n_channels = session.shape
    timestamps = np.arange(n_times, dtype=int)
    data_with_timestamp = np.column_stack((timestamps, session))
    header = [""] + [str(i) for i in range(n_channels)]

    # Removing decimals from timestamps
    df = pd.DataFrame(data_with_timestamp, columns=header)
    df[""] = df[""].astype(int)
    
    subject_num = subject[1:]  # removing 'A'
    filename = f"subject_{subject_num}_session_01.csv"
    df.to_csv(filename, index=False)
    
    print(f"File saved: {filename}")
