RAW (Base repo) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for bi2013a-AO

---

**Important Note: bi2013a**

The original **bi2013a** files are organized with one folder per subject/session. Each folder contains 4 CSV files (1.csv to 4.csv).

**File Mapping:**
To standardize the data, a function splits these files into four distinct databases:

* **bi2013-AT**: uses `1.csv` (Adaptive - Training)
* **bi2013-AO**: uses `2.csv` (Adaptive - Online)
* **bi2013-NAT**: uses `3.csv` (Non-Adaptive - Training)
* **bi2013-NAO**: uses `4.csv` (Non-Adaptive - Online)

After this separation, the processing is the same as for the other datasets.

In [1]:
import numpy as np
import pandas as pd
import os

# Import decimate 
import sys
import os
sys.path.append(os.path.abspath('..'))
from ConvTools import decimate, rearrange, df_to_mne

In [None]:
# Rearrange bi2013a-AO
source = "D:\\Travail\\backupPCgipsa\\taf\\officework\\gipsa bases\\CSV zenodo bi2013a\\"
file_dir = "D:\\Travail\\backupPCgipsa\\taf\\officework\\gipsa bases\\CSV bi2013a-AO\\"
csv_num = 2
rearrange(csv_num, source, file_dir)

In [2]:
# test for 1 file
temp_file = "D:\\Travail\\backupPCgipsa\\taf\\officework\\gipsa bases\\CSV bi2013a-AO\\subject_01_session_01.csv"

In [None]:
# Read the data
data = pd.read_csv(temp_file, header=0)
data = np.array(data)

# Label standardization
# Convert 33285 to 2 and 33286 to 1 in stim column
data[:, -1] = np.where(data[:, -1] == 33285, 2, data[:, -1])
data[:, -1] = np.where(data[:, -1] == 33286, 1, data[:, -1])

df = pd.DataFrame(data)

In [None]:
# Downsampling
sfreq = 512
decimation_factor = 2
stim_name = 'STI'

raw = df_to_mne(df, sfreq)
raw_decimated = decimate(raw, sfreq, decimation_factor, stim_name)
data = raw_decimated.get_data()

# Transpose
dataT = data.T

In [None]:
# Extract the last column (stim channel)
stim_col = dataT[:, -1]

# Count the unique values
unique_vals, counts = np.unique(stim_col, return_counts=True)

# Loop through unique values and their counts to print the results
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurrence count : {count}")

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df[""] = df[""].astype(int)

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Iterate through all subject folders/files in the directory
subject_list = [os.path.join(file_dir, file) for file in os.listdir(file_dir)]

# Signal Processing Parameters
sfreq = 512
decimation_factor = 2
stim_name = 'STI'

for subject in subject_list:
    # Read the data
    data = pd.read_csv(subject, header=0)
    data = np.array(data)

    # Label standardization
    # Convert 33285 to 2 and 33286 to 1 in stim column
    data[:, -1] = np.where(data[:, -1] == 33285, 2, data[:, -1])
    data[:, -1] = np.where(data[:, -1] == 33286, 1, data[:, -1])

    # Format as DataFrame
    df = pd.DataFrame(data)

    # Convert DataFrame to MNE Raw object and apply decimation (filtering + resampling)
    raw = df_to_mne(df, sfreq)
    raw_decimated = decimate(raw, sfreq, decimation_factor, stim_name)
    data = raw_decimated.get_data()

    # Transpose data to (time_samples, channels) format
    dataT = data.T

    # Generate integer timestamps and prepare the CSV header
    n_times, n_channels = dataT.shape
    timestamps = np.arange(n_times, dtype=int)
    data_with_timestamp = np.column_stack((timestamps, dataT))
    header = [""] + [str(i) for i in range(n_channels)]

    # Format as DataFrame and ensure the timestamp column is integer type
    df = pd.DataFrame(data_with_timestamp, columns=header)
    df[""] = df[""].astype(int)

    # Extract the filename from the path
    filename = os.path.basename(subject)

    # Export the processed DataFrame to CSV
    df.to_csv(filename, index=False)
    print(f"Saved file: {filename}")

    # Display information
    events = df.iloc[:, -1]
    n_nt = len(events[events == 1]) 
    n_t = len(events[events == 2]) 
    print(f"Number of Non-Target (1): {n_nt}")
    print(f"Number of Target (2): {n_t}")