RAW (Base repo) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for bi2012-O

Important note : 

The files were moved manually from nested folders (subject_XX/subject_XX/) directly into CSV bi2012/subject_XX/. 

Each subject folder now contains the online and training files in csv format. 

I also deleted the __MACOSX folders and empty subdirectories to keep the directory clean.

In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
# test for 1 file
temp_file = 'D:\\Travail\\backupPCgipsa\\taf\\officework\\gipsa bases\\CSV bi2012\\subject_01\\online.csv'

In [None]:
# Read and process the data
df = pd.read_csv(temp_file, header=None)
data = np.array(df)
mask_valid_rows = ~np.isnan(data).any(axis=1)
data = data[mask_valid_rows]

In [None]:
# rearranging the stim column
data[:, 19] = data[:, 19] * 2

# Transfer 2s from target column (column 19) to non_target column (column 18)
mask = data[:, 19] == 2
data[mask, 18] = 2
data = np.delete(data, [19], axis=1)

# convert to µvolt
data[:, 1:-1] = data[:, 1:-1] * 1e-6

# delete original timestamp col
data = np.delete(data, [0], axis=1)

In [None]:
# Extract the last column (stim channel)
stim_col = data[:, -1]

# Count the unique values
unique_vals, counts = np.unique(stim_col, return_counts=True)

# Loop through unique values and their counts to print the results
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurrence count : {count}")

In [None]:
# creating timestamps and header
n_times, n_channels = data.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, data))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df[""] = df[""].astype(int)

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Loop through all subjects
# Path to the directory containing all .csv files of the dataset
file_dir = "D:\\Travail\\backupPCgipsa\\taf\\officework\\gipsa bases\\CSV bi2012"
subject_list = [os.path.join(file_dir, file) for file in os.listdir(file_dir) if os.path.isdir(os.path.join(file_dir, file))]

for subject in subject_list:

    # Extract subject number from the subject folder name
    subject_folder = os.path.basename(subject)
    subject_num = subject_folder.split('_')[1]  # Extract 'XX' from 'subject_XX'

    # Construct path to the session 1 CSV file within the subject folder
    csv_file_path = os.path.join(subject, "online.csv")

    # Read and process the data
    df = pd.read_csv(csv_file_path, header=None)
    data = np.array(df)
    
    # Remove rows containing NaN values
    mask_valid_rows = ~np.isnan(data).any(axis=1)
    data = data[mask_valid_rows]

    # Rearranging the stimulation column
    # Multiply target labels by 2 (e.g., changing 1 to 2)
    data[:, 19] = data[:, 19] * 2

    # Transfer target markers (2s) from target column (19) to non-target column (18)
    mask = data[:, 19] == 2
    data[mask, 18] = 2
    
    # Delete the redundant target column (index 19)
    data = np.delete(data, [19], axis=1)

    # Convert values to microvolts (µV)
    data[:, 1:-1] = data[:, 1:-1] * 1e-6

    # Delete the original timestamp column (index 0)
    data = np.delete(data, [0], axis=1)

    # Generate new timestamps and header
    n_times, n_channels = data.shape
    timestamps = np.arange(n_times, dtype=int)
    data_with_timestamp = np.column_stack((timestamps, data))
    header = [""] + [str(i) for i in range(n_channels)]

    # Convert the array to a DataFrame and ensure integer timestamps
    df = pd.DataFrame(data_with_timestamp, columns=header)
    df[""] = df[""].astype(int)

    # Construct the final filename
    filename = f"subject_{subject_num}_session_01.csv"

    # Export the processed DataFrame to CSV
    df.to_csv(filename, index=False)
    print(f"Saved file: {filename}")

    # Display information
    events = df.iloc[:, -1]
    n_nt = len(events[events == 1]) 
    n_t = len(events[events == 2]) 
    print(f"Number of Non-Target (1): {n_nt}")
    print(f"Number of Target (2): {n_t}")