RAW (Base repo) to CSV

This code convert the data sets from RAW format to CSV format using MOABB.

It has been specifically conceived for BCI data.

This script is for bi2015a-2


In [None]:
import numpy as np
import pandas as pd
import os

# Import decimate 
import sys
import os
sys.path.append(os.path.abspath('..'))
from ConvTools import decimate, df_to_mne

In [None]:
# test for 1 file
temp_file = "D:\\Travail\\backupPCgipsa\\taf\\officework\\gipsa bases\\CSV bi2015a\\subject_01_csv\\subject_01_session_02.csv"

In [None]:
# Read and the data
data = pd.read_csv(temp_file, header=None)
data = np.array(data)

# rearranging the stim colunm
data[:, 34] = data[:, 34] * 2

# Transfer 2s from target column (column 35) to non_target column (column 34) and delete unnecessary column
mask = data[:, 34] == 2
data[mask, 33] = 2
data = np.delete(data, [34], axis=1)

# convert to Volts
data[:, 1:-1] =  data[:, 1:-1] * 1e-6

df = pd.DataFrame(data)

In [None]:
# Downsampling
sfreq = 512
decimation_factor = 2
stim_name = 'STI'

raw = df_to_mne(df, sfreq)
raw_decimated = decimate(raw, sfreq, decimation_factor, stim_name)
data = raw_decimated.get_data()

# Transpose
dataT = data.T

In [None]:
# Extract the last column (stim channel)
stim_col = dataT[:, -1]

# Count the unique values
unique_vals, counts = np.unique(stim_col, return_counts=True)

# Loop through unique values and their counts to print the results
for val, count in zip(unique_vals, counts):
    print(f"Value : {val}, Occurrence count : {count}")

In [None]:
# creating timestamps and header
n_times, n_channels = dataT.shape
timestamps = np.arange(n_times, dtype=int)
data_with_timestamp = np.column_stack((timestamps, dataT))
header = [""] + [str(i) for i in range(n_channels)]

# Removing decimals from timestamps
df = pd.DataFrame(data_with_timestamp, columns=header)
df[""] = df[""].astype(int)

In [None]:
# Test to check csv file
df.to_csv("data.csv", index=False)

In [None]:
# Loop through all subjects
# Path to the directory containing all .csv files of the dataset
file_dir = "D:\\Travail\\backupPCgipsa\\taf\\officework\\gipsa bases\\CSV bi2015a\\"
subject_list = [os.path.join(file_dir, file) for file in os.listdir(file_dir)]

# parameters
sfreq = 512
decimation_factor = 2
stim_name = 'STI'

for subject in subject_list:

    # Extract subject number from the subject folder name
    subject_folder = os.path.basename(subject)
    sub_num = subject_folder.split('_')[1]  # Extract 'XX' from 'subject_XX_csv'

    # Construct the final filename
    filename = f"subject_{sub_num}_session_02.csv"

    # Construct path to the session 1 CSV file within the subject folder
    csv_file_path = os.path.join(subject, filename)

    # Read the data
    data = pd.read_csv(csv_file_path, header=None)
    data = np.array(data)

    # rearranging the stim colunm
    data[:, 34] = data[:, 34] * 2

    # Transfer 2s from target column (column 35) to non_target column (column 34) and delete unnecessary column
    mask = data[:, 34] == 2
    data[mask, 33] = 2
    data = np.delete(data, [34], axis=1)

    # convert to Volts
    data[:, 1:-1] =  data[:, 1:-1] * 1e-6
    df = pd.DataFrame(data)

    # downsampling
    raw = df_to_mne(df, sfreq)
    raw_decimated = decimate(raw, sfreq, decimation_factor, stim_name)
    data = raw_decimated.get_data()

    # Transpose
    dataT = data.T
    
    # creating timestamps and header
    n_times, n_channels = dataT.shape
    timestamps = np.arange(n_times, dtype=int)
    data_with_timestamp = np.column_stack((timestamps, dataT))
    header = [""] + [str(i) for i in range(n_channels)]

    # Removing decimals from timestamps
    df = pd.DataFrame(data_with_timestamp, columns=header)
    df[""] = df[""].astype(int)

    # Export the processed DataFrame to CSV
    df.to_csv(filename, index=False)
    print(f"Saved file: {filename}")

    # Display information
    events = df.iloc[:, -1]
    n_nt = len(events[events == 1]) 
    n_t = len(events[events == 2]) 
    print(f"Number of Non-Target (1): {n_nt}")
    print(f"Number of Target (2): {n_t}")
