# Importing data

In [1]:
from glob import glob
import re, mne, numpy as np, pandas as pd
import torch, argparse

In [2]:
#Your path to the data
main_path = "../BCI_Database_Processed/*"

In [3]:
def df_attributes():
    
    #Retrieving attributes of all participants so we can separate them based on specific conditions
    attributes_path = glob("../BCI_Database/*")[3]
    print("Path :", attributes_path)

    attributes = pd.read_excel(attributes_path, header = None)

    attributes.columns = attributes.iloc[2]
    attributes = attributes.iloc[3:63].reset_index(drop=True)

    names = attributes.columns.tolist()
    print("Columns :", names)

    #Using a dictionnary to fill NA values for every column that contains Na
    D = {None : "Nothing/Unknown"}
    attributes = attributes.replace(D)   
    
    #Reject every participant that had some issues in the EEG data acquisition
    rejected = attributes[attributes['COMMENTS'] != "Nothing/Unknown"]['SUJ_ID']
    rejected = rejected.tolist()
    
    return rejected

In [4]:
rejected = df_attributes()
rejected.append("A40")
rejected.append("A59")

Path : ../BCI_Database\Perfomances.xlsx
Columns : ['SUJ_ID', 'SUJ_gender', 'EXP_gender', 'COMMENTS', 'Perf_RUN_3', 'Perf_RUN_4', 'Perf_RUN_5', 'Perf_RUN_6', 'Birth_year', 'Vision', 'Vision_assistance', 'Symptoms', 'Level of study', 'Level_knowledge neuro', 'Meditation practice', 'Laterality answered', 'Manual activity', 'Manual activity_TXT', 'score', 'time_1', 'time_2', 'PRE_Mood', 'PRE_Mindfulness', 'PRE_Motivation', 'PRE_Hours_sleep_last_night', 'PRE_Usual_sleep', 'PRE_Level_of_alertness', 'PRE_Stimulant_doses_12h', 'PRE_Stimulant_doses_2h', 'PRE_Stim_normal', 'PRE_Tabacco', 'PRE_Tabacco_normal', 'PRE_Alcohol', 'PRE_Last_meal', 'PRE_Last_pills', 'PRE_Pills_TXT', 'POST_Mood', 'POST_Mindfulness', 'POST_Motivation', 'POST_Cognitive load', 'POST_Agentivity', 'POST_Expectations_filled', 'active', 'reflexive', 'sensory', 'intuitive', 'visual', 'verbal', 'sequential', 'global', 'A', 'B', 'C', 'E', 'F', 'G', 'H', 'I', 'L', 'M', 'N', 'O', 'Q1', 'Q2', 'Q3', 'Q4', 'IM', 'EX', 'AX', 'TM', 'IN',

  attributes = attributes.replace(D)


In [5]:
def files(main_path, baseline = False, acquisition = False, MI = True, rejected = []):
    
    #Copying the list since we will remove some subjects in the list
    reject = rejected.copy()

    single_digit = []
    for suj in rejected:
        
        if len(suj) == 2:
            single_digit.append(suj)
            reject.remove(suj)
        
        #Win some time cause 2 digits subjects are after single digit ones
        else:
            break
    
    #Until -1 to avoid the input_parameters folder
    all_file_path = glob(main_path)[:-1]

    print(f"Number of participants : {len(all_file_path)}\n")

    all_file_path = [path for path in all_file_path if not any(substring in path.split('BCI_Database_Processed')[1] for substring in reject)]
    
    #Remove A1 because of the acquisition which is shorter than other acquisition files
    all_file_path.remove("../BCI_Database_Processed\\A1")

    #Removing the subject with single digit
    for path in single_digit:
        all_file_path.remove(f"../BCI_Database_Processed\\{path}")

    print("Number of participants retained :" , len(all_file_path))

    all_baseline_path = []
    if baseline :
        all_baseline_path = []
        for path in all_file_path:
            path = glob(path + '/*baseline*')
            all_baseline_path.append(path)
    
    all_acquisition_path = []
    if acquisition :
        for path in all_file_path:
            path = glob(path + '/*acquisition*')
            all_acquisition_path.append(path)

    all_MI_path = []
    if MI :
        for path in all_file_path:
            path = glob(path + '/*online*')
            all_MI_path.append(path)

    if baseline:
        n = len(all_baseline_path)
        
    elif acquisition:
        n = len(all_acquisition_path)

    elif MI:
        n = len(all_MI_path)

    #Creating final list with empty sublists
    L = [[] for _ in range(n)]

    for i in range(n):

        if baseline:
            L[i].extend(all_baseline_path[i])

        if acquisition:
            L[i].extend(all_acquisition_path[i])

        if MI:
            L[i].extend(all_MI_path[i])
    
    #Find participants selected
    pattern = r'A\d+'

    return L

In [6]:
all_files = files(main_path, baseline=False, acquisition=True, MI=True, rejected=rejected)
print(f"\nNumber of files per participant : {len(all_files[0])}\nFiles for participant A10 : {all_files[0]}")

Number of participants : 60

Number of participants retained : 39

Number of files per participant : 6
Files for participant A10 : ['../BCI_Database_Processed\\A10\\A10_R1_acquisition.set', '../BCI_Database_Processed\\A10\\A10_R2_acquisition.set', '../BCI_Database_Processed\\A10\\A10_R3_onlineT.set', '../BCI_Database_Processed\\A10\\A10_R4_onlineT.set', '../BCI_Database_Processed\\A10\\A10_R5_onlineT.set', '../BCI_Database_Processed\\A10\\A10_R6_onlineT.set']


In [7]:
baseline_files = files(main_path, baseline=True, acquisition=False, MI=False, rejected=rejected)
print(f"\nNumber of files per participant : {len(baseline_files[0])} \nFiles for participant A10 : {baseline_files[0]}")

Number of participants : 60

Number of participants retained : 39

Number of files per participant : 2 
Files for participant A10 : ['../BCI_Database_Processed\\A10\\A10_CE_baseline.set', '../BCI_Database_Processed\\A10\\A10_OE_baseline.set']


In [8]:
#Reading data
def read_data(file_path):

    #Data are already preprocessed so we just load them

    # Importing data
    data = mne.io.read_raw_eeglab(file_path, eog='auto', preload=True)

    #Creating events based on the article annotation (768 = begin of the trial, 769 = left-hand MI, 770 = right-hand MI)
    events = mne.events_from_annotations(data, event_id={'Start of Trial, Trigger at t=0s' : 0, 'class1, Left hand\t- cue onset (BCI experiment)' : 1, 'class2, Right hand\t- cue onset (BCI experiment)': 2})[0]
    
    #Creating epochs for every trial
    epochs = mne.Epochs(data, events, event_id={'start': 0}, tmin=0, tmax=8, baseline=(0, 2), preload=True).drop_channels(['EOG1', 'EOG2', 'EOG3', 'EMGg', 'EMGd'])
    
    array = epochs['start'].get_data(copy=True)
    
    #Retrieving the sampling rate
    global sfreq
    sfreq = int(data.info['sfreq'])

    #Retrieving channel names
    global channel_names
    channel_names = epochs.ch_names

    print(f"\nSampling rate : {sfreq}")
    return array, events

def read_baseline(file_path):

    # Importing data
    data = mne.io.read_raw_eeglab(file_path, eog='auto', preload=True)

    #Creating epochs (same # of epochs as the old one)
    epochs = mne.make_fixed_length_epochs(data, duration = 4.6, overlap = 0, preload=True).drop_channels(['EOG1', 'EOG2', 'EOG3', 'EMGg', 'EMGd'])

    array = epochs.get_data()

    return array

In [9]:
def read_all_data(all_files):

    #Label array to store every ID for each participant
    label_subject_acquisition = []
    label_subject_MI = []

    #Regex pattern to find the ID of the subject
    pattern = r'A\d+'

    data_acquisition_list = []  #Metadata of the acquisition
    data_MI_list = []           #Metadata of the MI

    events_acquisition = []            #Storing the events for every signal
    events_MI = []            #Storing the events for every signal

    for subject_files in all_files:
        
        #Storing acquisition for each subject (each signal selected)
        data_subject_acquisition = []

        #Storing MI for each subject
        data_subject_MI = []
        label = re.findall(pattern, subject_files[0])[0]

        for j in subject_files:

            data, event = read_data(j)

            if 'acquisition' in j:
                print(j)
                data_subject_acquisition.append(data)
                events_acquisition.append(event)

            elif 'onlineT' in j:
                data_subject_MI.append(data)
                events_MI.append(event)

        
        #Creating the array and reshaping it (#files, # epochs, #channels, resolution) -> (#epochs * #files, #channels, #resolution)
        data_subject_acquisition = np.array(data_subject_acquisition)

        #Reshaping acquisition matrix
        files_acquisition, epochs_acquisition, channels_acquisition, resolution_acquisition = data_subject_acquisition.shape
        data_subject_acquisition = data_subject_acquisition.reshape((files_acquisition*epochs_acquisition, channels_acquisition, resolution_acquisition))
        
        #Adding the labels to the arrays
        label_subject_acquisition.append([label] *  (files_acquisition*epochs_acquisition))

        #Adding it to the metadata
        data_acquisition_list.append(data_subject_acquisition)


        #Same for motor imagery
        data_subject_MI = np.array(data_subject_MI)

        #Reshaping motor imagery matrix
        files_MI, epochs_MI, channels_MI, resolution_MI = data_subject_MI.shape
        data_subject_MI = data_subject_MI.reshape((files_MI*epochs_MI, channels_MI, resolution_MI))

        #Adding the labels to the arrays
        label_subject_MI.append([label] *  (files_MI*epochs_MI))

        #Adding it to the metadata
        data_MI_list.append(data_subject_MI)
        
    #Creating the array and reshaping it to the good shape
    events_acquisition = np.array(events_acquisition)
    events_acquisition = events_acquisition.reshape(-1, 3)

    events_MI = np.array(events_MI)
    events_MI = events_MI.reshape(-1, 3)

    return np.array(data_acquisition_list), np.array(data_MI_list), np.array(label_subject_acquisition), np.array(label_subject_MI), events_acquisition, events_MI

In [10]:
%%capture
#Execution takes around 6mins
data_array_acquisition, data_array_MI, label_array_acquisition, label_array_MI, events_acquisition, events_MI = read_all_data(all_files)

In [11]:
data_array_acquisition.shape

(39, 80, 27, 4097)

# Anonymizing the dataset

In [12]:
import sys
import os

# Obtenez le chemin du répertoire Scripts
scripts_dir = r'C:\Users\gricih01\OneDrive - Université du Québec en Outaouais\Documents\MI BCI Database and scripts\Scripts\MI_BCI_Anonymizing_data'
print(scripts_dir)

# Obtenez le chemin du répertoire unlearnable-privacy-master
parent_dir = r'C:\Users\gricih01\OneDrive - Université du Québec en Outaouais\Documents\MI BCI Database and scripts\Scripts'
unlearnable_privacy_dir = r'C:\Users\gricih01\OneDrive - Université du Québec en Outaouais\Documents\MI BCI Database and scripts\Scripts\unlearnable_privacy-master'

# Ajoutez le répertoire unlearnable-privacy-master au chemin système
sys.path.append(unlearnable_privacy_dir)

# Vérifiez si le chemin est correct
print(sys.path, '\n\nDone')

C:\Users\gricih01\OneDrive - Université du Québec en Outaouais\Documents\MI BCI Database and scripts\Scripts\MI_BCI_Anonymizing_data
['C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\python311.zip', 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\DLLs', 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\Lib', 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0', '', 'C:\\Users\\gricih01\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages', 'C:\\Users\\gricih01\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\win32', 'C:\\Users\\gricih01\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCa

In [13]:
x_train = data_array_acquisition

y_train = events_acquisition[(events_acquisition[:, 2] == 1) | (events_acquisition[:, 2] == 2), -1]
y_train = y_train.reshape(39, 80)

s_train = label_array_acquisition 

In [14]:
parser = argparse.ArgumentParser(description='Model train')
parser.add_argument('--gpu_id', type=str, default='1')
parser.add_argument('--dataset', type=str, default='EPFLnoClip')

parser.add_argument('--batch_size', type=int, default=128)
parser.add_argument('--epochs', type=int, default=150)
parser.add_argument('--optim', type=str, default='Adam')
parser.add_argument('--lr', type=float, default=0.01)

#We comment one of these two lines since they do not work (Jupyter conflict)
#parser.add_argument('--feature_c', type=str, default='EEGNet')
parser.add_argument('--feature_d', type=str, default='EEGNet')

parser.add_argument('--subject_wise', type=bool, default=False)
parser.add_argument('--alpha', type=float, default=1.0) 
parser.add_argument('--log', type=str, default='')

args = parser.parse_args()
args.device = f'cuda:{args.gpu_id}' if torch.cuda.is_available() else 'cpu'

In [15]:
args.feature_c = 'EEGNet'
args.feature_d = 'EEGNet'

for arg in vars(args):
    print(f"{arg}: {getattr(args, arg)}")

gpu_id: 1
dataset: EPFLnoClip
batch_size: 128
epochs: 150
optim: Adam
lr: 0.01
feature_d: EEGNet
subject_wise: False
alpha: 1.0
log: 
device: cpu
feature_c: EEGNet


In [16]:
def str_to_int(S):
    #To convert string labels into int since
    x, y = S.shape
    S_int = np.zeros((x, y))

    for i in range(x):
        for j in range(y):
            
            #Recuperating the integer of the subject
            subject = S[i, j][1:]

            S_int[i, j] = subject

    return S_int

In [17]:
s_train = str_to_int(s_train)
print(s_train[0, :], s_train.shape)

[10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10.
 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10.
 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10.
 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10.
 10. 10. 10. 10. 10. 10. 10. 10.] (39, 80)


In [18]:
#s_train = np.hstack(s_train)

#x_train have to be 4-dimensionals (otherwise the code below will not work)
print(x_train.shape, s_train.shape, y_train.shape)

(39, 80, 27, 4097) (39, 80) (39, 80)


In [19]:
x_train = torch.FloatTensor(x_train)
x_merged = x_train.view(39 * 80, 27, 4097)
print(f"shape of x_train : {x_train.shape}")
print(f"Type of x_merged : {type(x_train)}\n")


y_train = torch.IntTensor(y_train)
y_sorted = y_train -1
print(f"shape of y_sorted : {y_sorted.shape}")
print(f"Type of y_sorted : {type(y_sorted)}\n")


s_train = torch.IntTensor(s_train)
print(f"shape of s_train : {s_train.shape}")
print(f"Type of s_train : {type(s_train)}")

shape of x_train : torch.Size([39, 80, 27, 4097])
Type of x_merged : <class 'torch.Tensor'>

shape of y_sorted : torch.Size([39, 80])
Type of y_sorted : <class 'torch.Tensor'>

shape of s_train : torch.Size([39, 80])
Type of s_train : <class 'torch.Tensor'>


In [20]:
#Since we're modifying their code, we need to reimport the library for our tests
import importlib
import unlearnable_gen
import utils.pytorch_utils
from models import LoadModel, Classifier, Discriminator, CalculateOutSize

importlib.reload(unlearnable_gen)
importlib.reload(utils.pytorch_utils)

<module 'utils.pytorch_utils' from 'C:\\Users\\gricih01\\OneDrive - Université du Québec en Outaouais\\Documents\\MI BCI Database and scripts\\Scripts\\unlearnable_privacy-master\\utils\\pytorch_utils.py'>

In [21]:
print(f"Shape of x_merged : {x_merged.shape}")

tensors = [x_train, y_sorted, s_train]
for tensor in tensors:
    print(tensors[0].size(0), tensor.size(0))



assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors), "Problème de tailles"

# Assurez-vous que x_train est de la forme (3120, 1, 27, 4097)
x_merged = x_merged.reshape(-1, 1, 27, 4097)


chans, samples = x_merged.shape[1], x_merged.shape[0]

feature_ext = LoadModel(model_name=args.feature_d, Chans=chans, Samples=samples)

#feature_ext

classifier = Classifier(
    input_dim=CalculateOutSize(feature_ext, chans, samples),
    n_classes=len(np.unique(y_sorted.numpy()))).to(args.device)
discriminator = Discriminator(
    input_dim=CalculateOutSize(feature_ext, chans, samples),
    n_subjects=len(np.unique(s_train.numpy()))).to(args.device)

classifier

Shape of x_merged : torch.Size([3120, 27, 4097])
39 39
39 39
39 39


Classifier(
  (block): Sequential(
    (0): Linear(in_features=776, out_features=2, bias=True)
  )
)

In [27]:
print(y_sorted.shape)
print(y_sorted)
print(y_train)

torch.Size([39, 80])
tensor([[0, 1, 0,  ..., 1, 0, 0],
        [1, 0, 1,  ..., 0, 1, 0],
        [0, 0, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 1, 0, 0],
        [1, 1, 1,  ..., 1, 1, 0],
        [1, 0, 0,  ..., 1, 1, 0]], dtype=torch.int32)
tensor([[1, 2, 1,  ..., 2, 1, 1],
        [2, 1, 2,  ..., 1, 2, 1],
        [1, 1, 2,  ..., 1, 1, 1],
        ...,
        [2, 2, 2,  ..., 2, 1, 1],
        [2, 2, 2,  ..., 2, 2, 1],
        [2, 1, 1,  ..., 2, 2, 1]], dtype=torch.int32)


In [None]:
for ten
    count = [0.0] * len(np.unique(y.numpy()))
    for label in y:
        count[label] += 1.0
    count = [len(y) / x for x in count]
    weight = [0.0] * len(y)
    for idx, label in enumerate(y):
        weight[idx] = count[label]

In [22]:
# Import function from .py files

# x_train: raw EEG training data, y_sorted: task labels, s_train: identity labels
# generated by sample-wise noise
#u_x_train = unlearnable_gen.unlearnable(x_train, y_train, s_train, args)

# generated by subject-wise noise
u_x_train = unlearnable_gen.unlearnable_optim(x_train, y_sorted, s_train, args)

TypeError: only integer tensors of a single element can be converted to an index

: 