In [1]:
import sys
import mne
import scipy.io as sp
import numpy as np
import random
import pandas as pd
import multiprocessing as mp
import concurrent.futures
from mne.decoding import CSP
import pymrmr
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import logging
from scipy.io import loadmat
from scipy.signal import hamming
from scipy.signal import hann
from scipy.signal import blackman
from scipy.signal import kaiser
from scipy.signal import gaussian
from sklearn.decomposition import FastICA
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
import lightgbm as lgb
from catboost import CatBoostClassifier

  from pandas import MultiIndex, Int64Index


In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# Set display options for NumPy
np.set_printoptions(threshold=np.inf)

In [3]:
WINDOW_TIME_LENGTH = 4
SAMPLING_RATE = 250
TR_SLIDING_WINDOW_TIME = 2
WINDOW_SAMPLE_LENGTH = WINDOW_TIME_LENGTH*SAMPLING_RATE
NUMBER_OF_CHANNELS = 64
SLIDING_TIME = 4 
SLIDING_POINTS = SLIDING_TIME*SAMPLING_RATE
TR_SLIDING_POINTS = TR_SLIDING_WINDOW_TIME*SAMPLING_RATE
beta = 1.5

num_channels = 64
epoch_length = 1000
sampling_freq = 250
number_of_runs = 10
# number_of_splits = 10
number_of_components = 10
number_of_selected_features = 10
number_of_processes = 10
number_of_bands = 9
# rf = pd.DataFrame()
column_names = ['participant', 'class1', 'class2','running_time','test_acc','train_acc','test_size','train_size','train_block','test_block']
# rf = rf.reindex(columns=column_names)

trial_order=[['Tongue','Feet','Mis','Hand'],
            ['Feet','Mis','Hand','Tongue'],
            ['Hand','Feet','Tongue','Mis'],
            ['Tongue','Mis','Hand','Feet'],
            ['Mis','Feet','Hand','Tongue'],
            ['Feet','Hand','Tongue','Mis'],
            ['Hand','Tongue','Mis','Feet'],
            ['Tongue','Feet','Mis','Hand'],
            ['Mis','Tongue','Hand','Feet']]


In [4]:
def get_task_rest_times(b_num):
    if b_num == 0:
        task_time = [[12, 16, 20, 8],
                    [16, 12, 20, 8],
                    [20, 16, 8, 12],
                    [20, 12, 8, 16]]
        
        rest_time = [[20, 8, 16, 12],
                    [16, 20, 8, 12],
                    [12, 20, 16, 8],
                    [20, 12, 8, 16]]
        
    elif b_num == 1:
        task_time = [[12, 8, 20, 16],
                    [16, 20, 8, 12],
                    [8, 20, 16, 12],
                    [8, 12, 20, 16]]
        
        rest_time = [[16, 12, 8, 20],
                    [8, 20, 12, 16],
                    [20, 16, 8, 12],
                    [12, 16, 20, 8]]
        
    elif b_num == 2:
        task_time = [[16, 8, 12, 20],
                    [20, 16, 12, 8],
                    [12, 20, 8, 16],
                    [8, 12, 16, 20]]
        
        rest_time = [[8, 20, 16, 12],
                    [12, 8, 20, 16],
                    [16, 12, 20, 8],
                    [8, 12, 20, 16]]
        
    elif b_num == 3:
        task_time = [[12, 16, 20, 8],
                    [16, 12, 20, 8],
                    [20, 16, 8, 12],
                    [20, 12, 8, 16]]
        
        rest_time = [[20, 8, 16, 12],
                    [16, 20, 8, 12],
                    [12, 20, 16, 8],
                    [20, 12, 8, 16]]
        
    elif b_num == 4:
        task_time = [[16, 8, 20, 12],
                    [12, 16, 8, 20],
                    [20, 8, 12, 16],
                    [8, 20, 12, 16]]
        
        rest_time = [[8, 12, 16, 20],
                    [16, 20, 12, 8],
                    [12, 16, 8, 20],
                    [20, 8, 12, 16]]
        
    elif b_num == 5:
        task_time = [[16, 12, 8, 20],
                    [20, 16, 12, 8],
                    [8, 16, 20, 12],
                    [12, 8, 16, 20]]

        rest_time = [[12, 8, 16, 20],
                    [16, 8, 20, 12],
                    [20, 12, 16, 8],
                    [8, 16, 12, 20]]
        
    elif b_num == 6:
        task_time = [[16, 8, 12, 20],
                    [20, 8, 16, 12],
                    [8, 16, 12, 20],
                    [16, 20, 12, 8]]

        rest_time = [[16, 8, 12, 20],
                    [12, 20, 8, 16],
                    [20, 16, 12, 8],
                    [8, 16, 20, 12]]     
    elif b_num ==7:
        task_time = [[12, 8, 20, 16],
                    [16, 20, 8, 12],
                    [8, 20, 16, 12],
                    [8, 12, 20, 16]]   
               
        rest_time = [[16, 12, 8, 20],
                    [8, 20, 12, 16],
                    [20, 16, 8, 12],
                    [12, 16, 20, 8]]  
    
    elif b_num == 8:
        task_time = [[16, 8, 12, 20],
                    [20, 16, 12, 8],
                    [12, 20, 8, 16],
                    [8, 12, 16, 20]]
        
        rest_time = [[8, 20, 16, 12],
                    [12, 8, 20, 16],
                    [16, 12, 20, 8],
                    [8, 12, 20, 16]]
        
    else:
        print("Error in block number")

    return task_time,rest_time


In [5]:
def trial_times_genertor(task_times,rest_times):
    block_times = [item for pair in zip(task_times, rest_times) for item in pair]
    return block_times
    

In [6]:
def calc_csp(x_train, y_train, x_test):
    csp = CSP(number_of_components)
    csp_fit = csp.fit(x_train, y_train)
    train_feat = csp_fit.transform(x_train)
    test_feat = csp_fit.transform(x_test)
    return train_feat, test_feat

In [7]:
def class_extractor(number_of_epochs, class_1, class_2, data, labels):
    size = sum(labels[:,0] == class_1) + sum(labels[:,0] == class_2)
    Final_labels = np.zeros((size,1)).astype(int)
    dataset = np.zeros((size,num_channels, epoch_length))
    index = 0
    for i in range(number_of_epochs):
        if labels[i,0] == class_1 or labels[i,0] == class_2:
            dataset[index,:,:] = data[i,:,:]
            Final_labels[index,0] = labels[i,0]
            index = index + 1
        else:
            continue
            
    return dataset, Final_labels

In [8]:
def feature_extractor(dataset, labels, number_of_bands, test_data):

    low_cutoff = 0
    
    for b in range(number_of_bands):
        logging.getLogger('mne').setLevel(logging.WARNING)
        low_cutoff += 4
        data = dataset.copy()
        data_test = test_data.copy()
        filtered_data = mne.filter.filter_data(data, sampling_freq, low_cutoff, low_cutoff + 4, verbose = False, n_jobs = 4)
        filtered_data_test = mne.filter.filter_data(test_data, sampling_freq, low_cutoff, low_cutoff + 4, verbose = False, n_jobs = 4)
        [train_feats, test_feats] = calc_csp(filtered_data, labels[:,0], filtered_data_test)
        if b == 0:
            train_features = train_feats
            test_features = test_feats
        else:
            train_features = np.concatenate((train_features, train_feats), axis = 1)
            test_features = np.concatenate((test_features, test_feats), axis = 1)
    
    return train_features, test_features

In [9]:
def feature_selector(train_features, labels, number_of_selected_features):
    X = pd.DataFrame(train_features)
    y = pd.DataFrame(labels)
    K = number_of_selected_features
    
    df = pd.concat([y,X], axis = 1)
    df.columns = df.columns.astype(str)
        
    selected_features = list(map(int, pymrmr.mRMR(df, 'MID', K)))
    return selected_features

In [10]:
def data_reader(path,p_num,block_list):
    data_dict = {}
    for b_num in block_list:
        print(b_num)
        mat = loadmat(path+'P'+str(p_num)+'B'+str(b_num)+'.mat', chars_as_strings=True, mat_dtype=True, squeeze_me=True, struct_as_record=False, verify_compressed_data_integrity=False, variable_names=None)
        df = pd.DataFrame(mat['Data'])
        # ddf = dd.from_pandas(df, npartitions=10)
        data_dict[b_num] = df
    return data_dict


In [11]:
def extra_samples_counter(df,class_1,class_2):
    x=0
    i=0
    sampleList = []
    while i<len(df):
        if (df.iloc[i,64]==class_1):
            x+=1
        else:
            i-=1
            sampleList.append(x)
            x=0
            class_1,class_2 = class_2,class_1
        i+=1
    sampleList.append(x)
    print(sampleList)
    

In [76]:
def extra_samples_block_counter(df,trial_order,b_num):

    df.drop(df[df.iloc[:,64].isin(['Begin', 'End'])].index, inplace=True)
    df.reset_index(drop=True, inplace=True)
    print('hi')
    
    df['group'] = (df.iloc[:,64] != df.iloc[:,64].shift(1)).cumsum()
    # group_counts_Tongue = df[df.iloc[:,64] == 'Tongue'].groupby('group').size()
    # group_counts_Feet = df[df.iloc[:,64] == 'Feet'].groupby('group').size()
    # group_counts_Hand = df[df.iloc[:,64] == 'Hand'].groupby('group').size()
    # group_counts_Mis = df[df.iloc[:,64] == 'Mis'].groupby('group').size()
    # group_counts_Rest = df[df.iloc[:,64] == 'Rest'].groupby('group').size()

    
    group_counts_Rest = df[df.iloc[:,64] == 'Rest'].groupby('group').size()
    with open('sampleList.txt', 'a') as file:
        file.write(f'block {b_num+1} '+'\n')
        for j in range (len(trial_order)):
            print(trial_order[j])
            trial_num = j
            task_times,rest_times = get_task_rest_times(b_num)
            trial_times = trial_times_genertor(task_times[trial_num],rest_times[trial_num])
            trial_samples = [item*SAMPLING_RATE for item in trial_times]
            group_counts_task = df[df.iloc[:,64] == trial_order[j]].groupby('group').size()
            sampleList = []
            for i in range(4):
                task = group_counts_task.iloc[i]
                rest = group_counts_Rest.iloc[4*j+i]
                sampleList.append(task)
                sampleList.append(rest)
            extra_samples = [x-y for x,y in zip(sampleList,trial_samples)]
            file.write(', '.join(map(str, extra_samples)) + f' trial={trial_order[j]} '+'\n')
            print(sampleList)
        file.write('\n\n')


    # print(group_counts_Tongue)
    # print(group_counts_Feet)
    # print(group_counts_Hand)
    # print(group_counts_Mis)
    # print(group_counts_Rest)

    # print(group_counts_b.index[0])
    # print(group_counts_b.iloc[0])
    # print(group_counts)


    # for j in range(len(trial_order)):
    #     print(j)
    #     class_2 = 'Rest'
    #     class_1 = trial_order[j]
    #     sampleList = []
    #     x=0
    #     i=0


    #     while i<len(df):
    #         print(i)
    #         if (df.iloc[i,64]!=class_1):
    #             x+=1
    #         else:
    #             i-=1
    #             sampleList.append(x)
    #             x=0
    #             class_1,class_2 = class_2,class_1
    #         i+=1
    #     sampleList.append(x)
    #     df.drop(df.index[0:sum(sampleList)], inplace=True)
    #     df.reset_index(drop=True, inplace=True)
    #     print(sampleList)
        # with open('sampleList.txt', 'w') as file:
        #     # for item in sampleList:
        #     file.write(f"{sampleList}\n")
    

In [13]:

def data_cleaner(df,class_1,class_2,tasks_time):
    # extra_samples_counter(df,class_1,class_2)
    # sys.exit() 
    class_x = class_1
    class_y = class_2
    new_df = pd.DataFrame()
    trial_df = df.copy() 
    print(tasks_time)
    for i in range(len(tasks_time)):
        sample_point = tasks_time[i]*SAMPLING_RATE
        if(trial_df.iloc[sample_point+1,64] == class_x ):
            if(i==len(tasks_time)-1):
                temp_df = trial_df.iloc[:sample_point,:]
                new_df = pd.concat([new_df, temp_df], axis=0)
                new_df.reset_index(drop=True, inplace=True)
            else:    
                temp_df = trial_df.iloc[:sample_point,:]
                next_task_idx = trial_df[trial_df.iloc[:, 64] == class_y].index
                trial_df.drop(trial_df.index[0:next_task_idx[0]], inplace=True)
                trial_df.reset_index(drop=True, inplace=True)
                new_df = pd.concat([new_df, temp_df], axis=0)
                new_df.reset_index(drop=True, inplace=True)
                class_x,class_y = class_y,class_x

    return new_df

In [14]:
def class_seperator(cleaned_df,class_1,class_2):
    # df = cleaned_df.sort_values(by=cleaned_df.columns[64]).reset_index(drop=True)
    # print(seperated_df.head(14003))
    # print(cleaned_df.head(5003))

    df = cleaned_df
    sorting_order = {class_1: 0, class_2: 1}

    df['sorting_order'] = df.iloc[:, 64].map(sorting_order)
    df.sort_values(by=['sorting_order', df.columns[64]], inplace=True)
    df.drop('sorting_order', axis=1, inplace=True)
    df.reset_index(drop=True, inplace=True)

    return df

In [15]:
def shuffler(dataset,labels):
    print(dataset.shape)
    print(labels.shape)
    np.random.seed(42)
    indices = np.random.permutation(len(dataset))
    shuffled_dataset = dataset[indices]
    shuffled_labels = labels[indices]
    return shuffled_dataset,shuffled_labels
    

In [16]:
def data_label_attacher(cleaned_df,class_1,class_2,random_flag,class_seperator_flag):
    
    #Initialization
    if class_seperator_flag:
        seperated_class_df = class_seperator(cleaned_df,class_1,class_2)
        new_df_ = seperated_class_df.copy()
        new_df_.drop(seperated_class_df.columns[-1], axis=1, inplace=True)
        X = new_df_.to_numpy()
        X = np.transpose(X)
        number_of_epochs = int((int(len(new_df_))-WINDOW_SAMPLE_LENGTH)/TR_SLIDING_POINTS)
        print(number_of_epochs)
    else :  
        new_df_ = cleaned_df.copy()
        new_df_.drop(cleaned_df.columns[-1], axis=1, inplace=True)
        X = new_df_.to_numpy()
        X = np.transpose(X)
        number_of_epochs = int(len(new_df_)/WINDOW_SAMPLE_LENGTH)

    dataset = np.zeros((number_of_epochs,NUMBER_OF_CHANNELS,WINDOW_SAMPLE_LENGTH))
    labels = np.zeros((number_of_epochs,1)).astype(int)

    if class_seperator_flag:
        i = 0  
        startIdx = i * WINDOW_SAMPLE_LENGTH
        endIdx = (i+1) * WINDOW_SAMPLE_LENGTH 
        while(endIdx<=int(len(new_df_))/2):
            slice_X = X[:, startIdx:endIdx]
            kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,beta)
            slice_X *= kaiser_window
            dataset[i, :, :] = slice_X
            labels[i,0] = 0
            # if (seperated_class_df.iloc[startIdx, 64] == class_1):
            #     labels[i,0] = 0
            # elif(seperated_class_df.iloc[startIdx, 64] == class_2):
            #     labels[i,0] = 1
            # else:
            #     labels[i,0] = 2
            startIdx+=TR_SLIDING_POINTS
            endIdx+=TR_SLIDING_POINTS
            i+=1
        # print(int(len(new_df_))/2,"len")    
        # print(endIdx,"endIdx")    
        # print(seperated_class_df.iloc[endIdx-2:endIdx+2,64])
       
        j = i
        
        startIdx = endIdx-TR_SLIDING_POINTS
        endIdx = startIdx+WINDOW_SAMPLE_LENGTH
        print(j, "j is this")
        while(endIdx<=int(len(new_df_))):
            slice_X = X[:, startIdx:endIdx]
            kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,beta)
            slice_X *= kaiser_window
            dataset[j, :, :] = slice_X
            labels[j,0] = 1
            # if (cleaned_df.iloc[startIdx, 64] == class_1):
            #     labels[j,0] = 0
            # elif(cleaned_df.iloc[startIdx, 64] == class_2):
            #     labels[j,0] = 1
            # else:
            #     labels[j,0] = 2
            startIdx+=TR_SLIDING_POINTS
            endIdx+=TR_SLIDING_POINTS
            j+=1
        print(j, "j is this")
        # dataset,labels = shuffler(dataset,labels)

    else:
        i = 0  
        start_idx = i * WINDOW_SAMPLE_LENGTH
        end_idx = (i+1) * WINDOW_SAMPLE_LENGTH 
        while (end_idx<=int(len(new_df_))):
            slice_X = X[:, start_idx:end_idx]
            kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,beta)
            slice_X *= kaiser_window
            dataset[i, :, :] = slice_X
            if (cleaned_df.iloc[start_idx, 64] == class_1):
                labels[i,0] = 0
            elif(cleaned_df.iloc[start_idx, 64] == class_2):
                labels[i,0] = 1
            else:
                labels[i,0] = 2
            start_idx+=SLIDING_POINTS
            end_idx+=SLIDING_POINTS
            i+=1
        # dataset,labels = shuffler(dataset,labels)







    #For training and test purpose
    # if random_flag:
    #     randomlist = random.sample(range(number_of_epochs), number_of_epochs)
    # else:
    #     randomlist = list(range(number_of_epochs))
    #Labeling the data



    # for i in range(number_of_epochs):
    #     start_idx = randomlist[i] * WINDOW_SAMPLE_LENGTH + SLIDING_POINTS
    #     end_idx = (randomlist[i] + 1) * WINDOW_SAMPLE_LENGTH
    #     slice_X = X[:, start_idx:end_idx]

    #     # hamming_window = hamming(WINDOW_SAMPLE_LENGTH)
    #     # slice_X *= hamming_window

    #     # hanning_window = hann(WINDOW_SAMPLE_LENGTH)
    #     # slice_X *= hanning_window

    #     # blackman_window = blackman(WINDOW_SAMPLE_LENGTH)
    #     # slice_X *= blackman_window

    #     # kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,0.5)
    #     # slice_X *= kaiser_window

    #     # gaussian_window = gaussian(WINDOW_SAMPLE_LENGTH,0.5)
    #     # slice_X *= gaussian_window


    #     dataset[i, :, :] = slice_X
    #     if (cleaned_df.iloc[randomlist[i] * WINDOW_SAMPLE_LENGTH, 64] == class_1):
    #         labels[i,0] = 0
    #     elif(cleaned_df.iloc[randomlist[i] * WINDOW_SAMPLE_LENGTH, 64] == class_2):
    #         labels[i,0] = 1
    #     else:
    #         labels[i,0] = 2

    return dataset,labels




In [17]:
def trial_cutter(data, class_1):
    df = data.copy()
    Begin_trigger = "Begin" + "_" + class_1
    End_trigger = "End" + "_" + class_1
    Begin_idx = df[df.iloc[:, 64] == Begin_trigger].index
    End_idx = df[df.iloc[:, 64] == End_trigger].index
    trial_df = df.iloc[Begin_idx[0]+1:End_idx[0],:]
    trial_df.reset_index(drop=True, inplace=True)
    trial_df.head()
    return trial_df

In [18]:
def Begin_End_trigger_modifier(data):
    df = data.copy()
    Begin_indexes = df[df.iloc[:, 64] == 'Begin'].index
    End_indexes = df[df.iloc[:, 64] == 'End'].index
    if(len(Begin_indexes)==len(End_indexes)):
        for i in range(len(Begin_indexes)):
            index = Begin_indexes[i]+1
            val = df.iloc[index,64]
            df.iloc[Begin_indexes[i],64] = "Begin" + "_" + str(val)
            df.iloc[End_indexes[i],64]   =  "End" + "_" + str(val)
    else:
        print("Trigger seinding Exception")
    
    return df

In [19]:
def preprocessor(data_,class_1,class_2,tasks_time,set_type,class_seperator_flag):
    CLASS_1 = class_1
    CLASS_2 = class_2
    df = data_.copy()
    modified_df = Begin_End_trigger_modifier(df)
    trial_df = trial_cutter(modified_df,CLASS_1)
    print(trial_df.shape,"trial_df")
    cleaned_df = data_cleaner(trial_df,CLASS_1,CLASS_2,tasks_time)
    print(cleaned_df.shape,"cleaned_df")

    if set_type =="TRAIN":
        random_flag = True
    elif set_type =="TEST":
        random_flag = False
    else:
        print("Error in set type")

  
    final_data, final_labels = data_label_attacher(cleaned_df,CLASS_1,CLASS_2,random_flag,class_seperator_flag)
    print(final_data.shape,"final_data shape")
    print(final_labels.shape,"final_labels shape")
    
    return final_data,final_labels

In [20]:
def trials_set_builder(data_dict,blocks_set,set_label,class_1,class_2,class_seperator_flag):
    counter = 0

    for b_num in blocks_set:
        trial_num = trial_order[b_num].index(class_1)
        task_times,rest_times = get_task_rest_times(b_num)
        print(task_times[trial_num],rest_times[trial_num])
        trial_times = trial_times_genertor(task_times[trial_num],rest_times[trial_num])
        print(trial_times)
        data = data_dict[b_num]
        df = data.copy()
        # last_column = df.pop(df.columns[-1])
        # df.drop(df.columns[-1], axis=1, inplace=True)
        # eeg_data = df.to_numpy().T  # Transpose to have channels in columns

        # channel_names = [f'Ch{i+1}' for i in range(63)]

        # # Create MNE-Python RawArray object
        # info = mne.create_info(ch_names=channel_names, sfreq=sampling_freq, ch_types='eeg')
        # raw = mne.io.RawArray(eeg_data, info)

        # # Apply ICA
        # ica = mne.preprocessing.ICA(n_components=20, random_state=97, max_iter=800)
        # ica.fit(raw)
        # ica_components = ica.get_components()

        # # Convert the ICA components to a DataFrame
        # df2 = pd.DataFrame(data=ica_components.T, columns=channel_names)
        # df2 = df2.assign(LastColumn=last_column)
        # # df = data.copy(deep=False)
        dataset,labels = preprocessor(df,class_1,class_2,trial_times,set_label,class_seperator_flag)
        # print(dataset.shape)

        if counter == 0 :
            final_data = dataset
            final_labels = labels
            print("Before concatenation - final_data shape:", final_data.shape, "dataset shape:", dataset.shape)
        else:
            final_data = np.vstack((final_data, dataset))
            final_labels = np.vstack((final_labels, labels))
            print("After concatenation - final_data shape:", final_data.shape, "final_labels shape:", final_labels.shape)

        counter+=1 
    return final_data,final_labels

In [108]:
# PATH = '../../Participants/P1/'
# P_NUM = 1
# B_NUM = 1
# CLASS_1 = "Feet"
# CLASS_2 = "Rest"
# data_tr_ = data_reader(PATH+'P'+str(P_NUM)+'B'+str(B_NUM)+'.mat')
# data_tr2_ = data_reader(PATH+'P'+str(P_NUM)+'B'+str(4)+'.mat')
# data_te_ = data_reader(PATH+'P'+str(P_NUM)+'B'+str(6)+'.mat')





# for i in range(number_of_epochs):
#     data[i,:,:] = X[:, randomlist[i]*epoch_length:(randomlist[i] + 1)*epoch_length]
#     if (df['condition'][randomlist[i]*epoch_length] == 'Left'):
#         labels[i,0] = 0
#     elif(df['condition'][randomlist[i]*epoch_length] == 'Right'):
#         labels[i,0] = 1
#     elif(df['condition'][randomlist[i]*epoch_length] == 'Feet'):
#         labels[i,0] = 2
#     elif(df['condition'][randomlist[i]*epoch_length] == 'Tongue'):
#         labels[i,0] = 3
#     elif(df['condition'][randomlist[i]*epoch_length] == 'Mis'):
#         labels[i,0] = 4
#     elif(df['condition'][randomlist[i]*epoch_length] == 'Si'):
#         labels[i,0] = 5
#     else:
#         labels[i,0] = 6



# preprocessor(data_tr,data_te)

# X_tr_raw,X_te_raw,y_tr_raw,y_te_raw,number_of_epochs_tr,number_of_epochs_te = preprocessor(X_train,X_test,data1)
# [X_tr, y_tr] = class_extraction(number_of_epochs_tr, class_1, class_2, X_tr_raw, y_tr_raw)
# [X_te, y_te] = class_extraction(number_of_epochs_te, class_1, class_2, X_te_raw, y_te_raw) 
# print(X_te.shape,"X_te.shape")


In [100]:
blcok_list = [0,1,3,4,5,6,7,8]
p_num = 1
data_dict = data_reader('../../Participants/P1/',p_num,blcok_list)

0


MemoryError: 

In [21]:
blcok_list = [0,1,2,3,4,5,6]
p_num = 4
data_dict_3 = data_reader(f'../../Participants/P{p_num}/',p_num,blcok_list)

0
1
2
3
4
5
6


In [91]:


PATH = '../../Participants/P3/'
class_1 = 'Feet'
class_2 = 'Rest'
b_num = 0
p_num = 3
train_blocks_set = [0,1,2,3,4]
test_blocks_set = [5,6]

X_tr, Y_tr = trials_set_builder(data_dict_3,train_blocks_set,'TRAIN',class_1,class_2,False)
X_te, Y_te = trials_set_builder(data_dict_3,test_blocks_set,'TEST',class_1,class_2,False)

print(X_tr.shape,Y_tr.shape,"train shape")
print(X_te.shape,Y_te.shape,"test shape")

[train_features, test_features] = feature_extractor(X_tr, Y_tr, number_of_bands, X_te)
selected_features = feature_selector(train_features, Y_tr, number_of_selected_features)

train_acc_list = []
test_acc_list = []

clf = XGBClassifier()
for r in range(1):
    clf.fit(train_features[:, selected_features], Y_tr[:,0])

    y_pr_te = clf.predict(test_features[:, selected_features])
    y_pr_tr = clf.predict(train_features[:,selected_features])

    accuracy_te = accuracy_score(Y_te, y_pr_te)
    test_acc_list.append(accuracy_te)

    accuracy_tr = accuracy_score(Y_tr,y_pr_tr)
    train_acc_list.append(accuracy_tr)

print(train_acc_list,"train")
print(test_acc_list,"test")


    



# block_order_tr = ['Tongue','Feet','Mis','Hand']
# block_order_tr2 = ['Tongue','Mis','Hand','Feet']
# block_order_te = ['Feet','Hand','Tongue','Mis']
# CLASS_1 = "Hand"
# CLASS_2 = "Rest"
# tasks_time_tr = [16,16,12,20,20,8,8,12]
# tasks_time_tr2 = [20,20,12,12,8,8,16,16]
# tasks_time_te = [16,12,12,8,8,16,20,20]

# df_tr = data_tr_.copy()
# df_tr2 = data_tr2_.copy()
# df_te = data_te_.copy()
# data_tr,labels_tr = preprocessor(df_tr,CLASS_1,CLASS_2,tasks_time_tr,"TRAIN")
# data_tr2,labels_tr2 = preprocessor(df_tr2,CLASS_1,CLASS_2,tasks_time_tr2,"TRAIN")
# data_te,labels_te = preprocessor(df_te,CLASS_1,CLASS_2,tasks_time_te,"TEST")
# data_tr = np.vstack((data_tr, data_tr2))
# labels_tr = np.vstack((labels_tr, labels_tr2))
# print(data_tr.shape)
# print(labels_tr.shape)
# print(data_te.shape)
# print(labels_te.shape)





# print(data_tr.shape,labels_tr.shape)
# print(data_te.shape,labels_te.shape)
# print(labels_te)
# print(indexes)
# print(Begin_indexes)
# print(End_indexes)
# print(df.iloc[1,64])


    




[16, 12, 20, 8] [16, 20, 8, 12]
[16, 16, 12, 20, 20, 8, 8, 12]
(28473, 65) trial_df
[4078, 4078, 3089, 5079, 5078, 2032, 2031, 3008]
[16, 16, 12, 20, 20, 8, 8, 12]
(28000, 65) cleaned_df
(28, 64, 1000) final_data shape
(28, 1) final_labels shape
Before concatenation - final_data shape: (28, 64, 1000) dataset shape: (28, 64, 1000)
[12, 8, 20, 16] [16, 12, 8, 20]
[12, 16, 8, 12, 20, 8, 16, 20]
(28491, 65) trial_df
[3089, 4081, 2030, 3090, 5077, 2034, 4082, 5008]
[12, 16, 8, 12, 20, 8, 16, 20]
(28000, 65) cleaned_df
(28, 64, 1000) final_data shape
(28, 1) final_labels shape
After concatenation - final_data shape: (56, 64, 1000) final_labels shape: (56, 1)
[20, 16, 12, 8] [12, 8, 20, 16]
[20, 12, 16, 8, 12, 20, 8, 16]
(28486, 65) trial_df
[5078, 3088, 4078, 2030, 3091, 5082, 2032, 4007]
[20, 12, 16, 8, 12, 20, 8, 16]
(28000, 65) cleaned_df
(28, 64, 1000) final_data shape
(28, 1) final_labels shape
After concatenation - final_data shape: (84, 64, 1000) final_labels shape: (84, 1)
[20, 12, 8

3782.88s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
3783.05s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
3783.21s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
3783.38s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
0.62s - Expected: /home/mahdi146/jupyter2/lib/python3.8/site-packages/debugpy/_vendored/pydevd/pydevd_attach_to_process/attach_linux_amd64.so to exist.
0.52s - Expected: /home/mahdi146/jupyter2/lib/python3.8/site-packages/debugpy/_vendored/pydevd/pydevd_attach_to_process/attach_linux_amd64.so to exist.
0.55s - Expected: /home/mahdi146/jupyter2/lib/python3.8/site-packages/debugpy/_vendored/pydevd/pydevd_attach_to_process/attach_linux_amd64.so to exist.
0.57s - Expected: /home/mahdi146/jupyter2/lib/python3.8/site-packages/debugpy/_vendored/pydevd/pydevd_attach_to_process/attach_linux_amd64.so to exist.




 *** This program and the respective minimum Redundancy Maximum Relevance (mRMR) 
     algorithm were developed by Hanchuan Peng <hanchuan.peng@gmail.com>for
     the paper 
     "Feature selection based on mutual information: criteria of 
      max-dependency, max-relevance, and min-redundancy,"
      Hanchuan Peng, Fuhui Long, and Chris Ding, 
      IEEE Transactions on Pattern Analysis and Machine Intelligence,
      Vol. 27, No. 8, pp.1226-1238, 2005.


*** MaxRel features ***
Order 	 Fea 	 Name 	 Score
1 	 31 	 30 	 0.470
2 	 34 	 33 	 0.414
3 	 22 	 21 	 0.387
4 	 44 	 43 	 0.387
5 	 55 	 54 	 0.376
6 	 14 	 13 	 0.368
7 	 13 	 12 	 0.346
8 	 21 	 20 	 0.345
9 	 11 	 10 	 0.342
10 	 69 	 68 	 0.331

*** mRMR features *** 
Order 	 Fea 	 Name 	 Score
1 	 31 	 30 	 0.470
2 	 58 	 57 	 0.120
3 	 10 	 9 	 0.110
4 	 14 	 13 	 0.150
5 	 69 	 68 	 0.143
6 	 39 	 38 	 0.115
7 	 22 	 21 	 0.126
8 	 13 	 12 	 0.118
9 	 17 	 16 	 0.102
10 	 34 	 33 	 0.117
[1.0] train
[0.8928571428571429] 

In [24]:
idx = df[df.iloc[:, 64] == 'End_Tongue'].index
print(idx[0])

CLASS_1 = block_order[0]
CLASS_2 = 'Rest'
Begin_trigger = "Begin" + "_" + CLASS_1
End_trigger = "End" + "_" + CLASS_1

Begin_idx = df[df.iloc[:, 64] == Begin_trigger].index
End_idx = df[df.iloc[:, 64] == End_trigger].index
print(Begin_idx[0],End_idx[0])

trial_df = df.iloc[Begin_idx[0]+1:End_idx[0],:]
# trial_df.tail()

idxx = trial_df[trial_df.iloc[:, 64] == 'Rest'].index
idxx2 = trial_df[trial_df.iloc[:, 64] == 'Feet'].index
# print(idxx,len(idxx))
# print(idxx2,len(idxx2))

trial_df2 = trial_df.copy()


# sample_point = tasks_time[0]*SAMPLING_RATE
# if(trial_df2.iloc[sample_point+1,64] == class_x ):
#     temp_df = trial_df2.iloc[:sample_point,:]
#     next_task_idx = trial_df2[trial_df2.iloc[:, 64] == class_y].index
#     trial_df2.drop(trial_df2.index[0:next_task_idx[0]], inplace=True)
#     trial_df2.reset_index(drop=True, inplace=True)
#     new_df = pd.concat([new_df, temp_df], axis=0)

# sample_point = tasks_time[1]*SAMPLING_RATE
# if(trial_df2.iloc[sample_point+1,64] == class_y ):
#     temp_df2 = trial_df2.iloc[:sample_point,:]
#     next_task_idx = trial_df2[trial_df2.iloc[:, 64] == class_x].index
#     trial_df2.drop(trial_df2.index[0:next_task_idx[0]], inplace=True)
#     trial_df2.reset_index(drop=True, inplace=True)    

# new_df = pd.concat([temp_df, temp_df2], axis=0)
# new_df.reset_index(drop=True, inplace=True)
# new_df.tail()

cleaned_df = data_cleaner(trial_df,CLASS_1,CLASS_2)
cleaned_df.head()

data, labels = data_label_attacher(cleaned_df,CLASS_1,CLASS_2)
print(data.shape,labels.shape)
print(labels)
# for i in range(len(labels)):
#     if labels[i] == 1 :
#         print("hi")










NameError: name 'df' is not defined

In [70]:
df.head(86277)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,64
0,-6007.68457,12510.444336,7844.45166,-4246.978027,-3228.525146,-103.249702,8590.283203,-5608.668945,15386.517578,-11876.93457,...,-2211.25415,-7136.249023,43.948391,7693.819824,2708.186035,-10187.832031,10895.167969,7435.178711,15664.952148,Begin_Feet
1,-6012.343262,12505.867188,7834.443359,-4234.376953,-3221.28418,-58.525509,8586.755859,-5612.939453,15399.620117,-11870.067383,...,-2209.654785,-7135.818359,42.244194,7694.793945,2700.129883,-10194.820312,10893.699219,7429.57959,15661.667969,Feet
2,-6004.281738,12508.5,7836.693359,-4235.759766,-3225.619385,-89.742882,8589.950195,-5611.961426,15399.019531,-11871.919922,...,-2206.152344,-7133.556641,45.26141,7695.370605,2701.121582,-10193.835938,10892.120117,7430.616699,15662.744141,Feet
3,-6006.712891,12511.026367,7839.393555,-4254.118164,-3237.662842,-97.241196,8592.822266,-5606.171387,15382.796875,-11880.046875,...,-2205.669434,-7130.776367,46.148624,7693.93457,2701.351074,-10195.214844,10894.549805,7432.617188,15667.950195,Feet
4,-6015.308594,12510.445312,7840.519043,-4254.352539,-3237.874268,-128.30748,8591.375,-5594.76123,15381.959961,-11880.235352,...,-2208.33374,-7131.682617,45.585205,7698.09082,2702.700195,-10194.633789,10893.917969,7433.499512,15667.558594,Feet
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86272,-7947.979492,12310.938477,7300.790527,-4155.20166,-3016.442627,143.286392,8194.839844,-5476.609375,14971.160156,-11585.59082,...,-1267.154785,-8254.25,-468.513611,7737.567383,2609.070557,-10310.354492,11188.512695,7139.75,15612.225586,Rest
86273,-7956.728516,12311.724609,7300.694336,-4157.084473,-3019.311035,146.885925,8196.986328,-5477.582031,14968.926758,-11585.797852,...,-1267.393799,-8251.016602,-463.748169,7735.005371,2611.488525,-10310.40625,11187.453125,7139.99707,15614.25,Rest
86274,-7955.460449,12314.120117,7301.058594,-4155.061523,-3020.093262,138.646683,8201.264648,-5476.589355,14971.043945,-11588.185547,...,-1264.507202,-8247.374023,-463.993561,7735.862793,2611.591064,-10316.472656,11188.148438,7143.306152,15617.170898,Rest
86275,-7945.963867,12318.460938,7304.883789,-4156.449707,-3019.900879,141.031204,8205.243164,-5473.765137,14969.541992,-11589.478516,...,-1268.526611,-8244.618164,-459.132599,7734.895508,2614.547363,-10317.243164,11189.810547,7147.561035,15620.617188,End_Hand


In [1]:
a = [1, 2, 3, 4]
b = [5, 6, 7, 8]

c = [item for pair in zip(a, b) for item in pair]
print(c)

[1, 5, 2, 6, 3, 7, 4, 8]


In [50]:
for i in range(9):
   task,rest = get_task_rest_times(i)
   task_list = []
   rest_list = [] 
   for item in task:
    sumx = np.sum(item)
    task_list.append(sumx)

   for item in rest:
    sumy = np.sum(item)
    rest_list.append(sumy)


all_equal_rest = all(element == 56 for element in rest_list)
all_equal_task = all(element == 56 for element in task_list)

if all_equal_rest:
    print("Yes") 
if all_equal_task:
    print("Yes")

Yes
Yes


In [100]:
seed_value = 42
np.random.seed(seed_value)
arr = np.random.rand(4, 3, 2)

print("Original array:")
print(arr)

# Shuffle along the first axis
np.random.shuffle(arr)

print("\nShuffled array along the first axis:")
print(arr)

Original array:
[[[0.37454012 0.95071431]
  [0.73199394 0.59865848]
  [0.15601864 0.15599452]]

 [[0.05808361 0.86617615]
  [0.60111501 0.70807258]
  [0.02058449 0.96990985]]

 [[0.83244264 0.21233911]
  [0.18182497 0.18340451]
  [0.30424224 0.52475643]]

 [[0.43194502 0.29122914]
  [0.61185289 0.13949386]
  [0.29214465 0.36636184]]]

Shuffled array along the first axis:
[[[0.83244264 0.21233911]
  [0.18182497 0.18340451]
  [0.30424224 0.52475643]]

 [[0.37454012 0.95071431]
  [0.73199394 0.59865848]
  [0.15601864 0.15599452]]

 [[0.43194502 0.29122914]
  [0.61185289 0.13949386]
  [0.29214465 0.36636184]]

 [[0.05808361 0.86617615]
  [0.60111501 0.70807258]
  [0.02058449 0.96990985]]]


In [25]:
import pandas as pd

# Assuming df is your DataFrame with the last column named 'label'
data = {'col1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        'col2': ['some', 'random', 'data', 'for', 'example', 'purposes', 'in', 'this', 'case', 'it', 'does', 'not matter'],
        'label': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a', 'b', 'b', 'b']}

df = pd.DataFrame(data)

print(df)
print("his")
# Define a custom sorting order based on the desired grouping
sorting_order = {'a': 0, 'b': 1}

# Create a new column with the sorting order
df['sorting_order'] = df.iloc[:, 2].map(sorting_order)

# Sort the DataFrame based on the new column and the original order within each group
df.sort_values(by=['sorting_order', df.columns[2]], inplace=True)

# Drop the temporary sorting column
df.drop('sorting_order', axis=1, inplace=True)

# Optional: Reset the index if needed
df.reset_index(drop=True, inplace=True)

# Display the sorted DataFrame
print(df)





    col1        col2 label
0      1        some     a
1      2      random     a
2      3        data     a
3      4         for     b
4      5     example     b
5      6    purposes     b
6      7          in     a
7      8        this     a
8      9        case     a
9     10          it     b
10    11        does     b
11    12  not matter     b
his
    col1        col2 label
0      1        some     a
1      2      random     a
2      3        data     a
3      7          in     a
4      8        this     a
5      9        case     a
6      4         for     b
7      5     example     b
8      6    purposes     b
9     10          it     b
10    11        does     b
11    12  not matter     b


In [54]:
data = {
    'col1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13],
    'col2': ['some', 'random', 'data', 'for', 'example', 'purposes', 'in', 'this', 'case', 'it', 'does', 'not matter','b'],
    'label': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a', 'b', 'b', 'b','b']
}

df = pd.DataFrame(data)

x=0
i=0
class_1 = 'a'
class_2 = 'b'
sampleList = []
while i<len(df):
    if (df.iloc[i,2]==class_1):
        x+=1
    else:
        i-=1
        sampleList.append(x)
        x=0
        class_1,class_2 = class_2,class_1
    i+=1
sampleList.append(x)
print(sampleList)

[3, 3, 3, 4]


In [43]:
data = {
    'col1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13],
    'col2': ['some', 'random', 'data', 'for', 'example', 'purposes', 'in', 'this', 'case', 'it', 'does', 'not matter','c'],
    'label': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a', 'b', 'b', 'b','b']
}

df = pd.DataFrame(data)

# Identify consecutive groups of 'a's by creating a new group ID each time 'label' changes from 'b' to 'a'
df['group'] = (df['label'] != df['label'].shift(1)).cumsum()

# Count occurrences of 'a' within each group
group_counts = df[df['label'] == 'a'].groupby('group').size()

group_counts_b = df[df['label'] == 'b'].groupby('group').size()
print(group_counts_b)
print(group_counts_b.index[0])
print(group_counts_b.iloc[0])
print(group_counts)

group
2    3
4    4
dtype: int64
2
3
group
1    3
3    3
dtype: int64


In [58]:
p_num = 4
b_num = 7
path = f'../../Participants/P{p_num}/'
mat = loadmat(path+'P'+str(p_num)+'B'+str(b_num)+'.mat', chars_as_strings=True, mat_dtype=True, squeeze_me=True, struct_as_record=False, verify_compressed_data_integrity=False, variable_names=None)
df_1 = pd.DataFrame(mat['Data'])


In [59]:
extra_samples_block_counter(df_1,trial_order[0])

hi
[6191, 10157, 8157, 4065, 10161, 8156, 4060, 6014]
[8156, 8156, 6178, 10157, 10156, 4064, 4063, 6015]
[10158, 6176, 8165, 10155, 4073, 8156, 6184, 4016]
[10165, 10155, 6183, 6177, 4060, 4062, 8162, 8016]


In [77]:
for b in range(7):
    extra_samples_block_counter(data_dict_3[b],trial_order[b],b)

hi
Tongue
[3096, 5078, 4079, 2032, 5081, 4078, 2030, 3007]
Feet
[4078, 4078, 3089, 5079, 5078, 2032, 2031, 3008]
Mis
[5079, 3088, 4082, 5078, 2036, 4078, 3092, 2008]
Hand
[5082, 5078, 3091, 3089, 2030, 2031, 4081, 4008]
hi
Feet
[3089, 4081, 2030, 3090, 5077, 2034, 4082, 5008]
Mis
[4080, 2030, 5078, 5078, 2036, 3088, 3089, 4007]
Hand
[2035, 5077, 5078, 4077, 4081, 2031, 3091, 3008]
Tongue
[2032, 3092, 3092, 4078, 5076, 5079, 4082, 2008]
hi
Hand
[4082, 2033, 2032, 5078, 3090, 4079, 5077, 3007]
Feet
[5078, 3088, 4078, 2030, 3091, 5082, 2032, 4007]
Tongue
[3085, 4079, 5076, 3094, 2029, 5078, 4078, 2007]
Mis
[2032, 2032, 3088, 3089, 4077, 5081, 5079, 4008]
hi
Tongue
[3089, 5077, 4077, 2032, 5077, 4079, 2031, 3008]
Mis
[4077, 4078, 3089, 5162, 5079, 2131, 2027, 3007]
Hand
[5109, 3095, 4111, 5081, 2063, 4081, 3135, 2006]
Feet
[5077, 5111, 3089, 3119, 2027, 2043, 4078, 4007]
hi
Mis
[4080, 2036, 2066, 3090, 5116, 4078, 3177, 5008]
Feet
[3130, 4082, 4088, 5132, 2029, 3144, 5079, 2007]
Hand
[5079

In [125]:
! pip install xgboost

Looking in links: /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo2020/avx2, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo/avx2, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo2020/generic, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo/generic, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic
Processing /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic/xgboost-1.0.2+computecanada-py3-none-any.whl
Installing collected packages: xgboost
Successfully installed xgboost-1.0.2+computecanada


In [40]:
! pip install lightgbm

Looking in links: /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo2020/avx2, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo/avx2, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo2020/generic, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo/generic, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic
Processing /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo/avx2/lightgbm-4.0.0+computecanada-py3-none-linux_x86_64.whl
Installing collected packages: lightgbm
Successfully installed lightgbm-4.0.0+computecanada


In [43]:
! pip install catboost

Looking in links: /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo2020/avx2, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo/avx2, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo2020/generic, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo/generic, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic
Processing /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic/catboost-1.0.6+computecanada-cp38-none-linux_x86_64.whl
Processing /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic/plotly-5.18.0+computecanada-py3-none-any.whl
Processing /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic/graphviz-0.20.1+computecanada-py3-none-any.whl
Processing /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic/tenacity-8.2.3+computecanada-py3-none-any.whl
Installing collected packages: tenacity, graphviz, plotly, catboost
Successfully installed catboost-1.0.6+computecanada graphviz-0.20.1+computecanada pl