In [3]:
import sys
import mne
import scipy.io as sp
from scipy import interpolate
import numpy as np
import random
import pandas as pd
import multiprocessing as mp
import concurrent.futures
from mne.decoding import CSP
import pymrmr
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import logging
from scipy.io import loadmat
from scipy.signal import hamming
from scipy.signal import hann
from scipy.signal import blackman
from scipy.signal import kaiser
from scipy.signal import gaussian
from sklearn.decomposition import FastICA
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
import lightgbm as lgb
from catboost import CatBoostClassifier
from sklearn.impute import KNNImputer
from sklearn.decomposition import PCA
from pyriemann.estimation import Covariances
from pyriemann.tangentspace import TangentSpace
from pyriemann.classification import MDM
import medusa
import medusa.bci.mi_paradigms



In [4]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)


# Set display options for NumPy
np.set_printoptions(threshold=np.inf)

In [5]:
WINDOW_TIME_LENGTH = 4
SAMPLING_RATE = 250
TR_SLIDING_WINDOW_TIME = 2
WINDOW_SAMPLE_LENGTH = WINDOW_TIME_LENGTH*SAMPLING_RATE
NUMBER_OF_CHANNELS = 64
SLIDING_TIME = 4 
SLIDING_POINTS = SLIDING_TIME*SAMPLING_RATE
TR_SLIDING_POINTS = TR_SLIDING_WINDOW_TIME*SAMPLING_RATE
beta = 1.5

num_channels = 64
epoch_length = 1000
sampling_freq = 250
number_of_runs = 10
# number_of_splits = 10
number_of_components = 10
number_of_selected_features = 10
number_of_processes = 10
number_of_bands = 9
# rf = pd.DataFrame()
column_names = ['participant', 'class1', 'class2','running_time','test_acc','train_acc','test_size','train_size','train_block','test_block']
# rf = rf.reindex(columns=column_names)

trial_order=[['Tongue','Feet','Mis','Hand'],
            ['Feet','Mis','Hand','Tongue'],
            ['Hand','Feet','Tongue','Mis'],
            ['Tongue','Mis','Hand','Feet'],
            ['Mis','Feet','Hand','Tongue'],
            ['Feet','Hand','Tongue','Mis'],
            ['Hand','Tongue','Mis','Feet'],
            ['Tongue','Feet','Mis','Hand'],
            ['Mis','Tongue','Hand','Feet']]


In [6]:
def get_task_rest_times(b_num):
    if b_num == 0:
        task_time = [[12, 16, 20, 8],
                    [16, 12, 20, 8],
                    [20, 16, 8, 12],
                    [20, 12, 8, 16]]
        
        rest_time = [[20, 8, 16, 12],
                    [16, 20, 8, 12],
                    [12, 20, 16, 8],
                    [20, 12, 8, 16]]
        
    elif b_num == 1:
        task_time = [[12, 8, 20, 16],
                    [16, 20, 8, 12],
                    [8, 20, 16, 12],
                    [8, 12, 20, 16]]
        
        rest_time = [[16, 12, 8, 20],
                    [8, 20, 12, 16],
                    [20, 16, 8, 12],
                    [12, 16, 20, 8]]
        
    elif b_num == 2:
        task_time = [[16, 8, 12, 20],
                    [20, 16, 12, 8],
                    [12, 20, 8, 16],
                    [8, 12, 16, 20]]
        
        rest_time = [[8, 20, 16, 12],
                    [12, 8, 20, 16],
                    [16, 12, 20, 8],
                    [8, 12, 20, 16]]
        
    elif b_num == 3:
        task_time = [[12, 16, 20, 8],
                    [16, 12, 20, 8],
                    [20, 16, 8, 12],
                    [20, 12, 8, 16]]
        
        rest_time = [[20, 8, 16, 12],
                    [16, 20, 8, 12],
                    [12, 20, 16, 8],
                    [20, 12, 8, 16]]
        
    elif b_num == 4:
        task_time = [[16, 8, 20, 12],
                    [12, 16, 8, 20],
                    [20, 8, 12, 16],
                    [8, 20, 12, 16]]
        
        rest_time = [[8, 12, 16, 20],
                    [16, 20, 12, 8],
                    [12, 16, 8, 20],
                    [20, 8, 12, 16]]
        
    elif b_num == 5:
        task_time = [[16, 12, 8, 20],
                    [20, 16, 12, 8],
                    [8, 16, 20, 12],
                    [12, 8, 16, 20]]

        rest_time = [[12, 8, 16, 20],
                    [16, 8, 20, 12],
                    [20, 12, 16, 8],
                    [8, 16, 12, 20]]
        
    elif b_num == 6:
        task_time = [[16, 8, 12, 20],
                    [20, 8, 16, 12],
                    [8, 16, 12, 20],
                    [16, 20, 12, 8]]

        rest_time = [[16, 8, 12, 20],
                    [12, 20, 8, 16],
                    [20, 16, 12, 8],
                    [8, 16, 20, 12]]     
    elif b_num ==7:
        task_time = [[12, 8, 20, 16],
                    [16, 20, 8, 12],
                    [8, 20, 16, 12],
                    [8, 12, 20, 16]]   
               
        rest_time = [[16, 12, 8, 20],
                    [8, 20, 12, 16],
                    [20, 16, 8, 12],
                    [12, 16, 20, 8]]  
    
    elif b_num == 8:
        task_time = [[16, 8, 12, 20],
                    [20, 16, 12, 8],
                    [12, 20, 8, 16],
                    [8, 12, 16, 20]]
        
        rest_time = [[8, 20, 16, 12],
                    [12, 8, 20, 16],
                    [16, 12, 20, 8],
                    [8, 12, 20, 16]]
        
    else:
        print("Error in block number")

    return task_time,rest_time


In [7]:
def find_zero_order(matrix_3d):
    print(matrix_3d.shape,'matrix shape')
    depth, rows, cols = matrix_3d.shape
    
    for i in range(min(rows, cols)):
        print(i,'i isssssssssssss')
        sub_matrix = matrix_3d[:, :i+1, :i+1]
        determinant = np.linalg.det(sub_matrix)
        print("det",determinant,i+1)
        
    #     if determinant == 0:
    #         return i + 1  # Return the order where the leading minor becomes zero
    
    # return -1  # Return -1 if all leading minors are non-zero

In [8]:
def trial_times_genertor(task_times,rest_times):
    block_times = [item for pair in zip(task_times, rest_times) for item in pair]
    return block_times
    

In [9]:

def fill_zeros_with_average(matrix):
    # Iterate through the matrix
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            for k in range(matrix.shape[2]):
                if matrix[i, j, k] == 0:
                    # Find the neighboring non-zero elements
                    neighbors = []
                    if i > 0 and matrix[i - 1, j, k] != 0:
                        neighbors.append(matrix[i - 1, j, k])
                    if i < matrix.shape[0] - 1 and matrix[i + 1, j, k] != 0:
                        neighbors.append(matrix[i + 1, j, k])
                    if j > 0 and matrix[i, j - 1, k] != 0:
                        neighbors.append(matrix[i, j - 1, k])
                    if j < matrix.shape[1] - 1 and matrix[i, j + 1, k] != 0:
                        neighbors.append(matrix[i, j + 1, k])
                    if k > 0 and matrix[i, j, k - 1] != 0:
                        neighbors.append(matrix[i, j, k - 1])
                    if k < matrix.shape[2] - 1 and matrix[i, j, k + 1] != 0:
                        neighbors.append(matrix[i, j, k + 1])

                    # Fill the zero with the average of neighboring non-zero values
                    if neighbors:
                        matrix[i, j, k] = sum(neighbors) / len(neighbors)

    return matrix

In [10]:
def fill_zeros_with_interpolation(arr):
    filled_arr = arr.copy()  # Create a copy to avoid modifying the original array
    for i in range(len(arr)):
        non_zero_indices = np.where(arr[i] != 0)[0]
        zero_indices = np.where(arr[i] == 0)[0]

        # Interpolate zero values based on surrounding non-zero values
        filled_arr[i, zero_indices] = np.interp(zero_indices, non_zero_indices, arr[i, non_zero_indices])

    return filled_arr

In [11]:
def leading_minor_order_13(matrix_3d):
    order = 13  # Set the order of the leading minor
    
    # Extract the submatrix of order 13 from the top-left corner
    leading_submatrix = matrix_3d[:, :order, :order]
    
    # Calculate the determinant of the submatrix
    determinant = np.linalg.det(leading_submatrix)
    
    
    return determinant

In [12]:
def calc_csp_v3(x_train,y_train,x_test):
    csp = medusa.CSP(10)
    csp_fit = csp.fit(x_train,y_train)
    train_feat = csp_fit.transform(x_train)
    test_feat = csp_fit.transform(x_test)
    return train_feat, test_feat
    

In [13]:
def calc_csp_v2(x_train, y_train, x_test):

    cov_matrices = Covariances().fit_transform(x_train)
    epsilon = 0.001  # Small regularization parameter
    cov_matrices_regularized = cov_matrices + epsilon * np.eye(cov_matrices.shape[1])

    x_train = cov_matrices_regularized

    csp = TangentSpace(metric='euclid', n_components=number_of_components)
    csp_fit = csp.fit(x_train, y_train)
    train_feat = csp_fit.transform(x_train)
    test_feat = csp_fit.transform(x_test)

    return train_feat, test_feat
    

In [14]:
def calc_csp(x_train, y_train, x_test):
    # csp = CSP(n_components=number_of_components, reg='ledoit_wolf', log=True)
    csp = CSP(number_of_components)



    # reshaped_matrix = np.reshape(matrix_3d, (matrix_3d.shape[0], -1))

    # # Calculate the rank of the reshaped 2D matrix
    # rank = np.linalg.matrix_rank(reshaped_matrix)

    # # Determine if the matrix is full rank
    # if rank == min(reshaped_matrix.shape):
    #     print("The 3D matrix is full rank.")
    # else:
    #     print("The 3D matrix is not full rank.")
    
    # find_zero_order(x_train)
    # print("det",np.linalg.det(x_train))

    
    # data = x_train
    # for i in range(data.shape[1]):
    #     for j in range(data.shape[2]):
    #         nonzero_indices = np.where(data[:, i, j] != 0)[0]
    #         zero_indices = np.where(data[:, i, j] == 0)[0]
    #         if len(nonzero_indices) > 1:  # Interpolate only if there are non-zero values
    #             data[zero_indices, i, j] = np.interp(zero_indices, nonzero_indices, data[nonzero_indices, i, j])
    
    # x_train = data

    
    # x_train = fill_zeros_with_average(x_train)
    # x_train = np.add(x_train, 0.000001)



    nan_count = np.isnan(x_train).sum()
    print("Number of NaN values:", nan_count)

    empty_field_count = np.count_nonzero(x_train == 0)
    print("Number of empty fields:", empty_field_count)

    zeros_locations_3d = np.where(x_train == 0)
    # print("Locations of zeros:", zeros_locations)
    

# Printing indices and corresponding values
    # for depth_idx, row_idx, col_idx in zip(zeros_locations_3d[0], zeros_locations_3d[1], zeros_locations_3d[2]):
    #     value_at_zero_location = x_train[depth_idx, row_idx, col_idx]
    #     print(f"Zero found at position ({depth_idx}, {row_idx}, {col_idx}) with value {value_at_zero_location}")


    csp_fit = csp.fit(x_train, y_train)
    train_feat = csp_fit.transform(x_train)
    test_feat = csp_fit.transform(x_test)
    return train_feat, test_feat

In [15]:
def class_extractor(number_of_epochs, class_1, class_2, data, labels):
    size = sum(labels[:,0] == class_1) + sum(labels[:,0] == class_2)
    Final_labels = np.zeros((size,1)).astype(int)
    dataset = np.zeros((size,num_channels, epoch_length))
    index = 0
    for i in range(number_of_epochs):
        if labels[i,0] == class_1 or labels[i,0] == class_2:
            dataset[index,:,:] = data[i,:,:]
            Final_labels[index,0] = labels[i,0]
            index = index + 1
        else:
            continue
            
    return dataset, Final_labels

In [16]:
def feature_extractor(dataset, labels, number_of_bands, test_data):

    low_cutoff = 0
    
    for b in range(number_of_bands):
        logging.getLogger('mne').setLevel(logging.WARNING)
        low_cutoff += 4
        data = dataset.copy()
        data_test = test_data.copy()

        # empty_field_count = np.count_nonzero(data == 0)
        # print("Number of empty fields in data:", empty_field_count,"data shape",data.shape)   

        # empty_field_count = np.count_nonzero(data_test == 0)
        # print("Number of empty fields in data_test:", empty_field_count,"data_test shape",data_test.shape)    

        filtered_data = mne.filter.filter_data(data, sampling_freq, low_cutoff, low_cutoff + 4, verbose = False, n_jobs = 4)
        filtered_data_test = mne.filter.filter_data(test_data, sampling_freq, low_cutoff, low_cutoff + 4, verbose = False, n_jobs = 4)

        # empty_field_count = np.count_nonzero(filtered_data == 0)
        # print("Number of empty fields in filtered_data:", empty_field_count,"filtered_data shape",filtered_data.shape)   

        # empty_field_count = np.count_nonzero(filtered_data_test == 0)
        # print("Number of empty fields in filtered_data_test:", empty_field_count,"filtered_data_test shape",filtered_data_test.shape)  



        # # Reshape data to (samples, features)
        # num_samples_train, num_rows, num_cols = filtered_data.shape
        # num_samples_test, _, _ = filtered_data_test.shape
        # flattened_train_data = filtered_data.reshape(num_samples_train, -1)
        # flattened_test_data = filtered_data_test.reshape(num_samples_test, -1)
        
        # # Apply PCA
        # pca = PCA(n_components=10)
        # filtered_data_pca = pca.fit_transform(flattened_train_data)
        # filtered_data_test_pca = pca.transform(flattened_test_data)      
        # filtered_data_pca_3d = filtered_data_pca.reshape(num_samples_train, num_rows, num_cols)
        # filtered_data_test_pca_3d = filtered_data_test_pca.reshape(num_samples_test, num_rows, num_cols)

        # filtered_data = filtered_data_pca_3d
        # filtered_data_test = filtered_data_test_pca_3d


        #PCA
        # from mne.decoding import UnsupervisedSpatialFilter
        # from sklearn.decomposition import PCA, FastICA

        # pca = UnsupervisedSpatialFilter(PCA(64), average=False)
        # pca_fit = pca.fit(filtered_data)
        # filtered_data = pca_fit.transform(filtered_data)
        # filtered_data_test = pca_fit.transform(filtered_data_test)
        # train_feats = filtered_data
        # test_feats = filtered_data_test

        # filtered_data = data
        # filtered_data_test = data_test
        
        [train_feats, test_feats] = calc_csp(filtered_data, labels[:,0], filtered_data_test)
        if b == 0:
            train_features = train_feats
            test_features = test_feats
        else:
            train_features = np.concatenate((train_features, train_feats), axis = 1)
            test_features = np.concatenate((test_features, test_feats), axis = 1)
    
    return train_features, test_features

In [17]:
def feature_selector(train_features, labels, number_of_selected_features):
    X = pd.DataFrame(train_features)
    y = pd.DataFrame(labels)
    K = number_of_selected_features
    
    df = pd.concat([y,X], axis = 1)
    df.columns = df.columns.astype(str)
        
    selected_features = list(map(int, pymrmr.mRMR(df, 'MID', K)))
    return selected_features

In [18]:
def data_reader(path,p_num,block_list):
    data_dict = {}
    for b_num in block_list:
        print(b_num)
        mat = loadmat(path+'P'+str(p_num)+'B'+str(b_num)+'.mat', chars_as_strings=True, mat_dtype=True, squeeze_me=True, struct_as_record=False, verify_compressed_data_integrity=False, variable_names=None)
        df = pd.DataFrame(mat['Data'])
        # ddf = dd.from_pandas(df, npartitions=10)
        data_dict[b_num] = df
    return data_dict


In [19]:
def interpolate_zeros(matrix):
    # Create a copy of the matrix to work with
    matrix_copy = matrix.copy()

    # Create indices meshgrid for non-zero elements
    nonzero_indices = np.argwhere(matrix != 0)
    nonzero_rows, nonzero_cols = nonzero_indices[:, 0], nonzero_indices[:, 1]

    # Create interpolation function for rows and columns separately
    f_rows = interpolate.interp2d(nonzero_cols, nonzero_rows, matrix[nonzero_rows, nonzero_cols], kind='linear')
    f_cols = interpolate.interp2d(nonzero_cols, nonzero_rows, matrix[nonzero_rows, nonzero_cols].T, kind='linear')

    # Find zero indices
    zero_indices = np.argwhere(matrix == 0)

    for idx in zero_indices:
        row, col = idx
        # Interpolate zero values using the interpolation functions
        matrix_copy[row, col] = (f_rows(col, row) + f_cols(row, col)) / 2


In [20]:
def extra_samples_counter(df,class_1,class_2):
    x=0
    i=0
    sampleList = []
    while i<len(df):
        if (df.iloc[i,64]==class_1):
            x+=1
        else:
            i-=1
            sampleList.append(x)
            x=0
            class_1,class_2 = class_2,class_1
        i+=1
    sampleList.append(x)
    print(sampleList)
    

In [21]:
def extra_samples_block_counter(df,trial_order,b_num):

    df.drop(df[df.iloc[:,64].isin(['Begin', 'End'])].index, inplace=True)
    df.reset_index(drop=True, inplace=True)
    print('hi')
    
    df['group'] = (df.iloc[:,64] != df.iloc[:,64].shift(1)).cumsum()
    # group_counts_Tongue = df[df.iloc[:,64] == 'Tongue'].groupby('group').size()
    # group_counts_Feet = df[df.iloc[:,64] == 'Feet'].groupby('group').size()
    # group_counts_Hand = df[df.iloc[:,64] == 'Hand'].groupby('group').size()
    # group_counts_Mis = df[df.iloc[:,64] == 'Mis'].groupby('group').size()
    # group_counts_Rest = df[df.iloc[:,64] == 'Rest'].groupby('group').size()

    
    group_counts_Rest = df[df.iloc[:,64] == 'Rest'].groupby('group').size()
    with open('sampleList.txt', 'a') as file:
        file.write(f'block {b_num+1} '+'\n')
        for j in range (len(trial_order)):
            print(trial_order[j])
            trial_num = j
            task_times,rest_times = get_task_rest_times(b_num)
            trial_times = trial_times_genertor(task_times[trial_num],rest_times[trial_num])
            trial_samples = [item*SAMPLING_RATE for item in trial_times]
            group_counts_task = df[df.iloc[:,64] == trial_order[j]].groupby('group').size()
            sampleList = []
            for i in range(4):
                task = group_counts_task.iloc[i]
                rest = group_counts_Rest.iloc[4*j+i]
                sampleList.append(task)
                sampleList.append(rest)
            # extra_samples = [x-y for x,y in zip(sampleList,trial_samples)]
            file.write(', '.join(map(str, sampleList)) + f' trial={trial_order[j]} '+'\n')
            print(sampleList)
        file.write('\n\n')


    # print(group_counts_Tongue)
    # print(group_counts_Feet)
    # print(group_counts_Hand)
    # print(group_counts_Mis)
    # print(group_counts_Rest)

    # print(group_counts_b.index[0])
    # print(group_counts_b.iloc[0])
    # print(group_counts)


    # for j in range(len(trial_order)):
    #     print(j)
    #     class_2 = 'Rest'
    #     class_1 = trial_order[j]
    #     sampleList = []
    #     x=0
    #     i=0


    #     while i<len(df):
    #         print(i)
    #         if (df.iloc[i,64]!=class_1):
    #             x+=1
    #         else:
    #             i-=1
    #             sampleList.append(x)
    #             x=0
    #             class_1,class_2 = class_2,class_1
    #         i+=1
    #     sampleList.append(x)
    #     df.drop(df.index[0:sum(sampleList)], inplace=True)
    #     df.reset_index(drop=True, inplace=True)
    #     print(sampleList)
        # with open('sampleList.txt', 'w') as file:
        #     # for item in sampleList:
        #     file.write(f"{sampleList}\n")
    

In [22]:

def data_cleaner(df,class_1,class_2,tasks_time):
    # extra_samples_counter(df,class_1,class_2)
    # sys.exit() 
    class_x = class_1
    class_y = class_2
    new_df = pd.DataFrame()
    trial_df = df.copy() 
    print(tasks_time)
    for i in range(len(tasks_time)):
        sample_point = tasks_time[i]*SAMPLING_RATE
        if(trial_df.iloc[sample_point+1,64] == class_x ):
            if(i==len(tasks_time)-1):
                temp_df = trial_df.iloc[:sample_point,:]
                new_df = pd.concat([new_df, temp_df], axis=0)
                new_df.reset_index(drop=True, inplace=True)
            else:    
                temp_df = trial_df.iloc[:sample_point,:]
                next_task_idx = trial_df[trial_df.iloc[:, 64] == class_y].index
                trial_df.drop(trial_df.index[0:next_task_idx[0]], inplace=True)
                trial_df.reset_index(drop=True, inplace=True)
                new_df = pd.concat([new_df, temp_df], axis=0)
                new_df.reset_index(drop=True, inplace=True)
                class_x,class_y = class_y,class_x

    return new_df

In [23]:
def class_seperator(cleaned_df,class_1,class_2):
    # df = cleaned_df.sort_values(by=cleaned_df.columns[64]).reset_index(drop=True)
    # print(seperated_df.head(14003))
    # print(cleaned_df.head(5003))

    df = cleaned_df
    sorting_order = {class_1: 0, class_2: 1}

    df['sorting_order'] = df.iloc[:, 64].map(sorting_order)
    df.sort_values(by=['sorting_order', df.columns[64]], inplace=True)
    df.drop('sorting_order', axis=1, inplace=True)
    df.reset_index(drop=True, inplace=True)

    return df

In [24]:
def shuffler(dataset,labels):
    print(dataset.shape)
    print(labels.shape)
    np.random.seed(42)
    indices = np.random.permutation(len(dataset))
    shuffled_dataset = dataset[indices]
    shuffled_labels = labels[indices]
    return shuffled_dataset,shuffled_labels
    

In [33]:
def data_label_attacher(cleaned_df,class_1,class_2,random_flag,class_seperator_flag):

    # new_df_ = cleaned_df.copy()
    # new_df_.drop(cleaned_df.columns[-1], axis=1, inplace=True)
    # X = new_df_.to_numpy()
    # X = np.transpose(X)
    # number_of_epochs = int(len(new_df_)/WINDOW_SAMPLE_LENGTH)
    # number_of_epochs = int((int(len(new_df_))-WINDOW_SAMPLE_LENGTH)/TR_SLIDING_POINTS)

    
    # dataset = np.zeros((number_of_epochs,NUMBER_OF_CHANNELS,WINDOW_SAMPLE_LENGTH))
    # labels = np.zeros((number_of_epochs,1)).astype(int)
















    
    #Initialization
    if class_seperator_flag:
        seperated_class_df = class_seperator(cleaned_df,class_1,class_2)
        new_df_ = seperated_class_df.copy()
        new_df_.drop(seperated_class_df.columns[-1], axis=1, inplace=True)
        X = new_df_.to_numpy()
        X = np.transpose(X)
        empty_field_count = np.count_nonzero(X == 0)
        print("Number of empty fields in X:", empty_field_count)
        # zero_indices = np.where(X == 0)
        # print("befor filling",len(zero_indices[0]))
        # X[zero_indices] += 0.001
        # zero_indices = np.where(X == 0)
        # print("after filling",len(zero_indices[0]))
        number_of_epochs = int((int(len(new_df_))-WINDOW_SAMPLE_LENGTH)/TR_SLIDING_POINTS)
        print(number_of_epochs)
    else :  
        new_df_ = cleaned_df.copy()
        new_df_.drop(cleaned_df.columns[-1], axis=1, inplace=True)
        X = new_df_.to_numpy()
        X = np.transpose(X)
        empty_field_count = np.count_nonzero(X == 0)
        print("Number of empty fields in X:", empty_field_count)
        # zero_indices = np.where(X == 0)
        # print("befor filling",len(zero_indices[0]))
        # X[zero_indices] += 0.001
        # zero_indices = np.where(X == 0)
        # print("after filling",len(zero_indices[0]))

        number_of_epochs = int(len(new_df_)/WINDOW_SAMPLE_LENGTH)

    dataset = np.zeros((number_of_epochs,NUMBER_OF_CHANNELS,WINDOW_SAMPLE_LENGTH))
    labels = np.zeros((number_of_epochs,1)).astype(int)

    if class_seperator_flag:
        i = 0  
        startIdx = i * WINDOW_SAMPLE_LENGTH
        endIdx = (i+1) * WINDOW_SAMPLE_LENGTH 
        while(endIdx<=int(len(new_df_))/2):
            slice_X = X[:, startIdx:endIdx]

            kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,beta)
            slice_X *= kaiser_window

            dataset[i, :, :] = slice_X
            labels[i,0] = 0
            # if (seperated_class_df.iloc[startIdx, 64] == class_1):
            #     labels[i,0] = 0
            # elif(seperated_class_df.iloc[startIdx, 64] == class_2):
            #     labels[i,0] = 1
            # else:
            #     labels[i,0] = 2
            startIdx+=TR_SLIDING_POINTS
            endIdx+=TR_SLIDING_POINTS
            i+=1
        # print(int(len(new_df_))/2,"len")    
        # print(endIdx,"endIdx")    
        # print(seperated_class_df.iloc[endIdx-2:endIdx+2,64])
       
        j = i
        
        startIdx = endIdx-TR_SLIDING_POINTS
        endIdx = startIdx+WINDOW_SAMPLE_LENGTH
        print(j, "j is this")
        while(endIdx<=int(len(new_df_))):
            slice_X = X[:, startIdx:endIdx]

            kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,beta)
            slice_X *= kaiser_window

            dataset[j, :, :] = slice_X
            labels[j,0] = 1
            # if (cleaned_df.iloc[startIdx, 64] == class_1):
            #     labels[j,0] = 0
            # elif(cleaned_df.iloc[startIdx, 64] == class_2):
            #     labels[j,0] = 1
            # else:
            #     labels[j,0] = 2
            startIdx+=TR_SLIDING_POINTS
            endIdx+=TR_SLIDING_POINTS
            j+=1
        print(j, "j is this")
        # dataset,labels = shuffler(dataset,labels)

    else:
        i = 0  
        start_idx = i * WINDOW_SAMPLE_LENGTH
        end_idx = (i+1) * WINDOW_SAMPLE_LENGTH 
        while (end_idx<=int(len(new_df_))):
            slice_X = X[:, start_idx:end_idx]

            kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,beta)
            slice_X *= kaiser_window
            
            dataset[i, :, :] = slice_X
            if (cleaned_df.iloc[start_idx, 64] == class_1):
                labels[i,0] = 0
            elif(cleaned_df.iloc[start_idx, 64] == class_2):
                labels[i,0] = 1
            else:
                labels[i,0] = 2
            start_idx+=SLIDING_POINTS
            end_idx+=SLIDING_POINTS
            i+=1
        # dataset,labels = shuffler(dataset,labels)







    #For training and test purpose
    # if random_flag:
    #     randomlist = random.sample(range(number_of_epochs), number_of_epochs)
    # else:
    #     randomlist = list(range(number_of_epochs))
    #Labeling the data



    # for i in range(number_of_epochs):
    #     start_idx = randomlist[i] * WINDOW_SAMPLE_LENGTH + SLIDING_POINTS
    #     end_idx = (randomlist[i] + 1) * WINDOW_SAMPLE_LENGTH
    #     slice_X = X[:, start_idx:end_idx]

    #     # hamming_window = hamming(WINDOW_SAMPLE_LENGTH)
    #     # slice_X *= hamming_window

    #     # hanning_window = hann(WINDOW_SAMPLE_LENGTH)
    #     # slice_X *= hanning_window

    #     # blackman_window = blackman(WINDOW_SAMPLE_LENGTH)
    #     # slice_X *= blackman_window

    #     # kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,0.5)
    #     # slice_X *= kaiser_window

    #     # gaussian_window = gaussian(WINDOW_SAMPLE_LENGTH,0.5)
    #     # slice_X *= gaussian_window


    #     dataset[i, :, :] = slice_X
    #     if (cleaned_df.iloc[randomlist[i] * WINDOW_SAMPLE_LENGTH, 64] == class_1):
    #         labels[i,0] = 0
    #     elif(cleaned_df.iloc[randomlist[i] * WINDOW_SAMPLE_LENGTH, 64] == class_2):
    #         labels[i,0] = 1
    #     else:
    #         labels[i,0] = 2
    
    # empty_field_count = np.count_nonzero(dataset == 0)
    # print("Number of empty fields in dataset:", empty_field_count,"dataset shape",dataset.shape)
    return dataset,labels




In [26]:
def trial_cutter(data, class_1):
    df = data.copy()
    Begin_trigger = "Begin" + "_" + class_1
    End_trigger = "End" + "_" + class_1
    Begin_idx = df[df.iloc[:, 64] == Begin_trigger].index
    End_idx = df[df.iloc[:, 64] == End_trigger].index
    trial_df = df.iloc[Begin_idx[0]+1:End_idx[0],:]
    trial_df.reset_index(drop=True, inplace=True)
    trial_df.head()
    return trial_df

In [27]:
def Begin_End_trigger_modifier(data):
    df = data.copy()
    Begin_indexes = df[df.iloc[:, 64] == 'Begin'].index
    End_indexes = df[df.iloc[:, 64] == 'End'].index
    if(len(Begin_indexes)==len(End_indexes)):
        for i in range(len(Begin_indexes)):
            index = Begin_indexes[i]+1
            val = df.iloc[index,64]
            df.iloc[Begin_indexes[i],64] = "Begin" + "_" + str(val)
            df.iloc[End_indexes[i],64]   =  "End" + "_" + str(val)
    else:
        print("Trigger seinding Exception")
    
    return df

In [28]:
def preprocessor(data_,class_1,class_2,tasks_time,set_type,overlap):
    CLASS_1 = class_1
    CLASS_2 = class_2
    df = data_.copy()
    modified_df = Begin_End_trigger_modifier(df)
    trial_df = trial_cutter(modified_df,CLASS_1)
    print(trial_df.shape,"trial_df")
    cleaned_df = data_cleaner(trial_df,CLASS_1,CLASS_2,tasks_time)
    print(cleaned_df.shape,"cleaned_df")

    if set_type =="TRAIN":
        random_flag = True
    elif set_type =="TEST":
        random_flag = False
    else:
        print("Error in set type")

  
    final_data, final_labels = data_label_attacher(cleaned_df,CLASS_1,CLASS_2,random_flag,overlap)
    print(final_data.shape,"final_data shape")
    print(final_labels.shape,"final_labels shape")
    
    return final_data,final_labels

In [29]:
def trials_set_builder(data_dict,blocks_set,set_label,class_1,class_2,overlap):
    counter = 0

    for b_num in blocks_set:
        trial_num = trial_order[b_num].index(class_1)
        task_times,rest_times = get_task_rest_times(b_num)
        print(task_times[trial_num],rest_times[trial_num])
        trial_times = trial_times_genertor(task_times[trial_num],rest_times[trial_num])
        print(trial_times)
        data = data_dict[b_num]
        df = data.copy()
        # last_column = df.pop(df.columns[-1])
        # df.drop(df.columns[-1], axis=1, inplace=True)
        # eeg_data = df.to_numpy().T  # Transpose to have channels in columns

        # channel_names = [f'Ch{i+1}' for i in range(63)]

        # # Create MNE-Python RawArray object
        # info = mne.create_info(ch_names=channel_names, sfreq=sampling_freq, ch_types='eeg')
        # raw = mne.io.RawArray(eeg_data, info)

        # # Apply ICA
        # ica = mne.preprocessing.ICA(n_components=20, random_state=97, max_iter=800)
        # ica.fit(raw)
        # ica_components = ica.get_components()

        # # Convert the ICA components to a DataFrame
        # df2 = pd.DataFrame(data=ica_components.T, columns=channel_names)
        # df2 = df2.assign(LastColumn=last_column)
        # # df = data.copy(deep=False)
        dataset,labels = preprocessor(df,class_1,class_2,trial_times,set_label,overlap)
        # print(dataset.shape)

        if counter == 0 :
            final_data = dataset
            final_labels = labels
            print("Before concatenation - final_data shape:", final_data.shape, "dataset shape:", dataset.shape)
        else:
            final_data = np.vstack((final_data, dataset))
            final_labels = np.vstack((final_labels, labels))
            print("After concatenation - final_data shape:", final_data.shape, "final_labels shape:", final_labels.shape)

        counter+=1 
    # empty_field_count = np.count_nonzero(final_data == 0)
    # print("Number of empty fields in final_data:", empty_field_count,"final_data shape",final_data.shape)
    return final_data,final_labels

In [70]:
block_list = [0,1,2,3,4,5,6]
p_num_list = [8]
data_dicts_list = []
for p_num in p_num_list:
    print(f'reading P{p_num}')
    data_dict = data_reader(f'../../Participants/P{p_num}/', p_num, block_list)
    data_dicts_list.append(data_dict)

reading P8
0


FileNotFoundError: [Errno 2] No such file or directory: '../../Participants/P8/P8B0.mat'

In [30]:
block_list = [0,1,2,3,4,5,6]
p_num_list = [9]
data_dicts_list = []
for p_num in p_num_list:
    print(f'reading P{p_num}')
    data_dict = data_reader(f'/home/mahdi146/projects/def-b09sdp/mahdi146/Cedar/Classification/Participants/P{p_num}/',p_num,block_list)
    data_dicts_list.append(data_dict)


reading P9
0
1
2
3
4
5
6


In [30]:
print(data_dicts_list[0][2].shape)

KeyError: 2

In [49]:
# #Frame Maker
PATH = '/home/mahdi146/projects/def-b09sdp/mahdi146/Cedar/Classification/EEG/Results/XGBoost/'
df = pd.read_csv(PATH+'frame.csv')
p_num_list = [3]
for p_num in p_num_list:
    df.to_csv(PATH+'P'+str(p_num)+'.csv',index=False)



In [34]:
PATH = '/home/mahdi146/projects/def-b09sdp/mahdi146/Cedar/Classification/EEG/Results/XGBoost/'
class_1_list = ['Hand','Feet','Tongue','Mis']
class_2 = 'Rest'
p_num_list = [9]
train_blocks_set = [0,1,2,3,4]
test_blocks_set = [5,6]
Train_overlap = 0
Test_overlap = 0


i = 0
for p_num in p_num_list:
    import time
    start_time = time.time()
    for class_1 in class_1_list:
        X_tr, Y_tr = trials_set_builder(data_dicts_list[i],train_blocks_set,'TRAIN',class_1,class_2,True)
        X_te, Y_te = trials_set_builder(data_dicts_list[i],test_blocks_set,'TEST',class_1,class_2,False)

        print(X_tr.shape,Y_tr.shape,"train shape")
        print(X_te.shape,Y_te.shape,"test shape")

        [train_features, test_features] = feature_extractor(X_tr, Y_tr, number_of_bands, X_te)
        selected_features = feature_selector(train_features, Y_tr, number_of_selected_features)

        train_acc_list = []
        test_acc_list = []

        clf = XGBClassifier()
        for r in range(1):
            clf.fit(train_features[:, selected_features], Y_tr[:,0])

            y_pr_te = clf.predict(test_features[:, selected_features])
            y_pr_tr = clf.predict(train_features[:,selected_features])

            accuracy_te = accuracy_score(Y_te, y_pr_te)
            test_acc_list.append(accuracy_te)

            accuracy_tr = accuracy_score(Y_tr,y_pr_tr)
            train_acc_list.append(accuracy_tr)



        end_time = time.time()
        running_time = end_time-start_time
        participant = p_num
        class1 = class_1
        class2 = class_2
        running_time = running_time
        test_acc = np.average(test_acc_list)
        train_acc = np.average(train_acc_list)
        test_size = X_te.shape
        train_size = X_tr.shape
        train_block = '01234'
        test_block = '56'





        # new_row = [participant, class1, class2,running_time,test_acc,train_acc,test_size,train_size,train_block,test_block]

        # new_row_df = pd.DataFrame([new_row], columns=column_names)
        # rf = pd.read_csv(PATH +'P'+str(p_num)+'.csv')
        # cf = pd.concat([rf, new_row_df], ignore_index=True)
        # cf.to_csv(PATH +'P'+str(p_num)+'.csv',index=False)



        print(train_acc_list,"train")
        print(test_acc_list,"test")
        
    i+=1

        



# block_order_tr = ['Tongue','Feet','Mis','Hand']
# block_order_tr2 = ['Tongue','Mis','Hand','Feet']
# block_order_te = ['Feet','Hand','Tongue','Mis']
# CLASS_1 = "Hand"
# CLASS_2 = "Rest"
# tasks_time_tr = [16,16,12,20,20,8,8,12]
# tasks_time_tr2 = [20,20,12,12,8,8,16,16]
# tasks_time_te = [16,12,12,8,8,16,20,20]

# df_tr = data_tr_.copy()
# df_tr2 = data_tr2_.copy()
# df_te = data_te_.copy()
# data_tr,labels_tr = preprocessor(df_tr,CLASS_1,CLASS_2,tasks_time_tr,"TRAIN")
# data_tr2,labels_tr2 = preprocessor(df_tr2,CLASS_1,CLASS_2,tasks_time_tr2,"TRAIN")
# data_te,labels_te = preprocessor(df_te,CLASS_1,CLASS_2,tasks_time_te,"TEST")
# data_tr = np.vstack((data_tr, data_tr2))
# labels_tr = np.vstack((labels_tr, labels_tr2))
# print(data_tr.shape)
# print(labels_tr.shape)
# print(data_te.shape)
# print(labels_te.shape)





# print(data_tr.shape,labels_tr.shape)
# print(data_te.shape,labels_te.shape)
# print(labels_te)
# print(indexes)
# print(Begin_indexes)
# print(End_indexes)
# print(df.iloc[1,64])


    




[20, 12, 8, 16] [20, 12, 8, 16]
[20, 20, 12, 12, 8, 8, 16, 16]
(28483, 65) trial_df
[20, 20, 12, 12, 8, 8, 16, 16]
(28000, 65) cleaned_df
Number of empty fields in X: 0
54
27 j is this
54 j is this
(54, 64, 1000) final_data shape
(54, 1) final_labels shape
Before concatenation - final_data shape: (54, 64, 1000) dataset shape: (54, 64, 1000)
[8, 20, 16, 12] [20, 16, 8, 12]
[8, 20, 20, 16, 16, 8, 12, 12]
(28472, 65) trial_df
[8, 20, 20, 16, 16, 8, 12, 12]
(28000, 65) cleaned_df
Number of empty fields in X: 0
54
27 j is this
54 j is this
(54, 64, 1000) final_data shape
(54, 1) final_labels shape
After concatenation - final_data shape: (108, 64, 1000) final_labels shape: (108, 1)
[16, 8, 12, 20] [8, 20, 16, 12]
[16, 8, 8, 20, 12, 16, 20, 12]
(28472, 65) trial_df
[16, 8, 8, 20, 12, 16, 20, 12]
(28000, 65) cleaned_df
Number of empty fields in X: 0
54
27 j is this
54 j is this
(54, 64, 1000) final_data shape
(54, 1) final_labels shape
After concatenation - final_data shape: (162, 64, 1000) fi

Unnamed: 0,participant,class1,class2,running_time,test_acc,train_acc,test_size,train_size,train_block,test_block
7,5,Mis,Rest,111.92424,0.785714,1.0,"(56, 64, 1000)","(140, 64, 1000)",1234,56
8,6,Hand,Rest,28.528704,0.785714,1.0,"(56, 64, 1000)","(140, 64, 1000)",1234,56
9,6,Feet,Rest,56.803421,0.767857,1.0,"(56, 64, 1000)","(140, 64, 1000)",1234,56
10,6,Tongue,Rest,84.431607,0.875,1.0,"(56, 64, 1000)","(140, 64, 1000)",1234,56
11,6,Mis,Rest,112.289006,0.785714,1.0,"(56, 64, 1000)","(140, 64, 1000)",1234,56


In [32]:
p_num_list = [3,4,5,6,7,9]
vf = pd.DataFrame(columns=column_names) 
for p_num in p_num_list:
    rf = pd.read_csv(PATH + "P" + str(p_num) + ".csv")
    vf = pd.concat([vf, rf], ignore_index=True)
vf.to_csv(PATH+ 'ResultsOfAll.csv', index=False)
# vf.tail()
    
columnNames = ['class','b1234']
kf = pd.DataFrame(columns=columnNames)
kf.to_csv(PATH+'AverageAcc.csv',index=False)

vf = pd.read_csv(PATH +"ResultsOfAll.csv")
df = vf

class_list=['Hand','Feet','Tongue','Mis']
blk_list = [1234]
for class_ in class_list:
    avg_list = []
    for blk in blk_list:
        gf = df[(df['train_block'] == blk) & (df['class1'] == class_)]
        avg = gf['test_acc'].mean()
        avg_list.append(avg)
    print(avg_list)    
    new_row = [class_, avg_list[0]] 
    new_row_df = pd.DataFrame([new_row], columns=columnNames)
    rf = pd.read_csv(PATH + 'AverageAcc.csv')
    cf = pd.concat([rf, new_row_df], ignore_index=True)
    cf.to_csv(PATH +'AverageAcc.csv',index=False)  
kf = pd.read_csv(PATH +'AverageAcc.csv') 
kf.head()

[0.8571428571428571]
[0.8065476190476191]
[0.7767857142857143]
[0.75]


Unnamed: 0,class,b1234
0,Hand,0.857143
1,Feet,0.806548
2,Tongue,0.776786
3,Mis,0.75


In [25]:
import pandas as pd

# Assuming df is your DataFrame with the last column named 'label'
data = {'col1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        'col2': ['some', 'random', 'data', 'for', 'example', 'purposes', 'in', 'this', 'case', 'it', 'does', 'not matter'],
        'label': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a', 'b', 'b', 'b']}

df = pd.DataFrame(data)

print(df)
print("his")
# Define a custom sorting order based on the desired grouping
sorting_order = {'a': 0, 'b': 1}

# Create a new column with the sorting order
df['sorting_order'] = df.iloc[:, 2].map(sorting_order)

# Sort the DataFrame based on the new column and the original order within each group
df.sort_values(by=['sorting_order', df.columns[2]], inplace=True)

# Drop the temporary sorting column
df.drop('sorting_order', axis=1, inplace=True)

# Optional: Reset the index if needed
df.reset_index(drop=True, inplace=True)

# Display the sorted DataFrame
print(df)





    col1        col2 label
0      1        some     a
1      2      random     a
2      3        data     a
3      4         for     b
4      5     example     b
5      6    purposes     b
6      7          in     a
7      8        this     a
8      9        case     a
9     10          it     b
10    11        does     b
11    12  not matter     b
his
    col1        col2 label
0      1        some     a
1      2      random     a
2      3        data     a
3      7          in     a
4      8        this     a
5      9        case     a
6      4         for     b
7      5     example     b
8      6    purposes     b
9     10          it     b
10    11        does     b
11    12  not matter     b


In [54]:
data = {
    'col1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13],
    'col2': ['some', 'random', 'data', 'for', 'example', 'purposes', 'in', 'this', 'case', 'it', 'does', 'not matter','b'],
    'label': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a', 'b', 'b', 'b','b']
}

df = pd.DataFrame(data)

x=0
i=0
class_1 = 'a'
class_2 = 'b'
sampleList = []
while i<len(df):
    if (df.iloc[i,2]==class_1):
        x+=1
    else:
        i-=1
        sampleList.append(x)
        x=0
        class_1,class_2 = class_2,class_1
    i+=1
sampleList.append(x)
print(sampleList)

[3, 3, 3, 4]


In [43]:
data = {
    'col1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13],
    'col2': ['some', 'random', 'data', 'for', 'example', 'purposes', 'in', 'this', 'case', 'it', 'does', 'not matter','c'],
    'label': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a', 'b', 'b', 'b','b']
}

df = pd.DataFrame(data)

# Identify consecutive groups of 'a's by creating a new group ID each time 'label' changes from 'b' to 'a'
df['group'] = (df['label'] != df['label'].shift(1)).cumsum()

# Count occurrences of 'a' within each group
group_counts = df[df['label'] == 'a'].groupby('group').size()

group_counts_b = df[df['label'] == 'b'].groupby('group').size()
print(group_counts_b)
print(group_counts_b.index[0])
print(group_counts_b.iloc[0])
print(group_counts)

group
2    3
4    4
dtype: int64
2
3
group
1    3
3    3
dtype: int64


In [58]:
p_num = 6
b_num = 7
path = f'../../Participants/P{p_num}/'
mat = loadmat(path+'P'+str(p_num)+'B'+str(b_num)+'.mat', chars_as_strings=True, mat_dtype=True, squeeze_me=True, struct_as_record=False, verify_compressed_data_integrity=False, variable_names=None)
df_1 = pd.DataFrame(mat['Data'])


In [59]:
extra_samples_block_counter(df_1,trial_order[0])

hi
[6191, 10157, 8157, 4065, 10161, 8156, 4060, 6014]
[8156, 8156, 6178, 10157, 10156, 4064, 4063, 6015]
[10158, 6176, 8165, 10155, 4073, 8156, 6184, 4016]
[10165, 10155, 6183, 6177, 4060, 4062, 8162, 8016]


In [82]:
for b in range(7):
    extra_samples_block_counter(data_dicts_list[-1][b],trial_order[b],b)

hi
Tongue
[3093, 5078, 4078, 2030, 5078, 4079, 2032, 3007]
Feet
[4078, 4078, 3087, 5079, 5078, 2030, 2035, 3007]
Mis
[5080, 3089, 4076, 5079, 2036, 4077, 3093, 2007]
Hand
[5080, 5078, 3092, 3088, 2035, 2030, 4077, 4007]
hi
Feet
[3091, 4082, 2034, 3093, 5082, 2034, 4079, 5007]
Mis
[4083, 2033, 5079, 5082, 2031, 3090, 3087, 4007]
Hand
[2033, 5078, 5079, 4078, 4079, 2036, 3090, 3007]
Tongue
[2033, 3089, 3092, 4078, 5082, 5082, 4079, 2008]
hi
Hand
[4078, 2035, 2036, 5082, 3089, 4083, 5083, 3008]
Feet
[5077, 3088, 4078, 2035, 3088, 5077, 2033, 4007]
Tongue
[3088, 4082, 5082, 3087, 2031, 5079, 4077, 2007]
Mis
[2037, 2035, 3093, 3091, 4076, 5079, 5081, 4007]
hi
Tongue
[3087, 5081, 4082, 2035, 5077, 4077, 2031, 3008]
Mis
[4082, 4083, 3089, 5156, 5078, 2111, 2026, 3007]
Hand
[5105, 3088, 4106, 5077, 2065, 4076, 3122, 2007]
Feet
[5077, 5116, 3090, 3121, 2026, 2049, 4079, 4008]
hi
Mis
[4077, 2034, 2061, 3092, 5171, 4082, 3165, 5008]
Feet
[3201, 4078, 4077, 5177, 2030, 3166, 5083, 2007]
Hand
[5079