In [4]:
import sys
import mne
import scipy.io as sp
from scipy import interpolate
import numpy as np
import random
import pandas as pd
import multiprocessing as mp
import concurrent.futures
from mne.decoding import CSP
import pymrmr
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import logging
from scipy.io import loadmat
from scipy.signal import hamming
from scipy.signal import hann
from scipy.signal import blackman
from scipy.signal import kaiser
from scipy.signal import gaussian
from sklearn.decomposition import FastICA
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
import lightgbm as lgb
from catboost import CatBoostClassifier
from sklearn.impute import KNNImputer
from sklearn.decomposition import PCA
from pyriemann.estimation import Covariances
from pyriemann.tangentspace import TangentSpace
from pyriemann.classification import MDM
import medusa
import medusa.bci.mi_paradigms



In [5]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)


# Set display options for NumPy
np.set_printoptions(threshold=np.inf)

In [43]:
WINDOW_TIME_LENGTH = 4
SAMPLING_RATE = 250
WINDOW_SAMPLE_LENGTH = WINDOW_TIME_LENGTH*SAMPLING_RATE
NUMBER_OF_CHANNELS = 64
beta = 1.5

num_channels = 64
epoch_length = 1000
sampling_freq = 250
number_of_runs = 10
# number_of_splits = 10
number_of_components = 10
number_of_selected_features = 10
number_of_processes = 10
number_of_bands = 9
# rf = pd.DataFrame()
column_names = ['participant', 'class1', 'class2','running_time','test_acc','train_acc','test_size','train_size','train_block','test_block']
# rf = rf.reindex(columns=column_names)

trial_order=[['Tongue','Feet','Mis','Hand'],
            ['Feet','Mis','Hand','Tongue'],
            ['Hand','Feet','Tongue','Mis'],
            ['Tongue','Mis','Hand','Feet'],
            ['Mis','Feet','Hand','Tongue'],
            ['Feet','Hand','Tongue','Mis'],
            ['Hand','Tongue','Mis','Feet'],
            ['Tongue','Feet','Mis','Hand'],
            ['Mis','Tongue','Hand','Feet']]


In [7]:
def get_task_rest_times(b_num):
    if b_num == 0:
        task_time = [[12, 16, 20, 8],
                    [16, 12, 20, 8],
                    [20, 16, 8, 12],
                    [20, 12, 8, 16]]
        
        rest_time = [[20, 8, 16, 12],
                    [16, 20, 8, 12],
                    [12, 20, 16, 8],
                    [20, 12, 8, 16]]
        
    elif b_num == 1:
        task_time = [[12, 8, 20, 16],
                    [16, 20, 8, 12],
                    [8, 20, 16, 12],
                    [8, 12, 20, 16]]
        
        rest_time = [[16, 12, 8, 20],
                    [8, 20, 12, 16],
                    [20, 16, 8, 12],
                    [12, 16, 20, 8]]
        
    elif b_num == 2:
        task_time = [[16, 8, 12, 20],
                    [20, 16, 12, 8],
                    [12, 20, 8, 16],
                    [8, 12, 16, 20]]
        
        rest_time = [[8, 20, 16, 12],
                    [12, 8, 20, 16],
                    [16, 12, 20, 8],
                    [8, 12, 20, 16]]
        
    elif b_num == 3:
        task_time = [[12, 16, 20, 8],
                    [16, 12, 20, 8],
                    [20, 16, 8, 12],
                    [20, 12, 8, 16]]
        
        rest_time = [[20, 8, 16, 12],
                    [16, 20, 8, 12],
                    [12, 20, 16, 8],
                    [20, 12, 8, 16]]
        
    elif b_num == 4:
        task_time = [[16, 8, 20, 12],
                    [12, 16, 8, 20],
                    [20, 8, 12, 16],
                    [8, 20, 12, 16]]
        
        rest_time = [[8, 12, 16, 20],
                    [16, 20, 12, 8],
                    [12, 16, 8, 20],
                    [20, 8, 12, 16]]
        
    elif b_num == 5:
        task_time = [[16, 12, 8, 20],
                    [20, 16, 12, 8],
                    [8, 16, 20, 12],
                    [12, 8, 16, 20]]

        rest_time = [[12, 8, 16, 20],
                    [16, 8, 20, 12],
                    [20, 12, 16, 8],
                    [8, 16, 12, 20]]
        
    elif b_num == 6:
        task_time = [[16, 8, 12, 20],
                    [20, 8, 16, 12],
                    [8, 16, 12, 20],
                    [16, 20, 12, 8]]

        rest_time = [[16, 8, 12, 20],
                    [12, 20, 8, 16],
                    [20, 16, 12, 8],
                    [8, 16, 20, 12]]     
    elif b_num ==7:
        task_time = [[12, 8, 20, 16],
                    [16, 20, 8, 12],
                    [8, 20, 16, 12],
                    [8, 12, 20, 16]]   
               
        rest_time = [[16, 12, 8, 20],
                    [8, 20, 12, 16],
                    [20, 16, 8, 12],
                    [12, 16, 20, 8]]  
    
    elif b_num == 8:
        task_time = [[16, 8, 12, 20],
                    [20, 16, 12, 8],
                    [12, 20, 8, 16],
                    [8, 12, 16, 20]]
        
        rest_time = [[8, 20, 16, 12],
                    [12, 8, 20, 16],
                    [16, 12, 20, 8],
                    [8, 12, 20, 16]]
        
    else:
        print("Error in block number")

    return task_time,rest_time


In [8]:
def find_zero_order(matrix_3d):
    print(matrix_3d.shape,'matrix shape')
    depth, rows, cols = matrix_3d.shape
    
    for i in range(min(rows, cols)):
        print(i,'i isssssssssssss')
        sub_matrix = matrix_3d[:, :i+1, :i+1]
        determinant = np.linalg.det(sub_matrix)
        print("det",determinant,i+1)
        
    #     if determinant == 0:
    #         return i + 1  # Return the order where the leading minor becomes zero
    
    # return -1  # Return -1 if all leading minors are non-zero

In [9]:
def trial_times_genertor(task_times,rest_times):
    block_times = [item for pair in zip(task_times, rest_times) for item in pair]
    return block_times
    

In [10]:

def fill_zeros_with_average(matrix):
    # Iterate through the matrix
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            for k in range(matrix.shape[2]):
                if matrix[i, j, k] == 0:
                    # Find the neighboring non-zero elements
                    neighbors = []
                    if i > 0 and matrix[i - 1, j, k] != 0:
                        neighbors.append(matrix[i - 1, j, k])
                    if i < matrix.shape[0] - 1 and matrix[i + 1, j, k] != 0:
                        neighbors.append(matrix[i + 1, j, k])
                    if j > 0 and matrix[i, j - 1, k] != 0:
                        neighbors.append(matrix[i, j - 1, k])
                    if j < matrix.shape[1] - 1 and matrix[i, j + 1, k] != 0:
                        neighbors.append(matrix[i, j + 1, k])
                    if k > 0 and matrix[i, j, k - 1] != 0:
                        neighbors.append(matrix[i, j, k - 1])
                    if k < matrix.shape[2] - 1 and matrix[i, j, k + 1] != 0:
                        neighbors.append(matrix[i, j, k + 1])

                    # Fill the zero with the average of neighboring non-zero values
                    if neighbors:
                        matrix[i, j, k] = sum(neighbors) / len(neighbors)

    return matrix

In [11]:
def fill_zeros_with_interpolation(arr):
    filled_arr = arr.copy()  # Create a copy to avoid modifying the original array
    for i in range(len(arr)):
        non_zero_indices = np.where(arr[i] != 0)[0]
        zero_indices = np.where(arr[i] == 0)[0]

        # Interpolate zero values based on surrounding non-zero values
        filled_arr[i, zero_indices] = np.interp(zero_indices, non_zero_indices, arr[i, non_zero_indices])

    return filled_arr

In [12]:
def leading_minor_order_13(matrix_3d):
    order = 13  # Set the order of the leading minor
    
    # Extract the submatrix of order 13 from the top-left corner
    leading_submatrix = matrix_3d[:, :order, :order]
    
    # Calculate the determinant of the submatrix
    determinant = np.linalg.det(leading_submatrix)
    
    
    return determinant

In [13]:
def calc_csp_v3(x_train,y_train,x_test):
    csp = medusa.CSP(10)
    csp_fit = csp.fit(x_train,y_train)
    train_feat = csp_fit.transform(x_train)
    test_feat = csp_fit.transform(x_test)
    return train_feat, test_feat
    

In [14]:
def calc_csp_v2(x_train, y_train, x_test):

    cov_matrices = Covariances().fit_transform(x_train)
    epsilon = 0.001  # Small regularization parameter
    cov_matrices_regularized = cov_matrices + epsilon * np.eye(cov_matrices.shape[1])

    x_train = cov_matrices_regularized

    csp = TangentSpace(metric='euclid', n_components=number_of_components)
    csp_fit = csp.fit(x_train, y_train)
    train_feat = csp_fit.transform(x_train)
    test_feat = csp_fit.transform(x_test)

    return train_feat, test_feat
    

In [15]:
def calc_csp(x_train, y_train, x_test):
    # csp = CSP(n_components=number_of_components, reg='ledoit_wolf', log=True)
    csp = CSP(number_of_components)



    # reshaped_matrix = np.reshape(matrix_3d, (matrix_3d.shape[0], -1))

    # # Calculate the rank of the reshaped 2D matrix
    # rank = np.linalg.matrix_rank(reshaped_matrix)

    # # Determine if the matrix is full rank
    # if rank == min(reshaped_matrix.shape):
    #     print("The 3D matrix is full rank.")
    # else:
    #     print("The 3D matrix is not full rank.")
    
    # find_zero_order(x_train)
    # print("det",np.linalg.det(x_train))

    
    # data = x_train
    # for i in range(data.shape[1]):
    #     for j in range(data.shape[2]):
    #         nonzero_indices = np.where(data[:, i, j] != 0)[0]
    #         zero_indices = np.where(data[:, i, j] == 0)[0]
    #         if len(nonzero_indices) > 1:  # Interpolate only if there are non-zero values
    #             data[zero_indices, i, j] = np.interp(zero_indices, nonzero_indices, data[nonzero_indices, i, j])
    
    # x_train = data

    
    # x_train = fill_zeros_with_average(x_train)
    # x_train = np.add(x_train, 0.000001)



    nan_count = np.isnan(x_train).sum()
    print("Number of NaN values:", nan_count)

    empty_field_count = np.count_nonzero(x_train == 0)
    print("Number of empty fields:", empty_field_count)

    zeros_locations_3d = np.where(x_train == 0)
    # print("Locations of zeros:", zeros_locations)
    

# Printing indices and corresponding values
    # for depth_idx, row_idx, col_idx in zip(zeros_locations_3d[0], zeros_locations_3d[1], zeros_locations_3d[2]):
    #     value_at_zero_location = x_train[depth_idx, row_idx, col_idx]
    #     print(f"Zero found at position ({depth_idx}, {row_idx}, {col_idx}) with value {value_at_zero_location}")


    csp_fit = csp.fit(x_train, y_train)
    train_feat = csp_fit.transform(x_train)
    test_feat = csp_fit.transform(x_test)
    return train_feat, test_feat

In [16]:
def class_extractor(number_of_epochs, class_1, class_2, data, labels):
    size = sum(labels[:,0] == class_1) + sum(labels[:,0] == class_2)
    Final_labels = np.zeros((size,1)).astype(int)
    dataset = np.zeros((size,num_channels, epoch_length))
    index = 0
    for i in range(number_of_epochs):
        if labels[i,0] == class_1 or labels[i,0] == class_2:
            dataset[index,:,:] = data[i,:,:]
            Final_labels[index,0] = labels[i,0]
            index = index + 1
        else:
            continue
            
    return dataset, Final_labels

In [17]:
def feature_extractor(dataset, labels, number_of_bands, test_data):

    low_cutoff = 0
    
    for b in range(number_of_bands):
        logging.getLogger('mne').setLevel(logging.WARNING)
        low_cutoff += 4
        data = dataset.copy()
        data_test = test_data.copy()

        # empty_field_count = np.count_nonzero(data == 0)
        # print("Number of empty fields in data:", empty_field_count,"data shape",data.shape)   

        # empty_field_count = np.count_nonzero(data_test == 0)
        # print("Number of empty fields in data_test:", empty_field_count,"data_test shape",data_test.shape)    

        filtered_data = mne.filter.filter_data(data, sampling_freq, low_cutoff, low_cutoff + 4, verbose = False, n_jobs = 4)
        filtered_data_test = mne.filter.filter_data(test_data, sampling_freq, low_cutoff, low_cutoff + 4, verbose = False, n_jobs = 4)

        # empty_field_count = np.count_nonzero(filtered_data == 0)
        # print("Number of empty fields in filtered_data:", empty_field_count,"filtered_data shape",filtered_data.shape)   

        # empty_field_count = np.count_nonzero(filtered_data_test == 0)
        # print("Number of empty fields in filtered_data_test:", empty_field_count,"filtered_data_test shape",filtered_data_test.shape)  



        # # Reshape data to (samples, features)
        # num_samples_train, num_rows, num_cols = filtered_data.shape
        # num_samples_test, _, _ = filtered_data_test.shape
        # flattened_train_data = filtered_data.reshape(num_samples_train, -1)
        # flattened_test_data = filtered_data_test.reshape(num_samples_test, -1)
        
        # # Apply PCA
        # pca = PCA(n_components=10)
        # filtered_data_pca = pca.fit_transform(flattened_train_data)
        # filtered_data_test_pca = pca.transform(flattened_test_data)      
        # filtered_data_pca_3d = filtered_data_pca.reshape(num_samples_train, num_rows, num_cols)
        # filtered_data_test_pca_3d = filtered_data_test_pca.reshape(num_samples_test, num_rows, num_cols)

        # filtered_data = filtered_data_pca_3d
        # filtered_data_test = filtered_data_test_pca_3d


        #PCA
        # from mne.decoding import UnsupervisedSpatialFilter
        # from sklearn.decomposition import PCA, FastICA

        # pca = UnsupervisedSpatialFilter(PCA(64), average=False)
        # pca_fit = pca.fit(filtered_data)
        # filtered_data = pca_fit.transform(filtered_data)
        # filtered_data_test = pca_fit.transform(filtered_data_test)
        # train_feats = filtered_data
        # test_feats = filtered_data_test

        # filtered_data = data
        # filtered_data_test = data_test
        
        [train_feats, test_feats] = calc_csp(filtered_data, labels[:,0], filtered_data_test)
        if b == 0:
            train_features = train_feats
            test_features = test_feats
        else:
            train_features = np.concatenate((train_features, train_feats), axis = 1)
            test_features = np.concatenate((test_features, test_feats), axis = 1)
    
    return train_features, test_features

In [18]:
def feature_selector(train_features, labels, number_of_selected_features):
    X = pd.DataFrame(train_features)
    y = pd.DataFrame(labels)
    K = number_of_selected_features
    
    df = pd.concat([y,X], axis = 1)
    df.columns = df.columns.astype(str)
        
    selected_features = list(map(int, pymrmr.mRMR(df, 'MID', K)))
    return selected_features

In [19]:
def data_reader(path,p_num,block_list):
    data_dict = {}
    for b_num in block_list:
        print(b_num)
        mat = loadmat(path+'P'+str(p_num)+'B'+str(b_num)+'.mat', chars_as_strings=True, mat_dtype=True, squeeze_me=True, struct_as_record=False, verify_compressed_data_integrity=False, variable_names=None)
        df = pd.DataFrame(mat['Data'])
        # ddf = dd.from_pandas(df, npartitions=10)
        data_dict[b_num] = df
    return data_dict


In [20]:
def get_group_start_indices(dataframe):
    group_indices = []
    current_label = None

    for idx, row in dataframe.iterrows():
        if row.iloc[64] != current_label:
            group_indices.append(idx)
            current_label = row.iloc[64]

    return group_indices

In [21]:
def interpolate_zeros(matrix):
    # Create a copy of the matrix to work with
    matrix_copy = matrix.copy()

    # Create indices meshgrid for non-zero elements
    nonzero_indices = np.argwhere(matrix != 0)
    nonzero_rows, nonzero_cols = nonzero_indices[:, 0], nonzero_indices[:, 1]

    # Create interpolation function for rows and columns separately
    f_rows = interpolate.interp2d(nonzero_cols, nonzero_rows, matrix[nonzero_rows, nonzero_cols], kind='linear')
    f_cols = interpolate.interp2d(nonzero_cols, nonzero_rows, matrix[nonzero_rows, nonzero_cols].T, kind='linear')

    # Find zero indices
    zero_indices = np.argwhere(matrix == 0)

    for idx in zero_indices:
        row, col = idx
        # Interpolate zero values using the interpolation functions
        matrix_copy[row, col] = (f_rows(col, row) + f_cols(row, col)) / 2


In [22]:
def extra_samples_counter(df,class_1,class_2):
    x=0
    i=0
    sampleList = []
    while i<len(df):
        if (df.iloc[i,64]==class_1):
            x+=1
        else:
            i-=1
            sampleList.append(x)
            x=0
            class_1,class_2 = class_2,class_1
        i+=1
    sampleList.append(x)
    print(sampleList)
    

In [23]:
def extra_samples_block_counter(df,trial_order,b_num):

    df.drop(df[df.iloc[:,64].isin(['Begin', 'End'])].index, inplace=True)
    df.reset_index(drop=True, inplace=True)
    print('hi')
    
    df['group'] = (df.iloc[:,64] != df.iloc[:,64].shift(1)).cumsum()
    # group_counts_Tongue = df[df.iloc[:,64] == 'Tongue'].groupby('group').size()
    # group_counts_Feet = df[df.iloc[:,64] == 'Feet'].groupby('group').size()
    # group_counts_Hand = df[df.iloc[:,64] == 'Hand'].groupby('group').size()
    # group_counts_Mis = df[df.iloc[:,64] == 'Mis'].groupby('group').size()
    # group_counts_Rest = df[df.iloc[:,64] == 'Rest'].groupby('group').size()

    
    group_counts_Rest = df[df.iloc[:,64] == 'Rest'].groupby('group').size()
    with open('sampleList.txt', 'a') as file:
        file.write(f'block {b_num+1} '+'\n')
        for j in range (len(trial_order)):
            print(trial_order[j])
            trial_num = j
            task_times,rest_times = get_task_rest_times(b_num)
            trial_times = trial_times_genertor(task_times[trial_num],rest_times[trial_num])
            trial_samples = [item*SAMPLING_RATE for item in trial_times]
            group_counts_task = df[df.iloc[:,64] == trial_order[j]].groupby('group').size()
            sampleList = []
            for i in range(4):
                task = group_counts_task.iloc[i]
                rest = group_counts_Rest.iloc[4*j+i]
                sampleList.append(task)
                sampleList.append(rest)
            # extra_samples = [x-y for x,y in zip(sampleList,trial_samples)]
            file.write(', '.join(map(str, sampleList)) + f' trial={trial_order[j]} '+'\n')
            print(sampleList)
        file.write('\n\n')


    # print(group_counts_Tongue)
    # print(group_counts_Feet)
    # print(group_counts_Hand)
    # print(group_counts_Mis)
    # print(group_counts_Rest)

    # print(group_counts_b.index[0])
    # print(group_counts_b.iloc[0])
    # print(group_counts)


    # for j in range(len(trial_order)):
    #     print(j)
    #     class_2 = 'Rest'
    #     class_1 = trial_order[j]
    #     sampleList = []
    #     x=0
    #     i=0


    #     while i<len(df):
    #         print(i)
    #         if (df.iloc[i,64]!=class_1):
    #             x+=1
    #         else:
    #             i-=1
    #             sampleList.append(x)
    #             x=0
    #             class_1,class_2 = class_2,class_1
    #         i+=1
    #     sampleList.append(x)
    #     df.drop(df.index[0:sum(sampleList)], inplace=True)
    #     df.reset_index(drop=True, inplace=True)
    #     print(sampleList)
        # with open('sampleList.txt', 'w') as file:
        #     # for item in sampleList:
        #     file.write(f"{sampleList}\n")
    

In [24]:

def data_cleaner(df,class_1,class_2,tasks_time):
    # extra_samples_counter(df,class_1,class_2)
    # sys.exit() 
    class_x = class_1
    class_y = class_2
    new_df = pd.DataFrame()
    trial_df = df.copy() 
    print(tasks_time)
    for i in range(len(tasks_time)):
        sample_point = tasks_time[i]*SAMPLING_RATE
        if(trial_df.iloc[sample_point+1,64] == class_x ):
            if(i==len(tasks_time)-1):
                temp_df = trial_df.iloc[:sample_point,:]
                new_df = pd.concat([new_df, temp_df], axis=0)
                new_df.reset_index(drop=True, inplace=True)
            else:    
                temp_df = trial_df.iloc[:sample_point,:]
                next_task_idx = trial_df[trial_df.iloc[:, 64] == class_y].index
                trial_df.drop(trial_df.index[0:next_task_idx[0]], inplace=True)
                trial_df.reset_index(drop=True, inplace=True)
                new_df = pd.concat([new_df, temp_df], axis=0)
                new_df.reset_index(drop=True, inplace=True)
                class_x,class_y = class_y,class_x

    return new_df

In [25]:
def class_seperator(cleaned_df,class_1,class_2):
    # df = cleaned_df.sort_values(by=cleaned_df.columns[64]).reset_index(drop=True)
    # print(seperated_df.head(14003))
    # print(cleaned_df.head(5003))

    df = cleaned_df
    sorting_order = {class_1: 0, class_2: 1}

    df['sorting_order'] = df.iloc[:, 64].map(sorting_order)
    df.sort_values(by=['sorting_order', df.columns[64]], inplace=True)
    df.drop('sorting_order', axis=1, inplace=True)
    df.reset_index(drop=True, inplace=True)

    return df

In [26]:
def shuffler(dataset,labels):
    print(dataset.shape)
    print(labels.shape)
    np.random.seed(42)
    indices = np.random.permutation(len(dataset))
    shuffled_dataset = dataset[indices]
    shuffled_labels = labels[indices]
    return shuffled_dataset,shuffled_labels
    

In [27]:
def cal_epoch(df_len,sliding_len,window_len):
    print(window_len,sliding_len,df_len)
    number_of_epochs = int((int(df_len-window_len)/sliding_len)) +1
    # if((df_len-window_len)%sliding_len==0):
    #     number_of_epochs = int((int(df_len-window_len)/sliding_len)) +1
    # else:
    #     number_of_epochs = int((int(df_len-window_len)/sliding_len)) +2


    
    # a = window_len
    # d = sliding_len
    # n = df_len
    # k = int((n - a) / d)
    # print(k)
    # epoch = float((k * (a - d) + n) / a)
    return number_of_epochs

In [28]:
print(cal_epoch(3,2,10))

10 2 3
-2


In [29]:
def data_label_attacher(cleaned_df,class_1,class_2,random_flag,class_seperator_flag,sliding_time):
    SLIDING_POINTS = sliding_time*SAMPLING_RATE
    window_time = WINDOW_TIME_LENGTH
    new_df_ = cleaned_df.copy()
    new_df_.drop(cleaned_df.columns[-1], axis=1, inplace=True)
    X = new_df_.to_numpy()
    X = np.transpose(X)
    number_of_epochs = cal_epoch(int(int(len(cleaned_df)/SAMPLING_RATE)),sliding_time,window_time)
    print(number_of_epochs)
    dataset = np.zeros((number_of_epochs,NUMBER_OF_CHANNELS,WINDOW_SAMPLE_LENGTH))
    labels = np.zeros((number_of_epochs,1)).astype(int)

    index = get_group_start_indices(cleaned_df)
    index.append(len(cleaned_df))
    k = 0  
    startIdx = k * WINDOW_SAMPLE_LENGTH
    endIdx = (k+1) * WINDOW_SAMPLE_LENGTH 
    l = 0
    label = 1
    for i in range(number_of_epochs):
        print(i,"i is")
        print(l,"l is")
        if(startIdx>=index[l] and endIdx<+index[l+1]):
            print(startIdx,endIdx,"start and end in if")
            slice_X = X[:, startIdx:endIdx]

            kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,beta)
            slice_X *= kaiser_window

            dataset[i, :, :] = slice_X
            labels[i,0] = label

        else:
            
            temp = endIdx-index[l+1]
            print(temp,endIdx,index[l+1],"temp,end,index l+1")
            slice_X = X[:, startIdx:endIdx]
            kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,beta)
            slice_X *= kaiser_window
            dataset[i, :, :] = slice_X

            if(temp<=WINDOW_SAMPLE_LENGTH/2):

                labels[i,0] = label
            else:
                labels[i,0] = not(label)

            if(startIdx>=index[l+1]):
                l+=1
                label = not(label)

                

            

        startIdx+=SLIDING_POINTS
        endIdx+=SLIDING_POINTS
    








        # a = df_len - wdinow_len
        # a/sliding_len
        # b = a%sliding_len





####################################################


    # new_df_ = cleaned_df.copy()
    # new_df_.drop(cleaned_df.columns[-1], axis=1, inplace=True)
    # X = new_df_.to_numpy()
    # X = np.transpose(X)
    # number_of_epochs = int(len(new_df_)/WINDOW_SAMPLE_LENGTH)
    # number_of_epochs = int((int(len(new_df_))-WINDOW_SAMPLE_LENGTH)/SLIDING_POINTS) +1

    
    # dataset = np.zeros((number_of_epochs,NUMBER_OF_CHANNELS,WINDOW_SAMPLE_LENGTH))
    # labels = np.zeros((number_of_epochs,1)).astype(int)
















    
    # #Initialization
    # if class_seperator_flag:
    #     seperated_class_df = class_seperator(cleaned_df,class_1,class_2)
    #     new_df_ = seperated_class_df.copy()
    #     new_df_.drop(seperated_class_df.columns[-1], axis=1, inplace=True)
    #     X = new_df_.to_numpy()
    #     X = np.transpose(X)
    #     empty_field_count = np.count_nonzero(X == 0)
    #     print("Number of empty fields in X:", empty_field_count)
    #     # zero_indices = np.where(X == 0)
    #     # print("befor filling",len(zero_indices[0]))
    #     # X[zero_indices] += 0.001
    #     # zero_indices = np.where(X == 0)
    #     # print("after filling",len(zero_indices[0]))
    #     number_of_epochs = int((int(len(new_df_))-WINDOW_SAMPLE_LENGTH)/TR_SLIDING_POINTS)
    #     print(number_of_epochs)
    # else :  
    #     new_df_ = cleaned_df.copy()
    #     new_df_.drop(cleaned_df.columns[-1], axis=1, inplace=True)
    #     X = new_df_.to_numpy()
    #     X = np.transpose(X)
    #     empty_field_count = np.count_nonzero(X == 0)
    #     print("Number of empty fields in X:", empty_field_count)
    #     # zero_indices = np.where(X == 0)
    #     # print("befor filling",len(zero_indices[0]))
    #     # X[zero_indices] += 0.001
    #     # zero_indices = np.where(X == 0)
    #     # print("after filling",len(zero_indices[0]))

    #     number_of_epochs = int(len(new_df_)/WINDOW_SAMPLE_LENGTH)

    # dataset = np.zeros((number_of_epochs,NUMBER_OF_CHANNELS,WINDOW_SAMPLE_LENGTH))
    # labels = np.zeros((number_of_epochs,1)).astype(int)

    # if class_seperator_flag:
    #     i = 0  
    #     startIdx = i * WINDOW_SAMPLE_LENGTH
    #     endIdx = (i+1) * WINDOW_SAMPLE_LENGTH 
    #     while(endIdx<=int(len(new_df_))/2):
    #         slice_X = X[:, startIdx:endIdx]

    #         kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,beta)
    #         slice_X *= kaiser_window

    #         dataset[i, :, :] = slice_X
    #         labels[i,0] = 0
    #         # if (seperated_class_df.iloc[startIdx, 64] == class_1):
    #         #     labels[i,0] = 0
    #         # elif(seperated_class_df.iloc[startIdx, 64] == class_2):
    #         #     labels[i,0] = 1
    #         # else:
    #         #     labels[i,0] = 2
    #         startIdx+=TR_SLIDING_POINTS
    #         endIdx+=TR_SLIDING_POINTS
    #         i+=1
    #     # print(int(len(new_df_))/2,"len")    
    #     # print(endIdx,"endIdx")    
    #     # print(seperated_class_df.iloc[endIdx-2:endIdx+2,64])
       
    #     j = i
        
    #     startIdx = endIdx-TR_SLIDING_POINTS
    #     endIdx = startIdx+WINDOW_SAMPLE_LENGTH
    #     print(j, "j is this")
    #     while(endIdx<=int(len(new_df_))):
    #         slice_X = X[:, startIdx:endIdx]

    #         kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,beta)
    #         slice_X *= kaiser_window

    #         dataset[j, :, :] = slice_X
    #         labels[j,0] = 1
    #         # if (cleaned_df.iloc[startIdx, 64] == class_1):
    #         #     labels[j,0] = 0
    #         # elif(cleaned_df.iloc[startIdx, 64] == class_2):
    #         #     labels[j,0] = 1
    #         # else:
    #         #     labels[j,0] = 2
    #         startIdx+=TR_SLIDING_POINTS
    #         endIdx+=TR_SLIDING_POINTS
    #         j+=1
    #     print(j, "j is this")
    #     # dataset,labels = shuffler(dataset,labels)

    # else:
    #     i = 0  
    #     start_idx = i * WINDOW_SAMPLE_LENGTH
    #     end_idx = (i+1) * WINDOW_SAMPLE_LENGTH 
    #     while (end_idx<=int(len(new_df_))):
    #         slice_X = X[:, start_idx:end_idx]

    #         kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,beta)
    #         slice_X *= kaiser_window
            
    #         dataset[i, :, :] = slice_X
    #         if (cleaned_df.iloc[start_idx, 64] == class_1):
    #             labels[i,0] = 0
    #         elif(cleaned_df.iloc[start_idx, 64] == class_2):
    #             labels[i,0] = 1
    #         else:
    #             labels[i,0] = 2
    #         start_idx+=SLIDING_POINTS
    #         end_idx+=SLIDING_POINTS
    #         i+=1
    #     # dataset,labels = shuffler(dataset,labels)



#####################################################













    #For training and test purpose
    # if random_flag:
    #     randomlist = random.sample(range(number_of_epochs), number_of_epochs)
    # else:
    #     randomlist = list(range(number_of_epochs))
    #Labeling the data



    # for i in range(number_of_epochs):
    #     start_idx = randomlist[i] * WINDOW_SAMPLE_LENGTH + SLIDING_POINTS
    #     end_idx = (randomlist[i] + 1) * WINDOW_SAMPLE_LENGTH
    #     slice_X = X[:, start_idx:end_idx]

    #     # hamming_window = hamming(WINDOW_SAMPLE_LENGTH)
    #     # slice_X *= hamming_window

    #     # hanning_window = hann(WINDOW_SAMPLE_LENGTH)
    #     # slice_X *= hanning_window

    #     # blackman_window = blackman(WINDOW_SAMPLE_LENGTH)
    #     # slice_X *= blackman_window

    #     # kaiser_window = kaiser(WINDOW_SAMPLE_LENGTH,0.5)
    #     # slice_X *= kaiser_window

    #     # gaussian_window = gaussian(WINDOW_SAMPLE_LENGTH,0.5)
    #     # slice_X *= gaussian_window


    #     dataset[i, :, :] = slice_X
    #     if (cleaned_df.iloc[randomlist[i] * WINDOW_SAMPLE_LENGTH, 64] == class_1):
    #         labels[i,0] = 0
    #     elif(cleaned_df.iloc[randomlist[i] * WINDOW_SAMPLE_LENGTH, 64] == class_2):
    #         labels[i,0] = 1
    #     else:
    #         labels[i,0] = 2
    
    # empty_field_count = np.count_nonzero(dataset == 0)
    # print("Number of empty fields in dataset:", empty_field_count,"dataset shape",dataset.shape)
    print(labels)
    return dataset,labels




In [30]:
def trial_cutter(data, class_1):
    df = data.copy()
    Begin_trigger = "Begin" + "_" + class_1
    End_trigger = "End" + "_" + class_1
    Begin_idx = df[df.iloc[:, 64] == Begin_trigger].index
    End_idx = df[df.iloc[:, 64] == End_trigger].index
    trial_df = df.iloc[Begin_idx[0]+1:End_idx[0],:]
    trial_df.reset_index(drop=True, inplace=True)
    trial_df.head()
    return trial_df

In [31]:
def Begin_End_trigger_modifier(data):
    df = data.copy()
    Begin_indexes = df[df.iloc[:, 64] == 'Begin'].index
    End_indexes = df[df.iloc[:, 64] == 'End'].index
    if(len(Begin_indexes)==len(End_indexes)):
        for i in range(len(Begin_indexes)):
            index = Begin_indexes[i]+1
            val = df.iloc[index,64]
            df.iloc[Begin_indexes[i],64] = "Begin" + "_" + str(val)
            df.iloc[End_indexes[i],64]   =  "End" + "_" + str(val)
    else:
        print("Trigger seinding Exception")
    
    return df

In [32]:
def preprocessor(data_,class_1,class_2,tasks_time,set_type,clean_flag,sliding_time):
    CLASS_1 = class_1
    CLASS_2 = class_2
    df = data_.copy()
    modified_df = Begin_End_trigger_modifier(df)
    trial_df = trial_cutter(modified_df,CLASS_1)
    print(trial_df.shape,"trial_df")
    indexes = get_group_start_indices(trial_df)
    print(indexes,'tasks index starting point')
    if clean_flag:
        cleaned_df = data_cleaner(trial_df,CLASS_1,CLASS_2,tasks_time)
        final_df = cleaned_df.copy()
    else:
        final_df = trial_df.copy()
    print(final_df.shape,"final_df")

    if set_type =="TRAIN":
        random_flag = True
    elif set_type =="TEST":
        random_flag = False
    else:
        print("Error in set type")

  
    final_data, final_labels = data_label_attacher(final_df,CLASS_1,CLASS_2,random_flag,clean_flag,sliding_time)
      
    print(final_data.shape,"final_data shape")
    print(final_labels.shape,"final_labels shape")
    
    return final_data,final_labels

In [33]:
def trials_set_builder(data_dict,blocks_set,set_label,class_1,class_2,clean_flag,sliding_time):
    counter = 0

    for b_num in blocks_set:
        trial_num = trial_order[b_num].index(class_1)
        task_times,rest_times = get_task_rest_times(b_num)
        print(task_times[trial_num],rest_times[trial_num])
        trial_times = trial_times_genertor(task_times[trial_num],rest_times[trial_num])
        print(trial_times)
        data = data_dict[b_num]
        df = data.copy()
        # last_column = df.pop(df.columns[-1])
        # df.drop(df.columns[-1], axis=1, inplace=True)
        # eeg_data = df.to_numpy().T  # Transpose to have channels in columns

        # channel_names = [f'Ch{i+1}' for i in range(63)]

        # # Create MNE-Python RawArray object
        # info = mne.create_info(ch_names=channel_names, sfreq=sampling_freq, ch_types='eeg')
        # raw = mne.io.RawArray(eeg_data, info)

        # # Apply ICA
        # ica = mne.preprocessing.ICA(n_components=20, random_state=97, max_iter=800)
        # ica.fit(raw)
        # ica_components = ica.get_components()

        # # Convert the ICA components to a DataFrame
        # df2 = pd.DataFrame(data=ica_components.T, columns=channel_names)
        # df2 = df2.assign(LastColumn=last_column)
        # # df = data.copy(deep=False)
        dataset,labels = preprocessor(df,class_1,class_2,trial_times,set_label,clean_flag,sliding_time)
        # print(dataset.shape)

        if counter == 0 :
            final_data = dataset
            final_labels = labels
            print("Before concatenation - final_data shape:", final_data.shape, "dataset shape:", dataset.shape)
        else:
            final_data = np.vstack((final_data, dataset))
            final_labels = np.vstack((final_labels, labels))
            print("After concatenation - final_data shape:", final_data.shape, "final_labels shape:", final_labels.shape)

        counter+=1 
    # empty_field_count = np.count_nonzero(final_data == 0)
    # print("Number of empty fields in final_data:", empty_field_count,"final_data shape",final_data.shape)
    return final_data,final_labels

In [34]:
block_list = [0,1,2,3,4,5,6]
p_num_list = [9]
data_dicts_list = []
for p_num in p_num_list:
    print(f'reading P{p_num}')
    data_dict = data_reader(f'../../Participants/P{p_num}/', p_num, block_list)
    data_dicts_list.append(data_dict)

reading P3
0


FileNotFoundError: [Errno 2] No such file or directory: '../../Participants/P3/P3B0.mat'

In [97]:
block_list = [0,1,2,3,4,5,6]
p_num_list = [3,4,5,6,7,8,9,10]
data_dicts_list = []
for p_num in p_num_list:
    print(f'reading P{p_num}')
    data_dict = data_reader(f'/home/mahdi146/projects/def-b09sdp/mahdi146/Cedar/Classification/Participants/P{p_num}/',p_num,block_list)
    data_dicts_list.append(data_dict)


reading P3
0
1
2
3
4
5
6
reading P4
0
1
2
3
4
5
6
reading P5
0
1
2
3
4
5
6
reading P6
0
1
2
3
4
5
6
reading P7
0
1
2
3
4
5
6
reading P8
0
1
2
3
4
5
6
reading P9
0
1
2
3
4
5
6
reading P10
0
1
2
3
4
5
6


In [95]:

data_pd.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
0,-11021.563477,2011.709229,-55.761044,520.408569,-20684.441406,-1321.571045,2844.5,1897.293945,3873.412109,-3390.763916,6468.262695,3895.379639,3740.709229,2350.338623,3639.767578,518.56427,1706.194214,239.164932,-1847.359497,1909.812256,-2326.628418,4100.845215,-2881.223389,2378.157715,6946.108887,-4292.529297,-22793.576172,-2901.924805,2222.550293,5265.567383,-1127.241821,-20955.636719,-2246.488525,4819.799805,2204.847168,5731.722168,3753.692871,-363.762787,5798.85791,5950.319336,1352.352661,3827.741211,3054.537598,82.662178,-2538.405518,-2058.649414,-503.768433,1303.559082,936.725891,-1209.23877,-2432.506592,2192.124268,3957.295898,3311.276611,5188.168457,1361.085571,-5259.562012,2357.348145,-4053.833008,2856.387451,-9436.401367,-2186.429443,3584.55127,7735.489258,Begin
1,-11019.81543,2019.473389,-45.985691,522.216309,-20692.701172,-1314.602661,2853.969971,1901.456787,3874.744385,-3404.037354,6471.873535,3898.243408,3740.831055,2349.407715,3638.776123,513.649597,1693.09436,223.630219,-1854.504761,1899.594482,-2330.757812,4096.545898,-2880.017334,2384.291992,6945.195801,-4302.51123,-22803.544922,-2898.099609,2227.588135,5274.783203,-1128.816406,-20959.650391,-2235.976807,4827.412109,2209.282471,5740.789551,3760.789795,-360.542023,5806.066895,5954.19873,1354.140991,3832.538818,3055.498779,82.907608,-2540.127197,-2055.697998,-504.444061,1296.86731,923.52417,-1216.108276,-2442.834229,2187.369873,3960.153076,3309.221924,5181.779785,1353.817749,-5254.267578,2362.797363,-4062.058594,2854.632568,-9444.953125,-2170.125732,3593.073975,7745.953613,Feet
2,-11018.658203,2024.80957,-39.534775,530.256836,-20686.820312,-1310.782837,2858.879883,1904.300537,3876.077148,-3399.990234,6470.980957,3898.312256,3738.900879,2348.226562,3631.036621,515.042114,1696.437378,225.450516,-1856.909302,1893.876953,-2354.367676,4094.231689,-2880.945801,2385.681396,6943.491699,-4314.899414,-22802.847656,-2901.995605,2230.090332,5274.411621,-1128.672852,-20954.513672,-2233.33667,4832.188965,2213.082275,5745.81543,3766.33667,-356.443665,5810.341797,5957.78125,1356.104614,3830.15332,3055.314697,81.365891,-2541.248535,-2055.880127,-503.891418,1296.928589,923.163208,-1219.266968,-2446.919678,2186.41333,3959.554688,3307.647705,5169.665039,1355.814209,-5254.508301,2360.286865,-4065.75,2853.093018,-9444.794922,-2175.619141,3596.796631,7750.266113,Feet
3,-11012.609375,2030.284668,-37.763271,530.58429,-20690.123047,-1309.931274,2862.270508,1906.595947,3872.74292,-3399.393311,6471.085449,3897.111084,3738.003418,2346.662598,3628.071289,519.368469,1695.479858,219.108231,-1857.56604,1895.371826,-2362.322021,4093.126953,-2882.567871,2387.009277,6942.941406,-4313.024414,-22810.306641,-2905.151855,2233.378174,5279.085449,-1130.351196,-20958.335938,-2235.542236,4833.381836,2215.358887,5750.805664,3767.092773,-359.498108,5813.189453,5958.614258,1355.983398,3831.019287,3056.532227,80.957245,-2543.448486,-2053.35083,-505.899872,1296.914795,919.536926,-1225.525513,-2447.840576,2184.288574,3958.900391,3305.175537,5166.929199,1356.535034,-5256.093262,2360.536133,-4052.152344,2850.23584,-9439.833008,-2176.58252,3601.0,7753.942383,Feet
4,-11018.163086,2029.341431,-36.43491,528.657288,-20691.441406,-1306.457764,2863.756836,1908.372437,3872.261475,-3406.937012,6470.541504,3904.807129,3736.581787,2349.560547,3647.142578,511.59903,1696.021362,223.772003,-1857.920654,1895.750977,-2358.227051,4093.004395,-2886.643311,2386.82959,6941.325195,-4315.922852,-22809.181641,-2911.917725,2231.532471,5273.249023,-1130.122803,-20960.558594,-2237.228027,4832.560547,2215.146729,5751.047363,3767.958984,-353.559998,5815.068359,5959.008301,1362.24585,3839.688721,3059.326416,81.026413,-2535.724121,-2060.143066,-508.467041,1295.243042,920.668091,-1230.667847,-2444.880859,2182.814697,3955.112305,3303.873291,5171.141602,1356.511353,-5259.299805,2359.301025,-4052.409424,2844.880127,-9443.042969,-2173.979492,3598.565918,7754.059082,Feet
5,-11021.588867,2028.514282,-35.685005,526.568115,-20696.210938,-1308.647217,2865.092773,1907.183228,3871.609863,-3408.464355,6470.27002,3900.873779,3736.827393,2348.731689,3642.998047,510.306274,1696.633545,228.145447,-1855.187012,1901.593872,-2360.911133,4097.163574,-2884.800781,2389.007568,6942.991211,-4315.833984,-22807.492188,-2911.4104,2232.099854,5268.495117,-1131.218872,-20964.613281,-2239.107666,4830.282227,2214.878418,5751.216309,3767.312012,-358.446869,5815.206055,5960.952148,1359.625732,3838.444092,3056.155273,79.770828,-2538.739014,-2059.480957,-507.202301,1296.049194,925.592651,-1220.195679,-2439.039795,2183.442871,3955.188721,3306.850586,5177.568359,1361.01062,-5256.822266,2361.042725,-4064.005615,2847.151367,-9448.557617,-2171.188721,3598.11377,7753.884277,Feet
6,-11021.239258,2030.351196,-33.54747,525.322632,-20696.494141,-1310.082886,2866.712891,1906.408325,3869.309082,-3398.438965,6472.456055,3894.569092,3736.108154,2344.918213,3625.544678,513.878601,1688.326782,211.840073,-1854.015015,1896.704712,-2347.615723,4100.342773,-2880.659424,2393.116455,6948.391113,-4312.525391,-22804.660156,-2903.724609,2236.131104,5275.773926,-1126.443115,-20970.574219,-2242.204834,4829.734375,2212.458252,5752.037598,3767.03418,-364.721375,5815.763184,5962.640137,1354.452026,3829.649414,3055.492188,78.670891,-2546.6875,-2062.493164,-510.106964,1293.689697,922.652222,-1226.171387,-2440.188477,2185.425049,3959.126709,3309.771973,5183.16748,1362.749756,-5252.098145,2366.099121,-4055.411865,2856.59375,-9456.298828,-2175.10498,3599.950439,7758.121582,Feet
7,-11028.858398,2026.954956,-36.283092,521.142639,-20694.34375,-1311.525391,2864.979736,1907.063599,3871.974609,-3402.130371,6474.251465,3898.567383,3738.499756,2347.592041,3635.621582,512.206787,1695.558105,227.058838,-1854.790283,1900.865356,-2344.624268,4098.453125,-2880.065674,2392.516846,6948.717773,-4315.109375,-22805.537109,-2905.564209,2234.203369,5271.210449,-1130.13501,-20964.382812,-2245.435303,4826.760254,2207.002441,5748.532227,3764.275391,-362.343109,5814.562988,5961.316895,1355.639038,3833.016846,3057.807617,80.482437,-2542.920654,-2060.337158,-508.818085,1296.38208,922.75946,-1220.531616,-2441.400146,2187.358643,3960.137695,3311.175293,5183.144043,1361.956909,-5253.34375,2363.436279,-4061.248291,2851.373779,-9453.235352,-2183.147705,3596.514893,7755.045898,Feet
8,-11033.777344,2022.609619,-41.882507,515.625549,-20698.496094,-1315.903809,2859.542725,1904.191162,3870.384521,-3404.43457,6474.39209,3894.563965,3742.819336,2348.259521,3629.978027,511.294617,1692.123779,222.283936,-1848.376465,1920.04248,-2345.191406,4103.651855,-2875.111572,2393.317139,6952.275391,-4316.085938,-22810.378906,-2899.106201,2232.75708,5271.835938,-1130.607666,-20962.878906,-2245.28418,4823.541992,2204.633789,5745.268555,3759.312012,-366.614807,5809.83252,5959.382812,1352.152344,3835.34668,3055.255615,82.855919,-2544.322754,-2059.819824,-507.203491,1299.00708,928.473328,-1227.761475,-2430.050049,2192.797363,3962.813477,3316.774902,5185.53125,1366.114624,-5250.037109,2364.836426,-4062.059326,2848.752197,-9440.193359,-2184.793457,3594.496826,7751.23877,Feet
9,-11032.90332,2021.869385,-43.776783,518.573914,-20695.046875,-1314.146606,2858.921631,1905.773804,3872.912354,-3399.586182,6477.310547,3900.135254,3744.781494,2350.756836,3639.01416,503.699402,1682.859619,213.929276,-1851.517578,1899.34082,-2341.468994,4100.182129,-2873.711426,2396.922119,6952.979492,-4320.27832,-22800.259766,-2895.920166,2233.816162,5274.644531,-1133.300781,-20965.533203,-2244.135498,4821.406738,2203.223389,5743.669922,3761.132812,-364.820587,5809.099609,5961.008301,1355.259888,3833.570801,3057.627197,83.597267,-2539.522705,-2067.135498,-509.555603,1294.943604,922.510376,-1217.959473,-2441.056396,2192.172607,3966.391357,3315.269531,5180.855469,1362.603027,-5246.946289,2364.776123,-4051.816406,2850.304688,-9443.542969,-2178.133789,3592.844238,7751.393066,Feet


In [98]:
def find_duplicates(data_list):
    counted_values = Counter(data_list)
    duplicate_values = {value: count for value, count in counted_values.items() if count > 1}
    return duplicate_values

In [108]:
with open('/home/mahdi146/projects/def-b09sdp/mahdi146/Cedar/Classification/EEG/Classification/Statistics.txt', 'w') as file:
    for p in range(len(p_num_list)):
        file.write(f'Particpant: {p+3} '+'\n')
        for b in range(7):
            file.write(f'Block: {b+1} '+'\n')
            data_pd = data_dicts_list[p][b]
            data = data_pd.iloc[:, :-1]
            data_np = data.values
            eeg_data = data_np
            print("Data type:", type(eeg_data))
            print("Shape:", eeg_data.shape)
            eeg_data = np.array(eeg_data)
            mean_values = np.mean(eeg_data, axis=0)
            variance_values = np.var(eeg_data, axis=0)
            std_deviation_values = []
            
            for i in range(num_channels):
                print(f"Channel {i + 1}:")
                print(f"Mean: {mean_values[i]}")
                print(f"Variance: {variance_values[i]}")
                std_deviation_values.append(np.sqrt(variance_values[i]))
                print(f"Standard Deviation: {std_deviation_values[i]}")
                print()
                file.write(f'Channel {i+1}: '+'\n')
                file.write(f"Mean: {mean_values[i]}"+"\n")
                file.write(f"Variance: {variance_values[i]}"+"\n")
                file.write(f"Standard Deviation: {std_deviation_values[i]}"+"\n\n")
            
            lists_to_check = {
            'mean_values': mean_values,
            'variance_values': variance_values,
            'std_deviation_values': std_deviation_values
            }
            for list_name, data_list in lists_to_check.items():
                duplicate_values = find_duplicates(data_list)
                if duplicate_values:
                    print(f"Duplicate values and their counts for {list_name}:")
                    file.write(f"Duplicate values and their counts for {list_name}:"+"\n")
                    for value, count in duplicate_values.items():
                        print(f"Value: {value}, Count: {count}")
                        file.write(f"Value: {value}, Count: {count}"+"\n")
                else:
                    print(f"No duplicate values found in the {list_name} list.")
                    file.write(f"No duplicate values found in the {list_name} list."+"\n")





            # my_list = mean_values
            # counted_values = Counter(my_list)
            # duplicate_values = {value: count for value, count in counted_values.items() if count > 1}
            # if duplicate_values:
            #     print("Duplicate values and their counts for mean_values:")
            #     for value, count in duplicate_values.items():
            #         print(f"Value: {value}, Count: {count}")
            # else:
            #     print("No duplicate values found in the mean_values list.")

            # my_list = variance_values
            # counted_values = Counter(my_list)
            # duplicate_values = {value: count for value, count in counted_values.items() if count > 1}
            # if duplicate_values:
            #     print("Duplicate values and their counts for variance_values:")
            #     for value, count in duplicate_values.items():
            #         print(f"Value: {value}, Count: {count}")
            # else:
            #     print("No duplicate values found in the variance_values list.")

            # my_list = std_deviation_values
            # counted_values = Counter(my_list)
            # duplicate_values = {value: count for value, count in counted_values.items() if count > 1}
            # if duplicate_values:
            #     print("Duplicate values and their counts for std_deviation_values:")
            #     for value, count in duplicate_values.items():
            #         print(f"Value: {value}, Count: {count}")
            # else:
            #     print("No duplicate values found in the std_deviation_values list.")

Data type: <class 'numpy.ndarray'>
Shape: (114958, 64)
Channel 1:
Mean: -17548.71658643809
Variance: 7267.42358031779
Standard Deviation: 85.24918521791156

Channel 2:
Mean: -2221.9437203689763
Variance: 14911.689020524316
Standard Deviation: 122.11342686422454

Channel 3:
Mean: 10699.062119222346
Variance: 13485.809771887327
Standard Deviation: 116.12841931192953

Channel 4:
Mean: -10900.610740223468
Variance: 2808.2122744348057
Standard Deviation: 52.99256810567691

Channel 5:
Mean: -2730.7548966720115
Variance: 7405.711014389097
Standard Deviation: 86.05644086521994

Channel 6:
Mean: 7695.596650893865
Variance: 10404.94762221129
Standard Deviation: 102.00464510114865

Channel 7:
Mean: 1013.6323289562777
Variance: 9197.708332281389
Standard Deviation: 95.9046835784436

Channel 8:
Mean: -2548.8553844078856
Variance: 8782.470843392251
Standard Deviation: 93.71483790410274

Channel 9:
Mean: -68.78262747418854
Variance: 13462.559316564983
Standard Deviation: 116.02826947156018

Channel 1

In [49]:
# #Frame Maker
PATH = '/home/mahdi146/projects/def-b09sdp/mahdi146/Cedar/Classification/EEG/Results/XGBoost/'
df = pd.read_csv(PATH+'frame.csv')
p_num_list = [3]
for p_num in p_num_list:
    df.to_csv(PATH+'P'+str(p_num)+'.csv',index=False)



In [51]:
PATH = '/home/mahdi146/projects/def-b09sdp/mahdi146/Cedar/Classification/EEG/Results/XGBoost/'
class_1_list = ['Hand','Feet','Tongue','Mis']
class_2 = 'Rest'
p_num_list = [8]
train_blocks_set = [0,1,2,3,4]
test_blocks_set = [5,6]
sliding_time_tr = 4
sliding_time_te = 4
vote_window = 4
params = {
    'max_depth': 5,
    'min_child_weight': 1,
    'gamma': 0,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'learning_rate': 0.1,
}


p = 0
for p_num in p_num_list:
    import time
    start_time = time.time()
    for class_1 in class_1_list:
        X_tr, Y_tr = trials_set_builder(data_dicts_list[p],train_blocks_set,'TRAIN',class_1,class_2,True,sliding_time_tr)
        X_te, Y_te = trials_set_builder(data_dicts_list[p],test_blocks_set,'TEST',class_1,class_2,True,sliding_time_te)

        print(X_tr.shape,Y_tr.shape,"train shape")
        print(X_te.shape,Y_te.shape,"test shape")

        [train_features, test_features] = feature_extractor(X_tr, Y_tr, number_of_bands, X_te)
        selected_features = feature_selector(train_features, Y_tr, number_of_selected_features)

        train_acc_list = []
        test_acc_list = []

        clf = XGBClassifier()
        for r in range(1):
            clf.fit(train_features[:, selected_features], Y_tr[:,0])

            y_pr_te = clf.predict(test_features[:, selected_features])
            y_pr_tr = clf.predict(train_features[:,selected_features])

            accuracy_te = accuracy_score(Y_te, y_pr_te)
            test_acc_list.append(accuracy_te)
            for i in range(len(Y_te)):
                print(f"Test_Real: {Y_te[i][0]}   Test_Predication: {y_pr_te[i]}")
            # print(Y_te.shape,y_pr_te.shape,"shape ")
            y_pr_te_Vote = majority_vote_sliding_with_prev_v2(y_pr_te,vote_window)
            Y_te_Vote = majority_vote_sliding_with_prev_v2(Y_te.reshape(-1),vote_window)

            for i in range(len(Y_te)):
                print(f"Test_Real_Vote: {Y_te_Vote[i]}   Test_Predication_vote: {y_pr_te_Vote[i]}")


            acc, num_of_mismatches ,mismatches_list = custom_accuracy(Y_te_Vote,y_pr_te_Vote)
            print(acc,num_of_mismatches,mismatches_list, "acc, num_of_mismatches ,mismatches_list",class_1)

            accuracy_tr = accuracy_score(Y_tr,y_pr_tr)
            train_acc_list.append(accuracy_tr)



        end_time = time.time()
        running_time = end_time-start_time
        participant = p_num
        class1 = class_1
        class2 = class_2
        running_time = running_time
        test_acc = np.average(test_acc_list)
        train_acc = np.average(train_acc_list)
        test_size = X_te.shape
        train_size = X_tr.shape
        train_block = '01234'
        test_block = '56'





        # new_row = [participant, class1, class2,running_time,test_acc,train_acc,test_size,train_size,train_block,test_block]

        # new_row_df = pd.DataFrame([new_row], columns=column_names)
        # rf = pd.read_csv(PATH +'P'+str(p_num)+'.csv')
        # cf = pd.concat([rf, new_row_df], ignore_index=True)
        # cf.to_csv(PATH +'P'+str(p_num)+'.csv',index=False)



        print(train_acc_list,"train",class_1)
        print(test_acc_list,"test",class_1)
        
    i+=1

        



# block_order_tr = ['Tongue','Feet','Mis','Hand']
# block_order_tr2 = ['Tongue','Mis','Hand','Feet']
# block_order_te = ['Feet','Hand','Tongue','Mis']
# CLASS_1 = "Hand"
# CLASS_2 = "Rest"
# tasks_time_tr = [16,16,12,20,20,8,8,12]
# tasks_time_tr2 = [20,20,12,12,8,8,16,16]
# tasks_time_te = [16,12,12,8,8,16,20,20]

# df_tr = data_tr_.copy()
# df_tr2 = data_tr2_.copy()
# df_te = data_te_.copy()
# data_tr,labels_tr = preprocessor(df_tr,CLASS_1,CLASS_2,tasks_time_tr,"TRAIN")
# data_tr2,labels_tr2 = preprocessor(df_tr2,CLASS_1,CLASS_2,tasks_time_tr2,"TRAIN")
# data_te,labels_te = preprocessor(df_te,CLASS_1,CLASS_2,tasks_time_te,"TEST")
# data_tr = np.vstack((data_tr, data_tr2))
# labels_tr = np.vstack((labels_tr, labels_tr2))
# print(data_tr.shape)
# print(labels_tr.shape)
# print(data_te.shape)
# print(labels_te.shape)





# print(data_tr.shape,labels_tr.shape)
# print(data_te.shape,labels_te.shape)
# print(labels_te)
# print(indexes)
# print(Begin_indexes)
# print(End_indexes)
# print(df.iloc[1,64])


    




[20, 12, 8, 16] [20, 12, 8, 16]
[20, 20, 12, 12, 8, 8, 16, 16]
(28491, 65) trial_df
[0, 5081, 10160, 13248, 16335, 18368, 20404, 24484] tasks index starting point
[20, 20, 12, 12, 8, 8, 16, 16]
(28000, 65) final_df
4 4 112
28
0 i is
0 l is
0 1000 start and end in if
1 i is
0 l is
1000 2000 start and end in if
2 i is
0 l is
2000 3000 start and end in if
3 i is
0 l is
3000 4000 start and end in if
4 i is
0 l is
0 5000 5000 temp,end,index l+1
5 i is
0 l is
1000 6000 5000 temp,end,index l+1
6 i is
1 l is
6000 7000 start and end in if
7 i is
1 l is
7000 8000 start and end in if
8 i is
1 l is
8000 9000 start and end in if
9 i is
1 l is
0 10000 10000 temp,end,index l+1
10 i is
1 l is
1000 11000 10000 temp,end,index l+1
11 i is
2 l is
11000 12000 start and end in if
12 i is
2 l is
0 13000 13000 temp,end,index l+1
13 i is
2 l is
1000 14000 13000 temp,end,index l+1
14 i is
3 l is
14000 15000 start and end in if
15 i is
3 l is
0 16000 16000 temp,end,index l+1
16 i is
3 l is
1000 17000 16000 temp,

LinAlgError: The leading minor of order 13 of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed.

In [37]:
def custom_accuracy(y_true, y_pred):
    mismatches = []
    total = len(y_true)
    mismatch_count = 0
    
    for i, (true_label, pred_label) in enumerate(zip(y_true, y_pred)):
        if true_label != pred_label:
            mismatches.append(i)
            mismatch_count += 1
            
    accuracy = 1 - (mismatch_count / total)  # Accuracy calculation
    
    return accuracy, mismatch_count, mismatches

In [38]:
from collections import Counter

def majority_vote_sliding_with_next(prediction_list, window_size=3):
    majority_votes = []
    
    for i in range(len(prediction_list) - window_size + 1):
        window = prediction_list[i:i+window_size]
        window_tuple = tuple(window)
        counts = Counter(window_tuple)
        majority = counts.most_common(1)[0][0]
        majority_votes.append(majority)
        
    return majority_votes


In [39]:
def majority_vote_sliding_with_prev(prediction_list, window_size=3):
    majority_votes = []
    
    for i in range(len(prediction_list)):
        if i >= window_size - 1:
            start_index = i - window_size + 1
            window = prediction_list[start_index:i+1]
            counts = Counter(window)
            majority = counts.most_common(1)[0][0]
            majority_votes.append(majority)
        
    return majority_votes

In [40]:
def majority_vote_sliding_with_prev_v2(prediction_list, window_size=3):
    majority_votes = []
    
    for i in range(len(prediction_list)):
        start_index = max(0, i - window_size + 1)
        window = prediction_list[start_index:i+1]
        counts = Counter(window)
        majority = counts.most_common(1)[0][0]
        majority_votes.append(majority)
        
    return majority_votes

In [41]:
prediction_list = [1, 1, 1, 1, 0, 0, 1, 1, 0, 0]  # Replace with your actual prediction list

result = majority_vote_sliding_with_next(prediction_list)
result2 = majority_vote_sliding_with_prev_v2(prediction_list)
print("Majority Votes:", result)
print("Majority Votes Previous:", result2)

Majority Votes: [1, 1, 1, 0, 0, 1, 1, 0]
Majority Votes Previous: [1, 1, 1, 1, 1, 0, 0, 1, 1, 0]


Unnamed: 0,participant,class1,class2,running_time,test_acc,train_acc,test_size,train_size,train_block,test_block
7,5,Mis,Rest,111.92424,0.785714,1.0,"(56, 64, 1000)","(140, 64, 1000)",1234,56
8,6,Hand,Rest,28.528704,0.785714,1.0,"(56, 64, 1000)","(140, 64, 1000)",1234,56
9,6,Feet,Rest,56.803421,0.767857,1.0,"(56, 64, 1000)","(140, 64, 1000)",1234,56
10,6,Tongue,Rest,84.431607,0.875,1.0,"(56, 64, 1000)","(140, 64, 1000)",1234,56
11,6,Mis,Rest,112.289006,0.785714,1.0,"(56, 64, 1000)","(140, 64, 1000)",1234,56


In [32]:
p_num_list = [3,4,5,6,7,9]
vf = pd.DataFrame(columns=column_names) 
for p_num in p_num_list:
    rf = pd.read_csv(PATH + "P" + str(p_num) + ".csv")
    vf = pd.concat([vf, rf], ignore_index=True)
vf.to_csv(PATH+ 'ResultsOfAll.csv', index=False)
# vf.tail()
    
columnNames = ['class','b1234']
kf = pd.DataFrame(columns=columnNames)
kf.to_csv(PATH+'AverageAcc.csv',index=False)

vf = pd.read_csv(PATH +"ResultsOfAll.csv")
df = vf

class_list=['Hand','Feet','Tongue','Mis']
blk_list = [1234]
for class_ in class_list:
    avg_list = []
    for blk in blk_list:
        gf = df[(df['train_block'] == blk) & (df['class1'] == class_)]
        avg = gf['test_acc'].mean()
        avg_list.append(avg)
    print(avg_list)    
    new_row = [class_, avg_list[0]] 
    new_row_df = pd.DataFrame([new_row], columns=columnNames)
    rf = pd.read_csv(PATH + 'AverageAcc.csv')
    cf = pd.concat([rf, new_row_df], ignore_index=True)
    cf.to_csv(PATH +'AverageAcc.csv',index=False)  
kf = pd.read_csv(PATH +'AverageAcc.csv') 
kf.head()

[0.8571428571428571]
[0.8065476190476191]
[0.7767857142857143]
[0.75]


Unnamed: 0,class,b1234
0,Hand,0.857143
1,Feet,0.806548
2,Tongue,0.776786
3,Mis,0.75


In [25]:
import pandas as pd

# Assuming df is your DataFrame with the last column named 'label'
data = {'col1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        'col2': ['some', 'random', 'data', 'for', 'example', 'purposes', 'in', 'this', 'case', 'it', 'does', 'not matter'],
        'label': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a', 'b', 'b', 'b']}

df = pd.DataFrame(data)

print(df)
print("his")
# Define a custom sorting order based on the desired grouping
sorting_order = {'a': 0, 'b': 1}

# Create a new column with the sorting order
df['sorting_order'] = df.iloc[:, 2].map(sorting_order)

# Sort the DataFrame based on the new column and the original order within each group
df.sort_values(by=['sorting_order', df.columns[2]], inplace=True)

# Drop the temporary sorting column
df.drop('sorting_order', axis=1, inplace=True)

# Optional: Reset the index if needed
df.reset_index(drop=True, inplace=True)

# Display the sorted DataFrame
print(df)





    col1        col2 label
0      1        some     a
1      2      random     a
2      3        data     a
3      4         for     b
4      5     example     b
5      6    purposes     b
6      7          in     a
7      8        this     a
8      9        case     a
9     10          it     b
10    11        does     b
11    12  not matter     b
his
    col1        col2 label
0      1        some     a
1      2      random     a
2      3        data     a
3      7          in     a
4      8        this     a
5      9        case     a
6      4         for     b
7      5     example     b
8      6    purposes     b
9     10          it     b
10    11        does     b
11    12  not matter     b


In [54]:
data = {
    'col1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13],
    'col2': ['some', 'random', 'data', 'for', 'example', 'purposes', 'in', 'this', 'case', 'it', 'does', 'not matter','b'],
    'label': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a', 'b', 'b', 'b','b']
}

df = pd.DataFrame(data)

x=0
i=0
class_1 = 'a'
class_2 = 'b'
sampleList = []
while i<len(df):
    if (df.iloc[i,2]==class_1):
        x+=1
    else:
        i-=1
        sampleList.append(x)
        x=0
        class_1,class_2 = class_2,class_1
    i+=1
sampleList.append(x)
print(sampleList)

[3, 3, 3, 4]


In [52]:
import pandas as pd

data = {
    'col1': [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12],
    'col2': ['some', 'random', 'data', 'for', 'example', 'purposes', 'in', 'this', 'it', 'does', 'not matter'],
    'label': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'b', 'b', 'b']
}
df = pd.DataFrame(data)

print(get_group_start_indices(df))


[0, 3, 6, 8]


In [43]:
data = {
    'col1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13],
    'col2': ['some', 'random', 'data', 'for', 'example', 'purposes', 'in', 'this', 'case', 'it', 'does', 'not matter','c'],
    'label': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a', 'b', 'b', 'b','b']
}

df = pd.DataFrame(data)

# Identify consecutive groups of 'a's by creating a new group ID each time 'label' changes from 'b' to 'a'
df['group'] = (df['label'] != df['label'].shift(1)).cumsum()

# Count occurrences of 'a' within each group
group_counts = df[df['label'] == 'a'].groupby('group').size()

group_counts_b = df[df['label'] == 'b'].groupby('group').size()
print(group_counts_b)
print(group_counts_b.index[0])
print(group_counts_b.iloc[0])
print(group_counts)

group
2    3
4    4
dtype: int64
2
3
group
1    3
3    3
dtype: int64


In [58]:
p_num = 6
b_num = 7
path = f'../../Participants/P{p_num}/'
mat = loadmat(path+'P'+str(p_num)+'B'+str(b_num)+'.mat', chars_as_strings=True, mat_dtype=True, squeeze_me=True, struct_as_record=False, verify_compressed_data_integrity=False, variable_names=None)
df_1 = pd.DataFrame(mat['Data'])


In [59]:
extra_samples_block_counter(df_1,trial_order[0])

hi
[6191, 10157, 8157, 4065, 10161, 8156, 4060, 6014]
[8156, 8156, 6178, 10157, 10156, 4064, 4063, 6015]
[10158, 6176, 8165, 10155, 4073, 8156, 6184, 4016]
[10165, 10155, 6183, 6177, 4060, 4062, 8162, 8016]


In [82]:
for b in range(7):
    extra_samples_block_counter(data_dicts_list[-1][b],trial_order[b],b)

hi
Tongue
[3093, 5078, 4078, 2030, 5078, 4079, 2032, 3007]
Feet
[4078, 4078, 3087, 5079, 5078, 2030, 2035, 3007]
Mis
[5080, 3089, 4076, 5079, 2036, 4077, 3093, 2007]
Hand
[5080, 5078, 3092, 3088, 2035, 2030, 4077, 4007]
hi
Feet
[3091, 4082, 2034, 3093, 5082, 2034, 4079, 5007]
Mis
[4083, 2033, 5079, 5082, 2031, 3090, 3087, 4007]
Hand
[2033, 5078, 5079, 4078, 4079, 2036, 3090, 3007]
Tongue
[2033, 3089, 3092, 4078, 5082, 5082, 4079, 2008]
hi
Hand
[4078, 2035, 2036, 5082, 3089, 4083, 5083, 3008]
Feet
[5077, 3088, 4078, 2035, 3088, 5077, 2033, 4007]
Tongue
[3088, 4082, 5082, 3087, 2031, 5079, 4077, 2007]
Mis
[2037, 2035, 3093, 3091, 4076, 5079, 5081, 4007]
hi
Tongue
[3087, 5081, 4082, 2035, 5077, 4077, 2031, 3008]
Mis
[4082, 4083, 3089, 5156, 5078, 2111, 2026, 3007]
Hand
[5105, 3088, 4106, 5077, 2065, 4076, 3122, 2007]
Feet
[5077, 5116, 3090, 3121, 2026, 2049, 4079, 4008]
hi
Mis
[4077, 2034, 2061, 3092, 5171, 4082, 3165, 5008]
Feet
[3201, 4078, 4077, 5177, 2030, 3166, 5083, 2007]
Hand
[5079