In [1]:
import glob
import pandas as pd
import numpy as np
import os


## Select Relevant Files for 5x5 Confusion Matrix

In [2]:
data_path = '../DATA/Jun14_data/'

arb_files_5x5 = glob.glob(data_path + 'arb/sub-ar*/sess5/arb/*ArmGame*.csv')
bio_files_5x5 = glob.glob(data_path + 'bio/sub-bi*/sess5/bio/*ArmGame*.csv')

file_group = [arb_files_5x5,bio_files_5x5]

# for file_group in [arb_files_5x5,bio_files_5x5]:
#     print(file_group)
#     print(len(file_group))

## Load ArmGames Files

In [3]:
def get_bits(path):
    split = path.split('/')
    group = split[3]
    sub = split[4]
    return group,sub


goals = [0,17,18,0,40,38] # Rest, open, close, rest, pinch, tripod


for group in file_group:
    for file in group:
        group, sub = get_bits(file)

        armgame_df = pd.read_csv(file)

        class_df = armgame_df[['entryID','entryType','class', 'emgChan1','emgChan2','emgChan3',	'emgChan4','emgChan5','emgChan6','emgChan7','emgChan8']]
        class_df['goal'] = -1
        class_df['chunk'] = -1

        total_tp = len(class_df)
        curr = 0
        which_chunk = 0

        while curr < total_tp and which_chunk < len(goals):
            if class_df.at[curr,'entryType'] == 3 or class_df.at[curr,'entryType'] == 0:
                start = curr

                while curr < total_tp and class_df.at[curr,'entryType'] == 3:
                    curr += 1
                
                while curr < total_tp and class_df.at[curr,'entryType'] == 0:
                    curr += 1

                class_df.loc[start:curr-1,'goal'] = goals[which_chunk]
                class_df.loc[start:curr-1,'chunk'] = which_chunk

                which_chunk += 1

            else:
                while curr < total_tp and class_df.at[curr,'entryType'] != 0:
                    curr += 1

        save_path = '5x5_ArmGameData/' + group + '/'
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        s = '_'
        file_name = s.join([sub, 'sess5', '5x5'])
        class_df.to_csv(save_path + file_name + '.csv', index = False)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  class_df['goal'] = -1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  class_df['chunk'] = -1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [4]:
gesture_dict = {0:'rest',17:'open',18:'close',38:'tripod',40:'pinch'}

def prep_cm_5x5(file):
    old_df = pd.read_csv(file)
    new_df = pd.DataFrame
    split_chunk_df = pd.DataFrame()

    for which_chunk in range(0,6):
        curr_chunk = old_df.loc[(old_df['chunk'] == which_chunk) & (old_df['entryType'] == 0)]
        curr_chunk = curr_chunk.reset_index(drop=True)
        # chunk_acc = np.where(curr_chunk['class'] == curr_chunk['goal'], 1, 0)
        # npArray = np.array(chunk_acc)
        # val = np.argmax(npArray > 0)
        # print(val)

        # curr_chunk = curr_chunk[val:-1]

        # curr_chunk = curr_chunk.reset_index(drop=True)

        col_name = str(gesture_dict[goals[which_chunk]]) + '_' + str(which_chunk)
        
        split_chunk_df[col_name + '_class'] = curr_chunk['class']
        split_chunk_df[col_name + '_goal'] = curr_chunk['goal']

        curr_chunk['acc'] = np.where(curr_chunk['class'] == curr_chunk['goal'], 1, 0)
        split_chunk_df[col_name + '_acc'] = curr_chunk['acc']



        if which_chunk == 0:
            new_df = curr_chunk

        else: 
            new_df = new_df.append(curr_chunk)


    new_df.to_csv(file)

    split_chunk_path = file.replace('bio','split/bio')
    split_chunk_path = split_chunk_path.replace('arb','split/arb')

    s = '/'
    folder = s.join(split_chunk_path.split('/')[:-1])

    if not os.path.exists(folder):
        os.makedirs(folder)

    split_chunk_df.to_csv(split_chunk_path)
    


prepped_bio_5x5 = glob.glob('5x5_ArmGameData/bio/sub-bi*.csv')
prepped_arb_5x5 = glob.glob('5x5_ArmGameData/arb/sub-ar*.csv')

prepped_file_groups = [prepped_bio_5x5,prepped_arb_5x5]

for prepped_file_group in prepped_file_groups:
    for file in prepped_file_group:
        prep_cm_5x5(file)


In [9]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.utils.multiclass import unique_labels
import seaborn as sn
import matplotlib.pyplot as plt

def plot_cmn(cmn, group, tp_window):

    save_folder = 'cm_figs/5x5/'
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    save_path = save_folder + group + '_' + tp_window + '_cm'
    plt.figure(figsize=(10,8))
    sn.heatmap(cmn, annot=True,fmt='.2f',xticklabels=['rest','open','close','pinch','tripod'],yticklabels=['rest','open','close','pinch','tripod'], vmin=0, vmax=1)

    plt.title(group + '_' + tp_window)
    plt.ylabel('Classifier Output')
    plt.xlabel('Goal Output')
    
    plt.savefig(save_path)
    plt.close()


bio_split_files = glob.glob('5x5_ArmGameData/split/bio/sub-bi*.csv')
arb_split_files = glob.glob('5x5_ArmGameData/split/arb/sub-ar*.csv')

bio_agg_5x5 = pd.DataFrame(columns=['class','goal'])
arb_agg_5x5 = pd.DataFrame(columns=['class','goal'])

split_file_groups = [(bio_split_files,bio_agg_5x5,'bio'),(arb_split_files,arb_agg_5x5,'arb')]

time_windows = [(0,99),(50,149),(100,199)]

acc_df = pd.DataFrame(columns=['tp_win','group','pinch','tripod'])

for window in time_windows:
    start = window[0]
    end = window[1]

    num_tp = end-start 
    tp_range = str(start) + '-' + str(end) + '_ticks'

    for file_group_5x5 in split_file_groups:

        tp_window_cm_df = pd.DataFrame(columns=['subj','sess','VAL1','VAL2','VAL3','VAL4','VAL5','VAL6','VAL7','VAL8','VAL9','VAL10','VAL11','VAL12','VAL13','VAL14','VAL15','VAL16','VAL17','VAL18','VAL19','VAL20','VAL21','VAL22','VAL23','VAL24','VAL25'])

        split_files = file_group_5x5[0]

        agg_class = []
        agg_goal = []

        group = file_group_5x5[2]
        for file in split_files:
            hold = pd.read_csv(file)

            file_name = file.split('/')[-1]
            bits = file_name.split('_')
            subj = bits[0]
            sess = bits[1]

            class_df = []
            goal_df = []

            for col in hold.columns:
                temp_list = hold.loc[start:end,col].tolist()
                temp_list = [item for item in temp_list if not pd.isna(item)]

                if not temp_list:
                    continue

                if 'class' in col:
                    class_df.extend(temp_list)
                    agg_class.extend(temp_list)
                elif 'goal' in col:
                    goal_df.extend(temp_list)
                    agg_goal.extend(temp_list)

            if len(class_df) != len(goal_df):
                print('mismatch: ' + len(class_df) + ' vs. ' + len(goal_df))
            
            if not all(x in class_df for x in [0,17,18,38,40]):
                print(subj,sess,' missing some fvalue')
                continue

            labels = unique_labels(class_df, goal_df)

            cm = confusion_matrix(class_df,goal_df,labels=[0.,17.,18.,38.,40.])
            cmn = cm.astype('float') / cm.sum(axis=0)

            if 5 != np.sum(np.sum(cmn,0)):
                print('bad stuff')
                print(group, subj, window)

            # plot_cmn(cmn, group, tp_range)

            row = [subj,sess]

            for item in cmn:
                row.extend(item)

            tp_window_cm_df.loc[len(tp_window_cm_df)] = row

        cm = confusion_matrix(agg_class,agg_goal,labels=[0.,17.,18.,38.,40.])
        cmn = cm.astype('float') / cm.sum(axis=0)

        if 5 != np.sum(np.sum(cmn,0)):
            print('bad stuff BIG')
            print(group, subj, window)

        plot_cmn(cmn, group, tp_range)
        
        tp_window_cm_df = tp_window_cm_df.sort_values(by=['subj','sess'])

        out_folder = '5x5_ArmGameData/cm_dfs/' + str(num_tp+1) + 'tp/'

        if not os.path.exists(out_folder):
            os.makedirs(out_folder)

        out_path = out_folder + group + '_' + tp_range + '.csv'

        tp_window_cm_df.to_csv(out_path,index=False)

sub-ar13 sess5  missing some fvalue


  cmn = cm.astype('float') / cm.sum(axis=0)


bad stuff
bio sub-bi18 (50, 149)
sub-ar13 sess5  missing some fvalue


  cmn = cm.astype('float') / cm.sum(axis=0)
  cmn = cm.astype('float') / cm.sum(axis=0)


bad stuff
bio sub-bi18 (100, 199)
bad stuff
bio sub-bi11 (100, 199)
sub-ar13 sess5  missing some fvalue
bad stuff
arb sub-ar20 (100, 199)


  cmn = cm.astype('float') / cm.sum(axis=0)
