In [1]:
import importlib
import ProCtrlDataLoaderV2 as DataLoader
import pandas as pd
import numpy as np
import glob
import logging

In [None]:
logger = logging.getLogger(__name__)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(lineno)s - %(levelname)s - %(message)s')
logger.setLevel(logging.WARNING)
fhandler = logging.FileHandler(filename='logging.log', mode='w')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)

In [2]:
importlib.reload(DataLoader)
start_path = './Jun14_data/' # Put path to head of raw data file tree here

loadTest = DataLoader.ProCtrlDataLoader(start_path, ['bio','arb','control'],[], ['sess2','sess5', 'sess6']) # Can be used to delineate which subsets of data are to be considered in current calculations (which [Groups],[Subjects],[Sessions])

loadTest.loaddata() # Build an object which holds all of the data for each participant within selected tags
loadTest.printDict() # Confirms correct subjects have been loaded

'bio'
'arb'
'control'


In [3]:
# Inputs: Single group, subject, and session of interest
# Output: two csv (one for pre, one for post) containing classification, goal, and acc at each timepoint for each gesture
# Requires that a loadTest object has been initialized and loaded (done in prior block)
def prep_cm_dfs(group,subject,sess): 
    session = loadTest.data_dict[group][subject][sess] # Load data for selected session (dict containing pre+post individually)
    
    for pre_post in session.keys():
        hold = pd.DataFrame(index = range(400)) # dummy df to hold all the trials (max 400 tp in a single trial)

        if 'trained' in pre_post: # Only build csvs for pre/post, ignore bio/arb sections
            logging.info(pre_post)
            df = session[pre_post] # create df to hold raw data

            pre_post_string = 'pre' 
            if 'post' in pre_post:
                pre_post_string = 'post'

            logging.info(pre_post_string)

            for mc in df.chunk_bounds: # chunks are defined by trials (each gesture), iterate through all trials in session
                j = 0
                for bounds in df.chunk_bounds[mc]:
                    j += 1
                    start = bounds[0]
                    end = bounds[1]

                    chunk_goal = df.armgame_df.loc[start:end, 'goal'] # Get goal data

                    chunk_class = df.armgame_df.loc[start:end, 'class'] # Get classification data

                    chunk_acc = np.where(chunk_goal == chunk_class, 1, 0) # calculate acc at each tp

                    npArray = np.array(chunk_acc)
                    val = np.argmax(npArray > 0) # find the first non-zero accuracy value (first correct classification)

                    # Drop values prior to first correct classification
                    chunk_goal = pd.Series(chunk_goal[val:-1])
                    chunk_class = pd.Series(chunk_class[val:-1])
                    chunk_acc = pd.Series(chunk_acc[val:-1])

                    chunk_goal = chunk_goal.reset_index(drop=True)
                    chunk_class = chunk_class.reset_index(drop=True)
                    chunk_acc = chunk_acc.reset_index(drop=True)


                    # Resize hold if it isn't large enough to contain new data
                    if len(chunk_class.index) > len(hold.index):
                        new_df = pd.DataFrame(index = range(len(chunk_class.index)))
                        hold = new_df.join(hold)
                        

                    # add columns for current trial to session df
                    col_string = mc + '_' + str(j) + '_' + pre_post + '_'
                    hold[col_string + 'class'] = chunk_class
                    hold[col_string + 'goal'] = chunk_goal
                    hold[col_string + 'acc'] = chunk_acc

            # Save it homie
            file_name = 'ind_sub_acc_prep/' + subject +'_' + sess + '_' + pre_post_string + '_cm_prep.csv'
            hold.to_csv(file_name)

# Loop through all groups, subjects, and sessions in loadTest to build cm dfs
for group in loadTest.data_dict:
    for subj in loadTest.data_dict[group]:
        for sess in loadTest.data_dict[group][subj]:
            prep_cm_dfs(group, subj, sess)

In [6]:
from fileinput import filename
from hashlib import new
import os

from sklearn import preprocessing


acc_prep_files = glob.glob('./ind_sub_acc_prep/*.csv') # Load in previously prepped cm files

time_windows = [(0,49),(50,99),(100,149),(150,199)] # Set time windows of interest

gestures = ['rest','open','close'] # Set gestures of interest

new_cols = ['subj', 'group', 'sess', 'pre_post'] # Set tag column names 

# Populate other columns of interest (for each tp window + gesture combo)
for gesture in gestures:
    for window in time_windows:
        string = gesture + '_' + str(window[0]) + '-' + str(window[1]) + 'tp'
        new_cols.append(string)

logging.info(new_cols)

agg_windowed_class_acc = pd.DataFrame(columns=new_cols) # make df with these columns

for file in acc_prep_files:
    file_name = file.split('/')[-1]
    file_name = file_name.split('_')

    logging.info(file_name)
    subj = file_name[0].split('-')[-1]
    
    group = 'bio'
    if 'ar' in subj:
        group = 'arb'
    if 'co' in subj:
        group = 'control'
    sess = file_name[1]
    pre_post = file_name[2]

    logging.info(subj,sess,pre_post)

    curr_file = pd.read_csv(file)

    acc_cols = [x for x in curr_file.columns if 'acc' in x]

    rest_cols = [x for x in acc_cols if 'rest' in x]
    open_cols = [x for x in acc_cols if 'open' in x]
    close_cols = [x for x in acc_cols if 'close' in x]
    



    row = pd.DataFrame(columns=new_cols)
    row.loc[0,'subj'] = subj 
    row.loc[0,'group'] = group
    row.loc[0,'sess'] = sess
    row.loc[0,'pre_post'] = pre_post


    for window in time_windows:
        start = window[0]
        stop = window[1]
        for grouping in [(rest_cols,'rest'),(open_cols,'open'),(close_cols,'close')]:
            cols = grouping[0]
            gesture = grouping[1]

            temp_avg = -1

            for column in cols:
                chunk_avg = curr_file.loc[start:stop, column].mean()

                if not chunk_avg:
                    logging.info(subj + sess + 'no chunk?' + column)

                if temp_avg == -1:
                    temp_avg = chunk_avg
                else:
                    temp_avg = (temp_avg + chunk_avg) / 2

            col = gesture + '_' + str(window[0]) + '-' + str(window[1]) + 'tp'
            if temp_avg >= 0:
                row[col] = temp_avg
            # print(row)
    agg_windowed_class_acc = pd.concat([agg_windowed_class_acc, row], axis=0)

    agg_windowed_class_acc = agg_windowed_class_acc.sort_values(by=['group','subj','sess'])

    outpath = 'LearningMeasureHunter/'

    if not os.path.exists(outpath):
        os.makedirs(outpath)

    agg_windowed_class_acc.to_csv(outpath + 'class_avg_prep.csv')

In [10]:
path = os.getcwd()
path = path + '/LearningMeasureHunter/'

prepped_lm_files = glob.glob(path + 'class_avg_prep.csv')

output_df = pd.DataFrame(columns=['group','sess','subj','pre_post','gesture','time_window','class_acc'])

test = pd.MultiIndex.from_frame(output_df)

prepped_file = pd.read_csv(prepped_lm_files[0])

win_codes = []
for window in time_windows:
    win_codes = [*win_codes, str(window[0]) + '-' + str(window[1]) + 'tp']

logging.info(prepped_file['subj'].unique())
logging.info(win_codes)

arrays = [
    prepped_file['subj'].unique(),
    ['sess2','sess5','sess6'],
    ['pre','post'],
    ['rest','open','close'],
    win_codes
]

index = pd.MultiIndex.from_product(arrays, names=['subj','sess','pre_post','gesture','time_window'])

df = pd.DataFrame(index=index,columns=['group','class_acc'])

idx = pd.IndexSlice

for i in range(len(prepped_file)):
    subj = prepped_file.loc[i,'subj']
    sess = prepped_file.loc[i,'sess']
    pre_post = prepped_file.loc[i,'pre_post']

    for gesture in gestures:
        for window in win_codes:
            cols = [x for x in prepped_file.columns if gesture in x and window in x]
            for column in cols:

                val = prepped_file.loc[i,column]

                df.loc[idx[subj,sess,pre_post,gesture,window], 'class_acc'] = val

                group = 'bio'
                if 'ar' in subj:
                    group = 'arb' 
                if 'co' in subj:
                    group = 'control'

                df.loc[idx[subj,sess,pre_post,gesture,window], 'group'] = group
df = df.sort_values(['subj','sess','pre_post','gesture','time_window'])
df.to_csv(outpath + 'raw_classacc_avgs.csv')

In [11]:
re_save = glob.glob(outpath + 'raw*avgs*')

for file in re_save:
    temp = pd.read_csv(file)

    temp = temp[temp['class_acc'].notna()]

    temp.to_csv(outpath + 'tagged_classacc_avgs.csv')