In [None]:
import sys
import os
from os.path import join, dirname, realpath, exists
import json
import glob
import inspect
import time
from tqdm.notebook import tqdm, trange

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import signal
from scipy import stats
from sklearn import preprocessing
import xarray as xr

# Load dataset and montage setting

In [None]:
measurements_dir = './data_Sorted/' # directory with all measurements
preprocess_dir = './preprocessed_data/' # directory to save preprocessed behavioral files

subject_folders = glob.glob(measurements_dir + 'pongFac23*')
subjects = [subj.split('_')[-1] for subj in subject_folders]

In [None]:
def get_default_args(func):

    # function to extract default arguments from any function
    
    signature = inspect.signature(func)
    return {
        k: v.default
        for k, v in signature.parameters.items()
        if v.default is not inspect.Parameter.empty
    }

In [None]:
def find_nearest(array, values):
    # A function to find the nearest indices to an array of timestamps
    
    # make sure array is a numpy array
    array = np.array(array)

    # get insert positions
    idxs = np.searchsorted(array, values, side="left")
    
    # find indexes where previous index is closer
    prev_idx_is_less = ((idxs == len(array))|(np.fabs(values - array[np.maximum(idxs-1, 0)]) < np.fabs(values - array[np.minimum(idxs, len(array)-1)])))
    idxs[prev_idx_is_less] -= 1
    
    return array[idxs], idxs

In [None]:
def formatData(data, time, timeLocks, binSize, maxLead, maxLag, sr = 120, isComplex = False):

    # A function to time-lock data and create trials of the same shape

    
    if len(data) != len(time):
        raise Exception('Data and time must have equal length')

    nTrials = len(timeLocks)

    rangeInds = np.round(np.array([maxLead*sr, maxLag*sr], dtype = np.int64)) # Range of each time bin
    
    nearestTimes, nearestInds = find_nearest(time, timeLocks)
    beginInds = nearestInds - rangeInds[0] # Start of each bin
    endInds = nearestInds + rangeInds[-1] # End of each bin
    
    fData = np.zeros((np.sum(rangeInds), data.shape[1], nTrials))

    if isComplex:
        fData = fData.astype(complex)

    # Assign a specific number of indices before/after the time lock in the continuous data to each row in the trials array
    for trialInd in range(nTrials):
        
        trial_data = data[beginInds[trialInd] : endInds[trialInd],:]
        trial_length = trial_data.shape[0]
        
        if trial_length >= fData.shape[0]:
            fData[:, :, trialInd] = trial_data
        else:
            fData[:trial_length, :, trialInd] = trial_data
    
    return fData

In [None]:
def process_pong_session(sub_dir, nTrials = 160, event_label = 'startTrig0', binSize = 1, maxLead = 0.2, maxLag = 3, frame_rate = 120, output_all = True):

    # Obtain subject directories/files
    behav_dir = os.path.join(sub_dir, 'Pong')

    sub_trials = glob.glob(behav_dir + '/test_*kin_*.csv')
    sub_behav = sub_trials[0].split('_kin_')[0]+'.csv'
    sub_trials = sorted(sub_trials, key=lambda fname: int(fname.split('_kin_')[-1].split('.')[0]))
    
    session_data = pd.read_csv(sub_behav, nrows = nTrials)
    time_locks = session_data[event_label]

    movement_data = []

    # Loop over trial files, read each trial data and concatenate them together
    for trial, trial_file in enumerate(sub_trials):
        trialdf = pd.read_csv(trial_file)
        movement_data.append(trialdf)
        
        if trial == 0:
            mvm = trialdf['p1x']
            tst = trialdf['t']
            by = trialdf['by']
            bx = trialdf['bx']
        else:
            mvm = np.concatenate((mvm, trialdf['p1x']))
            tst = np.concatenate((tst, trialdf['t']))
            by = np.concatenate((by, trialdf['by']))
            bx = np.concatenate((bx, trialdf['bx']))

    mvm = mvm.reshape(-1,1)
    tst = tst.reshape(-1,1)
    by = by.reshape(-1,1)
    bx = bx.reshape(-1,1)

    # Time-lock (bin) the continuous (concatenated) data to a specific task event
    if output_all:
        binned_data_trials = formatData(np.concatenate((mvm, tst, by, bx), axis = 1), tst.squeeze(), time_locks, binSize, maxLead, maxLag, sr = frame_rate)
    else:    
        binned_data_trials = formatData(mvm, tst.squeeze(), time_locks, binSize, maxLead, maxLag, sr = frame_rate).squeeze()

    return session_data, movement_data, binned_data_trials

# Save processed data

Process (concatentation, time-locking, etc.) each all movement trials (for each subject) and add them to the aggregate array 

In [None]:
# maxLead = 0.2
# maxLag = 3.0
# event_label = 'startTrig0'

maxLead = 0.5
maxLag = 0.0
event_label = 'feedbackTime'


session_results_agg = []
movement_trials_agg = []
agg_movements = []

for sub_ind, sub_dir in enumerate(subject_folders):
    session_data, movement_data, binned_mvms = process_pong_session(sub_dir, maxLead = maxLead, maxLag = maxLag, event_label = event_label)
    session_results_agg.append(session_data)
    movement_trials_agg.append(movement_data)
    agg_movements.append(binned_mvms)

agg_movements = np.array(agg_movements)
agg_movements = np.transpose(agg_movements, (1,3,2,0))

Similar aggregation for trial results/properties

In [None]:
relevant_beh_columns = ['BDP_new', 'BAP_new', 'BDP', 'BAP', 'offset', 'ballX', 'ms',
 'ballSpeedX', 'ballSpeedY','text.started', 'startTrig0',
 'startTrig1', 'threshTime', 'feedbackTime', 'result', 'cond',
 'participant','age', 'gender', 'condOrder','Number']

session_dims = ('variable', 'trial', 'subject')
session_coords = {'variable': relevant_beh_columns, 'subject': subjects}

results_array = []

for sInd, session_results in enumerate(session_results_agg):    
    df = session_results[relevant_beh_columns].copy()
    subj = df.loc[0,'participant']
    mapd = {'n': -1, 'p': 1, 'a-p': 0.1, 'p-a': 1.0, 'male' : 1, 'female' : 2, subj: int(subj.split('p')[-1])}
    df = df.replace(mapd).to_numpy()
    results_array.append(df)

results_array = np.transpose(np.array(results_array), (2,1,0))
results_array=xr.DataArray(results_array, dims = session_dims, coords = session_coords)
results_array.to_netcdf(preprocess_dir + 'agg_pong_results.nc')

In [None]:
mvm_data_dims = ('time', 'trial', 'source', 'subject')
mvm_data_coords = {'source': ['movement', 'timestamp', 'ball_y', 'ball_x'], 'subject': subjects}
mvm_data_attrs = {'max_lead' : maxLead, 'max_lag' : maxLag, 'center_event': event_label}

movement_array = xr.DataArray(agg_movements, dims = mvm_data_dims, coords = mvm_data_coords, attrs = mvm_data_attrs)
movement_array.to_netcdf(preprocess_dir + 'agg_pong_movement_' + event_label + '_lock.nc')

In [None]:
plt.plot(np.abs(movement_array.sel(source = 'movement', subject = 'p07')));

# Load processed data

In [None]:
# event_label = 'startTrig0'
# event_label = 'threshTime'
event_label = 'feedbackTime'

mapped_dict = {-1: 'n', 1: 'p', 0.1: 'a-p', 1.0: 'p-a', 1: 'male', 2: 'female'}
 
results_array = xr.load_dataarray(preprocess_dir + 'agg_pong_results.nc')
movement_array_raw = xr.load_dataarray(preprocess_dir + 'agg_pong_movement_' + event_label + '_lock.nc')
results_array.load()
movement_array_raw.load()

# Analyses

Filter the movement data, and get the condition arrays

In [None]:
subject_gens = list(results_array.sel(variable = 'gender', trial = 0).to_numpy())
gen_dict = {1: 'M', 2: 'F'}

sub_labels = [gen_dict[g] for g in subject_gens]

f_order = 3
low_cut = 10

lowpass = signal.butter(f_order, low_cut, fs = 1000, btype = 'lp', output = 'sos') 

movement_array = signal.sosfiltfilt(lowpass, movement_array_raw, axis = 0)
movement_array = xr.DataArray(movement_array, dims = movement_array_raw.dims, coords = movement_array_raw.coords, attrs = movement_array_raw.attrs)

In [None]:
sInd = 0
subj = subjects[sInd]

conditions = results_array.sel(variable = 'cond')
intercepts = results_array.sel(variable = 'result')

In [None]:
pcond = conditions == 1
acond = conditions == 0

negfb = intercepts == -1
posfb = intercepts == 1

## Speed

In [None]:
f,a = plt.subplots(1,2, figsize = (10,5))

tsz = 13

pp = np.zeros(len(subjects))
aa = np.zeros(len(subjects))

for sInd, subj in enumerate(subjects):
    
    if sInd == 0:
        labels = ['Presence', 'Absence']
    else:
        labels = ['', '']
    
    sub_bs = results_array.sel(subject = subj, variable = 'ms')
    sub_bap = results_array.sel(subject = subj, variable = 'BAP_new')

    # Normalize trials based on BAP. Better normalization than MinMax, since it takes actual arrival point into consideration
    sub_movement = np.abs(movement_array.sel(subject = subj, source = 'movement')/sub_bap)
    stable_trials = sub_movement.max('time') <= 2 # Optional conditional mask for filtering out trials with very high arrival ratio
    
    sub_movement = sub_movement.sel(time = np.arange(0,40)) # Choose a time window

    # sub_speed = np.abs(np.gradient(sub_movement, axis = 0))
    sub_speed = np.gradient(sub_movement, axis = 0)
    sub_speed = xr.DataArray(sub_speed, coords = sub_movement.coords, dims = sub_movement.dims)


    # Trial condition masks
    
    nfb = negfb.sel(subject = subj)
    pfb = posfb.sel(subject = subj)
    pres = pcond.sel(subject = subj)
    abse = acond.sel(subject = subj)

    # p_tr = pres & stable_trials
    # a_tr = abse & stable_trials
    
    p_tr = pres 
    a_tr = abse 

    # Filter based on condition and compute metrics
    p_mov = sub_speed.sel(trial = p_tr).max('trial').mean('time')
    a_mov = sub_speed.sel(trial = a_tr).max('trial').mean('time')

    # Assign to bins for histogram
    pp[sInd] = p_mov
    aa[sInd] = a_mov
    
    a[0].bar(sInd, p_mov, color='b', alpha = 0.7, label = labels[0])
    a[0].bar(sInd, a_mov, color='r', alpha = 0.7, label = labels[1])
    
    sub_gen = np.array(results_array.sel(subject = subj, variable = 'gender'))[0]
    sub_gen = mapped_dict[sub_gen]
    

a[0].set_ylabel('Avg of trial peak velocity', fontsize = tsz, labelpad = 10)
a[0].set_xlabel('Subjects', fontsize = tsz, labelpad = 10)
a[0].set_xticks(np.arange(len(subjects)))
a[0].set_xticklabels(sub_labels)
# a.set_xticklabels(subjects)
# a[0].set_ylim([0, 0.4])

for ax in a.ravel():
    ax.tick_params(length = 0)
    ax.set_frame_on(False)

a[0].set_title('All balls', fontsize = tsz, y = 1.1)

f.legend(frameon=False, ncol = 1, fontsize = tsz-2, bbox_to_anchor = (1,1))
f.tight_layout()

# Simple stats
nbins = 8
print(pp.mean(),aa.mean())
plt.hist(pp, color='royalblue', bins = nbins, alpha = 0.7);plt.hist(aa, color='crimson', bins = nbins, alpha = 0.7);
print('Mann-Whit: {}'.format(stats.mannwhitneyu(pp, aa, alternative = 'greater')[1]))

## Success

In [None]:
f,a = plt.subplots()

tsz = 13

for sInd, subj in enumerate(subjects):
    
    if sInd == 0:
        labels = ['Presence', 'Absence']
    else:
        labels = ['', '']
    
    nfb = negfb.sel(subject = subj)
    pfb = posfb.sel(subject = subj)
    
    pres = pcond.sel(subject = subj)
    abse = acond.sel(subject = subj)
        
    p_succ = pres & pfb
    a_succ = abse & pfb
    
    a.bar(sInd, p_succ.sum('trial'), color='b', alpha = 0.7, label = labels[0])
    a.bar(sInd, a_succ.sum('trial'), color='r', alpha = 0.7, label = labels[1])
    
    sub_gen = np.array(results_array.sel(subject = subj, variable = 'gender'))[0]
    sub_gen = mapped_dict[sub_gen]
    

a.set_ylabel('Performance', fontsize = tsz, labelpad = 10)
a.set_xlabel('Subjects', fontsize = tsz, labelpad = 10)
a.set_xticks(np.arange(len(subjects)))
a.set_xticklabels(sub_labels)
a.tick_params(length = 0)
a.set_frame_on(False)
f.legend(frameon=False, ncol = 1, fontsize = tsz-2, bbox_to_anchor = (1,1))
a.set_title('All conditions', fontsize = tsz, y = 1.1)
f.tight_layout()

## Combined speed/perf

In [None]:
f,a = plt.subplots(1,2, figsize = (10,5))

tsz = 13

pp = np.zeros(len(subjects))
aa = np.zeros(len(subjects))

for sInd, subj in enumerate(subjects):
    
    if sInd == 0:
        labels = ['Presence', 'Absence']
    else:
        labels = ['', '']

    
    sub_movement = np.abs(movement_array.sel(subject = subj, source = 'movement')/sub_bap)
    sub_movement = sub_movement.sel(time = np.arange(0,30))

    sub_speed = np.gradient(sub_movement, axis = 0)
    sub_speed = xr.DataArray(sub_speed, coords = sub_movement.coords, dims = sub_movement.dims)
    sub_speed_abs = np.abs(sub_speed)
    stable_trials = sub_speed_abs.mean('time') > sub_speed_abs.max('time')/2

    
    nfb = negfb.sel(subject = subj)
    pfb = posfb.sel(subject = subj)
    pres = pcond.sel(subject = subj)
    abse = acond.sel(subject = subj)
    
    # p_tr = pres
    # a_tr = abse 
    p_tr = pres & stable_trials 
    a_tr = abse & stable_trials

    # Count successful trials from the conditional mask
    p_sum = (p_tr & pfb).sum('trial')
    a_sum = (p_tr & pfb).sum('trial')

    p_met = sub_speed.sel(trial = p_tr).max('time').mean('trial')/p_sum
    a_met = sub_speed.sel(trial = a_tr).max('time').mean('trial')/a_sum
    
    a[0].bar(sInd, p_met, color='b', alpha = 0.7, label = labels[0])
    a[0].bar(sInd, a_met, color='r', alpha = 0.7, label = labels[1])
    
    sub_gen = np.array(results_array.sel(subject = subj, variable = 'gender'))[0]
    sub_gen = mapped_dict[sub_gen]

    pp[sInd] = p_met
    aa[sInd] = a_met
        
a[0].set_ylabel('Speed to Accuracy Ratio', fontsize = tsz, labelpad = 10)
a[0].set_xlabel('Subjects', fontsize = tsz, labelpad = 10)
a[0].set_xticks(np.arange(len(subjects)))
a[0].set_xticklabels(sub_labels)
a[0].tick_params(length = 0)
a[0].set_frame_on(False)
f.legend(frameon=False, ncol = 1, fontsize = tsz-2, bbox_to_anchor = (1,1))
a[0].set_title('All conditions', fontsize = tsz, y = 1.1)
f.tight_layout()

nbins = 8
print(pp.mean(),aa.mean())
a[1].hist(pp, color='royalblue', bins = nbins, alpha = 0.7);a[1].hist(aa, color='crimson', bins = nbins, alpha = 0.7);
a[1].set_frame_on(False)
print('Mann-Whit: {}'.format(stats.mannwhitneyu(pp, aa, alternative = 'greater')[1]))