In [1]:
import pandas as pd
import numpy as np
import scipy.stats

trialOrders = {}

import os

path = './events'


def compute_skourascore(subject_performace, idealized_performance):
    return scipy.stats.pearsonr(subject_performace, idealized_performance)[0]


def find_empty_times(data):
    intermissions = data[data['instruction']==" Push Button"].index.tolist()
    rests = data[data['instruction']==" Rest"]
    first_scan_index = data[data['instruction']!=" Rest"].index.tolist()[0] - 1
    first_rest_at_end = data[data['instruction']!=" Rest"].index.tolist()[-1] + 1
    times = [first_scan_index] + intermissions + [first_rest_at_end]
    return times

def determine_trialorder(data, times):
    trialOrder = []
    for trialnum in range(12):
        this_trial = data[(times[trialnum] + 1):times[trialnum + 1]][data['feedback']=="On"]
        trialOrder += [this_trial['left_text'].tolist()[0][1:] + "-" + this_trial['right_text'].tolist()[0][1:], this_trial['instruction'].tolist()[0][1:]]
    return trialOrder

files = []
# r=root, d=directories, f = files
for r, d, f in os.walk(path):
    for file in f:
        if '.tsv' in file:
            files.append(os.path.join(r, file))

outputdict = {'ID':[], 'skourascore_down': [], 'skourascore_down_90': [], 'skourascore_up': [], 'skourascore_up_90': [], 'skourascore_both': [], 'skourascore_both_90': []}

scoreseries = {'ID':[]}
for i in range(1, 13):
    scoreseries[str(i)] = []

In [2]:
for i in range(len(files)):
    #parsing filename to find NKI subject ID
    subpos = files[i].find('sub-A')
    subjID = files[i][(subpos + 4):(subpos + 13)]
    print(subjID)
    #reading events.tsv file as "data"
    data = pd.read_csv(files[i], sep="\t")
    times = find_empty_times(data)
    trialOrder = determine_trialorder(data, times)
    down_skourascores = []
    down_skourascores_90 = []
    up_skourascores = []
    up_skourascores_90 = []
    both_skourascores = []
    both_skourascores_90 = []
    for trialnum in range(12):
        #this_trial is the data just from the trial of trialnum
        this_trial = data[(times[trialnum] + 1):times[trialnum + 1]]
        length = len(this_trial.needle_position.values)
        #print("Trial", trialnum + 1, "was", (length-1)*2, "seconds long and contained", length, "needle_position values.")
        #print("needle_position values")
        #print(this_trial.needle_position.values)
        #print(trialOrder[(trialnum * 2)])
        if trialOrder[(trialnum * 2) + 1] == "Focus":
            if trialOrder[(trialnum * 2)] == 'Focused-Wandering':
                idealized = np.linspace(90, 90 + (length - 1), num=length)
            elif trialOrder[(trialnum * 2)] == 'Wandering-Focused':
                idealized = np.linspace(90, 90 - (length - 1), num=length)
            skourascore = compute_skourascore(this_trial.needle_position.values, idealized)
            down_skourascores += [skourascore]
            if length == 46:
                down_skourascores_90 += [skourascore]
        elif trialOrder[(trialnum * 2) + 1] == "Wander":
            if trialOrder[(trialnum * 2)] == 'Focused-Wandering':
                idealized = np.linspace(90, 90 - (length - 1), num=length)
            elif trialOrder[(trialnum * 2)] == 'Wandering-Focused':
                idealized = np.linspace(90, 90 + (length - 1), num=length)
            skourascore = compute_skourascore(this_trial.needle_position.values, idealized)
            up_skourascores += [skourascore]
            if length == 46:
                up_skourascores_90 += [skourascore]
        else:
            print("something is horribly wrong")
        both_skourascores += [skourascore]
        if length == 46:
            both_skourascores_90 += [skourascore]
    #print(down_skourascores)
    outputdict['ID'] += [subjID]
    outputdict['skourascore_down'] += [np.mean(down_skourascores)]
    outputdict['skourascore_down_90'] += [np.mean(down_skourascores_90)]
    outputdict['skourascore_up'] += [np.mean(up_skourascores)]
    outputdict['skourascore_up_90'] += [np.mean(up_skourascores_90)]
    outputdict['skourascore_both'] += [np.mean(both_skourascores)]
    outputdict['skourascore_both_90'] += [np.mean(both_skourascores_90)]
    #print(subjID, np.mean(down_skourascores))
    
    scoreseries['ID'] += [subjID]
    for i in range(1, 13):
        scoreseries[str(i)] += [both_skourascores[i - 1]]
print(scoreseries)

A00028185
A00032875
A00033747
A00034854
A00035072
A00035827
A00035840
A00037112
A00037511
A00037848
A00038642




A00038998
A00040524
A00040623
A00040628
A00040640
A00040944
A00043299
A00043677
A00043704
A00043721
A00043722
A00043998
A00045590
A00050940
A00051539
A00051548
A00051676
A00051927
A00052125
A00052340
A00053455
A00053473
A00053475
A00053850
A00053851
A00053902
A00054019
A00054441
A00054504
A00054857
A00054914
A00055121
A00055373
A00055446
A00055447
A00055542
A00055738
A00055763
A00055806
A00056097
A00056452
A00056556
A00056627
A00056949
A00057005
A00057035
A00057182
A00057235
A00057372
A00057444
A00057786
A00057808
A00057965
A00058214
A00058218
A00058503
A00058552
A00058667
A00058952
A00058999
A00059344
A00059346
A00059428
A00059756
A00059845
A00059911
A00060006
A00060093
A00060169
A00060259
A00060279
A00060372
A00060407
A00060430
A00060471
A00060480
A00060516
A00060582
A00060632
A00060662
A00060773
A00060848
A00060925
A00061204
A00061276
A00061387
A00061709
A00061711
A00061806
A00062210
A00062248
A00062266
A00062282
A00062288
A00062351
A00062917
A00062942
A00063008
A00063103
A00063326


In [3]:
df = pd.DataFrame(outputdict)
df = df.sort_values(by=['ID'])
print(df)
df.to_csv("./newout.tsv", sep="\t", index=False)

            ID  skourascore_down  skourascore_down_90  skourascore_up  \
0    A00028185          0.016352            -0.053907       -0.298887   
1    A00032875         -0.528836             0.093946        0.349334   
2    A00033747          0.444372             0.911257        0.047072   
3    A00034854          0.395348             0.681288       -0.148989   
4    A00035072          0.624165             0.615793        0.703747   
..         ...               ...                  ...             ...   
133  A00066827         -0.098758            -0.310026        0.233062   
134  A00066926          0.406654             0.025461        0.049357   
135  A00072203          0.005606            -0.104370        0.158520   
136  A00073600          0.087554             0.018960        0.593002   
137  A00073677         -0.141997            -0.649310       -0.252080   

     skourascore_up_90  skourascore_both  skourascore_both_90  
0            -0.327317         -0.141267            -0.1906

In [9]:
%matplotlib notebook

In [61]:
import matplotlib
import matplotlib.pyplot as plt

x = []
for i in range(12):
    x += [i + 1]
series = pd.DataFrame(scoreseries)
series = series.sort_values(by=['ID'])
series = series.to_numpy()#[0][1:]
fig, ax = plt.subplots()
for j in range(len(series)):
    ax.plot(x, series[j][1:], color='k', alpha=0.1)

# set ticks and tick labels
ax.set_xlim((1, 12))
ax.set_xticks(x)
ax.set_xticklabels(x)
ax.set_ylim(-1, 1)

plt.xlabel('Trial Number')
plt.ylabel('Score')
plt.title('Learning Curves')

plt.show()

from IPython.core.display import display, HTML
display(HTML("<style>div.output_scroll { height: 55em; }</style>"))

<IPython.core.display.Javascript object>