In [1]:
# General dependencies
import os, shutil
from os.path import join
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from copy import deepcopy
from glob import glob
import math
from math import floor
import scipy

  import pandas.util.testing as tm


In [2]:
# LCBD dependencies
# add relative path to our toolbox
import sys
sys.path.append('../../../..') # path to preprocessing from here

from LCBDtools.scripts import argParser
from LCBDtools.src import Plots
from LCBDtools.src import Statistics
from LCBDtools.src import TimeSeries
from LCBDtools.Stimuli.Flanker import TaskReader

In [4]:
# Some configuration variables
dataDir = "/data/perlman/moochie/study_data/P-CAT/task_data"
PSUdataDir = "/data/perlman/moochie/study_data/P-CAT/PSU_data/task_data"
participant_num_len = 4 # default length of participant numbers
task = "Flanker"

In [5]:
# from data tracker July 12th 2022, list of subjects with v3 Flanker completed
in_subs = [
    "1115",
    "1116",
#     "1110", didn't do flanker, less 80% practice
    "1114",
#     "1119", # refused to flank
    "1121",
    "1122",
    "1126",
    "1125",
    "1127",
#     "1128", sibling participated
    "1129",
    "1134",
    "1130",
#     "1131", <20% on practice
#     "1137", lower extreme on KBIT
#     "1124", # took off cap midway through, they did the first couple blocks
    "1133",
#     "1138", <20% on practice 
    "1144",
    "1143",
#     "1141", <20% on practice
    "1145",
    "1149",
    "1154",
    "1155",
#     "1142", <20% on practice
    "1148",
    "1156",
    # june 24 updates
    "1159",
    # "1160", failed practice
    ### Sep 21st updates
    "1162",
#     "1164", no flank
#     "1165", no flank
    "1167",
#     "1168", no flank
    "1170",
    "1139",
    "1146",
]

# PSU stuff
PSU_insubs = [
    "1202",
#     "1203", # issue... no flanker data collected
    "1204",
#     "1205", # failed practice
    "1207",
    "1209",
    "1211",
    "1213",
#     "1214", # failed practice
#     "1216", # failed practice
    "1219",
    "1220",
#     "1221", # failed practice
#     "1222", # failed practice
    "1223",
#     "1234", # failed practice
#     "1229", # failed practice 
#     "1231", # failed practice
    "1238",
    "1226",
    ### Sep 21st updates
    "1227",
    "1228",
    "1235",
    "1236",
    "1239",
    "1242",
    # others
    "1243",
    "1244",
    "1246",
    "1247",
    "1251",
    "1253",
    "1254"
]

for sub in PSU_insubs:
    in_subs.append(sub)

In [6]:
# upd. sep 21, 2022
all_subs = [
    "1102",
    "1109",
    "1103",
    "1104",
    "1112",
    "1115",
    "1116",
    "1110",
    "1117",
    "1118",
    "1114",
    "1119",
    "1121",
    "1122",
    "1126",
    "1113",
    "1125",
    "1127",
    "1128",
    "1129",
    "1134",
    "1130",
    "1131",
    "1137",
    "1124",
    "1133",
    "1138",
    "1144",
    "1143",
    "1151",
    "1145",
    "1141",
    "1147",
    "1149",
    "1154",
    "1155",
    "1152",
    "1139",
    "1142",
    "1148",
    "1156",
    "1159",
    "1160",
    "1164",
    "1165",
    "1162",
    "1166",
    "1146",
    "1167",
    "1168",
    "1170"]

# PSU stuff
PSU_subs = [
    "1200",
    "1201",
    "1202",
    "1203",
    "1204",
    "1205",
    "1206",
    "1207",
    "1208",
    "1209",
    "1210",
    "1211",
    "1212",
    "1213",
    "1214",
    "1216",
    "1218",
    "1219",
    "1220",
    "1221",
    "1222",
    "1223",
    "1225",
    "1226",
    "1227",
    "1228",
    "1229",
    "1231",
    "1232",
    "1234",
    "1235",
    "1236",
    "1237",
    "1238",
    "1239",
    "1240",
    "1241",
    "1242",
    "1243",
    "1244",
    "1245",
    "1246",
    "1247",
    "1248",
    "1249",
    "1250",
    "1251",
    "1252",
    "1253",
    "1254"]

for sub in PSU_subs:
    all_subs.append(sub)

In [7]:
# upd. sep 21, 2022
ages = [
    6.42026009582478,
    5.1006160164271,
    6.38193018480493,
    4.68993839835729,
    7.97809719370294,
    6.55989048596851,
    7.43326488706366,
    4.8952772073922,
    6.78439425051335,
    5.09787816563997,
    4.58316221765914,
    4.51471594798084,
    7.87953456536619,
    5.37987679671458,
    7.97535934291581,
    4.03285420944559,
    7.71252566735113,
    7.81656399726215,
    0.0,
    7.45516769336071,
    5.86173853524983,
    6.6009582477755,
    4.3750855578371,
    4.07939767282683,
    6.13826146475017,
    4.35318275154004,
    5.13894592744695,
    5.7056810403833,
    7.42778918548939,
    4.51745379876797,
    6.40930869267625,
    4.03011635865845,
    6.01779603011636,
    7.67693360711841,
    6.94318959616701,
    4.35592060232717,
    0.0,
    7.76180698151951,
    4.03832991101985,
    5.93018480492813,
    5.36344969199179,
    4.254620123,
    5.12251882272416,
    4.70636550308008,
    5.48117727583847,
    5.9192334017796,
    0.0,
    7.58110882956879,
    5.08418891170431,
    4.17248459958932,
    6.58179329226557]

# PSU stuff
PSU_ages = [
    5.88364134154689,
    0.0,
    5.71663244353183,
    5.84804928131417,
    5.91375770020534,
    6.37645448323066,
    0.0,
    6.08624229979466,
    0.0,
    5.30047912388775,
    0.0,
    4.18617385352498,
    0.0,
    4.85694729637235,
    4.42710472279261,
    4.49828884325804,
    0.0,
    6.92676249144422,
    6.34086242299795,
    4.99657768651609,
    4.50376454483231,
    5.68377823408624,
    7.75085557837098,
    7.84941820670773,
    4.24366872005476,
    5.1006160164271,
    4.20807665982204,
    4.99383983572895,
    0.0,
    4.34496919917865,
    7.34839151266256,
    7.7700205338809,
    7.0444900752909,
    6.48596851471595,
    6.20670773442847,
    0.0,
    0.0,
    4.48733744010951,
    5.85626283367557,
    7.35386721423682,
    5.63997262149213,
    5.71937029431896,
    5.94661190965092,
    5.25119780971937,
    5.57426420260096,
    0.0,
    6.080766598, #1251, problematic little bugger
    0.0,
    5.9356605065024,
    6.51334702258727]

for age in PSU_ages:
    ages.append(age)

In [8]:
print(len(PSU_ages))

50


In [9]:
if len(ages) != len(all_subs):
    print("AGES ERROR")

In [10]:
# use glob to make a list of all the task files for flanker for WUSTL
fnames = [fname for fname in glob(join(dataDir, '*', '*_'+task, '*.csv')) if \
    os.path.basename(fname)[:participant_num_len] in in_subs]

In [11]:
# same but PSU files
PSUfnames = [fname for fname in glob(join(PSUdataDir, '*', '*_'+task, '*.csv')) if \
    os.path.basename(fname)[:participant_num_len] in in_subs]

In [12]:
# make a dictionary where all of the flanker data will get loaded into
d_dataset = {}

# loop over all of the WUSTL files
for fname in fnames:
    try:
        flanker_series = TaskReader(fname).flankerSeries # load using the LCBD taskreader 
        subject = os.path.basename(fname)[:participant_num_len]

        for flank in flanker_series:
            flank.eval() 
            flank.meta['age'] = ages[all_subs.index(subject)] # add their time to the metadata

        d_dataset[subject] = flanker_series
        
    except:
        print("Error encountered @ subject:", os.path.basename(fname)[:participant_num_len])  

In [13]:
d_dataset.keys()

dict_keys(['1143', '1134', '1149', '1115', '1162', '1127', '1133', '1144', '1139', '1116', '1159', '1154', '1129', '1130', '1145', '1156', '1121', '1114', '1126', '1170', '1148', '1146', '1122', '1155', '1167', '1125'])

In [14]:
d_dataset['1143'][64].meta

{'trial_n': 65,
 'block': 5,
 'corr_answer': 'right',
 'stim_file': 'stimuli/trial_block5_D_CR.png',
 'directional': True,
 'congruent': True,
 'stim_start_time': 459.1406643,
 'sitm_stop_time': 460.1506643,
 'response': 'right',
 'response_time': 0.5548605999999999,
 'fixation_start_time': 457.67200230000003,
 'fixation_stop_time': 459.1851416,
 'correct': True,
 'age': 7.42778918548939}

In [15]:
# add the PSU files as well
for fname in PSUfnames:
    try:
        flanker_series = TaskReader(fname).flankerSeries
        subject = os.path.basename(fname)[:participant_num_len]

        for flank in flanker_series:
            flank.eval()
            flank.meta['age'] = ages[all_subs.index(subject)]

        d_dataset[subject] = flanker_series
        
    except:
        print("Error encountered @ subject:", os.path.basename(fname)[:participant_num_len]) 

In [16]:
print([os.path.basename(fname)[:participant_num_len] for fname in PSUfnames])

['1243', '1244', '1239', '1220', '1253', '1202', '1247', '1211', '1223', '1254', '1213', '1219', '1238', '1235', '1207', '1251', '1226', '1209', '1246', '1236', '1204']


In [17]:
print(len(PSUfnames))

21


In [18]:
len(d_dataset)

47

In [19]:
# os.mkdir("/home/usr/schneiderc/carlo_flankeroutput")

In [20]:
# for sub in d_dataset.keys():
#     df = pd.DataFrame()
#     flanks = d_dataset[sub]
#     for flank in flanks:
#         df = df.append(flank.meta, ignore_index=True)
#     df.to_csv(
#         "/home/usr/schneiderc/carlo_flankeroutput/{}_flanks.csv".format(str(sub))
#     )

In [21]:
df = pd.DataFrame(columns=['letter', 'number'])

In [22]:
def write_avgs(outfile=None):
    
    df = pd.DataFrame(
        # names of all of the columns we're about to gather data for 
        columns=[
            "subject",
            "age",
            "N RTs",
            "N trials",
            "mean RT (session)",
            "accuracy (session)",
            "IES",
            "mean RT (congruent)",
            "accuracy (congruent)",
            "IES (congruent)",
            "mean RT (incongruent)",
            "accuracy (incongruent)",
            "IES (incongruent)",
            "mean RT (directional)",
            "accuracy (directional)",
            "IES (directional)",
            "mean RT (indirectional)",
            "accuracy (indirectional)",
            "IES (indirectional)"])
    
#     df.set_index('subject')
    
    for subject in d_dataset.keys():
        flanker = d_dataset[subject]
        
        # filters out for only flanks that were responded to
        real_flanks = [flank for flank in flanker if not math.isnan(flank.meta['response_time'])]
        
        df.loc[subject, :] = [
            
            # sub and demographic
            int(subject), # subject
            float(ages[all_subs.index(subject)]), # age
            
            # session data
            int(len([flank for flank in real_flanks])), # N RTs
            int(len(flanker)), # N trials
            float(np.mean([flank.meta['response_time'] for flank in real_flanks])), # session mean RT
            float(np.mean([flank.meta['correct'] for flank in real_flanks])), # session accuracy
            float( # IES = mean session RT / proportion of correct responses
                np.mean([flank.meta['response_time'] for flank in real_flanks]) /\
                (len([flank for flank in real_flanks if flank.meta['correct']]) /\
                    len(real_flanks))),
            
            # congruent (directional)
            float(np.mean([flank.meta['response_time'] for flank in real_flanks if \
                flank.meta['congruent']])), # mean RT congruent
            float(np.mean([flank.meta['correct'] for flank in real_flanks if \
                flank.meta['congruent']])), # accuracy congruent
            float( # IES = congruent RT / proportion of correct responses
                # congruent RT divided by:
                np.mean([flank.meta['response_time'] for flank in real_flanks if\
                    (flank.meta['congruent']) and (flank.meta['directional'])]) /\
                # proportion of correct responses in congruent
                (len([flank for flank in real_flanks if\
                        (flank.meta['congruent']) and (flank.meta['directional']) and (flank.meta['correct'])]) /\
                    len([flank for flank in real_flanks if (flank.meta['congruent']) and (flank.meta['directional'])]))), 
            
            # incongruent (directional)
            float(np.mean([flank.meta['response_time'] for flank in real_flanks if \
                (flank.meta['congruent'] is False) and (flank.meta['directional'] is True)])), # mean RT incongruent
            float(np.mean([flank.meta['correct'] for flank in real_flanks if \
                (flank.meta['congruent'] is False) and (flank.meta['directional'] is True)])), # accuracy incongruent
            float( # IES = incongruent RT / proportion of correct responses
                # incongruent RT divided by:
                np.mean([flank.meta['response_time'] for flank in real_flanks if\
                    (not flank.meta['congruent']) and (flank.meta['directional'])]) /\
                # proportion of correct responses in incongruent
                (len([flank for flank in real_flanks if\
                        (not flank.meta['congruent']) and (flank.meta['directional']) and (flank.meta['correct'])]) /\
                    len([flank for flank in real_flanks if (not flank.meta['congruent']) and (flank.meta['directional'])]))), 
                 
            # directional (congruent or incongruent)
            float(np.mean([flank.meta['response_time'] for flank in real_flanks if \
                flank.meta['directional'] is True])), # mean RT directional
            float(np.mean([flank.meta['correct'] for flank in real_flanks if \
                flank.meta['directional'] is True])), # accuracy directional
            float( # IES = directional RT / proportion of correct responses
                # directional RT divided by:
                np.mean([flank.meta['response_time'] for flank in real_flanks if\
                    flank.meta['directional']]) /\
                # proportion of correct responses in directional
                (len([flank for flank in real_flanks if\
                        (flank.meta['directional']) and (flank.meta['correct'])]) /\
                    len([flank for flank in real_flanks if flank.meta['directional']]))), 
            
            # indirectional (neither)
            float(np.mean([flank.meta['response_time'] for flank in real_flanks if \
                flank.meta['directional'] is False])), # mean RT indirectional
            float(np.mean([flank.meta['correct'] for flank in real_flanks if \
                flank.meta['directional'] is False])), # accuracy indirectional
            float( # IES = indirectional RT / proportion of correct responses
                # indirectional RT divided by:
                np.mean([flank.meta['response_time'] for flank in real_flanks if\
                    not flank.meta['directional']]) /\
                # proportion of correct responses in indirectional
                (len([flank for flank in real_flanks if\
                        (not flank.meta['directional']) and (flank.meta['correct'])]) /\
                    len([flank for flank in real_flanks if not flank.meta['directional']]))), 
        ]
    
    if outfile is not None:
        df.to_csv(outfile) # save as csv
        
    return df

In [23]:
# use the function we just made to save the file to this place
df = write_avgs(outfile="/data/perlman/moochie/analysis/P-CAT/behavioral_metrics_combined_new.csv")

In [24]:
len(df['subject'])

47

In [25]:
df

Unnamed: 0,subject,age,N RTs,N trials,mean RT (session),accuracy (session),IES,mean RT (congruent),accuracy (congruent),IES (congruent),mean RT (incongruent),accuracy (incongruent),IES (incongruent),mean RT (directional),accuracy (directional),IES (directional),mean RT (indirectional),accuracy (indirectional),IES (indirectional)
1143,1143,7.42779,148,150,0.627807,0.945946,0.663681,0.622913,0.959184,0.64942,0.64706,0.918367,0.704577,0.634987,0.938776,0.676399,0.613733,0.96,0.639306
1134,1134,5.86174,145,150,0.621688,0.910345,0.682915,0.593227,0.938776,0.631916,0.67358,0.875,0.769806,0.63299,0.907216,0.697727,0.598849,0.916667,0.65329
1149,1149,7.67693,146,150,0.715747,0.952055,0.751792,0.667699,1.0,0.667699,0.754001,0.979592,0.769709,0.711294,0.989691,0.718704,0.724561,0.877551,0.825663
1115,1115,6.55989,145,150,0.709677,0.924138,0.767934,0.69644,0.92,0.757,0.699616,0.913043,0.766246,0.697962,0.916667,0.761413,0.73263,0.938776,0.78041
1162,1162,5.91923,129,150,0.800578,0.891473,0.89804,0.798056,0.930233,0.857911,0.792168,0.837209,0.946201,0.795112,0.883721,0.899732,0.81151,0.906977,0.894742
1127,1127,7.81656,150,150,0.51455,0.946667,0.543539,0.503355,0.96,0.524328,0.516196,0.9,0.573551,0.509775,0.93,0.548145,0.524101,0.98,0.534797
1133,1133,4.35318,111,150,0.989501,0.864865,1.14411,0.949465,0.885714,1.07198,0.993797,0.756757,1.31323,0.972247,0.819444,1.18647,1.02136,0.948718,1.07656
1144,1144,5.70568,147,150,0.637057,0.952381,0.66891,0.607754,1.0,0.607754,0.672742,0.86,0.782258,0.640576,0.929293,0.689316,0.629799,1.0,0.629799
1139,1139,7.76181,147,150,0.802062,0.979592,0.818771,0.789468,1.0,0.789468,0.841854,0.959184,0.877678,0.815661,0.979592,0.832654,0.774863,0.979592,0.791006
1116,1116,7.43326,149,150,0.629571,0.979866,0.642507,0.63073,0.98,0.643602,0.671527,0.98,0.685232,0.651128,0.98,0.664417,0.585575,0.979592,0.597775


In [41]:
# fig, ax = plt.subplots()

# scatter = ax.scatter(
#     df['age'].astype(float),
#     df['mean RT (directional)'].astype(float) - df['mean RT (indirectional)'].astype(float)
# )

# slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(df['age'].astype(float), df['mean RT (directional)'].astype(float) - df['mean RT (indirectional)'].astype(float))

# xseq = np.linspace(0, max(df['age'])+0.2, num=100)

# bestfit = ax.plot(xseq, intercept + slope * xseq, color='k', lw=2.5, label='r^2={:.2f}'.format(r_value**2))

# plt.xlabel('Age (years)')
# plt.ylabel("Mean RT (Directional vs. Indirectional)")
# plt.xlim((min(df['age'])-0.2, max(df['age'])+0.2))
# plt.legend()

In [42]:
# plt.rcParams['savefig.facecolor']='white'

# cols = [col for col in df.columns if \
#     ("mean" in col) or ("accuracy" in col) or ("IES" in col)]

# for coi in cols:

#     fig, ax = plt.subplots()

#     c=['red' if str(sub) not in PSU_insubs else 'blue' for sub in df['subject']]
    
#     scatter = ax.scatter(
#         df['age'].astype(float),
#         df[coi].astype(float),
#         c=c)

#     slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(df['age'].astype(float), df[coi].astype(float))

#     xseq = np.linspace(0, max(df['age'])+0.2, num=100)

#     bestfit = ax.plot(xseq, intercept + slope * xseq, color='k', lw=2.5, label='r^2={:.2f}'.format(r_value**2))

#     plt.xlabel('Age (years)')
#     plt.ylabel(coi)
#     plt.xlim((min(df['age'])-0.2, max(df['age'])+0.2))
#     plt.title('Flanker Age vs. ' + coi + ' (WU Red, PSU Blue)')

#     plt.legend(loc="upper right")

#     plt.savefig("/data/perlman/moochie/analysis/P-CAT/RTplot_{}_2.png".format(coi))

In [28]:
len(d_dataset['1159'])

150

In [29]:
# these subjects have missing data maybe? they have version 3 but aren't here. 
# maybe they don't actually have real flanker?
print([sub for sub in in_subs if sub not in list(d_dataset.keys())])

[]


In [30]:
# these subjects have actual flanker data loaded in:
print(list(d_dataset.keys()))

['1143', '1134', '1149', '1115', '1127', '1133', '1144', '1116', '1124', '1159', '1154', '1129', '1130', '1145', '1156', '1121', '1114', '1126', '1148', '1122', '1155', '1125']


In [31]:
d_dataset['1143'][0].meta

{'trial_n': 1,
 'block': 1,
 'corr_answer': 'right',
 'stim_file': 'stimuli/trial_block1_D_IR.png',
 'directional': True,
 'congruent': False,
 'stim_start_time': 148.8814254,
 'stim_stop_time': 149.8868478,
 'response': 'right',
 'response_time': 1.1343523,
 'fixation_start_time': 147.4024356,
 'fixation_stop_time': 148.915589,
 'correct': True,
 'age': 7.42778918548939}

In [32]:
# subject 1124 only answered 80 of them
len([flank for flank in d_dataset['1124'] if flank.meta['correct'] in [True, False]])

80