# Imports

In [11]:
import pandas as pd; import os; from analysis_helpers import *; import warnings
warnings.filterwarnings('ignore')

# Data Compile
This cell takes a long time to run. 

It will print each participant number to give a sense of the progress (finishing at paticipant 112).

In [12]:
def pres_gaze_from_df(behavioral_df, eye_df):
    '''
    input: participant's behavioral df
           participant's eye track df
    output: single df of gaze data for this participant, when pres images on screen
    '''
    # empty lists for pres_gaze and no_gaze (runs w/ very few recorded datapoints)
    pres_gaze = []
    no_gaze = []

    # for each presentation row in the behavioral df
    for idx,x in behavioral_df[behavioral_df['Trial Type']=='Presentation'].iterrows():

        # select times when visual stimuli appear & disappear
        start,end = x['Stimulus Onset'],x['Stimulus End']

        # select gaze data from interval stim was on screen
        chunk = eye_df.loc[(eye_df['timestamp']>=start) & (eye_df['timestamp']<=end)]

        # add trial and run numbers
        chunk['Trial'] = np.nan
        chunk['Run']   = np.nan
        chunk['Trial'] = x['Trial']
        chunk['Run']   = x['Run']

        # add start times to separate row
        chunk['Behavior_Image_Start'] = start

        # # if there are fewer than five gazepoints, also add this chunk to no_gaze
        # if chunk.shape[0] <5:
        #     no_gaze.append(chunk)

        # append the gaze data for each trial to a list
        pres_gaze.append(chunk)

    # concat data from all runs and trials
    pres_gaze = pd.concat(pres_gaze)
    # no_gaze = pd.concat(no_gaze)

    return(pres_gaze)

In [42]:
# Compile data from all exps

all_data = []; all_gaze = []

data_dirs = ['../data/sustained_attention_experiment/','../data/variable_attention_experiment/'] 
filenames = [x+'aggregate.csv' for x in data_dirs]

sub_count = 0

# for each experiment (sustained and variable)
for data,file in zip(data_dirs, filenames):
    
    # empty subject list
    sub_list = []
    
    # for each subject in this directory
    for sub_dir in os.listdir(data):
        
        if sub_dir != '.DS_Store' and sub_dir != 'README.md': #0_2020_Feb_07
            
            # add attention labels to memory stim 
            subject = add_level(sum_pd(data + '/' + sub_dir))

            # organize ON and OFF stim times 
            subject_log = list_logs(data + '/' + sub_dir + '/')
            subject_log['Subject'] = pd.to_numeric(subject_log['Subject'])
            subject_log = subject_log.sort_values(by=['Subject','Run','TIME'])
            subject = subject.sort_values(by=['Subject','Run'])

            # extract desired stim ON and OFF times from log files
            composite_onsets  = subject_log[subject_log[0].str.contains('COMPOSITES ON')]
            composite_offsets = subject_log[subject_log[0].str.contains('COMPOSITES OFF')]
            memory_onsets  = list(subject_log[ (subject_log[0].str.contains('MEMORY ON')) & (subject_log[0].str.contains('FLIP')) ]['TIME'])
            memory_offsets = list(subject_log[ (subject_log[0].str.contains('MEMORY OFF')) & (subject_log[0].str.contains('FLIP')) ]['TIME'])

            # add ON and OFF stim times from log files to df
            subject.loc[subject['Trial Type']=='Presentation', 'Stimulus Onset'] = list(composite_onsets['TIME'])
            subject.loc[subject['Trial Type']=='Presentation', 'Stimulus End'  ] = list(composite_offsets['TIME'])
            subject.loc[subject['Trial Type']=='Presentation','Attention Response Time (s)'] = subject[subject['Trial Type']=='Presentation']['Attention Response Time (s)'] - subject[subject['Trial Type']=='Presentation']['Stimulus End']
            subject.loc[subject['Trial Type']=='Memory', 'Stimulus Onset'] = memory_onsets
            subject.loc[subject['Trial Type']=='Memory', 'Stimulus End'  ] = memory_offsets

            # Pull attention RT's from log file

            # find every probe display, and the next event after each probe display
            probe_time_indices = subject_log[(subject_log[0].str.contains('ATTN'))].index
            key_press_indices  = [x+1 for x in list(probe_time_indices)]

            # if next event isn't keypress 1 or keypress 3, go until you find the first keypress 1 or 3
            for idx,x in enumerate(key_press_indices):
                while 'Keypress: 1' not in str(subject_log.loc[x][0]) and 'Keypress: 3' not in str(subject_log.loc[x][0]):
                    x+=1
                key_press_indices[idx]=x

            ##############################################################################

            attn_rt = {}
            attn_rt['probe_start'] = [] #list(subject_log.loc[probe_time_indices]['TIME'])
            attn_rt['key press' ] = [] #list(subject_log.loc[key_press_indices ]['TIME'])
            a = []
            b=[]

            for r in subject_log['Run'].unique():

                run_log = subject_log[subject_log['Run']==r]

               # find every probe display, and the next event after each probe display
                probe_time_indices = run_log[(run_log[0].str.contains('ATTN'))].index
                key_press_indices  = [x+1 for x in list(probe_time_indices)]

               # if next event isn't keypress 1 or keypress 3, go until you find the first keypress 1 or 3
                for idx,x in enumerate(key_press_indices):
                    while 'Keypress: 1' not in str(run_log.loc[x][0]) and 'Keypress: 3' not in str(run_log.loc[x][0]):
                        x+=1
                    key_press_indices[idx]=x
                   # then stop and collect the time of the button press

                a.extend(list(run_log.loc[probe_time_indices]['TIME']))
                b.extend(list(run_log.loc[key_press_indices ]['TIME']))

            attn_rt['probe_start'] = a
            attn_rt['key press' ]  = b
            attn_df = pd.DataFrame(attn_rt)
            log_file_rt = attn_df['key press'].astype('float64')-attn_df['probe_start'].astype('float64')
            subject.loc[subject['Trial Type']=='Presentation','Attention Response Time (s)'] = list(log_file_rt)
            subject.loc[subject['Trial Type']=='Presentation','Attention Reaction Time (s)'] = list(log_file_rt)

            ##############################################################################

            # Convert all times to be eyetribe compatible
            for r in subject['Run'].unique():
                time = float(subject_log[subject_log['Run']==r].loc[subject_log[subject_log['Run']==r][0].str.contains('urrent time')]['TIME'])
                curr_string = subject_log[subject_log['Run']==r].loc[subject_log[subject_log['Run']==r][0].str.contains('urrent time')][0].str.split(' ')
                curr_time = float(list(curr_string)[0][-1])
                diff = curr_time - time

                # convert times for each run
                subject.loc[subject['Run']==r, 'Stimulus Onset'] = subject.loc[subject['Run']==r, 'Stimulus Onset'] + diff
                subject.loc[subject['Run']==r, 'Stimulus End']   = subject.loc[subject['Run']==r, 'Stimulus End'] + diff

            subject = subject.rename(columns={'Attention Response Time (s)': 'Attention Reaction Time (s)'})

            # add trial numbers to behavioral data
            subject['Trial'] = np.nan
            subject.loc[subject['Trial Type']=='Memory','Trial']       = list(range(0,40))*8
            subject.loc[subject['Trial Type']=='Presentation','Trial'] = list(range(0,10))*8
                
            # Gaze data 
            gaze = eye_initial(data + '/' + sub_dir + '/eye_data/')
        
            gaze['Subject']  = sub_dir.split('_')[0]
            gaze['UniqueID'] = sub_count
            gaze['Experiment'] = data[2:10]
            
            pres_gaze = pres_gaze_from_df(subject, gaze)
            
            # Give every subj unique ID, label group & experiment
            subject['UniqueID'] = sub_count
            subject['Experiment'] = data[2:10]
            
            sub_count += 1
            sub_list.append(subject)
            subject.to_csv(data + '/' + sub_dir + '/subject_b_data.csv')
            pres_gaze.to_csv(data + '/' + sub_dir + '/subject_pres_gaze_data.csv')
            
#     exp_raw = pd.concat(sub_list)
#     exp_raw.to_csv(file)

In [40]:
# compile gaze df's from each subject
paths    = ['../data/sustained_attention_experiment/', '../data/variable_attention_experiment/']
all_subs = []; all_gazes = []

for exp in paths:
    
    subjects = os.listdir(exp)

    for s in subjects:
        if s != '.DS_Store' and s != 'aggregate.csv':
            
            print(s)

            subject  = pd.read_csv(exp+s+'/subject_b_data.csv')
            gaze     = pd.read_csv(exp+s+'/subject_pres_gaze_data.csv')

            all_gazes.append(gaze); all_subs.append(subject)

34_2019_Oct_13
40_2019_Oct_17
7_2019_Oct_13
13_2019_Oct_14
39_2019_Oct_17
16_2019_Oct_08
2_2019_Oct_06
8_2019_Oct_13
32_2019_Oct_15
15_2019_Oct_14
5_2019_Oct_17
1_2019_Oct_13
10_2019_Oct_14
19_2019_Oct_15
4_2019_Oct_13
28_2019_Oct_09
7_2019_Oct_13_b
14_2019_Oct_07
3_2019_Oct_13
18_2019_Oct_08
38_2019_Oct_17
35_2019_Oct_13
11_2019_Oct_14_b
20_2019_Oct_08
36_2019_Oct_13
5_2019_Oct_13
11_2019_Oct_14
0_2019_Oct_13
25_2019_Oct_08
9_2019_Oct_14
56_2020_Feb_21
17_2019_Nov_18
12_2019_Nov_17
20_2019_Nov_19
30_2020_Jan_13
25_2020_Jan_24
11_2019_Nov_17
18_2019_Nov_19
9_2019_Nov_16
29_2020_Jan_13
14_2019_Nov_17
21_2019_Nov_19
27_2020_Jan_15
6_2019_Nov_15
16_2019_Nov_18
26_2020_Jan_16
28_2020_Jan_13
8_2019_Nov_16
15_2019_Nov_18
5_2019_Nov_15
19_2019_Nov_19
10_2019_Nov_16
0_2020_Feb_07


In [41]:
all_gazes  = pd.concat(all_gazes); all_gazes.to_csv('pres_gaze_b.csv')
all_behavs = pd.concat(all_subs);  all_behavs.to_csv('behav_b.csv')