# Imports

In [1]:
import pandas as pd; import os; from analysis_helpers import *; import warnings
warnings.filterwarnings('ignore')

# Data Compile
Organize behavioral data from each participant into one dataframe for the whole experiment.

In [13]:
# Compile data from all exps
# add attention labels to memory stim
# correct timings for group1 - safest estimate
# check and add gaze data
# output dataframe with attn labels, timing, and gaze data

all_data = []
all_gaze = []

a,b = '../sustained_attention_experiment/data/','../variable_attention_experiment/data/'
data_dirs = [a+'group1', a+'group2', b+'group1', b+'group2']
filenames = [x+'_aggregate.csv' for x in data_dirs]

sub_count = 0

# for each group (1 & 2) in each experiment (sustained and variable)
for data,file in zip(data_dirs, filenames):
    
    # empty subject list
    sub_list = []
    
    # for each subject in this directory
    for sub_dir in os.listdir(data):
        print(sub_dir)
        if sub_dir != '.DS_Store' and sub_dir != 'README.md':
            
            # add attention labels to memory stim 
            subject = add_level(sum_pd(data + '/' + sub_dir))

            # If Group 1 (Sustained or Variable)
            if data[-1] == '1':
                
                # TIMING CORRRECTIONS (for gaze)
               
                # github Issue #83, Check B: times in behavioral csv's (Group1) imprecise by fractions of a second
                # correct for those timing issues (for gaze analysis), using safest estimates 
                
                # SUSTAINED: cued composite starts .0178 seconds earlier; VAR : .0167 earlier 
                # SUSTAINED: composites disappear 0.0322 secs later; VARIABLE: .0359 secs 
                subject.loc[subject['Trial Type']=='Presentation','Stimulus Onset'] = subject[subject['Trial Type']=='Presentation']['Stimulus Onset'] - .0178
                subject.loc[subject['Trial Type']=='Presentation','Stimulus End']   = subject[subject['Trial Type']=='Presentation']['Stimulus End']   + .0359
                  
                # SUST: mem images display 0.0179 earlier; VAR: .0142
                # SUST: mem images disappear .259 secs later; VAR: .137
                subject.loc[subject['Trial Type']=='Memory','Stimulus Onset'] = subject[subject['Trial Type']=='Memory']['Stimulus Onset'] - .0179
                subject.loc[subject['Trial Type']=='Memory','Stimulus End']   = subject[subject['Trial Type']=='Memory']['Stimulus End']   +.259
                
                # NOTE: no correction for button press timing differences, as they averaged .001 seconds or less

                if data == '../variable_attention_experiment/data/group1':
                    
                    # EXP 2 cue corrections ---------------
                    # github Issue #83, Check E: some Group1, Variable Experiment valid cues marked as invalid cues
                    
                    # correct cue labels Variable Experiment, Group 1
                    for run in subject['Run'].unique():
                        first_cue = list(subject[(subject['Run']==run) & (subject['Trial Type']=='Presentation')]['Cued Side'])
                        if subject[(subject['Run']==run) & (subject['Cue Validity']==0) & (subject['Cued Side'] != first_cue[0])].shape[0]>0:
                            subject.loc[(subject['Run']==run) & (subject['Cue Validity']==0) & (subject['Cued Side'] != first_cue[0]), 'Cue Validity'] = 1
            
            # if Group 2 (Sustained or Variable)
            if data[-1] == '2':
                
                # add ON and OFF stim times for group 2
                subject_log = list_logs(data + '/' + sub_dir + '/')
                subject_log['Subject'] = pd.to_numeric(subject_log['Subject'])
                subject_log = subject_log.sort_values(by=['Subject','Run','TIME'])
                subject = subject.sort_values(by=['Subject','Run'])
                
                # now, extract desired stim on and off times from log files
                composite_onsets  = subject_log[subject_log[0].str.contains('COMPOSITES ON')]
                composite_offsets = subject_log[subject_log[0].str.contains('COMPOSITES OFF')]
                memory_onsets  = list(subject_log[ (subject_log[0].str.contains('MEMORY ON')) & (subject_log[0].str.contains('FLIP')) ]['TIME'])
                memory_offsets = list(subject_log[ (subject_log[0].str.contains('MEMORY OFF')) & (subject_log[0].str.contains('FLIP')) ]['TIME'])
                # attention_on = composite_offsets 
                
                # add ON and OFF stim times for group 2
                subject.loc[subject['Trial Type']=='Presentation', 'Stimulus Onset'] = list(composite_onsets['TIME'])
                subject.loc[subject['Trial Type']=='Presentation', 'Stimulus End'  ] = list(composite_offsets['TIME'])
                subject.loc[subject['Trial Type']=='Presentation','Attention Response Time (s)'] = subject[subject['Trial Type']=='Presentation']['Attention Response Time (s)'] - subject[subject['Trial Type']=='Presentation']['Stimulus End']
                subject.loc[subject['Trial Type']=='Memory', 'Stimulus Onset'] = memory_onsets
                subject.loc[subject['Trial Type']=='Memory', 'Stimulus End'  ] = memory_offsets
                
                
                # Pull attention RT's from log file
                
                # find every probe display, and the next event after each probe display
                probe_time_indices = subject_log[(subject_log[0].str.contains('ATTN'))].index
                key_press_indices  = [x+1 for x in list(probe_time_indices)]

                # if next event isn't keypress 1 or keypress 3, go until you find the first keypress 1 or 3
                for idx,x in enumerate(key_press_indices):
                    while 'Keypress: 1' not in str(subject_log.loc[x][0]) and 'Keypress: 3' not in str(subject_log.loc[x][0]):
                        x+=1
                    key_press_indices[idx]=x
                    # then stop and collect the time of the button press
            
                attn_rt = {}
                attn_rt['probe_start'] = list(subject_log.loc[probe_time_indices]['TIME'])
                attn_rt['key press' ] = list(subject_log.loc[key_press_indices ]['TIME'])
                attn_df = pd.DataFrame(attn_rt)
                log_file_rt = attn_df['key press'].astype('float64')-attn_df['probe_start'].astype('float64')
                subject.loc[subject['Trial Type']=='Presentation','Attention Response Time (s)'] = log_file_rt
                
                # Convert all times to be eyetribe compatible
                for r in subject['Run'].unique():
                    time = float(subject_log[subject_log['Run']==r].loc[subject_log[subject_log['Run']==r][0].str.contains('urrent time')]['TIME'])
                    curr_string = subject_log[subject_log['Run']==r].loc[subject_log[subject_log['Run']==r][0].str.contains('urrent time')][0].str.split(' ')
                    curr_time = float(list(curr_string)[0][-1])
                    diff = curr_time - time

                    # convert times for each run
                    subject.loc[subject['Run']==r, 'Stimulus Onset'] = subject.loc[subject['Run']==r, 'Stimulus Onset'] + diff
                    subject.loc[subject['Run']==r, 'Stimulus End']   = subject.loc[subject['Run']==r, 'Stimulus End'] + diff
                
                subject = subject.rename(columns={'Attention Response Time (s)': 'Attention Reaction Time (s)'})

            # add trial numbers to behavioral data
            subject['Trial'] = np.nan
            subject.loc[subject['Trial Type']=='Memory','Trial']       = list(range(0,40))*8
            subject.loc[subject['Trial Type']=='Presentation','Trial'] = list(range(0,10))*8
                
            # Gaze data 
            gaze = eye_initial(data + '/' + sub_dir + '/eye_data/')
        
            gaze['Subject']  = sub_dir.split('_')[0]
            gaze['UniqueID'] = sub_count
            gaze['Group'] = int(data[-1])
            gaze['Experiment'] = data[2:10]
            
            # KZ : need to update pres_gaze so that it pulls times frome new df and not from data files
            pres_gaze = pres_gaze_from_df(subject, gaze) # pres_gaze_image(data + '/' + sub_dir, gaze)
            print('pres_gaze')
            
            # Give every subj unique ID, label group & experiment
            subject['UniqueID'] = sub_count
            subject['Group'] = int(data[-1])
            subject['Experiment'] = data[2:10]
            
            print(sub_count)
            print()
            sub_count += 1
            sub_list.append(subject)
            pres_gaze.to_csv(data + '/' + sub_dir + '/df_gaze_data.csv')
            print('gaze_out')
            
    exp_raw = pd.concat(sub_list)
#     all_gaze.append(pres_gaze)
#     all_data.append(exp_raw)
    exp_raw.to_csv(file)


# Data Save

Save a single, compiled dataframe, containing the data from every participant in the study (n=120)

In [3]:
# compile behavioral df's from groups 1 and 2, variable and sustained
files,exps = ['group1_aggregate.csv', 'group2_aggregate.csv'],['sustained_attention_experiment/data/', 'variable_attention_experiment/data/']

full_four = []
for exp in exps:
    for f in files:
        full_four.append(pd.read_csv('../'+exp+f))

full_behavioral = pd.concat(full_four)
full_behavioral.to_csv('../parsed_data/full_behavioral.csv')

#parsed_data/full_behavioral.csv has ALL behavioral data from ALL participants!

FileNotFoundError: [Errno 2] File b'../sustained_attention_experiment/data/group1_aggregate.csv' does not exist: b'../sustained_attention_experiment/data/group1_aggregate.csv'

In [11]:
# compile gaze df's from each subject
paths  = ['../sustained_attention_experiment/data/', '../variable_attention_experiment/data/']
groups = ['group1', 'group2']
gaze   = []

for exp in paths:
    for group in groups:
        subjects = os.listdir(exp+group)
        for s in subjects:
            if s != '.DS_Store':
                if 'df_gaze.csv' in os.listdir(exp+group+'/'+s):
                    gaze.append(pd.read_csv(exp+group+'/'+s+'/df_gaze_data.csv'))
                    
gaze_df = pd.concat(gaze)
gaze_df.to_csv('full_gaze.csv')              