# Imports

In [1]:
import pandas as pd; import os; from analysis_helpers import *; import warnings
warnings.filterwarnings('ignore')

# Data Compile
Organize behavioral data from each participant into one dataframe for the whole experiment.

In [9]:
# Compile data from all exps
# add attention labels to memory stim
# correct timings for group1 - safest estimate
# check and add gaze data
# output dataframe with attn labels, timing, and gaze data

all_data = []

a,b = '../sustained_attention_experiment/data/','../variable_attention_experiment/data/'
data_dirs = [a+'group1', a+'group2', b+'group1', b+'group2']
filenames = [x+'_partial.csv' for x in data_dirs]

sub_count = 0

# for each group (1 & 2) in each experiment (sustained and variable)
for data,file in zip(data_dirs, filenames):
    
    # empty subject list
    sub_list = []
    
    # for each subject in this directory
    for sub_dir in os.listdir(data):
        print(sub_dir)
        if sub_dir != '.DS_Store' and sub_dir != 'README.md':
            
            # add attention labels to memory stim 
            subject = add_level(sum_pd(data + '/' + sub_dir))

            # If Group 1 (Sustained or Variable)
            if data[-1] == '1':
                
                # TIMING CORRRECTIONS (for gaze)
               
                # github Issue #83, Check B: times in behavioral csv's (Group1) imprecise by fractions of a second
                # correct for those timing issues (for gaze analysis), using safest estimates 
                
                # SUSTAINED: cued composite starts .0178 seconds earlier; VAR : .0167 earlier 
                # SUSTAINED: composites disappear 0.0322 secs later; VARIABLE: .0359 secs 
                subject.loc[subject['Trial Type']=='Presentation','Stimulus Onset'] = subject[subject['Trial Type']=='Presentation']['Stimulus Onset'] - .0178
                subject.loc[subject['Trial Type']=='Presentation','Stimulus End']   = subject[subject['Trial Type']=='Presentation']['Stimulus End']   + .0359
                  
                # SUST: mem images display 0.0179 earlier; VAR: .0142
                # SUST: mem images disappear .259 secs later; VAR: .137
                subject.loc[subject['Trial Type']=='Memory','Stimulus Onset'] = subject[subject['Trial Type']=='Memory']['Stimulus Onset'] - .0179
                subject.loc[subject['Trial Type']=='Memory','Stimulus End']   = subject[subject['Trial Type']=='Memory']['Stimulus End']   +.259
                
                # NOTE: no correction for button press timing differences, as they averaged .001 seconds or less

                if data == '../variable_attention_experiment/data/group1':
                    
                    # EXP 2 cue corrections ---------------
                    # github Issue #83, Check E: some Group1, Variable Experiment valid cues marked as invalid cues
                    
                    # correct cue labels Variable Experiment, Group 1
                    for run in subject['Run'].unique():
                        first_cue = list(subject[(subject['Run']==run) & (subject['Trial Type']=='Presentation')]['Cued Side'])
                        if subject[(subject['Run']==run) & (subject['Cue Validity']==0) & (subject['Cued Side'] != first_cue[0])].shape[0]>0:
                            subject.loc[(subject['Run']==run) & (subject['Cue Validity']==0) & (subject['Cued Side'] != first_cue[0]), 'Cue Validity'] = 1
            
            # if Group 2 (Sustained or Variable)
            if data[-1] == '2':
                
                # add ON and OFF stim times for group 2
                subject_log = list_logs(data + '/' + sub_dir + '/')
                subject_log['Subject'] = pd.to_numeric(subject_log['Subject'])
                subject_log = subject_log.sort_values(by=['Subject','Run','TIME'])
                subject = subject.sort_values(by=['Subject','Run'])
                
                # now, extract desired stim on and off times from log files
                composite_onsets  = subject_log[subject_log[0].str.contains('COMPOSITES ON')]
                composite_offsets = subject_log[subject_log[0].str.contains('COMPOSITES OFF')]
                memory_onsets  = list(subject_log[ (subject_log[0].str.contains('MEMORY ON')) & (subject_log[0].str.contains('FLIP')) ]['TIME'])
                memory_offsets = list(subject_log[ (subject_log[0].str.contains('MEMORY OFF')) & (subject_log[0].str.contains('FLIP')) ]['TIME'])
                # attention_on = composite_offsets 
                
                # add ON and OFF stim times for group 2
                subject.loc[subject['Trial Type']=='Presentation', 'Stimulus Onset'] = list(composite_onsets['TIME'])
                subject.loc[subject['Trial Type']=='Presentation', 'Stimulus End'  ] = list(composite_offsets['TIME'])
                subject.loc[subject['Trial Type']=='Presentation','Attention Response Time (s)'] = subject[subject['Trial Type']=='Presentation']['Attention Response Time (s)'] - subject[subject['Trial Type']=='Presentation']['Stimulus End']
                subject.loc[subject['Trial Type']=='Memory', 'Stimulus Onset'] = memory_onsets
                subject.loc[subject['Trial Type']=='Memory', 'Stimulus End'  ] = memory_offsets
                
                
                # Pull attention RT's from log file
                
                # find every probe display, and the next event after each probe display
                probe_time_indices = subject_log[(subject_log[0].str.contains('ATTN'))].index
                key_press_indices  = [x+1 for x in list(probe_time_indices)]

                # if next event isn't keypress 1 or keypress 3, go until you find the first keypress 1 or 3
                for idx,x in enumerate(key_press_indices):
                    while 'Keypress: 1' not in str(subject_log.loc[x][0]) and 'Keypress: 3' not in str(subject_log.loc[x][0]):
                        x+=1
                    key_press_indices[idx]=x
                    # then stop and collect the time of the button press
            
                attn_rt = {}
                attn_rt['probe_start'] = list(subject_log.loc[probe_time_indices]['TIME'])
                attn_rt['key press' ] = list(subject_log.loc[key_press_indices ]['TIME'])
                attn_df = pd.DataFrame(attn_rt)
                log_file_rt = attn_df['key press'].astype('float64')-attn_df['probe_start'].astype('float64')
                subject.loc[subject['Trial Type']=='Presentation','Attention Response Time (s)'] = log_file_rt
                
                # Convert all times to be eyetribe compatible
                for r in subject['Run'].unique():
                    time = float(subject_log[subject_log['Run']==r].loc[subject_log[subject_log['Run']==r][0].str.contains('urrent time')]['TIME'])
                    curr_string = subject_log[subject_log['Run']==r].loc[subject_log[subject_log['Run']==r][0].str.contains('urrent time')][0].str.split(' ')
                    curr_time = float(list(curr_string)[0][-1])
                    diff = curr_time - time

                    # convert times for each run
                    subject.loc[subject['Run']==r, 'Stimulus Onset'] = subject.loc[subject['Run']==r, 'Stimulus Onset'] + diff
                    subject.loc[subject['Run']==r, 'Stimulus End']   = subject.loc[subject['Run']==r, 'Stimulus End'] + diff
                
                subject = subject.rename(columns={'Attention Response Time (s)': 'Attention Reaction Time (s)'})

            
            # Gaze data (internal)
            gaze = eye_initial(data + '/' + sub_dir + '/eye_data/')
            #s = sub_dir.split('/')[-2]
            gaze['Subject']  = sub_dir.split('_')[0]
            gaze['UniqueID'] = sub_count
            gaze['Group'] = int(data[-1])
            gaze['Experiment'] = data[2:10]
            
            # KZ : need to update pres_gaze so that it pulls times frome new df and not from data files
            pres_gaze = pres_gaze_image(data + '/' + sub_dir, gaze)
            print('pres_gaze')
            
            # Give every subj unique ID, label group & experiment
            subject['UniqueID'] = sub_count
            subject['Group'] = int(data[-1])
            subject['Experiment'] = data[2:10]
            
            print(sub_count)
            print()
            sub_count += 1
            sub_list.append(subject)
            pres_gaze.to_csv(data + '/' + sub_dir + '/pres_gaze.csv')
            print('gaze_out')
            
    exp_raw = pd.concat(sub_list)
    all_data.append(exp_raw)
    exp_raw.to_csv(file)
    


10_2018_Oct_03
pres_gaze
0

gaze_out
13_2018_Oct_10
pres_gaze
1

gaze_out
02_2018_Sep_26


KeyboardInterrupt: 

# Data Save

Save a single, compiled dataframe, containing the data from every participant in the study (n=120)

In [3]:
full = pd.concat(all_data)
full.to_csv('../parsed_data/FULL_DATA.csv')

ValueError: No objects to concatenate

# Checks

Check the behavioral and gaze data. (Include manual checks.)

In [8]:
pres_gaze

Unnamed: 0,avg,fix,lefteye,raw,righteye,state,time,timestamp,xRaw_righteye,yRaw_righteye,xRaw_lefteye,yRaw_lefteye,av_x_coord,av_y_coord,Subject,UniqueID,Group,Experiment,Trial,Run
43696,"{'x': 1041.042, 'y': 501.128}",True,"{'avg': {'x': 989.74, 'y': 494.5082}, 'pcenter...","{'x': 1029.6586, 'y': 493.4627}","{'avg': {'x': 1092.8706, 'y': 545.1093}, 'pcen...",7,994163077,1.538592e+09,1070.2732,528.2996,989.0439,458.6258,1029.65855,493.46270,10,0,1,/sustain,0,3
43697,"{'x': 1039.5768, 'y': 503.2538}",True,"{'avg': {'x': 989.9557, 'y': 494.9846}, 'pcent...","{'x': 1040.4014, 'y': 537.1355}","{'avg': {'x': 1092.2872, 'y': 546.3817}, 'pcen...",7,994163112,1.538592e+09,1087.7778,565.8687,993.0250,508.4023,1040.40140,537.13550,10,0,1,/sustain,0,3
43698,"{'x': 1039.4185, 'y': 505.6833}",True,"{'avg': {'x': 989.1204, 'y': 495.0801}, 'pcent...","{'x': 1033.1367, 'y': 523.2332}","{'avg': {'x': 1092.2155, 'y': 546.5137}, 'pcen...",7,994163156,1.538592e+09,1084.4083,548.3066,981.8650,498.1596,1033.13665,523.23310,10,0,1,/sustain,0,3
43699,"{'x': 1040.0889, 'y': 508.0482}",True,"{'avg': {'x': 988.995, 'y': 494.6188}, 'pcente...","{'x': 1036.7217, 'y': 526.7321}","{'avg': {'x': 1092.1448, 'y': 547.4536}, 'pcen...",7,994163191,1.538592e+09,1087.1793,566.3860,986.2640,487.0781,1036.72165,526.73205,10,0,1,/sustain,0,3
43700,"{'x': 1040.6561, 'y': 510.36}",True,"{'avg': {'x': 988.7053, 'y': 496.5291}, 'pcent...","{'x': 1041.6633, 'y': 540.6857}","{'avg': {'x': 1092.4565, 'y': 547.8153}, 'pcen...",7,994163224,1.538592e+09,1093.9738,555.7615,989.3529,525.6099,1041.66335,540.68570,10,0,1,/sustain,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21502,"{'x': 1042.4066, 'y': 620.0021}",True,"{'avg': {'x': 1027.8153, 'y': 628.4736}, 'pcen...","{'x': 1065.7292, 'y': 634.1613}","{'avg': {'x': 1072.8375, 'y': 621.5877}, 'pcen...",7,996160959,1.538594e+09,1081.9884,635.9231,1049.4701,632.3995,1065.72925,634.16130,10,0,1,/sustain,9,7
21503,"{'x': 1044.027, 'y': 620.3264}",True,"{'avg': {'x': 1031.9513, 'y': 629.6617}, 'pcen...","{'x': 1086.7183, 'y': 634.4919}","{'avg': {'x': 1075.8658, 'y': 622.5496}, 'pcen...",7,996160992,1.538594e+09,1103.3911,629.9956,1070.0453,638.9883,1086.71820,634.49195,10,0,1,/sustain,9,7
21504,"{'x': 1045.6412, 'y': 620.6877}",True,"{'avg': {'x': 1034.8477, 'y': 629.6591}, 'pcen...","{'x': 1074.8602, 'y': 624.9573}","{'avg': {'x': 1078.2296, 'y': 622.9379}, 'pcen...",7,996161025,1.538594e+09,1095.2908,623.5707,1054.4297,626.3438,1074.86025,624.95725,10,0,1,/sustain,9,7
21505,"{'x': 1047.1091, 'y': 621.0793}",True,"{'avg': {'x': 1036.964, 'y': 629.5635}, 'pcent...","{'x': 1070.9081, 'y': 620.9082}","{'avg': {'x': 1080.5629, 'y': 622.8793}, 'pcen...",7,996161060,1.538594e+09,1096.2729,616.5373,1045.5432,625.2791,1070.90805,620.90820,10,0,1,/sustain,9,7
