In [1]:
import pandas as pd; import seaborn as sb; import statsmodels.formula.api as smf; import pingouin;
from itertools import combinations; import os; from analysis_helpers import *; 
import scipy; from scipy import stats; import os; import math; import warnings
warnings.filterwarnings('ignore')

# Check data files

#### Check the number of participant directories in each experiment and group. 
(Naming convention: ##_YYYY_Mmm_DD or #_YYYY_Mmm_DD, depending on subject #). 


PRINT: # of participants per experiment.




#### Check that each participant has one file of each (below). 

- pres0.csv through pres8.csv
- mem0.csv through mem8.csv
- eye_data/##_0* through eye_data/##_8*

PRINT: subjects who have more or less than 1 of each file type.


In [2]:
%%bash

## declare array variables for exps and groups
declare -a exps=("sustained_attention_experiment" "variable_attention_experiment")
declare -a groups=("group1" "group2")

## for each exp
for e in "${exps[@]}"; do

    # for each group
    for g in "${groups[@]}"; do
        echo
        echo
        echo $e
        echo $g
        find ../$e/data/$g/*?_????_???_??* -maxdepth 0 -type d | wc -l
        echo
        echo
        # print the number of participant directories
        
        # each participant thisi group and experiment 
        for d in ../$e/data/$g/*  ; do


            # for each run (0-7)
            for n in {0..7}; do

                # count number of pres, mem, and gaze files this participant has
                pres=$(find $d/pres$n.csv | wc -l)
                mem=$(find $d/mem$n.csv | wc -l)
                eye=$(find $d/eye_data/*_$n* | wc -l)

                # if participant has more or less than 1 file for this run, print their info
                if [[ $pres -ne 1 ]] || [[ $mem -ne 1 ]] || [[ $eye -ne 1 ]] ; then
                    echo "$d"
                    echo "$n"
                    echo
                    echo "$pres"
                    echo "$mem"
                    echo "$eye"
                    echo
                fi

            done
        done
    done
done



sustained_attention_experiment
group1
      30


../sustained_attention_experiment/data/group1/31_2018_Oct_21
0

       1
       1
       0



sustained_attention_experiment
group2
      30




variable_attention_experiment
group1
      30


../variable_attention_experiment/data/group1/0_2019_Feb_21
0

       1
       1
       0

../variable_attention_experiment/data/group1/12_2019_Mar_27
0

       1
       1
       0

../variable_attention_experiment/data/group1/1_2019_Feb_21
0

       1
       1
       0



variable_attention_experiment
group2
      23




find: ../sustained_attention_experiment/data/group1/31_2018_Oct_21/eye_data/*_0*: No such file or directory
find: ../variable_attention_experiment/data/group1/0_2019_Feb_21/eye_data/*_0*: No such file or directory
find: ../variable_attention_experiment/data/group1/12_2019_Mar_27/eye_data/*_0*: No such file or directory
find: ../variable_attention_experiment/data/group1/1_2019_Feb_21/eye_data/*_0*: No such file or directory


# Graaveyard

In [6]:
def gaze_file_check(exps, grps, full=False):
    '''
    checks the number and size of gaze files for each subject in each experiment and group passed
    does not return anything, prints results
    '''
    
    # for each experiment
    for exp in exps:  
        
        # for each group
        for grp in grps:  

            print(); print('##################################'); print()
            
            # set combo equal to the path to the data for this experiment and group
            combo = '../'+exp+'/data/'+grp+'/'
            print(str(combo)); print()
            
            # list the number of particpants
            # change to counting the number of directories containing ####_Xxx_##
            print(str(len([g for g in os.listdir(combo) if g != '.DS_Store'])) + ' participants'); print()

            missing_gaze = 0; funny_size = 0
            
            for idx,x in enumerate([g for g in os.listdir(combo) if g != '.DS_Store']):

                loop = 0; 
                gaze_files = [a for a in os.listdir(combo+x+'/eye_data') if a != '.DS_Store']

                if len(gaze_files) <8:
                    if full == True:
                        print(x); print(); print( '*** '+ str(len(gaze_files)) + ' gaze files ***'); print(); 
                    loop += 1
                    missing_gaze += 1

                file_sizes = [os.path.getsize(combo+x+'/eye_data'+'/'+k) for k in gaze_files]
                indices = [i for i, j in enumerate(file_sizes) if j < 2000000 or j > 10000000] 

                if len(indices) > 0:
                    if full == True:
                        print(x); print()

                    for ind in indices:
                        if full == True:
                            print(gaze_files[ind]); print(file_sizes[ind]); print(); 
                        loop += 1
                    funny_size += len(indices)

                if loop >0 and full == True: 
                    print('----------------- ')

            if full == False:
                print(str(missing_gaze)+' missing gaze files')
                print(str(funny_size)+' large or small gaze files')


In [7]:
gaze_file_check(['sustained_attention_experiment', 'variable_attention_experiment'], ['group1', 'group2'])


##################################

../sustained_attention_experiment/data/group1/

30 participants

1 missing gaze files
7 large or small gaze files

##################################

../sustained_attention_experiment/data/group2/

30 participants

0 missing gaze files
32 large or small gaze files

##################################

../variable_attention_experiment/data/group1/

30 participants

3 missing gaze files
14 large or small gaze files

##################################

../variable_attention_experiment/data/group2/

23 participants

0 missing gaze files
46 large or small gaze files


# Check parsed gaze data

How many gazepoints are recorded during each presentation trial (3s) on average?
Are there trials where none are captured? Are there duplicate gaze data points?

In [4]:
gaze       = pd.read_csv('../parsed_data/full_gaze.csv')
behavioral = pd.read_csv('../parsed_data/full_behavioral.csv')

for s in behavioral['UniqueID'].unique():
    for r in behavioral['Run'].unique():
        behavioral.loc[(behavioral['Run']==r) 
                 & (behavioral['UniqueID']==s) 
                 & (behavioral['Trial Type']=='Memory'), 'Last Cued'] = behavioral[(behavioral['Run']==r) & (behavioral['UniqueID']==s) & (behavioral['Trial Type']=='Presentation') & (behavioral['Trial']==9)]['Cued Category'].item()

In [5]:
os.listdir('../parsed_data')

['.DS_Store',
 'full_behavioral.csv',
 'README.md',
 'behav_restricted.csv',
 'full_gaze.csv']

# Check oddball gaze files

Do gaze files that are unusually large or small catch more or less gazepoints during viewing?