In [1]:
import pandas as pd 
import os
import numpy as np

In [2]:
params = {'runs':1, 'presentations_per_run':10, 
          'invalid_cue_percentage':10, 'mem_to_pres':4, 
          'mem_pres_split':2}

In [3]:
def sum_pd(subdir):
    '''
    input: subject directory (string)
    output: full experiment info (dataframe)
    '''
    
    files = [ x for x in os.listdir(subdir) if 'pres' in x or 'mem' in x ]
    df_list = [ pd.read_csv(subdir+'/'+x) for x in files ]
    df = pd.concat(df_list, ignore_index=True)
    
    return(df)

def images(df_col):
    '''
    input: df column
    output: list of image names (strings)
    '''
    return([ x for x in df_col if type(x)==str])

def check_reps(lst):
    '''
    input: list of imagenames (strings)
    output: number of repeats (int)
    '''
    return(len(lst)-len(set(lst)))
    
def list_compare(lst1, lst2):
    '''
    input: two lists
    output: number of shared items between lists
    '''
    return(set(lst1) & set(lst2))

def check_shared(df, col1, col2,x=None):
    '''
    inputs: dataframe, two column names (strings), run#=None
    outputs: lists images shared between the columns
    '''
    
    if type(x)==int:
        
        mask = df['Run']==x
        return(list_compare(list(images(df.loc[mask,col1])), list(images(df.loc[mask,col2]))))
    
    else:
        return(list_compare(list(images(df[col1])), list(images(df[col2]))))

def validity_check(df, params):
    '''
    inputs: dataframe, parameters
    outputs: message about validity percentage (empty list or list containing string)
    '''
    num_valid = sum(list(df['Cue Validity']))
    
    if num_valid != params['presentations_per_run']*params['runs']*(100-params['invalid_cue_percentage'])/100:
        msg = ['Incorrect number of invalid attention circles.  ']
    else: 
        msg = []
    
    return(msg)
    
def stimulus_check(subdir, params):
    '''
    input: subject directory (string)
    output: message indicating if all stimulus proportions are correct (string)
    '''
    
    msg = []
    select_cols = ['Cued Face', 'Cued Place', 
                   'Uncued Face', 'Uncued Place', 
                'Memory Image']
    
    df = sum_pd(subdir)
    for x in select_cols:
        if check_reps(df[x]) > 0:
            msg.append('Internal repetition in '+x+'.  ')
        for run in range(params['runs']):
            if x!='Memory Image':
                if len(check_shared(df, x, 'Memory Image', run)) != params['presentations_per_run']*2/params['mem_to_pres']:
                    msg.append('Wrong number of prev seen images from one or more categories.  ')
                    print(x, check_shared(df, x, 'Memory Image', run))
    msg.extend(validity_check(df, params))
    
    if len(msg)==0:
        msg = "All stimulus proportions correct! :)"
    
    return(msg)
    

In [25]:
stimulus_check('/Users/Student/Desktop/attention-memory-task/data/KirstenTestRun1_2018_Jun_25', params)

['Incorrect number of invalid attention circles.  ']