### Import packages

In [1]:
# %pip install scipy

import pandas as pd
import os
import numpy as np
import seaborn as sb
import scipy.stats as stats
import scipy
import ast
import warnings
import pylab
import matplotlib.pyplot as plt
import math
warnings.filterwarnings('ignore')

### Load & Organize Data

In [8]:
path       = 'new MRI Behavioral Data/'
subjects   = [ path+x for x in os.listdir(path) if 'subject' in x and 'subject_17' not in x and 'REPEAT' not in x]
csv_ignore = [ x for x in os.listdir('.') if 'disregard' in x ] 
csv_ignore = [ list(pd.read_csv(x).loc[1])[1][:-3]+'csv' for x in csv_ignore ]

^^ ignore extra files and exclude subject 17 due to noisy data (head coils were not receiving during data collection)

### Define functions

In [9]:
def load_dataframe(sub_path, ignore=[]):
    
    files = [ sub_path+'/'+x for x in os.listdir(sub_path) if '.csv' in x and 'REPEAT' not in x]
    files = [ x for x in files if x not in ignore]
    df    = pd.concat( [ pd.read_csv(x) for x in files ] )
    
    return(df)

### Check that participants are responding to the task appropriately

In [10]:
total_no_response_trials = []
total_trials = []
all_data = []

for s in subjects:
    
    df = load_dataframe(s)
    df = df[~df['type'].isna()]
    all_data.append(df)
    
    total_trials.append(df.shape[0])
    
    dat = df[~df['key_resp_9.keys'].isin(["['1']","['2']",
                                                  "['1', '1']","['2', '2']",
                                                  "['1', '2']","['2', '1']"
                                                 ])]
    
    # note: we did not have any cases where three responses were entered
    
    no_response = dat.shape[0]
    
    total_no_response_trials.append(no_response)


In [11]:
np.sum(total_no_response_trials)

100

In [12]:
22*6*20

2640

In [13]:
print('Participants are doing the task as intedned; they responded appropriately in approximately '+str(np.round(1-(100/2640),2)*100)+'% of trials')
                                                                          
                                                                          

Participants are doing the task as intedned; they responded appropriately in approximately 96.0% of trials


### Get breakdown of two-mismatch responses, by run

In [14]:
new_dataframe = []
mismatches = 0

for s in subjects:
    
    df = load_dataframe(s)
    df = df[~df['type'].isna()]
        
    dat = df[~df['key_resp_9.keys'].isin(["['1']","['2']",
                                                  "['1', '1']","['2', '2']",
                                                  "['1', '2']","['2', '1']"
                                                 ])]
    mismatch = df[df['key_resp_9.keys'].isin([ "['1', '2']" , "['2', '1']" ])]
    
    if mismatch.shape[0]>0:
        print(s)
        print(mismatch.shape[0])
        mismatches += 1
    
    new_df = dat.groupby(['run'],as_index=False).count()[['run','moviestim']]
    new_df['subject'] = s
    
    new_dataframe.append(new_df)
    
run_df = pd.concat(new_dataframe)

new MRI Behavioral Data/subject_14
1
new MRI Behavioral Data/subject_16
1


In [15]:
print('In almost all cases, participants enter one response; there are only '+str(mismatches)+' trials where participants entered different responses during the same response period ('+str(np.round(mismatches/np.sum(total_trials),4))+'% of trials)')



In almost all cases, participants enter one response; there are only 2 trials where participants entered different responses during the same response period (0.0008% of trials)


### Check responses entered by run

A main contrast we conduct in the study is comparing 1-response trials with 2-response trials. If there are any runs where a participant entered only one response, we cannot do the contrast on that run.

In [16]:
full_data = pd.concat(all_data)

In [17]:
# check the responses

for p in full_data['participant'].unique():
    
    for r in full_data[full_data['participant']==p]['run'].unique():
        
        responses = full_data[(full_data['participant']==p)&(full_data['run']==r)]['key_resp_9.keys'].unique()
        
        if '1' not in str(responses) or '2' not in str(responses):
            
            print()
            print(responses); print()
            print(p); print(); print(r)
            print()


["['1']" nan]

subject_21

3.0



There is just one run where a subject only responded '1' (they responded '1' in five trials and had no response during the response period in the other 15 trials). We will exclude this run.

### Save out a dataframe with the run we will exclude

In [18]:
run_df[(run_df['subject']=='new MRI Behavioral Data/subject_21')
      &(run_df['run']==3.0)] 

Unnamed: 0,run,moviestim,subject
2,3.0,15,new MRI Behavioral Data/subject_21


In [19]:
exclude_df = run_df[(run_df['subject']=='new MRI Behavioral Data/subject_21')
                   &(run_df['run']==3.0)] 

exclude_df.to_csv('exclude_runs_behavioral.csv')