In [81]:
import requests
import os.path
# It is common practice to shorten 'pandas' to 'pd' for 
# less typing when calling functions from the pandas library.
import pandas as pd

# In this part, I read in the data. The data is in several different files
# which I would like to combine into one dataframe. 

df_participants = pd.read_csv("https://openneuro.org/crn/datasets/ds002785/snapshots/2.0.0/files/participants.tsv",sep='\t')
df_subjects = pd.DataFrame()

for i in range(1, len(df_participants)+1):
    df_subject = pd.read_csv("https://openneuro.org/crn/datasets/ds002785/snapshots/2.0.0/files/sub-"+ str(i).zfill(4) +":func:sub-"+ str(i).zfill(4) +"_task-emomatching_acq-seq_events.tsv",sep='\t')
    # Some participants never did the emotion matching task so there is no csv at the link above.
    # In this case the length of the dataframe df_subject is 0. 
    if (len(df_subject) != 0):
        # Appends a column to the specific subject's data frame indicating what subject number they 
        # are in each row. This will be useful when all the subjects' data are put together in one
        # dataframe so we can keep track of what data is connected to a given subject.
        df_subject['subject'] = [i]*len(df_subject) 
        df_subjects = pd.concat([df_subjects, df_subject])
df_subjects = df_subjects.reset_index()




In [82]:
# For each row checks that the data in each column takes the anticipated possible values.
# If not the index at with the unanticipated data is printed and the loop stops.
for index, row in df_subjects.iterrows():
    # onset should either be NaN or a float
    if (not (pd.isna(row['onset']) or isinstance(row['onset'], float))):
        print('onset')
        print(i)
        break
    # duration should either be NaN or a float
    if (not (pd.isna(row['duration']) or isinstance(row['duration'], float))):
        print('duration')
        print(i)
        break
    # trial_type should either be NaN, control, or emotion
    if (not (pd.isna(row['trial_type']) or (row['trial_type'] == 'control') or (row['trial_type'] == 'emotion'))):
        print('trial_type')
        print(i)
        break
    # response_time should either be NaN or a float
    if (not (pd.isna(row['response_time']) or isinstance(row['response_time'], float))):
        print('response_time')
        print(i)
        break
    # response_hand should either be NaN, control, or emotion
    if (not (pd.isna(row['response_hand']) or (row['response_hand'] == 'right') or (row['response_hand'] == 'left'))):
        print('response_hand')
        print(index)
        break
    # response_accuracy should either be NaN, correct, miss, or incorrect
    if (not (pd.isna(row['response_accuracy']) or (row['response_accuracy'] == 'correct') or (row['response_accuracy'] == 'miss') or (row['response_accuracy'] == 'incorrect'))):
        print('response_accuracy')
        print(index)
        break
    # ori_match should either be NaN, vertical, or horizontal 
    if (not (pd.isna(row['ori_match']) or (row['ori_match'] == 'vertical') or (row['ori_match'] == 'horizontal'))):
        print('ori_match')
        print(index) 
        break
    # sex should either be NaN, male, or female
    if (not (pd.isna(row['sex']) or (row['sex'] == 'male') or (row['sex'] == 'female'))):
        print('sex')
        break
    # ethn_target should either be NaN, caucasian, black, or asian
    if (not (pd.isna(row['ethn_target']) or (row['ethn_target'] == 'caucasian') or (row['ethn_target'] == 'black') or (row['ethn_target'] == 'asian'))):
        print('ethn_target')
        print(index)
        break
    # ethn_match should either be NaN, caucasian, black, or asian
    if (not (pd.isna(row['ethn_match']) or (row['ethn_match'] == 'caucasian') or (row['ethn_match'] == 'black') or (row['ethn_match'] == 'asian'))):
        print('ethn_match')
        print(index)
        break
    # emo_match should either be NaN, fear, or anger
    if (not (pd.isna(row['emo_match']) or (row['emo_match'] == 'fear') or (row['emo_match'] == 'anger'))):
        print('emo_match')
        print(index)
        break
    # ethn_distractor should either be NaN, caucasian, black, or asian
    if (not (pd.isna(row['ethn_distractor']) or (row['ethn_distractor'] == 'caucasian') or (row['ethn_distractor'] == 'black') or (row['ethn_distractor'] == 'asian'))):
        print('ethn_distractor')
        print(index)
        break
    

ethn_distractor
34


Per the paper describing the dataset (insert screen shot) there should not be an ethn_dsitractor variable. It is unclear where this variable came from. Moreover the documentation of the dataset in OpenNeuro (insert screen shots) says that the only levels of the variable are causasian, black, asian, or n/a. We can also see that the documentation is shotty for the emo_match variable. It says the levels are causasian, black, asian, or n/a when the paper says it is fear, anger, or n/a. We also see that the only column with unanticipated data is ethn_distractor. We therefore removed the ethn_distractor column as it should not have been there in the first place and will not contribute to our analysis.


In [83]:
df_subjects = df_subjects.drop('ethn_distractor',axis = 1)

# Discuss how to deal with missing data MCAR, MAR etc.
# Check if missing data is always paired correctly. The data might not truly be missing it 
# could simply that there is no sex when no face is present. Perhaps recode this to none instead of NaN.
df_subjects

Unnamed: 0,index,onset,duration,trial_type,response_time,response_hand,response_accuracy,ori_match,sex,ethn_target,ethn_match,emo_match,subject
0,0,10.0115,1.9212,control,1.9212,right,correct,vertical,,,,,1
1,1,15.0197,1.3478,control,1.3478,left,correct,vertical,,,,,1
2,2,20.0279,1.2605,control,1.2605,right,correct,horizontal,,,,,1
3,3,25.0362,0.9700,control,0.9700,left,correct,horizontal,,,,,1
4,4,30.0444,0.8287,control,0.8287,right,correct,horizontal,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10027,43,225.3638,4.9000,emotion,,,miss,,female,black,asian,fear,215
10028,44,230.3720,2.6348,emotion,2.6348,right,correct,,male,caucasian,caucasian,anger,215
10029,45,235.3802,4.9000,emotion,,,miss,,male,black,black,fear,215
10030,46,240.3884,4.9000,emotion,,,miss,,female,caucasian,caucasian,anger,215
