# Looping and File I/O- A worked example
Here we have a worked example of a real task involving looping and reading/writing files.

In [1]:
# Standard imports (i.e., Python builtins) go at the top
from os import listdir
import os.path as op
from glob import glob

# Now third-party imports
import pandas as pd

# Finally, any local imports would go here

In [2]:
# Here's an example file for us to build our workflow around
example_file = '/home/data/nbc/Sutherland_HIVCB/dset/sub-193/func/sub-193_task-errorawareness_run-01_events.tsv'

In [3]:
# does the file exist?
op.isfile(example_file)

True

In [4]:
# what are the contents?
df = pd.read_csv(example_file)
df.head()

Unnamed: 0,onset\tduration\tresponse_time\ttrial_type\ttrial_type_2\tstimulus_word\tstimulus_color\ttrial_accuracy
0,12.015\t0.75\t0.428\tcongruent\tgoCorrect\twhi...
1,13.515\t0.75\t0.43\tcongruent\tgoCorrect\tred\...
2,15.015\t0.75\t0.479\tcongruent\tgoCorrect\tpur...
3,16.515\t0.75\t0.498\tcongruent\tgoCorrect\tyel...
4,18.031\t0.75\t0.486\tcongruent\tgoCorrect\tgre...


In [5]:
help(pd.read_csv)

Help on function read_csv in module pandas.io.parsers:

read_csv(filepath_or_buffer:Union[str, pathlib.Path, IO[~AnyStr]], sep=',', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, cache_dates=True, iterator=False, chunksize=None, compression='infer', thousands=None, decimal=b'.', lineterminator=None, quotechar='"', quoting=0, doublequote=True, escapechar=None, comment=None, encoding=None, dialect=None, error_bad_lines=True, warn_bad_lines=True, delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None)
    Read a comma-separated values (csv) file into Data

In [6]:
# so what should we change?
df = pd.read_csv(example_file)
df.head()

Unnamed: 0,onset\tduration\tresponse_time\ttrial_type\ttrial_type_2\tstimulus_word\tstimulus_color\ttrial_accuracy
0,12.015\t0.75\t0.428\tcongruent\tgoCorrect\twhi...
1,13.515\t0.75\t0.43\tcongruent\tgoCorrect\tred\...
2,15.015\t0.75\t0.479\tcongruent\tgoCorrect\tpur...
3,16.515\t0.75\t0.498\tcongruent\tgoCorrect\tyel...
4,18.031\t0.75\t0.486\tcongruent\tgoCorrect\tgre...


In [7]:
print(example_file)

/home/data/nbc/Sutherland_HIVCB/dset/sub-193/func/sub-193_task-errorawareness_run-01_events.tsv


In [8]:
in_folder = '/home/data/nbc/Sutherland_HIVCB/dset'

subject_folders = sorted(glob(op.join(in_folder, 'sub-*')))

# I'm quite sure that there are no files starting with 'sub-', 
# since that would not fit with BIDS, but, just to be safe,
# we can reduce the list to folders only.
subject_folders = [sf for sf in subject_folders if op.isdir(sf)]

In [9]:
# Let's look through the subject-specific folders
for subject_folder in subject_folders:
    func_folder = op.join(subject_folder, 'func')
    # And grab *all* errorawareness task events files
    events_files = sorted(glob(op.join(func_folder, '*_task-errorawareness_*_events.tsv')))
    for ev_file in events_files:
        df = pd.read_csv(ev_file, sep='\t')

In [30]:
df.head()

Unnamed: 0,onset,duration,response_time,trial_type,trial_type_2,stimulus_word,stimulus_color,trial_accuracy
0,12.01,0.75,0.377,congruent,goCorrect,yellow,yellow,1
1,13.51,0.75,0.433,congruent,goCorrect,white,white,1
2,15.01,0.75,0.408,congruent,goCorrect,yellow,yellow,1
3,16.51,0.75,0.41,congruent,goCorrect,red,red,1
4,18.01,0.75,0.347,congruent,goCorrect,purple,purple,1


In [11]:
ev_file

'/home/data/nbc/Sutherland_HIVCB/dset/sub-410/func/sub-410_task-errorawareness_run-06_events.tsv'

In [12]:
subject_folders

['/home/data/nbc/Sutherland_HIVCB/dset/sub-193',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-194',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-198',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-203',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-205',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-206',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-211',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-212',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-216',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-217',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-218',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-225',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-229',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-237',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-238',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-240',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-241',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-242',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-244',
 '/home/data/nbc/Sutherland_HIVCB/dset/sub-247',
 '/home/data/nbc/Sut

In [19]:
# So what do we want from the files?
# 1. All incorrect go trials
# 2. All incorrect nogo trials
print(list(df['trial_type_2'].unique()))

['goCorrect', 'nogoIncorrectAware', 'goAware', 'nogoCorrect', 'nogoIncorrectUnaware', 'goUnaware']


In [20]:
# We don't know if that specific file exhibits *all* possible values
ttypes = []
for subject_folder in subject_folders:
    func_folder = op.join(subject_folder, 'func')
    # And grab *all* errorawareness task events files
    events_files = sorted(glob(op.join(func_folder, '*_task-errorawareness_*_events.tsv')))
    for ev_file in events_files:
        df = pd.read_csv(ev_file, sep='\t')
        ttypes += list(df['trial_type_2'].unique())
ttypes = sorted(list(set(ttypes)))
print(ttypes)

['goAware', 'goCorrect', 'goIncorrect', 'goUnaware', 'nogoCorrect', 'nogoIncorrectAware', 'nogoIncorrectUnaware']


In [25]:
# We know that incorrect go trials are listed as goIncorrect or goUnaware
# So let's grab those
go_incorrect_df = df.loc[df['trial_type_2'].isin(['goIncorrect'])]
go_incorrect_onsets = go_incorrect_df['onset'].values
go_incorrect_onsets

array([160.523])

In [26]:
# And let's check incorrect nogo trials while we're at it
nogo_incorrect_df = df.loc[df['trial_type_2'].isin(['nogoIncorrectAware', 'nogoIncorrectUnaware'])]
nogo_incorrect_onsets = nogo_incorrect_df['onset'].values

array([159.023])

In [2]:
in_folder = '/home/data/nbc/Sutherland_HIVCB/dset'

subject_folders = sorted(glob(op.join(in_folder, 'sub-*')))

# I'm quite sure that there are no files starting with 'sub-', 
# since that would not fit with BIDS, but, just to be safe,
# we can reduce the list to folders only.
subject_folders = [sf for sf in subject_folders if op.isdir(sf)]


# Now let's put these things together

# We need an output directory to save things to
out_dir = '/home/data/nbc/Sutherland_HIVCB/derivatives/afni-processing/preprocessed-data/'

for subject_folder in subject_folders:
    subject_id = op.basename(subject_folder)
    print('Processing {}'.format(subject_id))
    func_folder = op.join(subject_folder, 'func')
    # And grab *all* errorawareness task events files
    events_files = sorted(glob(op.join(func_folder, '*_task-errorawareness_*_events.tsv')))
    
    out_sub_dir = op.join(out_dir, subject_id, 'func')
    
    # Make lists to place all lines in
    go_incorrect_onsets_text = []
    nogo_incorrect_onsets_text = []
    nogo_aware_onsets_text = []
    nogo_unaware_onsets_text = []
    nogo_correct_onsets_text = []
    
    for ev_file in events_files:
        df = pd.read_csv(ev_file, sep='\t')
        # Grab incorrect go trials, which are labeled as goIncorrect
        go_incorrect_df = df.loc[df['trial_type_2'].isin(['goIncorrect'])]
        go_incorrect_onsets = go_incorrect_df['onset'].values
        if go_incorrect_onsets.size == 0:
            go_incorrect_onsets = ['*']
        go_incorrect_onsets_text.append('\t'.join([str(num) for num in go_incorrect_onsets]))

        # Grab incorrect nogo trials, which are labeled as nogoIncorrectAware or nogoIncorrectUnaware
        nogo_incorrect_df = df.loc[df['trial_type_2'].isin(['nogoIncorrectAware', 'nogoIncorrectUnaware'])]
        nogo_incorrect_onsets = nogo_incorrect_df['onset'].values
        if nogo_incorrect_onsets.size == 0:
            nogo_incorrect_onsets = ['*']
        nogo_incorrect_onsets_text.append('\t'.join([str(num) for num in nogo_incorrect_onsets]))
    
        # Grab incorrect nogo aware trials, which are labeled as nogoIncorrectAware
        nogo_aware_df = df.loc[df['trial_type_2'].isin(['nogoIncorrectAware'])]
        nogo_aware_onsets = nogo_aware_df['onset'].values
        if nogo_aware_onsets.size == 0:
            nogo_aware_onsets = ['*']
        nogo_aware_onsets_text.append('\t'.join([str(num) for num in nogo_aware_onsets]))
        
        # Grab incorrect nogo unaware trials, which are labeled as nogoIncorrectUnaware
        nogo_unaware_df = df.loc[df['trial_type_2'].isin(['nogoIncorrectUnaware'])]
        nogo_unaware_onsets = nogo_unaware_df['onset'].values
        if nogo_unaware_onsets.size == 0:
            nogo_unaware_onsets = ['*']
        nogo_unaware_onsets_text.append('\t'.join([str(num) for num in nogo_unaware_onsets]))
    
        # Grab correct nogo trials, which are labeled as nogoCorrect
        nogo_correct_df = df.loc[df['trial_type_2'].isin(['nogoCorrect'])]
        nogo_correct_onsets = nogo_correct_df['onset'].values
        if nogo_correct_onsets.size == 0:
            nogo_correct_onsets = ['*']
        nogo_correct_onsets_text.append('\t'.join([str(num) for num in nogo_correct_onsets]))
    
    #different line for each run
    # Merge list of single-line strings into multiline string
    go_incorrect_onsets_text = '\n'.join(go_incorrect_onsets_text)
    nogo_incorrect_onsets_text = '\n'.join(nogo_incorrect_onsets_text)
    nogo_aware_onsets_text = '\n'.join(nogo_aware_onsets_text)
    nogo_unaware_onsets_text = '\n'.join(nogo_unaware_onsets_text)
    nogo_correct_onsets_text = '\n'.join(nogo_correct_onsets_text)
    
    try:
        #different file for each event type
        go_incorrect_file = op.join(out_sub_dir, 'go_incorrect.1D')
        with open(go_incorrect_file, 'w') as fo:
            fo.write(go_incorrect_onsets_text)

        nogo_incorrect_file = op.join(out_sub_dir, 'nogo_incorrect.1D')
        with open(nogo_incorrect_file, 'w') as fo:
            fo.write(nogo_incorrect_onsets_text)
            
        nogo_aware_file = op.join(out_sub_dir, 'nogo_aware.1D')
        with open(nogo_aware_file, 'w') as fo:
            fo.write(nogo_aware_onsets_text)   
            
        nogo_unaware_file = op.join(out_sub_dir, 'nogo_unaware.1D')
        with open(nogo_unaware_file, 'w') as fo:
            fo.write(nogo_unaware_onsets_text)
            
        nogo_correct_file = op.join(out_sub_dir, 'nogo_correct.1D')
        with open(nogo_correct_file, 'w') as fo:
            fo.write(nogo_correct_onsets_text)    
             
    except:
        print("missing subject")       
            

Processing sub-193
Processing sub-194
Processing sub-198
Processing sub-203
Processing sub-205
missing subject
Processing sub-206
Processing sub-211
Processing sub-212
Processing sub-216
Processing sub-217
Processing sub-218
Processing sub-225
Processing sub-229
Processing sub-237
Processing sub-238
Processing sub-240
Processing sub-241
missing subject
Processing sub-242
Processing sub-244
Processing sub-247
Processing sub-248
Processing sub-250
Processing sub-253
Processing sub-254
Processing sub-257
Processing sub-258
Processing sub-259
Processing sub-263
Processing sub-266
Processing sub-268
Processing sub-273
Processing sub-279
Processing sub-280
Processing sub-282
Processing sub-283
Processing sub-285
Processing sub-287
Processing sub-289
Processing sub-290
Processing sub-291
Processing sub-293
Processing sub-294
Processing sub-295
Processing sub-296
Processing sub-297
Processing sub-299
Processing sub-300
Processing sub-301
Processing sub-302
Processing sub-306
Processing sub-307