In [5]:
import os
from glob import glob
import pandas as pd
import numpy as np
from PIL import Image

# You first need to set you directory structure
# and collect the behavioral files for the localizer and the
# study task separately.  Given that each task will be modeled
# separately treat them separately.
proj_dir = '/home/mrive301/psb6351_data'
behav_dir = os.path.join(proj_dir, 'dset', 'sub-021', 'func')
loc_behav_files = sorted(glob(behav_dir + '/*loc*.tsv'))
study_behav_files = sorted(glob(behav_dir + '/*study*.tsv'))

# In this cell I'm going to first work on the localizer task

# Here I am setting up empty dictionary variables that I will
# then fill with keys for the different runs which are saved
# as separate behavioral tab delimited text files
loc_scene_onset_times = {}
loc_face_onset_times = {}

# Here I am iterating over my text files for the localizer task.
# The variable curr_behav_file will be a string variable with
# the full path to the separate runs of the localizer task. idx is a counter
# used for indexing.
for idx, curr_behav_file in enumerate(loc_behav_files):
    # Here I am creating my run keys.  idx is 0 based so I am adding a 1.
    # The variables associated with each key are empty lists.
    loc_scene_onset_times[f'run{idx+1}'] = []
    loc_face_onset_times[f'run{idx+1}'] = []
    
    # I'm using the pandas function read_csv to read in the log files
    curr_behav_data = pd.read_csv(curr_behav_file, sep='\t')
    
    # I'm creating a temp face and scence onset list variable here because
    # the localizer is a block design task.  I want to convolve a hemodynamic
    # signal over the entire face and scene periods and not separately for
    # each stimulus thus I want to accumulate to onset times for each face/scene
    # image presentation and then grab the first.
    tmp_face_onset = []
    tmp_scene_onset = []
    # iterating over trial_type here...i is counter for indexing
    for i, curr_trial_type in enumerate(curr_behav_data['trial_type']):
        if curr_trial_type == 'face':
            # Here I am appending the onset of the stimulus if the current
            # trial type is a face.
            tmp_face_onset.append(curr_behav_data['onset'][i])
        elif curr_trial_type == 'scence': #note...scence was misspelled originally
            # Here I am appending the onset of the stimulus if the current
            # trial type is a scene.
            tmp_scene_onset.append(curr_behav_data['onset'][i])
        # here I am using the first trial type when it becomes math and the 
        # face onset list variable is 20 elements long (just exited a face block)
        # to assign the first element of the tmp_face_onset list variable to the 
        # dictionary that I created earlier.
        elif curr_trial_type == 'math' and len(tmp_face_onset) == 20:
            loc_face_onset_times[f'run{idx+1}'].append(tmp_face_onset[0])
            tmp_face_onset = []
        elif curr_trial_type == 'math' and len(tmp_scene_onset) == 20:
            loc_scene_onset_times[f'run{idx+1}'].append(tmp_scene_onset[0])
            tmp_scene_onset = []
            
# The following code creates a string element that has the square brackets
# removed.  This is important for the following steps below.
loc_scene_run1_data = ", ".join(map(str, loc_scene_onset_times['run1']))
loc_scene_run2_data = ", ".join(map(str, loc_scene_onset_times['run2']))
loc_face_run1_data = ", ".join(map(str, loc_face_onset_times['run1']))
loc_face_run2_data = ", ".join(map(str, loc_face_onset_times['run2']))

# Here I am defining the sink directory where I would like to save the timing files
evs_sink_dir = os.path.join(proj_dir, 'derivatives', 'first_lvl', 'sub-021', 'evs')
# I check to see if the directory exists.  If it doesn't I create it.
if not os.path.isdir(evs_sink_dir):
    os.makedirs(evs_sink_dir)
    
# below I am defining the file names for the localizer (loc) face and scene evs.
# each run is captured on a separate line with the multiple onsets within a run
# captured on a single line
loc_scene_evs_file = 'loc_scene_evs.1D'
with open(os.path.join(evs_sink_dir, loc_scene_evs_file), 'wt') as fp:
    fp.writelines([f'{loc_scene_run1_data}\n'])
    fp.writelines([f'{loc_scene_run2_data}\n'])
loc_face_evs_file = 'loc_face_evs.1D'
with open(os.path.join(evs_sink_dir, loc_face_evs_file), 'wt') as fp:
    fp.writelines([f'{loc_face_run1_data}\n'])
    fp.writelines([f'{loc_face_run2_data}\n'])

# Similar to above I am creating empty dictionary variables
# for each of the events that I am interested in.
# I will then insert run keys to separate the timing files 
# for the events of interest and their specific runs.
task_fixb4_c_cond_onset_times = {}
task_fixb4_ic_cond_onset_times = {}
task_remain_events_onset_times = {}
task_fixb4_bl_onset_times = {}

# Here I am iterating over the study behavior files.  There should be
# 4 of them.
for idx, curr_behav_file in enumerate(study_behav_files):
    # I set the run key for each condition of interest
    task_fixb4_c_cond_onset_times[f'run{idx+1}'] = []
    task_fixb4_ic_cond_onset_times[f'run{idx+1}'] = []
    task_remain_events_onset_times[f'run{idx+1}'] = []
    task_fixb4_bl_onset_times[f'run{idx+1}'] = []
    
    # I read in the current study run behavioral file
    curr_behav_data = pd.read_csv(curr_behav_file, sep='\t')
    
    # I iterate now over the contents of the run specific data.
    # I am specifically iterating over trial_type
    for i, curr_trial_type in enumerate(curr_behav_data['trial_type']):
        # I am evaluating whether or not the current trail type was a 
        # fixed association that had a conditional trial that followed with a face
        # or a scene
        if 'face' in curr_trial_type or 'scene' in curr_trial_type:
            # if it was either of those grab that onset
            tmp_fix_onset = curr_behav_data['onset'][i]
            # if this is not our first trial (i = counter > 0) - remember python is 0-based
            if i > 0:
                # evaluate whether or not the LAST TRIAL (i-1) was a scene or face fix trial
                # grab the current onset time and assign it to the remaining events.
                # In the analysis that I am interested in pursuing I want to separate these
                # trials from trials where the fix face and scence trials are followed either
                # by a conditional trial or by a baseline trial
                # I THINK THERE IS A BUG HERE...SEEMING TO ONLY ASSIGN THESE AT THE END OF RUNS
                # DEBUG PLEASE
                    #Vanessa's answer: I think the reason these are only assigned at the end of runs is because
                    #at the end of every run (got info from the event.tsv file) a face or scene are preceded by
                    #another face or scene. 
                if 'face' in curr_behav_data['trial_type'][i-1] or 'scene' in curr_behav_data['trial_type'][i-1]:
                    task_remain_events_onset_times[f'run{idx+1}'].append(curr_behav_data['onset'][i])
        # Here I am evaluating whether or not the current trial type is a conditional trial 
        # that was responded to correctly
        elif curr_trial_type == 'COND' and curr_behav_data['acc'][i] == 1.0:
            # I am then evaluating wheter or not the LAST TRIAL (i - 1)...was a fix scence or face
            # trial and then appending that temp onset to fill in  
            if 'face' in curr_behav_data['trial_type'][i-1] or 'scene' in curr_behav_data['trial_type'][i-1]:
                task_fixb4_c_cond_onset_times[f'run{idx+1}'].append(tmp_fix_onset)
            else:
                task_remain_events_onset_times[f'run{idx+1}'].append(curr_behav_data['onset'][i])
        # I do the same for conditional trials that were incorrect.  Trying to separate
        # fix trials that preceded correct from incorrect conditional trials
        elif curr_trial_type == 'COND' and curr_behav_data['acc'][i] == 0.0:
            if 'face' in curr_behav_data['trial_type'][i-1] or 'scene' in curr_behav_data['trial_type'][i-1]:
                task_fixb4_ic_cond_onset_times[f'run{idx+1}'].append(tmp_fix_onset)
            else:
                task_remain_events_onset_times[f'run{idx+1}'].append(curr_behav_data['onset'][i])
        # Now I am doing the same thing for trials that precede the perceptual baseline trials
        # These trials will be used for the MVPA anlaysis that is planned.
        # TO DO:  NEED TO SEPARATE THESE FOR SCENCE AND FACE RATHER THAN COMBINE
            #Vanessa's answer: separated by face and scene and then added the remaining events 
        elif curr_trial_type == 'baseline':
            if 'face' in curr_behav_data['trial_type'][i-1]:
                task_fixb4_bl_onset_times[f'run{idx+1}'].append(tmp_fix_onset)
            else:
                task_remain_events_onset_times[f'run{idx+1}'].append(curr_behav_data['onset'][i])
            if 'scene' in curr_behav_data['trial_type'][i-1]:
                task_fixb4_bl_onset_times[f'run{idx+1}'].append(tmp_fix_onset)
            else:
                task_remain_events_onset_times[f'run{idx+1}'].append(curr_behav_data['onset'][i])
        # Here I am trying to assign conditional trial onsets to the remaining events regressor
        # BUG HERE...THE TIMING OF THESE SHOULD BE PEPPERED THROUGHOUT THE RUNS MORE EVENLY.
        # DEBUG PLEASE.
                #Vanessa's answer: Since the "remaining events" conditional statement was not evaluated for whether
                #the last trial was a fixed scene or face, it probably means this conditional statement has to be 
                # added to every condition of interest (i.e., baseline and the two task trials (face and scene)). 
                #Also, based on the way this chunk of code is set up, if the run does not meet any of the face or scene or
                #baseline criteria, then we can't just disregard that run, we have to add it to the "remainig events"
                #directionary created earlier in this code.
        #elif curr_trial_type == 'COND':
            #task_remain_events_onset_times[f'run{idx+1}'].append(curr_behav_data['onset'][i])
# Given that we're setting things up to analyze in AFNI
# you can't have runs that don't have any events in them....or
# you can but you can't have an empty row...thus here I am checking
# to see if the runs are empty and if they are adding a filler (-1)
for curr_run in ['run1', 'run2', 'run3', 'run4']:
    if len(task_fixb4_c_cond_onset_times[curr_run]) == 0:
        task_fixb4_c_cond_onset_times[curr_run].append(-1)
    if len(task_fixb4_ic_cond_onset_times[curr_run]) == 0:
        task_fixb4_ic_cond_onset_times[curr_run].append(-1)
    if len(task_remain_events_onset_times[curr_run]) == 0:
        task_remain_events_onset_times[curr_run].append(-1)
    if len(task_fixb4_bl_onset_times[curr_run]) == 0:
        task_fixb4_bl_onset_times[curr_run].append(-1)

# Reformatting as before to save as a text file with no square brackets
task_fixb4_c_cond_r1 = ", ".join(map(str, task_fixb4_c_cond_onset_times['run1']))
task_fixb4_c_cond_r2 = ", ".join(map(str, task_fixb4_c_cond_onset_times['run2']))
task_fixb4_c_cond_r3 = ", ".join(map(str, task_fixb4_c_cond_onset_times['run3']))
task_fixb4_c_cond_r4 = ", ".join(map(str, task_fixb4_c_cond_onset_times['run4']))

task_fixb4_ic_cond_r1 = ", ".join(map(str, task_fixb4_ic_cond_onset_times['run1']))
task_fixb4_ic_cond_r2 = ", ".join(map(str, task_fixb4_ic_cond_onset_times['run2']))
task_fixb4_ic_cond_r3 = ", ".join(map(str, task_fixb4_ic_cond_onset_times['run3']))
task_fixb4_ic_cond_r4 = ", ".join(map(str, task_fixb4_ic_cond_onset_times['run4']))

task_remain_evs_cond_r1 = ", ".join(map(str, task_remain_events_onset_times['run1']))
task_remain_evs_cond_r2 = ", ".join(map(str, task_remain_events_onset_times['run2']))
task_remain_evs_cond_r3 = ", ".join(map(str, task_remain_events_onset_times['run3']))
task_remain_evs_cond_r4 = ", ".join(map(str, task_remain_events_onset_times['run4']))

task_fixb4_bl_cond_r1 = ", ".join(map(str, task_fixb4_bl_onset_times['run1']))
task_fixb4_bl_cond_r2 = ", ".join(map(str, task_fixb4_bl_onset_times['run2']))
task_fixb4_bl_cond_r3 = ", ".join(map(str, task_fixb4_bl_onset_times['run3']))
task_fixb4_bl_cond_r4 = ", ".join(map(str, task_fixb4_bl_onset_times['run4']))

# Creating and checking to see if the directory exists
evs_sink_dir = os.path.join(proj_dir, 'derivatives', 'first_lvl', 'sub-021', 'evs')
if not os.path.isdir(evs_sink_dir):
    os.makedirs(evs_sink_dir)

# creating my separate ev files with runs written to each line
task_fixb4_c_cond_evs_file = 'fix_b4_c_cond_evs.1D'
with open(os.path.join(evs_sink_dir, task_fixb4_c_cond_evs_file), 'wt') as fp:
    fp.writelines([f'{task_fixb4_c_cond_r1}\n'])
    fp.writelines([f'{task_fixb4_c_cond_r2}\n'])
    fp.writelines([f'{task_fixb4_c_cond_r3}\n'])
    fp.writelines([f'{task_fixb4_c_cond_r4}\n'])
        
task_fixb4_ic_cond_evs_file = 'fix_b4_ic_cond_evs.1D'
with open(os.path.join(evs_sink_dir, task_fixb4_ic_cond_evs_file), 'wt') as fp:
    fp.writelines([f'{task_fixb4_ic_cond_r1}\n'])
    fp.writelines([f'{task_fixb4_ic_cond_r2}\n'])
    fp.writelines([f'{task_fixb4_ic_cond_r3}\n'])
    fp.writelines([f'{task_fixb4_ic_cond_r4}\n'])
    
task_fixb4_bl_evs_file = 'fix_b4_bl_evs.1D'
with open(os.path.join(evs_sink_dir, task_fixb4_bl_evs_file), 'wt') as fp:
    fp.writelines([f'{task_fixb4_bl_cond_r1}\n'])
    fp.writelines([f'{task_fixb4_bl_cond_r2}\n'])
    fp.writelines([f'{task_fixb4_bl_cond_r3}\n'])
    fp.writelines([f'{task_fixb4_bl_cond_r4}\n'])

task_remain_evs_file = 'events_remain_evs.1D'
with open(os.path.join(evs_sink_dir, task_remain_evs_file), 'wt') as fp:
    fp.writelines([f'{task_remain_evs_cond_r1}\n'])
    fp.writelines([f'{task_remain_evs_cond_r2}\n'])
    fp.writelines([f'{task_remain_evs_cond_r3}\n'])
    fp.writelines([f'{task_remain_evs_cond_r4}\n'])