# Scan_CreateRegressors.ipnyb

Author: Marissa Clark

Input (in scanfiles): 

* sub-{sub}_task-emp_run-{run}_events.tsv

    * file that contains all ratings by run
    

    



In [1]:
import pandas as pd
import os
from os.path import join as opj
from glob import glob
import re

In [2]:
project_filepath = "/Users/mdclark/Desktop/EmpAcc/data/behavioraldata/scanfiles_derivatives/rundata/"
correlation_filepath = "/Users/mdclark/Desktop/EmpAcc/data/behavioraldata/scanfiles_derivatives/correlationdata/"

#Get all subject ids
project_file_contents = os.listdir(project_filepath +  '/all')
subject_ids = [item for item in project_file_contents if "sub-" in item]
subject_ids = set([re.sub(r"_task-emp_run-0._events.tsv", "", subject_id)for subject_id in subject_ids])
print(subject_ids)

{'sub-191', 'sub-251', 'sub-139', 'sub-278', 'sub-266', 'sub-214', 'sub-257', 'sub-162', 'sub-197', 'sub-223', 'sub-231', 'sub-277', 'sub-186', 'sub-196', 'sub-200', 'sub-280', 'sub-262', 'sub-146', 'sub-270', 'sub-163', 'sub-216', 'sub-282', 'sub-167', 'sub-145', 'sub-253', 'sub-188', 'sub-279', 'sub-267', 'sub-222', 'sub-229', 'sub-177', 'sub-274', 'sub-276', 'sub-263', 'sub-256', 'sub-173', 'sub-237', 'sub-168', 'sub-236', 'sub-260', 'sub-157', 'sub-165', 'sub-147', 'sub-179', 'sub-288', 'sub-219', 'sub-154', 'sub-138', 'sub-195', 'sub-208', 'sub-215', 'sub-245', 'sub-184', 'sub-239', 'sub-156', 'sub-160', 'sub-161', 'sub-289', 'sub-242', 'sub-220'}


In [3]:
# Model = self_other
model_name = 'self_other'

def model_function(tsv_dataframe):
    task_events = tsv_dataframe[['onset', 'duration', 'self_other']]
    
    return task_events

In [56]:
# Model = self_other
model_name = 'self_other'
EV = 'other'

def model_function(tsv_dataframe): 
    df = []

    #Get Unique Videos
    videos = set(tsv_dataframe['video_number'])
    for video in videos: 
        #Get condition (not a pretty method of doing so)
        condition = tsv_dataframe.loc[tsv_dataframe['video_number'] == video].reset_index().loc[0,model_name]
       
        #Append Video, Video onset, Video Offset, and Model EVs
        df.append([video,tsv_dataframe[tsv_dataframe['video_number'] == video]['onset'].min(),\
                    tsv_dataframe[tsv_dataframe['video_number'] == video]['onset'].max(),\
                    condition])
    df = pd.DataFrame(df)
    df.columns = ['video_number', 'onset', 'offset', 'self_other']

    #Get duration
    df['onset'] = df['onset'] - 0.5 #Video starts 0.5 seconds before participant makes a rating
    df['duration'] = (df['offset'] - df['onset']) + 0.5 #Video ends 0.5 seconds after participant makes a rating (check with E about this)

    #Trim Data Frame
    df = df[df[model_name] == EV]
    df = df[['onset', 'duration']]
    df['modulator'] = 1

    tsv_dataframe = df

    return tsv_dataframe

for subject in subject_ids: 
    print(subject)
    
    
    runs = glob(os.path.join(project_filepath, 'all/' + subject + "*.tsv" ))
    
    for run_tsv in runs: 
            run_title = run_tsv.split('/')[-1].split('.tsv')[0]
            task_events = pd.DataFrame.from_csv(run_tsv, sep='\t', index_col=None)

            model_events = model_function(task_events)

            model_events.to_csv(opj(project_filepath, model_name, run_title + "_" + EV +  '.txt'), sep='\t', index=False, header=False)



sub-160
sub-220
sub-165
sub-289
sub-197
sub-239
sub-179
sub-288
sub-162
sub-173
sub-282
sub-262
sub-257
sub-253
sub-270
sub-146
sub-138
sub-200
sub-214
sub-279
sub-216
sub-242
sub-256
sub-274
sub-177
sub-267
sub-260
sub-145
sub-161
sub-196
sub-139
sub-168
sub-188
sub-186
sub-245
sub-184
sub-278
sub-215
sub-219
sub-236
sub-251
sub-154
sub-163
sub-277
sub-222
sub-276
sub-229
sub-231
sub-191
sub-167
sub-157
sub-263
sub-147
sub-223
sub-208
sub-266
sub-280
sub-195
sub-156
sub-237


In [14]:
# Model = self_other
model_name = 'video_timing'

def model_function(tsv_dataframe): 
    df = []

    #Get Unique Videos
    videos = set(tsv_dataframe['video_number'])
    for video in videos:        
        #Append Video, Video onset, Video Offset, and Model EVs
        df.append([video,tsv_dataframe[tsv_dataframe['video_number'] == video]['onset'].min(),\
                    tsv_dataframe[tsv_dataframe['video_number'] == video]['onset'].max()])
    df = pd.DataFrame(df)
    df.columns = ['video_number', 'onset', 'offset']

    #Get duration
    df['onset'] = df['onset'] - 0.5 #Video starts 0.5 seconds before participant makes a rating
    df['duration'] = (df['offset'] - df['onset']) + 0.5 #Video ends 0.5 seconds after participant makes a rating (check with E about this)

    #Trim Data Frame
    df = df[['video_number', 'onset', 'duration']]

    tsv_dataframe = df

    return tsv_dataframe

for subject in subject_ids: 
    print(subject)
    
    
    runs = glob(os.path.join(project_filepath, 'all/' + subject + "*.tsv" ))
    
    for run_tsv in runs: 
            run_title = run_tsv.split('/')[-1].split('.tsv')[0]
            task_events = pd.DataFrame.from_csv(run_tsv, sep='\t', index_col=None)

            model_events = model_function(task_events)

            model_events.to_csv(opj(project_filepath, model_name, run_title + "_" + EV +  '.txt'), sep='\t', index=False, header=True)



sub-191
sub-251
sub-139
sub-278
sub-266
sub-214
sub-257
sub-162
sub-197
sub-223
sub-231
sub-277
sub-186
sub-196
sub-200
sub-280
sub-262
sub-146
sub-270
sub-163
sub-216
sub-282
sub-167
sub-145
sub-253
sub-188
sub-279
sub-267
sub-222
sub-229
sub-177
sub-274
sub-276
sub-263
sub-256
sub-173
sub-237
sub-168
sub-236
sub-260
sub-157
sub-165
sub-147
sub-179
sub-288
sub-219
sub-154
sub-138
sub-195
sub-208
sub-215
sub-245
sub-184
sub-239
sub-156
sub-160
sub-161
sub-289
sub-242
sub-220


# Model = video_valence
model_name = 'video_valence'

def model_function(tsv_dataframe):
    task_events = tsv_dataframe[['onset', 'duration', 'neg_pos']]
    return task_events



# Model = video_valence
model_name = 'video_number'

def model_function(tsv_dataframe):
    task_events = tsv_dataframe[['onset', 'duration', 'video_number']]
    
    df = []

    #Get which videos and find first and last value of video to get onset and offset
    videos = set(tsv_dataframe['video_number'])
    for video in videos: 
        df.append([video,tsv_dataframe[tsv_dataframe['video_number'] == video]['onset'].min(), tsv_dataframe[tsv_dataframe['video_number'] == video]['onset'].max()])
    df = pd.DataFrame(df)
    df.columns = ['video_number', 'onset', 'offset']

    #Get duration
    df['onset'] = df['onset'] - 0.5 #Video starts 0.5 seconds before participant makes a rating
    df['duration'] = df['offset'] - df['onset'] + 0.5 #Video ends 0.5 seconds after participant makes a rating (check with E about this)

    
    
    return task_events


In [11]:
# # Model = Rater correlation w/ all other raters

# # For this model, individual ratings don't matter, so I'm going to change the onset/duration to reflect each video as a whole

# model_name = 'rater_correlation'

# def model_function(tsv_dataframe): 
#     df = []

#     #Get which videos and find first and last value of video to get onset and offset
#     videos = set(tsv_dataframe['video_number'])
#     for video in videos: 
#         df.append([video,tsv_dataframe[tsv_dataframe['video_number'] == video]['onset'].min(), tsv_dataframe[tsv_dataframe['video_number'] == video]['onset'].max()])
#     df = pd.DataFrame(df)
#     df.columns = ['video_number', 'onset', 'offset']

#     #Get duration
#     df['onset'] = df['onset'] - 0.5 #Video starts 0.5 seconds before participant makes a rating
#     df['duration'] = df['offset'] - df['onset'] + 0.5 #Video ends 0.5 seconds after participant makes a rating (check with E about this)

#     #Pull correlations and append to dataframe, sort
#     subject = tsv_dataframe['sub'][0]
#     correlations = pd.DataFrame.from_csv(correlation_filepath + str(subject) + '_z_correlations.csv', index_col=None)
#     tsv_dataframe = df.merge(correlations,how='left', left_on='video_number', right_on='video')
#     tsv_dataframe = tsv_dataframe[['onset', 'duration', 'z_corr']]
#     tsv_dataframe = tsv_dataframe.sort_values('onset', 0)

#     return tsv_dataframe

In [7]:
for subject in subject_ids: 
    print(subject)
    
    
    runs = glob(os.path.join(project_filepath, 'all/' + subject + "*.tsv" ))
    
    for run_tsv in runs: 
            run_title = run_tsv.split('/')[-1].split('.tsv')[0]
            task_events = pd.DataFrame.from_csv(run_tsv, sep='\t', index_col=None)

            model_events = model_function(task_events)

            model_events.to_csv(opj(project_filepath, model_name, run_title + '.txt'), sep='\t', index=False, header=False)


sub-160
sub-220
sub-165
sub-289
sub-197
sub-239
sub-179
sub-288
sub-162
sub-173
sub-282
sub-262
sub-257
sub-253
sub-270
sub-146
sub-138
sub-200
sub-214
sub-279
sub-216
sub-242
sub-256
sub-274
sub-177
sub-267
sub-260
sub-145
sub-161
sub-196
sub-139
sub-168
sub-188
sub-186
sub-245
sub-184
sub-278
sub-215
sub-219
sub-236
sub-251
sub-154
sub-163
sub-277
sub-222
sub-276
sub-229
sub-231
sub-191
sub-167
sub-157
sub-263
sub-147
sub-223
sub-208
sub-266
sub-280
sub-195
sub-156
sub-237


In [11]:
# # Model = template
# model_name = 

# def model_function(tsv_dataframe):

#     return task_events



In [None]:
print(run_tsv)