In [1]:
import pandas as pd
import numpy as np
import os
import re
from shutil import copyfile
import json
import sys
import scipy
import scipy.io as sio
from os.path import join as opj

%matplotlib inline

# File Handling
## Load Data

In [2]:
project_file = "/Users/mdclark/Desktop/EmpAcc/"

#path variables
project_filepath = os.path.join(project_file, 'data' , 'behavioraldata', 'scanfiles')
save_filepath = os.path.join(project_file, 'data' , 'behavioraldata', 'scanfiles_derivatives' , 'rundata')

project_file_contents = os.listdir(project_filepath)

project_file_contents = [item for item in project_file_contents if ".mat" in item]

#Get unique subject IDs (first three numbers of filename)
unique_sub_ids = [item for item in project_file_contents if 'video' in item]
unique_sub_ids = set([str.split(file, "_")[0] for file in unique_sub_ids])
unique_sub_ids = list(unique_sub_ids)
unique_sub_ids.sort(key=float)

In [3]:
print(unique_sub_ids)

['1', '138', '139', '145', '146', '147', '154', '156', '157', '160', '161', '162', '163', '165', '167', '168', '173', '177', '179', '184', '186', '188', '191', '195', '196', '197', '200', '208', '214', '215', '216', '219', '220', '222', '223', '229', '231', '236', '237', '239', '242', '245', '251', '253', '256', '257', '260', '262', '263', '266', '267', '270', '274', '276', '277', '278', '279', '280', '282', '288', '289', '999']


## intfMRI{SUB}_##.mat --> Run Reference File 


Turns .mat output files from scan that includes all runs and makes a csv file that also contains calculations relative to scan (rts) start 

For half the scans, there were two .mat files in case the computer crashed during the scan, so this combined those separate .mat files as well


In [8]:
#Load and Concatenate all ref files

def create_ref_file(sub_id):

    ref_file_contents = [item for item in project_file_contents if "intfMRI" + str(sub_id) in item]
    
    ref_file_data = []
    for file in ref_file_contents: 
        mat_contents = sio.loadmat(os.path.join(project_filepath, file))
        ref_file_data.append(pd.DataFrame(mat_contents['data']))
    
    #Join all files and remove empty data and irrelevant columns
    ref_file_data = pd.concat(ref_file_data)
    ref_file_data = ref_file_data[ref_file_data[5] != 0]
    ref_file_data = ref_file_data[ref_file_data[6] != 0]

    ref_file_data.columns = ["sub_id", "trial", "video_number", "self_other", "neg_pos", "scan_start", "video_start"]
    
    #Remove duplicate trials (if something crashed and restarted)
    ref_file_data = ref_file_data.drop_duplicates("trial")
    
    #Recode neg pos and self_other
    ref_file_data['self_other'][ref_file_data['self_other'] == 1] = "self"
    ref_file_data['self_other'][ref_file_data['self_other'] == 2] = "other"
    ref_file_data['neg_pos'][ref_file_data['neg_pos'] == 1] = "neg"
    ref_file_data['neg_pos'][ref_file_data['neg_pos'] == 2] = "pos"
    
    ref_file_data['video_start_rts'] = ref_file_data['video_start'] - ref_file_data['scan_start']
    
    #Create Runs (figures itself out by indecies, even if there was a problem with runs)
    ref_file_data = ref_file_data.sort_index() #index order problem
    ref_file_data.loc[0:3,   'run'] = 1
    ref_file_data.loc[4:7,   'run'] = 2
    ref_file_data.loc[8:11,  'run'] = 3
    ref_file_data.loc[12:15, 'run'] = 4

    #Write to csv
    ref_file_data.to_csv(opj(project_filepath , str(sub_id) + "_run_ref.csv"))
    
    return ref_file_data

In [9]:
for sub in unique_sub_ids: 
    create_ref_file(sub)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


## Create BIDS compliant run .tsv files

Each subject's video data is stored in the format {sub}_##_video{video}.mat

Joins this with the ref file information that has the valence, and trial type (self_other) as well as scan and video start times


In [48]:
#Load participant data and create .tsv run files

def get_sub_video_data(sub_id):
   
    #Get filenames that start with the SubjectID and then that contain "video" in them
    sub_video_filenames = [item for item in project_file_contents if str(sub_id)  in item]
    sub_video_filenames = [item for item in sub_video_filenames if 'video'  in item]

    sub_video_data = []

    #Grab and concatenate all subject videos, one video at a time
    for file in sub_video_filenames: 
        mat_contents = sio.loadmat(os.path.join(project_filepath, file))
        video_data = pd.DataFrame(mat_contents['allRatings'])
        video_data.columns = ['second', 'rating']
        video_data['sub_id'] = str.split(file, "_")[0]
        video_data['video_number'] = int(float(str.split(file, "_")[2].replace('.mat', '').replace('video', '')))
        sub_video_data.append(video_data)

    sub_video_data = pd.concat(sub_video_data)
    sub_video_data = sub_video_data[sub_video_data['second'] != 0] # get rid of blank ratings when no video was showing

    #Get Ref data and merge by video number
    ref_data = create_ref_file(sub_id)
    sub_video_data = pd.merge(sub_video_data, ref_data, on='video_number', how='left')
    sub_video_data['rating_time_rts'] = sub_video_data['video_start_rts'] + sub_video_data['second'] 
    sub_video_data = sub_video_data.sort_values(['video_start_rts','run'])

    #change into bids format
    sub_video_data['onset'] = sub_video_data['rating_time_rts']
    sub_video_data['duration'] = sub_video_data['rating_time_rts'].shift(-1) - sub_video_data['rating_time_rts']
    sub_video_data.loc[sub_video_data['duration'] > 1, 'duration'] = 0.5 #change gaps between scans into .5 duration
    sub_video_data.loc[sub_video_data['duration'] < 0, 'duration'] = 0.5 #change gaps between scans into .5 duration


    sub_video_data['sub']  = sub_video_data['sub_id_x'] 
    sub_video_data = sub_video_data[['onset', 'duration','rating', 'sub', 'video_number', 'trial', 'self_other', 'neg_pos', 'run']]


    for run in range(1,5):
        #sub_video_data[sub_video_data['run'] == run].to_csv(project_file  + 'sub-' + str(sub_id) + '/func/sub-' + str(sub_id) + '_task-emp_run-0' + str(run) + '_events.tsv', sep='\t', index=False)
        sub_video_data[sub_video_data['run'] == run].to_csv(save_filepath + '/all/sub-' + str(sub_id) + '_task-emp_run-0' + str(run) + '_events.tsv', sep='\t', index=False)
    return sub_video_data




In [49]:
for sub in unique_sub_ids: 
    get_sub_video_data(sub)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
