In [54]:
import pandas as pd
import numpy as np
import os
import pickle as pkl
from pprint import pprint
import re

### Load pickle, extract events to pd.DataFrame

In [83]:
fn = 'DEBUG_1_2017-10-07_12.05.54_outputDict.pickle'
pp_num = fn.split('_')[1]
with open(fn, 'rb') as f:
     outDict = pkl.load(f)
events = outDict['eventArray']

# Also load prepared design
prepped_design = pd.read_csv('../designs/pp_%s/all_blocks/trials.csv' % str(pp_num).zfill(3))

In [84]:
dat = pd.DataFrame(columns=['trial_ID', 'trial_start_time', 'stimulus_onset_time'])
dat['trial_ID'] = prepped_design['trial_ID']
dat['block_trial_ID'] = prepped_design['block_trial_ID']

# Extract using regex
re1 = re.compile('trial (?P<trial_ID>[0-9]*) started at (?P<start_time>[0-9]+.[0-9]+)')
re2 = re.compile('trial (?P<trial_ID>[0-9]*) phase (?P<phase_num>[0-9]+) started at (?P<start_time>[0-9]+.[0-9]+)')

for trial_n, trial in enumerate(events):    
    for trial_event_n, trial_event in enumerate(trial):

        # Lists are events: keypressed, saccades, etc. Non-lists are trial timings
        if not isinstance(trial_event, list):
            
            # Check for trial start
            if re1.match(trial_event) is not None:
                d = re1.match(trial_event).groupdict()
                dat.loc[dat['trial_ID'] == int(d['trial_ID']), 'trial_start_time'] = float(d['start_time'])
            
            # Check for phase start
            elif re2.match(trial_event) is not None:
                d = re2.match(trial_event).groupdict()
                dat.loc[dat['trial_ID'] == int(d['trial_ID']), 'phase_' + d['phase_num'] + '_start'] = float(d['start_time'])

#### Restructure

In [85]:
# Drop all rows without a trial start time [instructions etc]
dat = dat.dropna(axis=0, subset=['trial_start_time']).reset_index(drop=True)

# The stimulus onset time is phase_4_start
dat['stimulus_onset_time'] = dat['phase_4_start'].copy()

# The cure onset time is phase_2_start
dat['cue_onset_time'] = dat['phase_2_start']

# The start of phase_0 is the recorded 'trial start time': the trial code starts, but waits for scanner pulse
dat['phase_0_start'] = dat['trial_start_time'].copy()

# The actual trial start time is the start time of phase 1: the pre-cue fixation cross is shown.
dat['trial_start_time'] = dat['phase_1_start']

# Get start time of first trial: this is the phase_1 start of the first trial
t0 = dat['phase_1_start'].min()
for col in dat.columns:
    if 'phase' in col or 'time' in col:
        dat[col] = dat[col] - t0

# Calculate actual durations of each phase: this is the start time of the 'next' phase minus the own start time
dat['phase_6'] = dat['phase_7_start'] - dat['phase_6_start']
dat['phase_5'] = dat['phase_6_start'] - dat['phase_5_start']
dat['phase_4'] = dat['phase_5_start'] - dat['phase_4_start']
dat['phase_3'] = dat['phase_4_start'] - dat['phase_3_start']
dat['phase_2'] = dat['phase_3_start'] - dat['phase_2_start']
dat['phase_1'] = dat['phase_2_start'] - dat['phase_1_start']
dat['phase_0'] = dat['phase_1_start'] - dat['phase_0_start']


dat[['phase_' + str(x) for x in range(0,7)] + ['trial_start_time', 'stimulus_onset_time']]

Unnamed: 0,phase_0,phase_1,phase_2,phase_3,phase_4,phase_5,phase_6,trial_start_time,stimulus_onset_time
0,0.718097,1.502530,1.001858,0.000059,0.601132,0.000060,0.000026,0.000000,2.504447
1,2.90371,0.016283,1.001752,0.000069,0.601117,0.000081,0.000026,6.011258,7.029362
2,2.98829,2.253873,1.001790,0.000066,0.601077,0.000059,0.000018,12.005993,15.261722
3,2.13088,0.005200,1.001953,0.000060,0.601120,0.000053,0.000021,17.995099,19.002312
4,2.98758,2.237399,1.001930,0.000059,0.601085,0.000066,0.000017,23.994981,27.234369
5,2.17003,0.016226,1.001893,0.000057,0.601037,0.000058,0.000022,30.006926,31.025102
6,2.98183,0.005726,1.001958,0.000058,0.601049,0.000058,0.000022,35.995304,37.003046
7,2.9872,0.017126,1.001887,0.000058,0.601060,0.000060,0.000022,41.995323,43.014394
8,1.96903,1.487282,1.001892,0.000056,0.000125,0.600924,0.000054,50.994977,53.484207
9,2.90842,2.249911,1.001929,0.000057,0.000126,0.600999,0.000048,56.995069,60.246966


In [94]:
dat['trial_start_time_block'] = dat['trial_start_time']
dat['stimulus_onset_time_block'] = dat['stimulus_onset_time']
dat['cue_onset_time_block'] = dat['cue_onset_time']

new_block_rows = np.where(dat['block_trial_ID']==0)
# Loop over blocks
for i in range(len(new_block_rows)):
    idx_start = new_block_rows[i]
    idx_end = new_block_rows[i+1] if i < (len(new_block_rows)-1) else dat.shape[0]
    print('Correcting row idx: %d-%d' % (idx_start, idx_end))
    
    t0_this_block = dat.loc[dat['trial_ID'] == idx_start, 'trial_start_time'].values[0]
    dat.loc[idx_start:(idx_end-1), 'trial_start_time_block'] = dat.loc[idx_start:(idx_end-1), 'trial_start_time_block'] - t0_this_block
    dat.loc[idx_start:(idx_end-1), 'cue_onset_time_block'] = dat.loc[idx_start:(idx_end-1), 'cue_onset_time_block'] - t0_this_block
    dat.loc[idx_start:(idx_end-1), 'stimulus_onset_time_block'] = dat.loc[idx_start:(idx_end-1), 'stimulus_onset_time_block'] - t0_this_block

Selecting idx: 0-8
Selecting idx: 8-16
Selecting idx: 16-24
Selecting idx: 24-32
Selecting idx: 32-40
Selecting idx: 40-48
Selecting idx: 48-127
Selecting idx: 127-206
Selecting idx: 206-298
Selecting idx: 298-390


In [101]:
compare_cols = ['trial_start_time_block', 'cue_onset_time_block', 'stimulus_onset_time_block']
dat[compare_cols] - prepped_design[compare_cols]

Unnamed: 0,trial_start_time_block,cue_onset_time_block,stimulus_onset_time_block
0,0.000000,0.002530,0.004447
1,0.011258,0.027541,0.029362
2,0.005993,0.009866,0.011722
3,-0.004901,0.000299,0.002312
4,-0.005019,-0.017620,-0.015631
5,0.006926,0.023152,0.025102
6,-0.004696,0.001030,0.003046
7,-0.004677,0.012449,0.014394
8,0.000000,-0.012718,-0.010770
9,0.000092,0.000003,0.001989
