In [1]:
# In this jupyter notebook, we will run some analysis for data in the manual inhibiton paradigm, collected online.
# We will first load the data from a json file and transform it into a pandas data frame. 
# Next, we will make sure to reformat that data frame such that every row is one trial (the json file will give us
# one row for every event)

In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [7]:
data = {}
exp_data =  open('../data/jatos_results_t4')
for ses, jf in enumerate(exp_data):
    jd = json.loads(jf)
    df = pd.DataFrame(jd)
    #print(df)
    df['session'] = ses

    try:
        data = pd.concat([data,df], axis = 0)
        print('fail')
    except:
        data = df
#data.reset_index(drop = True, inplace = True)
data

Unnamed: 0,success,test_part,scrWidth,scrHeight,trial_type,trial_index,time_elapsed,internal_node_id,view_history,rt,...,touchX,touchY,key_press,position,shift,inwards,flash,full_rt,response,session
0,True,welcome,1024.0,1366.0,fullscreen,0,1672,0.0-0.0,,,...,,,,,,,,,,0
1,,intro,,,instructions,1,3423,0.0-1.0,"[{""page_index"":0,""viewing_time"":636},{""page_in...",1746.0,...,,,,,,,,,,0
2,,preFixation,,,html-button-response-touchdown-off,2,5251,0.0-2.0-0.0,,822.0,...,712.0,520.0,,,,,,,,0
3,,fixation,,,html-keyboard-response,3,5383,0.0-2.0-1.0,,,...,,,,,,,,,,0
4,,newPoint,,,html-button-response-touchdown-off,4,5588,0.0-2.0-2.0,,,...,714.0,521.0,,l,0.0,,1.0,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1511,,newPoint,,,html-button-response-touchdown-off,1511,617569,0.0-4.0-2.239,,,...,721.0,512.0,,r,1.0,0.0,0.0,,,0
1512,,flash,,,html-button-response-touchdown-off,1512,617644,0.0-4.0-3.239,,,...,721.0,512.0,,r,1.0,0.0,0.0,,,0
1513,,response,,,html-button-response-touchdown-off,1513,617843,0.0-4.0-4.239,,195.0,...,721.0,512.0,,r,1.0,0.0,0.0,400.0,True,0
1514,,feedback,,,html-button-response-touchdown-off,1514,618846,0.0-4.0-5.239,,,...,1084.0,508.0,,,,,,,,0


In [8]:
data.columns

Index(['success', 'test_part', 'scrWidth', 'scrHeight', 'trial_type',
       'trial_index', 'time_elapsed', 'internal_node_id', 'view_history', 'rt',
       'startts', 'rts', 'lift_ts', 'stimulus', 'button_pressed', 'touchX',
       'touchY', 'key_press', 'position', 'shift', 'inwards', 'flash',
       'full_rt', 'response', 'session'],
      dtype='object')

#what these columns mean: 
'success': 0/1 means if a response was given, 
'test_part': preFixation, fixation, newPoint, flash, response, feedback are the different event during one trial
'scrWidth, scrHeight' appear only in the beginning of the data file and have the information about screen size
'trial_typ': which java script code was used for the event
'trial_index': running number through all events
'time_elapsed': time stamp at the end of the event
'internal_node_id': idk
'view_history': viewing time information for the instruction slides
'rt': in button/keyboard press events, the time elapsed between event start and key/button press
'startts': time stamp beginning of the event
'rts': time stamps of the response
'lift_ts': time stamp when hand was lifted
'stimulus': html for stimulus presentation
'button_pressed': when there was more than one button: which one was pressed?
'touchX': x coordinates where touch is registered
'touchy': y coordinates where touch is registered
'key_press': there are no key presses in this experiment
'position, shift, inwards, flash': information about the trial
'full_rt': time from go signal (newPoint onset) till response onset
'response': true/false if a response was given in time
'session': id for the session

In [9]:
# next, we want to group all values together that belong to one trial.
# we can do that by assigning one value between the first event "prefixation" and the last event "feedback"
nr = 0
for start,end in zip(np.where(data.test_part == 'preFixation')[0],np.where(data.test_part == 'feedback')[0]):
    data.loc[start:end, 'trial_nr'] = nr
    nr +=1


In [39]:
# make a timeline

time_line = {}
ts_cols = ['startts', 'rts', 'lift_ts','time_elapsed']
sync = np.nan

for trial in np.unique(data.trial_nr): 
    
    if not np.isnan(trial):
        # set a value for the rows of the summary df (sdf), matched with the trial number
        row = int(trial)
        # get the trial data only
        tdf = data[data.trial_nr == trial]
        
        # loop through all events
        for event in tdf.index:
            
            if tdf.test_part[event] == 'preFixation':
                sync = tdf.startts[event]
                
            start = tdf.startts[event]-sync
            response_on = tdf.rts[event]-sync
            response_off = tdf.lift_ts[event]-sync
            end = tdf.time_elapsed[event]
            
            times = [start, response_on, response_off, end]
            labels = ts_cols
            event_type = [tdf.test_part[event]] * len(times)
                          
            event_dat = pd.DataFrame(data=np.array([times,labels,event_type]).T,columns = ['ts', 'label', 'event'])
            
            try:
                time_line = pd.concat([time_line,event_dat], axis = 0)
            except:
                time_line = event_dat
                
time_line.reset_index(inplace = True, drop = True)

# data problems detected: no start time in keyboard press trials
# different timing for time_elapsed and other time steps

In [50]:
time_line.ts = time_line.ts.astype('float') 

time_line

Unnamed: 0,ts,label,event
0,0.0,startts,preFixation
1,822.0,rts,preFixation
2,684.0,lift_ts,preFixation
3,5251.0,time_elapsed,preFixation
4,,startts,fixation
...,...,...,...
6043,617843.0,time_elapsed,response
6044,931.0,startts,feedback
6045,,rts,feedback
6046,988.0,lift_ts,feedback


In [53]:
time_line.groupby('label').count()

Unnamed: 0_level_0,ts,event
label,Unnamed: 1_level_1,Unnamed: 2_level_1
lift_ts,1260,1512
rts,488,1512
startts,1260,1512
time_elapsed,1512,1512


In [55]:
# initialize the summary data frame
sdf = pd.DataFrame(columns = ['trial', 't_sync', 't_fixTouched', 't_pointJumped', 't_flashOn', 't_flashOff', 't_stimOff', 'rt_offline', 'rt_online', 'rt_lift_flash', , 'rt_lift_resp' 'position', 'shift', 'inwards', 'flash', 'touchX', 'touchY', 'subject', 'response'])

In [28]:
# get all trial numbers
for trial in np.unique(data.trial_nr):
    # check if the trial number is nan 
    if not np.isnan(trial):
        # set a value for the rows of the summary df (sdf), matched with the trial number
        row = int(trial)
        # get the trial data only
        tdf = data[data.trial_nr == trial]
        # get the time when the experiment started as synchronization time
        t_sync = tdf[tdf.test_part == 'preFixation'].time_elapsed.values[0]
        # retrieve information about the experiment
        # the trial number
        sdf.loc[row,'trial'] = trial 
        # sync time
        sdf.loc[row,'t_sync'] = t_sync
        # time the fixation point was touched (should be 0)
        sdf.loc[row,'t_fixTouched'] = tdf[tdf.test_part == 'preFixation'].time_elapsed.values[0] - t_sync
        # time the point jumped
        sdf.loc[row,'t_pointJumped'] = tdf[tdf.test_part == 'fixation'].time_elapsed.values[0] - t_sync
        # time the flash appeared on the screen
        sdf.loc[row,'t_flashOn'] = tdf[tdf.test_part == 'newPoint'].time_elapsed.values[0] - t_sync
        # time the flash disappeared again
        sdf.loc[row,'t_flashOff'] = tdf[tdf.test_part == 'flash'].time_elapsed.values[0] - t_sync
        # time all stimuli where switched off (trial is over)
        sdf.loc[row,'t_stimOff'] = tdf[tdf.test_part == 'response'].time_elapsed.values[0] - t_sync
        
        # retrieve the time between go signal (jump) and flash offset
        t_go = sdf.loc[trial,'t_flashOff'] - sdf.loc[row,'t_pointJumped']
        # we will need the "response" line more often below, so we assign it to a variable with a shorter name
        resp = tdf[tdf.test_part == 'response']
        
        # offline computed response time (from go signal till touch)
        # we are missing response time that computes time from go signal till hand liftoff
        sdf.loc[row,'rt_offline'] = t_go + tdf[tdf.test_part == 'response'].rt.values[0]
        # online computed response time
        sdf.loc[row,'rt_online'] = resp.full_rt.values[0]
        # time point when the finger was lifted
        sdf.loc[row,'rt_lift_flash'] =  
        # 
        # position of the jumped stimulus (left or right)
        sdf.loc[row,'position'] = resp.position.values[0]
        # if there was a shift or not (0/1)
        sdf.loc[row,'shift'] = resp['shift'].values[0]
        # if the shift was inwards or not (0/1/nan)
        sdf.loc[row,'inwards'] = resp.inwards.values[0]
        # if there was a visible flash or not
        sdf.loc[row,'flash'] = resp.flash.values[0]
        # the x coordinates where the screen was touched
        sdf.loc[row,'touchX'] = tdf[tdf.test_part == 'feedback'].touchX.values[0]
        # the y coordinates where the screen was touched
        sdf.loc[row,'touchY'] = tdf[tdf.test_part == 'feedback'].touchY.values[0]
        # the subject number
        sdf.loc[row,'subject'] = resp.subject.values[0]
        # if a response was given or not (will be false when answer was too slow)
        sdf.loc[row,'response'] = resp.response.values[0]

    

In [29]:
sdf

Unnamed: 0,trial,t_sync,t_fixTouched,t_pointJumped,t_flashOn,t_flashOff,t_stimOff,rt_offline,rt_online,position,shift,inwards,flash,touchX,touchY,subject,response
0,0,11912,0,231,368,448,1294,1062.0,,l,0,,0,666,267,0,True
1,1,15564,0,126,260,347,1235,1108.0,,r,0,,1,674,265,0,True
2,2,23917,0,234,386,469,1160,925.0,,l,1,1.0,0,644,389,0,True
3,3,27370,0,120,236,320,1082,961.0,,r,1,1.0,0,655,377,0,True
4,4,30356,0,205,365,443,1028,820.0,,l,1,0.0,0,661,389,0,True
5,5,33308,0,112,244,326,1070,956.0,,r,1,0.0,0,653,384,0,True
6,6,36244,0,206,426,504,1102,896.0,,l,1,0.0,1,676,380,0,True
7,7,39347,0,253,432,505,1091,838.0,,r,0,,0,654,371,0,True
8,8,42435,0,246,382,466,1184,937.0,,l,1,1.0,1,663,381,0,True
9,9,45570,0,120,306,380,1125,1004.0,,r,1,0.0,1,649,381,0,True
