<!--
IRdisplay::display_html(file='code_hiding.html')
if the line above generates an error, it could be due to this:
https://github.com/IRkernel/IRdisplay/issues/41
In the meantime, the code below is enough; it works on nbviewer but not on the notebook directly
-->
<script>
  code_show=true;
  function code_toggle() {
    if (code_show){
      $('div.input').hide();
    } else {
      $('div.input').show();
    }
    code_show = !code_show
  } 
  $( document ).ready(code_toggle);
</script>
<font size=4>
<a href="javascript:code_toggle()">Toggle ON/OFF</a>
code cells.
</font>

**Author**: Adrian Ernesto Radillo  
**Date**: 30 Apr 2019  
The purpose of this notebook is to manually fix the taskID field from the csv files corresponding to the FIRA data from pilot data 15 through 19. See [issue 1](https://github.com/TheGoldLab/Task_SingleCP_DotsReversal/issues/1)

In [1]:
# load packages 

# Note:
# if one of the packages below is not installed, type, once, in another cell
# install.packages("<package_name>", lib="<path_to_installation_folder>")
# note that if you don't put the lib arg above, it will default to first item in .libPaths()
# ref:https://www.rdocumentation.org/packages/utils/versions/3.5.2/topics/install.packages

# I can't load the conflicted package here :(
# library(conflicted)
# https://github.com/r-lib/conflicted/issues/26

library(data.table)     # see https://cran.r-project.org/web/packages/data.table/vignettes/datatable-intro.html for reference
library(ggplot2)        # for plots
library(repr)           # for resizing figures
library(OneR)           # to use the function 'bin'
library(gridExtra)      # to use grid.arrange()

source("../R/R_functions.r") # custom functions

In [2]:
# DEFINE CONSTANTS
# folder/file-specific constants
PILOT_NUMBERS <- list(15, 16, 17, 18, 19)
PILOT_NUMBER <- paste(PILOT_NUMBERS, collapse = '-')

DATA_FOLDER <- "../data/"
FIRA_TAG <- "FIRA"
SUFFIX <- "_prefix"

In [3]:
# load csv files into data.tables
TRIALS <- loadMultiplePilotCSV(PILOT_NUMBERS, DATA_FOLDER, FIRA_TAG, SUFFIX)

In [4]:
NODES = unique(TRIALS[,taskID])
NUM_NODES = length(NODES)

In [5]:
NUM_SUBJECTS <- length(PILOT_NUMBERS)

In [6]:
NODES

In [7]:
NUM_SUBJECTS

In [8]:
# To manually relabel taskID, I need to loop through trialIndex and look at either:
#     the first trial where the valid trial number 200 is reached: this is 'last_row' below
#     the first time where a trialIndex is repeated after having been valid: this is 'break_row' below
for (subject in PILOT_NUMBERS) {
    st <- TRIALS[taskID==2 & pilotID==subject]
    st[,is_valid:=!(is.na(choice))]
    st[,valid_count:=cumsum(is_valid)]
    st[,row_idx:=.I]
    last_row <- st[valid_count==200, row_idx]
    if (length(last_row) > 1) {
        last_row <- last_row[1]
    }
    memory_dt <- data.table(trial_index=seq(200), valid_passed=FALSE, visited=FALSE)
    for (ridx in seq(st[,.N])) {
        trial_idx <- st[row_idx==ridx, trialIndex]
        is_valid <- st[row_idx==ridx, is_valid]

        was_visited <- memory_dt[trial_index==trial_idx, visited]
        was_valid_passed <- memory_dt[trial_index==trial_idx, valid_passed]

        if (was_visited & was_valid_passed) {
            break_row <- ridx
            break
        } else {
            memory_dt[trial_index==trial_idx, `:=`(visited=TRUE, valid_passed=is_valid)]
        }
    }
    cat('pilot ', subject, ' last_row=', last_row, 'break_row=', break_row,'\n')
}

pilot  15  last_row= 200 break_row= 201 
pilot  16  last_row= 200 break_row= 201 
pilot  17  last_row= 201 break_row= 202 
pilot  18  last_row= 201 break_row= 202 
pilot  19  last_row= 265 break_row= 266 


Since the two checks (last_row and break_row) above are consistent, we reuse the previous loop to now update the taskID column.

In [9]:
last_rows <- c(200, 200, 201, 201, 265)
iter <- 1
for (subject in PILOT_NUMBERS) {
    last_row <- last_rows[iter]
    iter <- iter + 1
    TRIALS[taskID==2 & pilotID==subject, tmp_idx:=.I]
    TRIALS[taskID==2 & pilotID==subject & (tmp_idx > last_row), taskID:=3]
    TRIALS[,tmp_idx:=NULL]
}

In [10]:
str(TRIALS)

Classes ‘data.table’ and 'data.frame':	2360 obs. of  25 variables:
 $ taskID         : int  1 1 1 1 1 1 1 1 1 1 ...
 $ trialIndex     : int  12 20 31 41 48 27 33 5 15 3 ...
 $ trialStart     : num  6037 6043 6049 6053 6058 ...
 $ trialEnd       : num  6043 6049 6053 6058 6067 ...
 $ RT             : num  NaN 1.08 1.28 1.3 2.09 ...
 $ choice         : num  NaN 0 0 1 0 1 0 0 1 0 ...
 $ correct        : num  NaN 1 0 1 1 1 0 0 1 0 ...
 $ initDirection  : int  180 180 0 0 180 0 0 0 0 0 ...
 $ endDirection   : int  180 180 0 0 180 0 0 0 0 0 ...
 $ presenceCP     : int  0 0 0 0 0 0 0 0 0 0 ...
 $ coherence      : num  23 23 21 40 39 39 38 49 57 57 ...
 $ viewingDuration: num  0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 ...
 $ probCP         : num  0 0 0 0 0 0 0 0 0 0 ...
 $ timeCP         : num  0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 ...
 $ randSeedBase   : int  8196 6504 1910 5103 7650 4513 6099 3907 2406 1395 ...
 $ fixationOn     : num  0.1512 0.0956 0.0865 0.0844 0.0965 ...
 $ fixationStart 

In [11]:
unique(TRIALS[,taskID])

In [13]:
fwrite(TRIALS, file = "../data/Pilot15-19/fixed_FIRA_TRIALS.csv", na="NA")