modeling.rmd

---
title: "The Influence of Fatigue on Usage of Model-Based vs Model-Free Reinforcement Learning Strategies"
subtitle: 
author: 'Colton Loftus, Maya Rozenshteyn'
date: "`r Sys.Date()`"
output:
  pdf_document:
    dev: png
    fig_caption: yes
    toc: yes
    toc_depth: 3
  html_document:
    df_print: paged
    toc: yes
geometry: margin=1in
fontsize: 10pt
editor_options:
  chunk_output_type: console
---


```{r setup, include=FALSE}
knitr::opts_chunk$set(fig.align="center", fig.height=6, fig.width=8, collapse=T, comment="", prompt=F, echo = T, cache=T, autodep=T, tidy=F, tidy.opts=list(width.cutoff=63), dev='png')
options(width=63)

```

```{r}
setwd("./")

library(purrr)
library(readr)
library(jsonlite)
library("data.table")
library(dplyr)
library(rlist)
library(magrittr)
library(lme4)
library(plotrix)
library(ggplot2)
```

```{r}
# Preprocess experiment data to extract relevant information: reward, 
# transition, first state choice, tiredness, experimental condition (morning
# vs evening), and task run (first or second time conducting the experiment).
# Note 1: reward is coded as -1 (no reward) and 1 (reward), transition is coded as
# -~0.7 (rare transition) and ~0.3 (common transition), tiredness is code on a
# scale from -3 (least tired) to 3 (most tired), experimental condition is coded 
# as 1 (morning) and -1 (evening) and task run as -1 (first run) and 1 
# (second run).
# Note 2: "stay" is true when the participant selected the same first-stage 
# choice in successive trials.
# Note 3: all predictors are zero-centered.
survey_response_to_val <- jsonlite::fromJSON("data/sleepiness.json")$meta
preprocess <- function(csv, survey_string, is_morning, morning_first, is_more_tired_condition, person_id) {
    relevant_data <- csv %>%
        dplyr::filter(trial_type == "two-step-trial") %>%
        select(state_1_choice, transition, outcome) %>%
        dplyr::filter(!grepl("null", state_1_choice)) %>%
        dplyr::filter(!grepl("null", transition)) %>%
        dplyr::filter(!grepl("null", outcome)) %>%
        mutate(state_1_choice = as.numeric(state_1_choice)) %>%
        mutate(outcome = as.numeric(outcome)) %>%
        mutate(next_state1_choice = lead(state_1_choice, 1)) %>%
        mutate(stay = state_1_choice == next_state1_choice) %>%
        mutate(tiredness = survey_response_to_val[survey_string][[1]]) %>%
        mutate(is_morning = is_morning) %>%
        mutate(task_run = ((is_morning && morning_first) || (is_morning && morning_first))) %>%
        mutate(is_morning = ifelse(is_morning == TRUE, 1, -1)) %>%
        mutate(task_run = ifelse(task_run == TRUE, -1, 1)) %>%
        mutate(subject = person_id) %>%
        dplyr::filter(!is.na(next_state1_choice)) %>%
        mutate(outcome = ifelse(outcome == 0, -1, outcome)) %>%
        as.data.frame()

        attr(relevant_data, "is_morning") <- is_morning
        attr(relevant_data, "task_run") <- (is_morning && morning_first) || (is_morning && morning_first)
        attr(relevant_data, "tiredness") <- survey_response_to_val[survey_string][[1]]
        attr(relevant_data, "subject") <- person_id
        attr(relevant_data, "is_more_tired_condition") <- is_more_tired_condition
        
    return(relevant_data)
}
```

```{r, warning=FALSE, message=FALSE}
all_data <- list()
# Read in and preprocess experimental data.
for (person in list.files("data/")) {
    if (!grepl("json", person)) {
        survey_response_map <- jsonlite::fromJSON(paste0("data/", person, "/meta.json"))$meta
        morning_first <- survey_response_map$first == "morning"
        for (csv in list.files(paste0("data/", person, "/"))) {
            if (!grepl("json", csv)) {
                csv_is_morning <- grepl("day", csv)
                if (csv_is_morning && morning_first | !csv_is_morning && !morning_first) {
                    survey_response <- survey_response_map["form1"]
                    is_more_tired_condition <- (survey_response_map["form1"][[1]] >= survey_response_map["form2"][[1]])
                } else {
                    survey_response <- survey_response_map["form2"]
                    is_more_tired_condition <- (survey_response_map["form2"][[1]] > survey_response_map["form1"][[1]])
                }
                read_in_csv <- read_csv(paste0("data/", person, "/", csv))
                processed_csv <- preprocess(read_in_csv, survey_response[[1]], csv_is_morning, morning_first, is_more_tired_condition, person)
                all_data[[length(all_data) + 1]] <- processed_csv
            }
        }
    }
}

all_data_concat <- bind_rows(all_data) %>% mutate(transition = transition - mean(transition))
```

```{r}
# Mixed effect modeling illustrating random and fixed effects of outcome and
# transition on stay.
base_reg <- glmer(stay ~ outcome*transition + (1 | subject), data = all_data_concat, family = binomial)
summary(base_reg)
```

```{r}
# Mixed effect modeling illustrating random and fixed effects of outcome,
# transition, task run, and time of day on stay.
tod_reg <- glmer(stay ~ outcome*transition*(task_run + is_morning) + (1 | subject), data = all_data_concat, family = binomial)
summary(tod_reg)
```

```{r}
# Mixed effect modeling illustrating random and fixed effects of outcome,
# transition, task run, and tiredness on stay.
tiredness_reg <- glmer(stay ~ outcome*transition*(task_run + tiredness) + (1 | subject), data = all_data_concat, family = binomial)
summary(tiredness_reg)
```

```{r}
sum_decision_type <- function(df, stay, outcome, transition) {
    return(sum(df$stay == stay & df$outcome == outcome & df$transition == transition))
}

stay_probability_computer <- function(df) {
  # Common Transition, Rewarded, Stay
  ct_r_s <- sum_decision_type(df, stay = TRUE, outcome = 1, transition = 1)
  
  # Common Transition, Rewarded, Leave
  ct_r_l <- sum_decision_type(df, stay = FALSE, outcome = 1, transition = 1)
  
  # Rare Transition, Rewarded, Stay
  rt_r_s <- sum_decision_type(df, stay = TRUE, outcome = 1, transition = 0)
  
  # Rare Transition, Rewarded, Leave
  rt_r_l <- sum_decision_type(df, stay = FALSE, outcome = 1, transition = 0)
  
  # Common Transition, Unrewarded, Stay
  ct_ur_s <- sum_decision_type(df, stay = TRUE, outcome = -1, transition = 1)
  
  # Common Transition, Unrewarded, Leave
  ct_ur_l <- sum_decision_type(df, stay = FALSE, outcome = -1, transition = 1)
  
  # Rare Transition, Unrewarded, Stay
  rt_ur_s <- sum_decision_type(df, stay = TRUE, outcome = -1, transition = 0)
  
  # Rare Transition, Unrewarded, Leave
  rt_ur_l <- sum_decision_type(df, stay = FALSE, outcome = -1, transition = 0)
  
  # Common Transition, Rewarded
  b1 <- ct_r_s / (ct_r_s + ct_r_l)
  # Rare Transition, Rewarded
  b2 <- rt_r_s / (rt_r_s + rt_r_l)
  # Common Transition, Unrewarded
  b3 <- ct_ur_s / (ct_ur_s + ct_ur_l)
  # Rare Transition, Unrewarded
  b4 <- rt_ur_s / (rt_ur_s + rt_ur_l)
  
  res = c(b1, b2, b3, b4)
  names(res) <- c("b1", "b2", "b3", "b4")
  return(res)
}

bp_val_averger <- function(bp_vals) {
  b1s <- c()
  b2s <- c()
  b3s <- c()
  b4s <- c()
  for (i in 1:length(bp_vals)) {
    b1s <- c(b1s, bp_vals[[i]][["b1"]])
    b2s <- c(b2s, bp_vals[[i]][["b2"]])
    b3s <- c(b3s, bp_vals[[i]][["b3"]])
    b4s <- c(b4s, bp_vals[[i]][["b4"]])
  }
  
  b1_av <- mean(b1s)
  b2_av <- mean(b2s)
  b3_av <- mean(b3s)
  b4_av <- mean(b4s)
  
  b1_stderr <- std.error(b1s)
  b2_stderr <- std.error(b2s)
  b3_stderr <- std.error(b3s)
  b4_stderr <- std.error(b4s)
  
  return(c(b1_av, b2_av, b3_av, b4_av, b1_stderr, b2_stderr, b3_stderr, b4_stderr))
}

stay_bar_plot_creator <- function(bars_to_plot, title) {
  reward_type <- c(rep("rewarded", 2), rep("unrewarded", 2))
  transition <- rep(c("common", "rare"), 2)
  value <- bars_to_plot[1:4]
  se <- bars_to_plot[5:8]
  data <- data.frame(reward_type, transition, value)
  ggplot(data, aes(fill = transition, y = value, x = reward_type)) +
      geom_bar(position = "dodge", stat = "identity") + 
      labs(y= "Stay Probability", x = "Reward Condition", title = title) +
      coord_cartesian(ylim = c(.5, max(value) + max(se))) +
      theme(plot.title = element_text(hjust = 0.5)) +
      geom_errorbar(aes(ymin=value-se, ymax=value+se),
                  width=.2,                    
                  position=position_dodge(.9))
}
```

```{r}
all_data_processed <- all_data %>%
    map(stay_probability_computer)
               
stay_bar_plot_creator(bp_val_averger(all_data_processed), "Stay Probability : All Data")
```

```{r}
morning_data <- all_data %>%
    list.filter(attr(., "is_morning") == TRUE) %>%
    map(stay_probability_computer)

stay_bar_plot_creator(bp_val_averger(morning_data), "Stay Probability : Morning Condition")
```

```{r}
evening_data <- all_data %>%
    list.filter(attr(., "is_morning") == FALSE) %>%
    map(stay_probability_computer)

stay_bar_plot_creator(bp_val_averger(evening_data), "Stay Probability : Evening Condition")
```

```{r}
first_run_data <- all_data %>%
    list.filter(attr(., "task_run") == TRUE) %>%
    map(stay_probability_computer)

stay_bar_plot_creator(bp_val_averger(first_run_data), "Stay Probability : First Task Run Condition")
```

```{r}
second_run_data <- all_data %>%
    list.filter(attr(., "task_run") == FALSE) %>%
    map(stay_probability_computer)

stay_bar_plot_creator(bp_val_averger(second_run_data), "Stay Probability : Second Task Run Condition")
```

# Tiredness Plotting Method 1: Tiredness Scores
```{r}
tired_data <- all_data %>%
    list.filter(attr(., "tiredness") >= 0) %>%
    map(stay_probability_computer)

stay_bar_plot_creator(bp_val_averger(tired_data), "Stay Probability : Tired Condition")
```

```{r}
awake_data <- all_data %>%
    list.filter(attr(., "tiredness") < 0) %>%
    map(stay_probability_computer)

stay_bar_plot_creator(bp_val_averger(awake_data), "Stay Probability : Awake Condition")
```

# Tiredness Plotting Method 2: Relative Tiredness between Trials
```{r}
relatively_tired_data <- all_data %>%
    list.filter(attr(., "is_more_tired_condition") == TRUE) %>%
    map(stay_probability_computer)

stay_bar_plot_creator(bp_val_averger(relatively_tired_data), "Stay Probability : Tired Condition")
```

```{r}
relatively_awake_data <- all_data %>%
    list.filter(attr(., "is_more_tired_condition") == FALSE) %>%
    map(stay_probability_computer)

stay_bar_plot_creator(bp_val_averger(relatively_awake_data), "Stay Probability : Awake Condition")
```