diff --git a/README.md b/README.md
index 9c740e4..5038643 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Cognitive Models with Julia
+# Cognitive Models with Julia
[![](https://img.shields.io/badge/status-looking_for_collaborators-orange)](https://github.com/DominiqueMakowski/CognitiveModels/issues)
[![](https://img.shields.io/badge/access-open-green)](https://dominiquemakowski.github.io/CognitiveModels/)
diff --git a/content/.jupyter_cache/executed/4a8dce518c228fb3cf99db4583eedbb4/base.ipynb b/content/.jupyter_cache/executed/4a8dce518c228fb3cf99db4583eedbb4/base.ipynb
new file mode 100644
index 0000000..2be6bf6
--- /dev/null
+++ b/content/.jupyter_cache/executed/4a8dce518c228fb3cf99db4583eedbb4/base.ipynb
@@ -0,0 +1,167 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "cef1b013",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import IJulia\n",
+ "\n",
+ "# The julia kernel has built in support for Revise.jl, so this is the \n",
+ "# recommended approach for long-running sessions:\n",
+ "# https://github.com/JuliaLang/IJulia.jl/blob/9b10fa9b879574bbf720f5285029e07758e50a5e/src/kernel.jl#L46-L51\n",
+ "\n",
+ "# Users should enable revise within .julia/config/startup_ijulia.jl:\n",
+ "# https://timholy.github.io/Revise.jl/stable/config/#Using-Revise-automatically-within-Jupyter/IJulia-1\n",
+ "\n",
+ "# clear console history\n",
+ "IJulia.clear_history()\n",
+ "\n",
+ "fig_width = 7\n",
+ "fig_height = 5\n",
+ "fig_format = :retina\n",
+ "fig_dpi = 96\n",
+ "\n",
+ "# no retina format type, use svg for high quality type/marks\n",
+ "if fig_format == :retina\n",
+ " fig_format = :svg\n",
+ "elseif fig_format == :pdf\n",
+ " fig_dpi = 96\n",
+ " # Enable PDF support for IJulia\n",
+ " IJulia.register_mime(MIME(\"application/pdf\"))\n",
+ "end\n",
+ "\n",
+ "# convert inches to pixels\n",
+ "fig_width = fig_width * fig_dpi\n",
+ "fig_height = fig_height * fig_dpi\n",
+ "\n",
+ "# Intialize Plots w/ default fig width/height\n",
+ "try\n",
+ " import Plots\n",
+ "\n",
+ " # Plots.jl doesn't support PDF output for versions < 1.28.1\n",
+ " # so use png (if the DPI remains the default of 300 then set to 96)\n",
+ " if (Plots._current_plots_version < v\"1.28.1\") & (fig_format == :pdf)\n",
+ " Plots.gr(size=(fig_width, fig_height), fmt = :png, dpi = fig_dpi)\n",
+ " else\n",
+ " Plots.gr(size=(fig_width, fig_height), fmt = fig_format, dpi = fig_dpi)\n",
+ " end\n",
+ "catch e\n",
+ " # @warn \"Plots init\" exception=(e, catch_backtrace())\n",
+ "end\n",
+ "\n",
+ "# Initialize CairoMakie with default fig width/height\n",
+ "try\n",
+ " import CairoMakie\n",
+ "\n",
+ " # CairoMakie's display() in PDF format opens an interactive window\n",
+ " # instead of saving to the ipynb file, so we don't do that.\n",
+ " # https://github.com/quarto-dev/quarto-cli/issues/7548\n",
+ " if fig_format == :pdf\n",
+ " CairoMakie.activate!(type = \"png\")\n",
+ " else\n",
+ " CairoMakie.activate!(type = string(fig_format))\n",
+ " end\n",
+ " CairoMakie.update_theme!(resolution=(fig_width, fig_height))\n",
+ "catch e\n",
+ " # @warn \"CairoMakie init\" exception=(e, catch_backtrace())\n",
+ "end\n",
+ " \n",
+ "# Set run_path if specified\n",
+ "try\n",
+ " run_path = raw\"C:\\Users\\domma\\Dropbox\\Software\\CognitiveModels\\content\"\n",
+ " if !isempty(run_path)\n",
+ " cd(run_path)\n",
+ " end\n",
+ "catch e\n",
+ " @warn \"Run path init:\" exception=(e, catch_backtrace())\n",
+ "end\n",
+ "\n",
+ "\n",
+ "# emulate old Pkg.installed beahvior, see\n",
+ "# https://discourse.julialang.org/t/how-to-use-pkg-dependencies-instead-of-pkg-installed/36416/9\n",
+ "import Pkg\n",
+ "function isinstalled(pkg::String)\n",
+ " any(x -> x.name == pkg && x.is_direct_dep, values(Pkg.dependencies()))\n",
+ "end\n",
+ "\n",
+ "# ojs_define\n",
+ "if isinstalled(\"JSON\") && isinstalled(\"DataFrames\")\n",
+ " import JSON, DataFrames\n",
+ " global function ojs_define(; kwargs...)\n",
+ " convert(x) = x\n",
+ " convert(x::DataFrames.AbstractDataFrame) = Tables.rows(x)\n",
+ " content = Dict(\"contents\" => [Dict(\"name\" => k, \"value\" => convert(v)) for (k, v) in kwargs])\n",
+ " tag = \"\"\n",
+ " IJulia.display(MIME(\"text/html\"), tag)\n",
+ " end\n",
+ "elseif isinstalled(\"JSON\")\n",
+ " import JSON\n",
+ " global function ojs_define(; kwargs...)\n",
+ " content = Dict(\"contents\" => [Dict(\"name\" => k, \"value\" => v) for (k, v) in kwargs])\n",
+ " tag = \"\"\n",
+ " IJulia.display(MIME(\"text/html\"), tag)\n",
+ " end\n",
+ "else\n",
+ " global function ojs_define(; kwargs...)\n",
+ " @warn \"JSON package not available. Please install the JSON.jl package to use ojs_define.\"\n",
+ " end\n",
+ "end\n",
+ "\n",
+ "\n",
+ "# don't return kernel dependencies (b/c Revise should take care of dependencies)\n",
+ "nothing\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "53bd139d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.01"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "using Distributions\n",
+ "\n",
+ "# Reparameterized Beta distribution\n",
+ "function BetaMod(μ, σ)\n",
+ " α = μ * (μ * (1 - μ) / σ^2 - 1)\n",
+ " β = α * (1 - μ) / μ\n",
+ " return Beta(α, β)\n",
+ "end\n",
+ "\n",
+ "\n",
+ "\n",
+ "mean(BetaMod(0.5, 0.0833))\n",
+ "var(BetaMod(0.2, 0.1))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.10.2",
+ "language": "julia",
+ "name": "julia-1.10"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.10.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/content/.jupyter_cache/global.db b/content/.jupyter_cache/global.db
index bd252b8..f6c8110 100644
Binary files a/content/.jupyter_cache/global.db and b/content/.jupyter_cache/global.db differ
diff --git a/content/.quarto/_freeze/3_scales/execute-results/html.json b/content/.quarto/_freeze/3_scales/execute-results/html.json
new file mode 100644
index 0000000..644be4a
--- /dev/null
+++ b/content/.quarto/_freeze/3_scales/execute-results/html.json
@@ -0,0 +1,12 @@
+{
+ "hash": "b7153c2b346aaba747f06dca75c74302",
+ "result": {
+ "engine": "jupyter",
+ "markdown": "# Choice and Scales\n\n![](https://img.shields.io/badge/status-not_started-red)\n\n1. Beta models \n\n::: {#53bd139d .cell execution_count=1}\n``` {.julia .cell-code}\nusing Distributions\n\n# Reparameterized Beta distribution\nfunction BetaMod(μ, σ)\n α = μ * (μ * (1 - μ) / σ^2 - 1)\n β = α * (1 - μ) / μ\n return Beta(α, β)\nend\n\n\n\nmean(BetaMod(0.5, 0.0833))\nvar(BetaMod(0.2, 0.1))\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```\n0.01\n```\n:::\n:::\n\n\n2. OrdBeta models for slider scales\n3. Logistic models for binary data\n\nUse the speed accuracy data that we use in the next chapter.\n\n",
+ "supporting": [
+ "3_scales_files"
+ ],
+ "filters": [],
+ "includes": {}
+ }
+}
\ No newline at end of file
diff --git a/content/.quarto/_freeze/4a_rt_descriptive/execute-results/html.json b/content/.quarto/_freeze/4a_rt_descriptive/execute-results/html.json
index 8e9d93b..09524bf 100644
--- a/content/.quarto/_freeze/4a_rt_descriptive/execute-results/html.json
+++ b/content/.quarto/_freeze/4a_rt_descriptive/execute-results/html.json
@@ -1,8 +1,8 @@
{
- "hash": "f94cf23c97bc7ea53d810dcb9e760ab3",
+ "hash": "485397b899d19b6923f1cc82888d5854",
"result": {
"engine": "jupyter",
- "markdown": "# Descriptive Models\n\n![](https://img.shields.io/badge/status-up_to_date-green)\n\n## The Data\n\nFor this chapter, we will be using the data from @wagenmakers2008diffusion - Experiment 1 [also reanalyzed by @heathcote2012linear], that contains responses and response times for several participants in two conditions (where instructions emphasized either **speed** or **accuracy**).\nUsing the same procedure as the authors, we excluded all trials with uninterpretable response time, i.e., responses that are too fast (<180 ms) or too slow [>2 sec instead of >3 sec, see @theriault2024check for a discussion on outlier removal].\n\n::: {#def3e6bc .cell execution_count=2}\n``` {.julia .cell-code code-fold=\"false\"}\nusing Downloads, CSV, DataFrames, Random\nusing Turing, Distributions, SequentialSamplingModels\nusing GLMakie\n\nRandom.seed!(123) # For reproducibility\n\ndf = CSV.read(Downloads.download(\"https://raw.githubusercontent.com/DominiqueMakowski/CognitiveModels/main/data/wagenmakers2008.csv\"), DataFrame)\n\n# Show 10 first rows\nfirst(df, 10)\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
10×5 DataFrame
Row
Participant
Condition
RT
Error
Frequency
Int64
String15
Float64
Bool
String15
1
1
Speed
0.7
false
Low
2
1
Speed
0.392
true
Very Low
3
1
Speed
0.46
false
Very Low
4
1
Speed
0.455
false
Very Low
5
1
Speed
0.505
true
Low
6
1
Speed
0.773
false
High
7
1
Speed
0.39
false
High
8
1
Speed
0.587
true
Low
9
1
Speed
0.603
false
Low
10
1
Speed
0.435
false
High
\n```\n:::\n:::\n\n\nIn the previous chapter, we modelled the error rate (the probability of making an error) using a logistic model, and observed that it was higher in the `\"Speed\"` condition. \nBut how about speed? We are going to first take interest in the RT of **Correct** answers only (as we can assume that errors are underpinned by a different *generative process*). \n\nAfter filtering out the errors, we create a new column, `Accuracy`, which is the \"binarization\" of the `Condition` column, and is equal to 1 when the condition is `\"Accuracy\"` and 0 when it is `\"Speed\"`.\n\n::: {#3b99ba16 .cell execution_count=3}\n``` {.julia .cell-code}\ndf = df[df.Error .== 0, :]\ndf.Accuracy = df.Condition .== \"Accuracy\"\n```\n:::\n\n\n::: {.callout-tip title=\"Code Tip\"}\nNote the usage of *vectorization* `.==` as we want to compare each element of the `Condition` vector to the target `\"Accuracy\"`.\n:::\n\n::: {#60c565e5 .cell execution_count=4}\n``` {.julia .cell-code}\nfunction plot_distribution(df, title=\"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n fig = Figure()\n ax = Axis(fig[1, 1], title=title,\n xlabel=\"RT (s)\",\n ylabel=\"Distribution\",\n yticksvisible=false,\n xticksvisible=false,\n yticklabelsvisible=false)\n Makie.density!(df[df.Condition .== \"Speed\", :RT], color=(\"#EF5350\", 0.7), label = \"Speed\")\n Makie.density!(df[df.Condition .== \"Accuracy\", :RT], color=(\"#66BB6A\", 0.7), label = \"Accuracy\")\n Makie.axislegend(\"Condition\"; position=:rt)\n Makie.ylims!(ax, (0, nothing))\n return fig\nend\n\nplot_distribution(df, \"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n\n```\n:::\n:::\n\n\n## Gaussian (aka *Linear*) Model\n\n::: {.callout-note}\nNote that until the last section of this chapter, we will disregard the existence of multiple participants (which require the inclusion of random effects in the model).\nWe will treat the data as if it was a single participant at first to better understand the parameters, but will show how to add random effects at the end.\n:::\n\nA linear model is the most common type of model. \nIt aims at predicting the **mean** $\\mu$ of the outcome variable using a **Normal** (aka *Gaussian*) distribution for the residuals.\nIn other words, it models the outcome $y$ as a Normal distribution with a mean $\\mu$ that is itself the result of a linear function of the predictors $X$ and a variance $\\sigma$ that is constant across all values of the predictors.\nIt can be written as $y = Normal(\\mu, \\sigma)$, where $\\mu = intercept + slope * X$.\n\nIn order to fit a Linear Model for RTs, we need to set a prior on all these parameters, namely:\n- The variance $\\sigma$ (correspondong to the \"spread\" of RTs)\n- The mean $\\mu$ for the intercept (i.e., at the reference condition which is in our case `\"Speed\"`)\n- The effect of the condition (the slope).\n\n### Model Specification\n\n::: {#fdfd5559 .cell execution_count=5}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_Gaussian(rt; condition=nothing)\n\n # Set priors on variance, intercept and effect of condition\n σ ~ truncated(Normal(0, 0.5); lower=0)\n\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\n\nfit_Gaussian = model_Gaussian(df.RT; condition=df.Accuracy)\nchain_Gaussian = sample(fit_Gaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#1e1a3766 .cell execution_count=6}\n``` {.julia .cell-code code-fold=\"false\"}\n# Summary (95% CI)\nhpd(chain_Gaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\nThe effect of Condition is significant, people are on average slower (higher RT) when condition is `\"Accuracy\"`.\nBut is our model good?\n\n### Posterior Predictive Check\n\n::: {#ba2b1593 .cell execution_count=7}\n``` {.julia .cell-code}\npred = predict(model_Gaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_Gaussian)\npred = Array(pred)\n```\n:::\n\n\n::: {#f02ce7d4 .cell fig-height='7' fig-width='10' execution_count=8}\n``` {.julia .cell-code}\nfig = plot_distribution(df, \"Predictions made by Gaussian (aka Linear) Model\")\nfor i in 1:length(chain_Gaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=8}\n```{=html}\n\n```\n:::\n:::\n\n\n## Scaled Gaussian Model\n\nThe previous model, despite its poor fit to the data, suggests that the mean RT is higher for the `Accuracy` condition. But it seems like the distribution is also *wider* (response time is more variable). \nTypical linear model estimate only one value for sigma $\\sigma$ for the whole model, hence the requirement for **homoscedasticity**.\n\n::: {.callout-note}\n**Homoscedasticity**, or homogeneity of variances, is the assumption of similar variances accross different values of predictors. \nIt is important in linear models as only one value for sigma $\\sigma$ is estimated.\n:::\n\nIs it possible to set sigma $\\sigma$ as a parameter that would depend on the condition, in the same way as mu $\\mu$? In Julia, this is very simple.\n\nAll we need is to set sigma $\\sigma$ as the result of a linear function, such as $\\sigma = intercept + slope * condition$.\nThis means setting a prior on the intercept of sigma $\\sigma$ (in our case, the variance in the reference condition) and a prior on how much this variance changes for the other condition.\nThis change can, by definition, be positive or negative (i.e., the other condition can have either a biggger or a smaller variance), so the prior over the effect of condition should ideally allow for positive and negative values (e.g., `σ_condition ~ Normal(0, 0.1)`).\n\nBut this leads to an **important problem**.\n\n::: {.callout-important}\nThe combination of an intercept and a (possible negative) slope for sigma $\\sigma$ technically allows for negative variance values, which is impossible (distributions cannot have a negative variance).\nThis issue is one of the most important to address when setting up complex models for RTs.\n:::\n\nIndeed, even if we set a very narrow prior on the intercept of sigma $\\sigma$ to fix it at for instance **0.14**, and a narrow prior on the effect of condition, say $Normal(0, 0.001)$, an effect of condition of **-0.15** is still possible (albeit with very low probability). \nAnd such effect would lead to a sigma $\\sigma$ of **0.14 - 0.15 = -0.01**, which would lead to an error (and this will often happen as the sampling process does explore unlikely regions of the parameter space).\n\n\n### Solution 1: Directional Effect of Condition\n\nOne possible (but not recommended) solution is to simply make it impossible for the effect of condition to be negative by *Truncating* the prior to a lower bound of 0. \nThis can work in our case, because we know that the comparison condition is likely to have a higher variance than the reference condition (the intercept) - and if it wasn't the case, we could have changed the reference factor.\nHowever, this is not a good practice as we are enforcing a very strong a priori specific direction of the effect, which is not always justified.\n\n::: {#62ef3b30 .cell execution_count=9}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0) # Same prior as previously\n σ_condition ~ truncated(Normal(0, 0.1); lower=0) # Enforce positivity\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#4b488c39 .cell execution_count=10}\n``` {.julia .cell-code code-fold=\"false\"}\n# Summary (95% CI)\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\nWe can see that the effect of condition on sigma $\\sigma$ is significantly positive: the variance is higher in the `Accuracy` condition as compared to the `Speed` condition. \n\n### Solution 2: Avoid Exploring Negative Variance Values\n\nThe other trick is to force the sampling algorithm to avoid exploring negative variance values (when sigma $\\sigma$ < 0).\nThis can be done by adding a conditional statement when sigma $\\sigma$ is negative to avoid trying this value and erroring, and instead returning an infinitely low model probability (`-Inf`) to push away the exploration of this impossible region.\n\n::: {#7b17f69f .cell execution_count=11}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#fa8a4426 .cell execution_count=12}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#ebf2b747 .cell execution_count=13}\n``` {.julia .cell-code}\npred = predict(model_ScaledlGaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_ScaledGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Scaled Gaussian Model\")\nfor i in 1:length(chain_ScaledGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n\n```\n:::\n:::\n\n\n\n\nAlthough relaxing the homoscedasticity assumption is a good step forward, allowing us to make **richer conclusions** and better capturing the data.\nDespite that, the Gaussian model stil seem to be a poor fit to the data.\n\n## The Problem with Linear Models\n\nReaction time (RTs) have been traditionally modeled using traditional linear models and their derived statistical tests such as *t*-test and ANOVAs. Importantly, linear models - by definition - will try to predict the *mean* of the outcome variable by estimating the \"best fitting\" *Normal* distribution. In the context of reaction times (RTs), this is not ideal, as RTs typically exhibit a non-normal distribution, skewed towards the left with a long tail towards the right. This means that the parameters of a Normal distribution (mean $\\mu$ and standard deviation $\\sigma$) are not good descriptors of the data.\n\n![](media/rt_normal.gif)\n\n> Linear models try to find the best fitting Normal distribution for the data. However, for reaction times, even the best fitting Normal distribution (in red) does not capture well the actual data (in grey).\n\nA popular mitigation method to account for the non-normality of RTs is to transform the data, using for instance the popular *log-transform*. \nHowever, this practice should be avoided as it leads to various issues, including loss of power and distorted results interpretation [@lo2015transform; @schramm2019reaction].\nInstead, rather than applying arbitrary data transformation, it would be better to swap the Normal distribution used by the model for a more appropriate one that can better capture the characteristics of a RT distribution.\n\n\n## Shifted LogNormal Model\n\nOne of the obvious candidate alternative to the log-transformation would be to use a model with a Log-transformed Normal distribution.\nA LogNormal distribution is a distribution of a random variable whose logarithm is normally distributed.\nA **Shifted** LogNormal model introduces a shift (a delay) parameter *tau* $\\tau$ that corresponds to the minimum \"starting time\" of the response process.\n\nWe need to set a prior for this parameter, which is usually truncated between 0 (to exclude negative minimum times) and the minimum RT of the data (the logic being that the minimum delay for response must be lower than the faster response actually observed).\n\nWhile $Uniform(0, min(RT))$ is a common choice of prior, it is not ideal as it implies that all values between 0 and the minimum RT are equally likely, which is not the case.\nIndeed, psychology research has shown that such minimum response time for Humans is often betwen 100 and 250 ms. \nMoreover, in our case, we explicitly removed all RTs below 180 ms, suggesting that the minimum response time is more likely to approach 180 ms than 0 ms.\n\n### Prior on Minimum RT\n\nInstead of a $Uniform$ prior, we will use a $Gamma(1.1, 11)$ distribution (truncated at min. RT), as this particular parameterization reflects the low probability of very low minimum RTs (near 0) and a steadily increasing probability for increasing times. \n\n::: {#07b852a9 .cell execution_count=14}\n``` {.julia .cell-code}\nxaxis = range(0, 0.3, 1000)\nfig = lines(xaxis, pdf.(Gamma(1.1, 11), xaxis); color=:blue, label=\"Gamma(1.1, 11)\")\nvlines!([minimum(df.RT)]; color=\"red\", linestyle=:dash, label=\"Min. RT = 0.18 s\")\naxislegend()\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=14}\n```{=html}\n\n```\n:::\n:::\n\n\n### Model Specification\n\n::: {#dbebf70c .cell execution_count=15}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_LogNormal(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n τ ~ truncated(Gamma(1.1, 11); upper=min_rt)\n\n μ_intercept ~ Normal(0, exp(1)) # On the log-scale: exp(μ) to get value in seconds\n μ_condition ~ Normal(0, exp(0.3))\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ShiftedLogNormal(μ, σ, τ)\n end\nend\n\nfit_LogNormal = model_LogNormal(df.RT; condition=df.Accuracy)\nchain_LogNormal = sample(fit_LogNormal, NUTS(), 400)\n```\n:::\n\n\n### Interpretation\n\n::: {#76460a1b .cell execution_count=16}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_LogNormal; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#6a414e1f .cell execution_count=17}\n``` {.julia .cell-code}\npred = predict(model_LogNormal([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_LogNormal)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_LogNormal)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n\n```\n:::\n:::\n\n\nThis model provides a much better fit to the data, and confirms that the `Accuracy` condition is associated with higher RTs and higher variability (i.e., a larger distribution width).\n\n## ExGaussian Model\n\nAnother popular model to describe RTs uses the **ExGaussian** distribution, i.e., the *Exponentially-modified Gaussian* distribution [@balota2011moving; @matzke2009psychological].\n\nThis distribution is a convolution of normal and exponential distributions and has three parameters, namely *mu* $\\mu$ and *sigma* $\\sigma$ - the mean and standard deviation of the Gaussian distribution - and *tau* $\\tau$ - the exponential component of the distribution (note that although denoted by the same letter, it does not correspond directly to a shift of the distribution). \nIntuitively, these parameters reflect the centrality, the width and the tail dominance, respectively.\n\n![](media/rt_exgaussian.gif)\n\n\nBeyond the descriptive value of these types of models, some have tried to interpret their parameters in terms of **cognitive mechanisms**, arguing for instance that changes in the Gaussian components ($\\mu$ and $\\sigma$) reflect changes in attentional processes [e.g., \"the time required for organization and execution of the motor response\"; @hohle1965inferred], whereas changes in the exponential component ($\\tau$) reflect changes in intentional (i.e., decision-related) processes [@kieffaber2006switch]. \nHowever, @matzke2009psychological demonstrate that there is likely no direct correspondence between ex-Gaussian parameters and cognitive mechanisms, and underline their value primarily as **descriptive tools**, rather than models of cognition *per se*.\n\nDescriptively, the three parameters can be interpreted as:\n\n- **Mu** $\\mu$ : The location / centrality of the RTs. Would correspond to the mean in a symmetrical distribution.\n- **Sigma** $\\sigma$ : The variability and dispersion of the RTs. Akin to the standard deviation in normal distributions.\n- **Tau** $\\tau$ : Tail weight / skewness of the distribution.\n\n::: {.callout-important}\nNote that these parameters are not independent with respect to distribution characteristics, such as the empirical mean and SD. \nBelow is an example of different distributions with the same location (*mu* $\\mu$) and dispersion (*sigma* $\\sigma$) parameters.\nAlthough only the tail weight parameter (*tau* $\\tau$) is changed, the whole distribution appears to shift is centre of mass. \nHence, one should be careful note to interpret the values of *mu* $\\mu$ directly as the \"mean\" or the distribution peak and *sigma* $\\sigma$ as the SD or the \"width\".\n:::\n\n![](media/rt_exgaussian2.gif)\n\n### Conditional Tau $\\tau$ Parameter\n\nIn the same way as we modeled the effect of the condition on the variance component *sigma* $\\sigma$, we can do the same for any other parameters, including the exponential component *tau* $\\tau$.\nAll wee need is to set a prior on the intercept and the condition effect, and make sure that $\\tau > 0$. \n\n::: {#a7089bbe .cell execution_count=18}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ExGaussian(rt; condition=nothing)\n\n # Priors \n μ_intercept ~ Normal(0, 1) \n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n τ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n τ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ <= 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ExGaussian(μ, σ, τ)\n end\nend\n\nfit_ExGaussian = model_ExGaussian(df.RT; condition=df.Accuracy)\nchain_ExGaussian = sample(fit_ExGaussian, NUTS(), 400)\n```\n:::\n\n\n### Interpretation\n\n::: {#bf20b174 .cell execution_count=19}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_ExGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=19}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#d4d95c07 .cell execution_count=20}\n``` {.julia .cell-code}\npred = predict(model_ExGaussian([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_ExGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_ExGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=20}\n```{=html}\n\n```\n:::\n:::\n\n\nThe ExGaussian model also provides an excellent fit to the data. \nMoreover, by modeling more parameters (including *tau* $\\tau$), we can draw more nuanced conclusions.\nIn this case, the `Accuracy` condition is associated with higher RTs, higher variability, and a heavier tail (i.e., more extreme values).\n\n## Shifted Wald Model\n\nThe **Wald** distribution, also known as the **Inverse Gaussian** distribution, corresponds to the distribution of the first passage time of a Wiener process with a drift rate $\\mu$ and a diffusion rate $\\sigma$.\nWhile we will unpack this definition below and emphasize its important consequences, one can first note that it has been described as a potential model for RTs when convoluted with an *exponential* distribution (in the same way that the ExGaussian distribution is a convolution of a Gaussian and an exponential distribution).\nHowever, this **Ex-Wald** model [@schwarz2001ex] was shown to be less appropriate than one of its variant, the **Shifted Wald** distribution [@heathcote2004fitting; @anders2016shifted].\n\nNote that the Wald distribution, similarly to the models that we will be covering next (the \"generative\" models), is different from the previous distributions in that it is not characterized by a \"location\" and \"scale\" parameters (*mu* $\\mu$ and *sigma* $\\sigma$).\nInstead, the parameters of the Shifted Wald distribution are:\n\n- **Nu** $\\nu$ : A **drift** parameter, corresponding to the strength of the evidence accumulation process.\n- **Alpha** $\\alpha$ : A **threshold** parameter, corresponding to the amount of evidence required to make a decision.\n- **Tau** $\\tau$ : A **delay** parameter, corresponding to the non-response time (i.e., the minimum time required to process the stimulus and respond). A shift parameter similar to the one in the Shifted LogNormal model.\n\n![](media/rt_wald.gif)\n\nAs we can see, these parameters do not have a direct correspondence with the mean and standard deviation of the distribution.\nTheir interpretation is more complex but, as we will see below, offers a window to a new level of interpretation.\n\n::: {.callout-note}\nExplanations regarding these new parameters will be provided in the next chapter.\n:::\n\n### Model Specification\n\n::: {#4df349b0 .cell execution_count=21}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_Wald(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n ν_intercept ~ truncated(Normal(1, 3); lower=0)\n ν_condition ~ Normal(0, 1)\n\n α_intercept ~ truncated(Normal(0, 1); lower=0)\n α_condition ~ Normal(0, 0.5)\n\n τ_intercept ~ truncated(Gamma(1.1, 11); upper=min_rt)\n τ_condition ~ Normal(0, 0.01)\n\n for i in 1:length(rt)\n ν = ν_intercept + ν_condition * condition[i]\n if ν <= 0 # Avoid negative drift\n Turing.@addlogprob! -Inf\n return nothing\n end\n α = α_intercept + α_condition * condition[i]\n if α <= 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ < 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Wald(ν, α, τ)\n end\nend\n\nfit_Wald = model_Wald(df.RT; condition=df.Accuracy)\nchain_Wald = sample(fit_Wald, NUTS(), 600)\n```\n:::\n\n\n::: {#b9814b26 .cell execution_count=22}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_Wald; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=22}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#cf9d1165 .cell execution_count=23}\n``` {.julia .cell-code}\npred = predict(model_Wald([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_Wald)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted Wald Model\")\nfor i in 1:length(chain_Wald)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=23}\n```{=html}\n\n```\n:::\n:::\n\n\n### Model Comparison\n\nAt this stage, given the multiple options avaiable to model RTs, you might be wondering which model is the best.\nOne can compare the models using the **Leave-One-Out Cross-Validation (LOO-CV)** method, which is a Bayesian method to estimate the out-of-sample predictive accuracy of a model.\n\n::: {#398a7d32 .cell execution_count=24}\n``` {.julia .cell-code}\nusing ParetoSmooth\n\nloo_Gaussian = psis_loo(fit_Gaussian, chain_Gaussian, source=\"mcmc\")\nloo_ScaledGaussian = psis_loo(fit_ScaledlGaussian, chain_ScaledGaussian, source=\"mcmc\")\nloo_LogNormal = psis_loo(fit_LogNormal, chain_LogNormal, source=\"mcmc\")\nloo_ExGaussian = psis_loo(fit_ExGaussian, chain_ExGaussian, source=\"mcmc\")\nloo_Wald = psis_loo(fit_Wald, chain_Wald, source=\"mcmc\")\n\nloo_compare((\n Gaussian = loo_Gaussian, \n ScaledGaussian = loo_ScaledGaussian, \n LogNormal = loo_LogNormal, \n ExGaussian = loo_ExGaussian, \n Wald = loo_Wald))\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```\n\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\n┌────────────────┬──────────┬────────┬────────┐\n│ │ cv_elpd │ cv_avg │ weight │\n├────────────────┼──────────┼────────┼────────┤\n│ ExGaussian │ 0.00 │ 0.00 │ 1.00 │\n│ LogNormal │ -322.27 │ -0.03 │ 0.00 │\n│ Wald │ -379.85 │ -0.04 │ 0.00 │\n│ ScaledGaussian │ -2465.97 │ -0.26 │ 0.00 │\n│ Gaussian │ -2974.49 │ -0.31 │ 0.00 │\n└────────────────┴──────────┴────────┴────────┘\n```\n:::\n:::\n\n\nThe `loo_compare()` function orders models from best to worse based on their ELPD (Expected Log Pointwise Predictive Density) and provides the difference in ELPD between the best model and the other models.\nAs one can see, traditional linear models perform terribly.\n\n",
+ "markdown": "# Descriptive Models\n\n![](https://img.shields.io/badge/status-up_to_date-green)\n\n## The Data\n\nFor this chapter, we will be using the data from @wagenmakers2008diffusion - Experiment 1 [also reanalyzed by @heathcote2012linear], that contains responses and response times for several participants in two conditions (where instructions emphasized either **speed** or **accuracy**).\nUsing the same procedure as the authors, we excluded all trials with uninterpretable response time, i.e., responses that are too fast (<180 ms) or too slow [>2 sec instead of >3 sec, see @theriault2024check for a discussion on outlier removal].\n\n::: {#def3e6bc .cell execution_count=2}\n``` {.julia .cell-code code-fold=\"false\"}\nusing Downloads, CSV, DataFrames, Random\nusing Turing, Distributions, SequentialSamplingModels\nusing GLMakie\n\nRandom.seed!(123) # For reproducibility\n\ndf = CSV.read(Downloads.download(\"https://raw.githubusercontent.com/DominiqueMakowski/CognitiveModels/main/data/wagenmakers2008.csv\"), DataFrame)\n\n# Show 10 first rows\nfirst(df, 10)\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
10×5 DataFrame
Row
Participant
Condition
RT
Error
Frequency
Int64
String15
Float64
Bool
String15
1
1
Speed
0.7
false
Low
2
1
Speed
0.392
true
Very Low
3
1
Speed
0.46
false
Very Low
4
1
Speed
0.455
false
Very Low
5
1
Speed
0.505
true
Low
6
1
Speed
0.773
false
High
7
1
Speed
0.39
false
High
8
1
Speed
0.587
true
Low
9
1
Speed
0.603
false
Low
10
1
Speed
0.435
false
High
\n```\n:::\n:::\n\n\nIn the previous chapter, we modelled the error rate (the probability of making an error) using a logistic model, and observed that it was higher in the `\"Speed\"` condition. \nBut how about speed? We are going to first take interest in the RT of **Correct** answers only (as we can assume that errors are underpinned by a different *generative process*). \n\nAfter filtering out the errors, we create a new column, `Accuracy`, which is the \"binarization\" of the `Condition` column, and is equal to 1 when the condition is `\"Accuracy\"` and 0 when it is `\"Speed\"`.\n\n::: {#3b99ba16 .cell execution_count=3}\n``` {.julia .cell-code}\ndf = df[df.Error .== 0, :]\ndf.Accuracy = df.Condition .== \"Accuracy\"\n```\n:::\n\n\n::: {.callout-tip title=\"Code Tip\"}\nNote the usage of *vectorization* `.==` as we want to compare each element of the `Condition` vector to the target `\"Accuracy\"`.\n:::\n\n::: {#60c565e5 .cell execution_count=4}\n``` {.julia .cell-code}\nfunction plot_distribution(df, title=\"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n fig = Figure()\n ax = Axis(fig[1, 1], title=title,\n xlabel=\"RT (s)\",\n ylabel=\"Distribution\",\n yticksvisible=false,\n xticksvisible=false,\n yticklabelsvisible=false)\n Makie.density!(df[df.Condition .== \"Speed\", :RT], color=(\"#EF5350\", 0.7), label = \"Speed\")\n Makie.density!(df[df.Condition .== \"Accuracy\", :RT], color=(\"#66BB6A\", 0.7), label = \"Accuracy\")\n Makie.axislegend(\"Condition\"; position=:rt)\n Makie.ylims!(ax, (0, nothing))\n return fig\nend\n\nplot_distribution(df, \"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n\n```\n:::\n:::\n\n\n## Gaussian (aka *Linear*) Model\n\n::: {.callout-note}\nNote that until the last section of this chapter, we will disregard the existence of multiple participants (which require the inclusion of random effects in the model).\nWe will treat the data as if it was a single participant at first to better understand the parameters, but will show how to add random effects at the end.\n:::\n\nA linear model is the most common type of model. \nIt aims at predicting the **mean** $\\mu$ of the outcome variable using a **Normal** (aka *Gaussian*) distribution for the residuals.\nIn other words, it models the outcome $y$ as a Normal distribution with a mean $\\mu$ that is itself the result of a linear function of the predictors $X$ and a variance $\\sigma$ that is constant across all values of the predictors.\nIt can be written as $y = Normal(\\mu, \\sigma)$, where $\\mu = intercept + slope * X$.\n\nIn order to fit a Linear Model for RTs, we need to set a prior on all these parameters, namely:\n- The variance $\\sigma$ (correspondong to the \"spread\" of RTs)\n- The mean $\\mu$ for the intercept (i.e., at the reference condition which is in our case `\"Speed\"`)\n- The effect of the condition (the slope).\n\n### Model Specification\n\n::: {#fdfd5559 .cell execution_count=5}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_Gaussian(rt; condition=nothing)\n\n # Set priors on variance, intercept and effect of condition\n σ ~ truncated(Normal(0, 0.5); lower=0)\n\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\n\nfit_Gaussian = model_Gaussian(df.RT; condition=df.Accuracy)\nchain_Gaussian = sample(fit_Gaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#1e1a3766 .cell execution_count=6}\n``` {.julia .cell-code code-fold=\"false\"}\n# Summary (95% CI)\nhpd(chain_Gaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\nThe effect of Condition is significant, people are on average slower (higher RT) when condition is `\"Accuracy\"`.\nBut is our model good?\n\n### Posterior Predictive Check\n\n::: {#ba2b1593 .cell execution_count=7}\n``` {.julia .cell-code}\npred = predict(model_Gaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_Gaussian)\npred = Array(pred)\n```\n:::\n\n\n::: {#f02ce7d4 .cell fig-height='7' fig-width='10' execution_count=8}\n``` {.julia .cell-code}\nfig = plot_distribution(df, \"Predictions made by Gaussian (aka Linear) Model\")\nfor i in 1:length(chain_Gaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=8}\n```{=html}\n\n```\n:::\n:::\n\n\n## Scaled Gaussian Model\n\nThe previous model, despite its poor fit to the data, suggests that the mean RT is higher for the `Accuracy` condition. But it seems like the distribution is also *wider* (response time is more variable). \nTypical linear model estimate only one value for sigma $\\sigma$ for the whole model, hence the requirement for **homoscedasticity**.\n\n::: {.callout-note}\n**Homoscedasticity**, or homogeneity of variances, is the assumption of similar variances accross different values of predictors. \nIt is important in linear models as only one value for sigma $\\sigma$ is estimated.\n:::\n\nIs it possible to set sigma $\\sigma$ as a parameter that would depend on the condition, in the same way as mu $\\mu$? In Julia, this is very simple.\n\nAll we need is to set sigma $\\sigma$ as the result of a linear function, such as $\\sigma = intercept + slope * condition$.\nThis means setting a prior on the intercept of sigma $\\sigma$ (in our case, the variance in the reference condition) and a prior on how much this variance changes for the other condition.\nThis change can, by definition, be positive or negative (i.e., the other condition can have either a biggger or a smaller variance), so the prior over the effect of condition should ideally allow for positive and negative values (e.g., `σ_condition ~ Normal(0, 0.1)`).\n\nBut this leads to an **important problem**.\n\n::: {.callout-important}\nThe combination of an intercept and a (possible negative) slope for sigma $\\sigma$ technically allows for negative variance values, which is impossible (distributions cannot have a negative variance).\nThis issue is one of the most important to address when setting up complex models for RTs.\n:::\n\nIndeed, even if we set a very narrow prior on the intercept of sigma $\\sigma$ to fix it at for instance **0.14**, and a narrow prior on the effect of condition, say $Normal(0, 0.001)$, an effect of condition of **-0.15** is still possible (albeit with very low probability). \nAnd such effect would lead to a sigma $\\sigma$ of **0.14 - 0.15 = -0.01**, which would lead to an error (and this will often happen as the sampling process does explore unlikely regions of the parameter space).\n\n\n### Solution 1: Directional Effect of Condition\n\nOne possible (but not recommended) solution is to simply make it impossible for the effect of condition to be negative by *Truncating* the prior to a lower bound of 0. \nThis can work in our case, because we know that the comparison condition is likely to have a higher variance than the reference condition (the intercept) - and if it wasn't the case, we could have changed the reference factor.\nHowever, this is not a good practice as we are enforcing a very strong a priori specific direction of the effect, which is not always justified.\n\n::: {#62ef3b30 .cell execution_count=9}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0) # Same prior as previously\n σ_condition ~ truncated(Normal(0, 0.1); lower=0) # Enforce positivity\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#4b488c39 .cell execution_count=10}\n``` {.julia .cell-code code-fold=\"false\"}\n# Summary (95% CI)\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\nWe can see that the effect of condition on sigma $\\sigma$ is significantly positive: the variance is higher in the `Accuracy` condition as compared to the `Speed` condition. \n\n### Solution 2: Avoid Exploring Negative Variance Values\n\nThe other trick is to force the sampling algorithm to avoid exploring negative variance values (when sigma $\\sigma$ < 0).\nThis can be done by adding a conditional statement when sigma $\\sigma$ is negative to avoid trying this value and erroring, and instead returning an infinitely low model probability (`-Inf`) to push away the exploration of this impossible region.\n\n::: {#7b17f69f .cell execution_count=11}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#fa8a4426 .cell execution_count=12}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#ebf2b747 .cell execution_count=13}\n``` {.julia .cell-code}\npred = predict(model_ScaledlGaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_ScaledGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Scaled Gaussian Model\")\nfor i in 1:length(chain_ScaledGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n\n```\n:::\n:::\n\n\n\n\nAlthough relaxing the homoscedasticity assumption is a good step forward, allowing us to make **richer conclusions** and better capturing the data.\nDespite that, the Gaussian model stil seem to be a poor fit to the data.\n\n## The Problem with Linear Models\n\nReaction time (RTs) have been traditionally modeled using traditional linear models and their derived statistical tests such as *t*-test and ANOVAs. Importantly, linear models - by definition - will try to predict the *mean* of the outcome variable by estimating the \"best fitting\" *Normal* distribution. In the context of reaction times (RTs), this is not ideal, as RTs typically exhibit a non-normal distribution, skewed towards the left with a long tail towards the right. This means that the parameters of a Normal distribution (mean $\\mu$ and standard deviation $\\sigma$) are not good descriptors of the data.\n\n![](media/rt_normal.gif)\n\n> Linear models try to find the best fitting Normal distribution for the data. However, for reaction times, even the best fitting Normal distribution (in red) does not capture well the actual data (in grey).\n\nA popular mitigation method to account for the non-normality of RTs is to transform the data, using for instance the popular *log-transform*. \nHowever, this practice should be avoided as it leads to various issues, including loss of power and distorted results interpretation [@lo2015transform; @schramm2019reaction].\nInstead, rather than applying arbitrary data transformation, it would be better to swap the Normal distribution used by the model for a more appropriate one that can better capture the characteristics of a RT distribution.\n\n\n## Shifted LogNormal Model\n\nOne of the obvious candidate alternative to the log-transformation would be to use a model with a Log-transformed Normal distribution.\nA LogNormal distribution is a distribution of a random variable whose logarithm is normally distributed. In this model, the *mean* $\\mu$ and is defined on the log-scale, and effects must be interpreted as multiplicative rather than additive (the condition increases the mean RT by a factor of $\\exp(\\mu_{condition})$). \n\nNote that for LogNormal distributions (as it is the case for many of the models introduced in the rest of the capter), the distribution parameters ($\\mu$ and $\\sigma$) are not independent with respect to the mean and the standard deviation (SD).\nThe empirical SD increases when the *mean* $\\mu$ increases (which is seen as a feature rather than a bug, as it is consistent with typical reaction time data [@wagenmakers2005relation]).\n\nA **Shifted** LogNormal model introduces a shift (a delay) parameter *tau* $\\tau$ that corresponds to the minimum \"starting time\" of the response process.\n\nWe need to set a prior for this parameter, which is usually truncated between 0 (to exclude negative minimum times) and the minimum RT of the data (the logic being that the minimum delay for response must be lower than the faster response actually observed).\n\nWhile $Uniform(0, min(RT))$ is a common choice of prior, it is not ideal as it implies that all values between 0 and the minimum RT are equally likely, which is not the case.\nIndeed, psychology research has shown that such minimum response time for Humans is often betwen 100 and 250 ms. \nMoreover, in our case, we explicitly removed all RTs below 180 ms, suggesting that the minimum response time is more likely to approach 180 ms than 0 ms.\n\n### Prior on Minimum RT\n\nInstead of a $Uniform$ prior, we will use a $Gamma(1.1, 11)$ distribution (truncated at min. RT), as this particular parameterization reflects the low probability of very low minimum RTs (near 0) and a steadily increasing probability for increasing times. \n\n::: {#07b852a9 .cell execution_count=14}\n``` {.julia .cell-code}\nxaxis = range(0, 0.3, 1000)\nfig = lines(xaxis, pdf.(Gamma(1.1, 11), xaxis); color=:blue, label=\"Gamma(1.1, 11)\")\nvlines!([minimum(df.RT)]; color=\"red\", linestyle=:dash, label=\"Min. RT = 0.18 s\")\naxislegend()\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=14}\n```{=html}\n\n```\n:::\n:::\n\n\n### Model Specification\n\n::: {#dbebf70c .cell execution_count=15}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_LogNormal(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n τ ~ truncated(Gamma(1.1, 11); upper=min_rt)\n\n μ_intercept ~ Normal(0, exp(1)) # On the log-scale: exp(μ) to get value in seconds\n μ_condition ~ Normal(0, exp(0.3))\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ShiftedLogNormal(μ, σ, τ)\n end\nend\n\nfit_LogNormal = model_LogNormal(df.RT; condition=df.Accuracy)\nchain_LogNormal = sample(fit_LogNormal, NUTS(), 400)\n```\n:::\n\n\n### Interpretation\n\n::: {#76460a1b .cell execution_count=16}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_LogNormal; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#6a414e1f .cell execution_count=17}\n``` {.julia .cell-code}\npred = predict(model_LogNormal([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_LogNormal)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_LogNormal)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n\n```\n:::\n:::\n\n\nThis model provides a much better fit to the data, and confirms that the `Accuracy` condition is associated with higher RTs and higher variability (i.e., a larger distribution width).\n\n\n::: {.callout-note}\n\n### LogNormal distributions in nature\n\nThe reason why the Normal distribution is so ubiquituous in nature (and hence used as a good default) is due to the **Central Limit Theorem**, which states that the sum of a large number of independent random variables will be approximately normally distributed. Because many things in nature are the result of the *addition* of many random processes, the Normal distribution is very common in real life.\n\nHowever, it turns out that the multiplication of random variables result in a **LogNormal** distribution, and multiplicating (rather than additive) cascades of processes are also very common in nature, from lengths of latent periods of infectious diseases to distribution of mineral resources in the Earth's crust, and the elemental mechanisms at stakes in physics and cell biolody [@limpert2001log].\n\nThus, using LogNormal distributions for RTs can be justified with the assumption that response times are the result of multiplicative stochastic processes happening in the brain.\n\n:::\n\n\n## ExGaussian Model\n\nAnother popular model to describe RTs uses the **ExGaussian** distribution, i.e., the *Exponentially-modified Gaussian* distribution [@balota2011moving; @matzke2009psychological].\n\nThis distribution is a convolution of normal and exponential distributions and has three parameters, namely *mu* $\\mu$ and *sigma* $\\sigma$ - the mean and standard deviation of the Gaussian distribution - and *tau* $\\tau$ - the exponential component of the distribution (note that although denoted by the same letter, it does not correspond directly to a shift of the distribution). \nIntuitively, these parameters reflect the centrality, the width and the tail dominance, respectively.\n\n![](media/rt_exgaussian.gif)\n\n\nBeyond the descriptive value of these types of models, some have tried to interpret their parameters in terms of **cognitive mechanisms**, arguing for instance that changes in the Gaussian components ($\\mu$ and $\\sigma$) reflect changes in attentional processes [e.g., \"the time required for organization and execution of the motor response\"; @hohle1965inferred], whereas changes in the exponential component ($\\tau$) reflect changes in intentional (i.e., decision-related) processes [@kieffaber2006switch]. \nHowever, @matzke2009psychological demonstrate that there is likely no direct correspondence between ex-Gaussian parameters and cognitive mechanisms, and underline their value primarily as **descriptive tools**, rather than models of cognition *per se*.\n\nDescriptively, the three parameters can be interpreted as:\n\n- **Mu** $\\mu$ : The location / centrality of the RTs. Would correspond to the mean in a symmetrical distribution.\n- **Sigma** $\\sigma$ : The variability and dispersion of the RTs. Akin to the standard deviation in normal distributions.\n- **Tau** $\\tau$ : Tail weight / skewness of the distribution.\n\n::: {.callout-important}\nNote that these parameters are not independent with respect to distribution characteristics, such as the empirical mean and SD. \nBelow is an example of different distributions with the same location (*mu* $\\mu$) and dispersion (*sigma* $\\sigma$) parameters.\nAlthough only the tail weight parameter (*tau* $\\tau$) is changed, the whole distribution appears to shift is centre of mass. \nHence, one should be careful note to interpret the values of *mu* $\\mu$ directly as the \"mean\" or the distribution peak and *sigma* $\\sigma$ as the SD or the \"width\".\n:::\n\n![](media/rt_exgaussian2.gif)\n\n### Conditional Tau $\\tau$ Parameter\n\nIn the same way as we modeled the effect of the condition on the variance component *sigma* $\\sigma$, we can do the same for any other parameters, including the exponential component *tau* $\\tau$.\nAll wee need is to set a prior on the intercept and the condition effect, and make sure that $\\tau > 0$. \n\n::: {#a7089bbe .cell execution_count=18}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ExGaussian(rt; condition=nothing)\n\n # Priors \n μ_intercept ~ Normal(0, 1) \n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n τ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n τ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ <= 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ExGaussian(μ, σ, τ)\n end\nend\n\nfit_ExGaussian = model_ExGaussian(df.RT; condition=df.Accuracy)\nchain_ExGaussian = sample(fit_ExGaussian, NUTS(), 400)\n```\n:::\n\n\n### Interpretation\n\n::: {#bf20b174 .cell execution_count=19}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_ExGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=19}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#d4d95c07 .cell execution_count=20}\n``` {.julia .cell-code}\npred = predict(model_ExGaussian([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_ExGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_ExGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=20}\n```{=html}\n\n```\n:::\n:::\n\n\nThe ExGaussian model also provides an excellent fit to the data. \nMoreover, by modeling more parameters (including *tau* $\\tau$), we can draw more nuanced conclusions.\nIn this case, the `Accuracy` condition is associated with higher RTs, higher variability, and a heavier tail (i.e., more extreme values).\n\n## Shifted Wald Model\n\nThe **Wald** distribution, also known as the **Inverse Gaussian** distribution, corresponds to the distribution of the first passage time of a Wiener process with a drift rate $\\mu$ and a diffusion rate $\\sigma$.\nWhile we will unpack this definition below and emphasize its important consequences, one can first note that it has been described as a potential model for RTs when convoluted with an *exponential* distribution (in the same way that the ExGaussian distribution is a convolution of a Gaussian and an exponential distribution).\nHowever, this **Ex-Wald** model [@schwarz2001ex] was shown to be less appropriate than one of its variant, the **Shifted Wald** distribution [@heathcote2004fitting; @anders2016shifted].\n\nNote that the Wald distribution, similarly to the models that we will be covering next (the \"generative\" models), is different from the previous distributions in that it is not characterized by a \"location\" and \"scale\" parameters (*mu* $\\mu$ and *sigma* $\\sigma$).\nInstead, the parameters of the Shifted Wald distribution are:\n\n- **Nu** $\\nu$ : A **drift** parameter, corresponding to the strength of the evidence accumulation process.\n- **Alpha** $\\alpha$ : A **threshold** parameter, corresponding to the amount of evidence required to make a decision.\n- **Tau** $\\tau$ : A **delay** parameter, corresponding to the non-response time (i.e., the minimum time required to process the stimulus and respond). A shift parameter similar to the one in the Shifted LogNormal model.\n\n![](media/rt_wald.gif)\n\nAs we can see, these parameters do not have a direct correspondence with the mean and standard deviation of the distribution.\nTheir interpretation is more complex but, as we will see below, offers a window to a new level of interpretation.\n\n::: {.callout-note}\nExplanations regarding these new parameters will be provided in the next chapter.\n:::\n\n### Model Specification\n\n::: {#4df349b0 .cell execution_count=21}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_Wald(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n ν_intercept ~ truncated(Normal(1, 3); lower=0)\n ν_condition ~ Normal(0, 1)\n\n α_intercept ~ truncated(Normal(0, 1); lower=0)\n α_condition ~ Normal(0, 0.5)\n\n τ_intercept ~ truncated(Gamma(1.1, 11); upper=min_rt)\n τ_condition ~ Normal(0, 0.01)\n\n for i in 1:length(rt)\n ν = ν_intercept + ν_condition * condition[i]\n if ν <= 0 # Avoid negative drift\n Turing.@addlogprob! -Inf\n return nothing\n end\n α = α_intercept + α_condition * condition[i]\n if α <= 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ < 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Wald(ν, α, τ)\n end\nend\n\nfit_Wald = model_Wald(df.RT; condition=df.Accuracy)\nchain_Wald = sample(fit_Wald, NUTS(), 600)\n```\n:::\n\n\n::: {#b9814b26 .cell execution_count=22}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_Wald; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=22}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#cf9d1165 .cell execution_count=23}\n``` {.julia .cell-code}\npred = predict(model_Wald([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_Wald)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted Wald Model\")\nfor i in 1:length(chain_Wald)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=23}\n```{=html}\n\n```\n:::\n:::\n\n\n### Model Comparison\n\nAt this stage, given the multiple options avaiable to model RTs, you might be wondering which model is the best.\nOne can compare the models using the **Leave-One-Out Cross-Validation (LOO-CV)** method, which is a Bayesian method to estimate the out-of-sample predictive accuracy of a model.\n\n::: {#398a7d32 .cell execution_count=24}\n``` {.julia .cell-code}\nusing ParetoSmooth\n\nloo_Gaussian = psis_loo(fit_Gaussian, chain_Gaussian, source=\"mcmc\")\nloo_ScaledGaussian = psis_loo(fit_ScaledlGaussian, chain_ScaledGaussian, source=\"mcmc\")\nloo_LogNormal = psis_loo(fit_LogNormal, chain_LogNormal, source=\"mcmc\")\nloo_ExGaussian = psis_loo(fit_ExGaussian, chain_ExGaussian, source=\"mcmc\")\nloo_Wald = psis_loo(fit_Wald, chain_Wald, source=\"mcmc\")\n\nloo_compare((\n Gaussian = loo_Gaussian, \n ScaledGaussian = loo_ScaledGaussian, \n LogNormal = loo_LogNormal, \n ExGaussian = loo_ExGaussian, \n Wald = loo_Wald))\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```\n\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\n┌────────────────┬──────────┬────────┬────────┐\n│ │ cv_elpd │ cv_avg │ weight │\n├────────────────┼──────────┼────────┼────────┤\n│ ExGaussian │ 0.00 │ 0.00 │ 1.00 │\n│ LogNormal │ -322.27 │ -0.03 │ 0.00 │\n│ Wald │ -379.85 │ -0.04 │ 0.00 │\n│ ScaledGaussian │ -2465.97 │ -0.26 │ 0.00 │\n│ Gaussian │ -2974.49 │ -0.31 │ 0.00 │\n└────────────────┴──────────┴────────┴────────┘\n```\n:::\n:::\n\n\nThe `loo_compare()` function orders models from best to worse based on their ELPD (Expected Log Pointwise Predictive Density) and provides the difference in ELPD between the best model and the other models.\nAs one can see, traditional linear models perform terribly.\n\n",
"supporting": [
"4a_rt_descriptive_files\\figure-html"
],
diff --git a/content/.quarto/cites/index.json b/content/.quarto/cites/index.json
index 9400964..50ea669 100644
--- a/content/.quarto/cites/index.json
+++ b/content/.quarto/cites/index.json
@@ -1 +1 @@
-{"4_rt.qmd":["wagenmakers2008diffusion","heathcote2012linear","theriault2024check","lo2015transform","schramm2019reaction","balota2011moving","matzke2009psychological","hohle1965inferred","kieffaber2006switch","matzke2009psychological","schwarz2001ex","heathcote2004fitting","anders2016shifted"],"2_predictors.qmd":[],"4_1_Normal.qmd":["wagenmakers2008diffusion","theriault2024check","lo2015transform","schramm2019reaction"],"1_introduction.qmd":[],"3_scales.qmd":[],"index.qmd":[],"references.qmd":[],"4a_rt_descriptive.qmd":["wagenmakers2008diffusion","heathcote2012linear","theriault2024check","lo2015transform","schramm2019reaction","balota2011moving","matzke2009psychological","hohle1965inferred","kieffaber2006switch","matzke2009psychological","schwarz2001ex","heathcote2004fitting","anders2016shifted"],"5_individual.qmd":[],"4b_rt_generative.qmd":[]}
+{"4a_rt_descriptive.qmd":["wagenmakers2008diffusion","heathcote2012linear","theriault2024check","lo2015transform","schramm2019reaction","wagenmakers2005relation","limpert2001log","balota2011moving","matzke2009psychological","hohle1965inferred","kieffaber2006switch","matzke2009psychological","schwarz2001ex","heathcote2004fitting","anders2016shifted"],"5_individual.qmd":[],"2_predictors.qmd":[],"3_scales.qmd":[],"references.qmd":[],"1_introduction.qmd":[],"index.qmd":[],"4b_rt_generative.qmd":[],"4_rt.qmd":["wagenmakers2008diffusion","heathcote2012linear","theriault2024check","lo2015transform","schramm2019reaction","balota2011moving","matzke2009psychological","hohle1965inferred","kieffaber2006switch","matzke2009psychological","schwarz2001ex","heathcote2004fitting","anders2016shifted"],"4_1_Normal.qmd":["wagenmakers2008diffusion","theriault2024check","lo2015transform","schramm2019reaction"]}
diff --git a/content/.quarto/idx/3_scales.qmd.json b/content/.quarto/idx/3_scales.qmd.json
index 2e64f39..a37c20f 100644
--- a/content/.quarto/idx/3_scales.qmd.json
+++ b/content/.quarto/idx/3_scales.qmd.json
@@ -1 +1 @@
-{"title":"Choice and Scales","markdown":{"headingText":"Choice and Scales","containsRefs":false,"markdown":"\n![](https://img.shields.io/badge/status-not_started-red)\n\n1. Beta models \n2. OrdBeta models for slider scales\n3. Logistic models for binary data\n\nUse the speed accuracy data that we use in the next chapter.","srcMarkdownNoYaml":""},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":true,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"execute":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":true,"code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","output-file":"3_scales.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.549","bibliography":["references.bib"],"theme":"pulse","number-depth":3},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]}
\ No newline at end of file
+{"title":"Choice and Scales","markdown":{"headingText":"Choice and Scales","containsRefs":false,"markdown":"\n![](https://img.shields.io/badge/status-not_started-red)\n\n1. Beta models \n\n```{julia}\nusing Distributions\n\n# Reparameterized Beta distribution\nfunction BetaMod(μ, σ)\n α = μ * (μ * (1 - μ) / σ^2 - 1)\n β = α * (1 - μ) / μ\n return Beta(α, β)\nend\n\n\n\nmean(BetaMod(0.5, 0.0833))\nvar(BetaMod(0.2, 0.1))\n```\n\n2. OrdBeta models for slider scales\n3. Logistic models for binary data\n\nUse the speed accuracy data that we use in the next chapter.","srcMarkdownNoYaml":""},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":true,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"execute":true,"engine":"jupyter"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":true,"code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","output-file":"3_scales.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.549","bibliography":["references.bib"],"theme":"pulse","number-depth":3},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]}
\ No newline at end of file
diff --git a/content/.quarto/idx/4a_rt_descriptive.qmd.json b/content/.quarto/idx/4a_rt_descriptive.qmd.json
index 2d6275f..3bf38d0 100644
--- a/content/.quarto/idx/4a_rt_descriptive.qmd.json
+++ b/content/.quarto/idx/4a_rt_descriptive.qmd.json
@@ -1 +1 @@
-{"title":"Descriptive Models","markdown":{"headingText":"Descriptive Models","containsRefs":false,"markdown":"\n![](https://img.shields.io/badge/status-up_to_date-green)\n\n## The Data\n\nFor this chapter, we will be using the data from @wagenmakers2008diffusion - Experiment 1 [also reanalyzed by @heathcote2012linear], that contains responses and response times for several participants in two conditions (where instructions emphasized either **speed** or **accuracy**).\nUsing the same procedure as the authors, we excluded all trials with uninterpretable response time, i.e., responses that are too fast (<180 ms) or too slow [>2 sec instead of >3 sec, see @theriault2024check for a discussion on outlier removal].\n\n```{julia}\n#| code-fold: false\n\nusing Downloads, CSV, DataFrames, Random\nusing Turing, Distributions, SequentialSamplingModels\nusing GLMakie\n\nRandom.seed!(123) # For reproducibility\n\ndf = CSV.read(Downloads.download(\"https://raw.githubusercontent.com/DominiqueMakowski/CognitiveModels/main/data/wagenmakers2008.csv\"), DataFrame)\n\n# Show 10 first rows\nfirst(df, 10)\n```\n\nIn the previous chapter, we modelled the error rate (the probability of making an error) using a logistic model, and observed that it was higher in the `\"Speed\"` condition. \nBut how about speed? We are going to first take interest in the RT of **Correct** answers only (as we can assume that errors are underpinned by a different *generative process*). \n\nAfter filtering out the errors, we create a new column, `Accuracy`, which is the \"binarization\" of the `Condition` column, and is equal to 1 when the condition is `\"Accuracy\"` and 0 when it is `\"Speed\"`.\n\n```{julia}\n#| output: false\n\ndf = df[df.Error .== 0, :]\ndf.Accuracy = df.Condition .== \"Accuracy\"\n```\n\n\n::: {.callout-tip title=\"Code Tip\"}\nNote the usage of *vectorization* `.==` as we want to compare each element of the `Condition` vector to the target `\"Accuracy\"`.\n:::\n\n```{julia}\nfunction plot_distribution(df, title=\"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n fig = Figure()\n ax = Axis(fig[1, 1], title=title,\n xlabel=\"RT (s)\",\n ylabel=\"Distribution\",\n yticksvisible=false,\n xticksvisible=false,\n yticklabelsvisible=false)\n Makie.density!(df[df.Condition .== \"Speed\", :RT], color=(\"#EF5350\", 0.7), label = \"Speed\")\n Makie.density!(df[df.Condition .== \"Accuracy\", :RT], color=(\"#66BB6A\", 0.7), label = \"Accuracy\")\n Makie.axislegend(\"Condition\"; position=:rt)\n Makie.ylims!(ax, (0, nothing))\n return fig\nend\n\nplot_distribution(df, \"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n```\n\n## Gaussian (aka *Linear*) Model\n\n::: {.callout-note}\nNote that until the last section of this chapter, we will disregard the existence of multiple participants (which require the inclusion of random effects in the model).\nWe will treat the data as if it was a single participant at first to better understand the parameters, but will show how to add random effects at the end.\n:::\n\nA linear model is the most common type of model. \nIt aims at predicting the **mean** $\\mu$ of the outcome variable using a **Normal** (aka *Gaussian*) distribution for the residuals.\nIn other words, it models the outcome $y$ as a Normal distribution with a mean $\\mu$ that is itself the result of a linear function of the predictors $X$ and a variance $\\sigma$ that is constant across all values of the predictors.\nIt can be written as $y = Normal(\\mu, \\sigma)$, where $\\mu = intercept + slope * X$.\n\nIn order to fit a Linear Model for RTs, we need to set a prior on all these parameters, namely:\n- The variance $\\sigma$ (correspondong to the \"spread\" of RTs)\n- The mean $\\mu$ for the intercept (i.e., at the reference condition which is in our case `\"Speed\"`)\n- The effect of the condition (the slope).\n\n### Model Specification\n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_Gaussian(rt; condition=nothing)\n\n # Set priors on variance, intercept and effect of condition\n σ ~ truncated(Normal(0, 0.5); lower=0)\n\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\n\nfit_Gaussian = model_Gaussian(df.RT; condition=df.Accuracy)\nchain_Gaussian = sample(fit_Gaussian, NUTS(), 400)\n```\n\n```{julia}\n#| code-fold: false\n\n# Summary (95% CI)\nhpd(chain_Gaussian; alpha=0.05)\n```\n\n\nThe effect of Condition is significant, people are on average slower (higher RT) when condition is `\"Accuracy\"`.\nBut is our model good?\n\n### Posterior Predictive Check\n\n```{julia}\n#| output: false\n\npred = predict(model_Gaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_Gaussian)\npred = Array(pred)\n```\n\n```{julia}\n#| fig-width: 10\n#| fig-height: 7\n\nfig = plot_distribution(df, \"Predictions made by Gaussian (aka Linear) Model\")\nfor i in 1:length(chain_Gaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n## Scaled Gaussian Model\n\nThe previous model, despite its poor fit to the data, suggests that the mean RT is higher for the `Accuracy` condition. But it seems like the distribution is also *wider* (response time is more variable). \nTypical linear model estimate only one value for sigma $\\sigma$ for the whole model, hence the requirement for **homoscedasticity**.\n\n::: {.callout-note}\n**Homoscedasticity**, or homogeneity of variances, is the assumption of similar variances accross different values of predictors. \nIt is important in linear models as only one value for sigma $\\sigma$ is estimated.\n:::\n\nIs it possible to set sigma $\\sigma$ as a parameter that would depend on the condition, in the same way as mu $\\mu$? In Julia, this is very simple.\n\nAll we need is to set sigma $\\sigma$ as the result of a linear function, such as $\\sigma = intercept + slope * condition$.\nThis means setting a prior on the intercept of sigma $\\sigma$ (in our case, the variance in the reference condition) and a prior on how much this variance changes for the other condition.\nThis change can, by definition, be positive or negative (i.e., the other condition can have either a biggger or a smaller variance), so the prior over the effect of condition should ideally allow for positive and negative values (e.g., `σ_condition ~ Normal(0, 0.1)`).\n\nBut this leads to an **important problem**.\n\n::: {.callout-important}\nThe combination of an intercept and a (possible negative) slope for sigma $\\sigma$ technically allows for negative variance values, which is impossible (distributions cannot have a negative variance).\nThis issue is one of the most important to address when setting up complex models for RTs.\n:::\n\nIndeed, even if we set a very narrow prior on the intercept of sigma $\\sigma$ to fix it at for instance **0.14**, and a narrow prior on the effect of condition, say $Normal(0, 0.001)$, an effect of condition of **-0.15** is still possible (albeit with very low probability). \nAnd such effect would lead to a sigma $\\sigma$ of **0.14 - 0.15 = -0.01**, which would lead to an error (and this will often happen as the sampling process does explore unlikely regions of the parameter space).\n\n\n### Solution 1: Directional Effect of Condition\n\nOne possible (but not recommended) solution is to simply make it impossible for the effect of condition to be negative by *Truncating* the prior to a lower bound of 0. \nThis can work in our case, because we know that the comparison condition is likely to have a higher variance than the reference condition (the intercept) - and if it wasn't the case, we could have changed the reference factor.\nHowever, this is not a good practice as we are enforcing a very strong a priori specific direction of the effect, which is not always justified.\n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0) # Same prior as previously\n σ_condition ~ truncated(Normal(0, 0.1); lower=0) # Enforce positivity\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n\n```{julia}\n#| code-fold: false\n\n# Summary (95% CI)\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\nWe can see that the effect of condition on sigma $\\sigma$ is significantly positive: the variance is higher in the `Accuracy` condition as compared to the `Speed` condition. \n\n### Solution 2: Avoid Exploring Negative Variance Values\n\nThe other trick is to force the sampling algorithm to avoid exploring negative variance values (when sigma $\\sigma$ < 0).\nThis can be done by adding a conditional statement when sigma $\\sigma$ is negative to avoid trying this value and erroring, and instead returning an infinitely low model probability (`-Inf`) to push away the exploration of this impossible region.\n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n\n```{julia}\n#| code-fold: false\n\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\n```{julia}\npred = predict(model_ScaledlGaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_ScaledGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Scaled Gaussian Model\")\nfor i in 1:length(chain_ScaledGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n\n\n\n\nAlthough relaxing the homoscedasticity assumption is a good step forward, allowing us to make **richer conclusions** and better capturing the data.\nDespite that, the Gaussian model stil seem to be a poor fit to the data.\n\n## The Problem with Linear Models\n\nReaction time (RTs) have been traditionally modeled using traditional linear models and their derived statistical tests such as *t*-test and ANOVAs. Importantly, linear models - by definition - will try to predict the *mean* of the outcome variable by estimating the \"best fitting\" *Normal* distribution. In the context of reaction times (RTs), this is not ideal, as RTs typically exhibit a non-normal distribution, skewed towards the left with a long tail towards the right. This means that the parameters of a Normal distribution (mean $\\mu$ and standard deviation $\\sigma$) are not good descriptors of the data.\n\n![](media/rt_normal.gif)\n\n> Linear models try to find the best fitting Normal distribution for the data. However, for reaction times, even the best fitting Normal distribution (in red) does not capture well the actual data (in grey).\n\nA popular mitigation method to account for the non-normality of RTs is to transform the data, using for instance the popular *log-transform*. \nHowever, this practice should be avoided as it leads to various issues, including loss of power and distorted results interpretation [@lo2015transform; @schramm2019reaction].\nInstead, rather than applying arbitrary data transformation, it would be better to swap the Normal distribution used by the model for a more appropriate one that can better capture the characteristics of a RT distribution.\n\n\n## Shifted LogNormal Model\n\nOne of the obvious candidate alternative to the log-transformation would be to use a model with a Log-transformed Normal distribution.\nA LogNormal distribution is a distribution of a random variable whose logarithm is normally distributed.\nA **Shifted** LogNormal model introduces a shift (a delay) parameter *tau* $\\tau$ that corresponds to the minimum \"starting time\" of the response process.\n\nWe need to set a prior for this parameter, which is usually truncated between 0 (to exclude negative minimum times) and the minimum RT of the data (the logic being that the minimum delay for response must be lower than the faster response actually observed).\n\nWhile $Uniform(0, min(RT))$ is a common choice of prior, it is not ideal as it implies that all values between 0 and the minimum RT are equally likely, which is not the case.\nIndeed, psychology research has shown that such minimum response time for Humans is often betwen 100 and 250 ms. \nMoreover, in our case, we explicitly removed all RTs below 180 ms, suggesting that the minimum response time is more likely to approach 180 ms than 0 ms.\n\n### Prior on Minimum RT\n\nInstead of a $Uniform$ prior, we will use a $Gamma(1.1, 11)$ distribution (truncated at min. RT), as this particular parameterization reflects the low probability of very low minimum RTs (near 0) and a steadily increasing probability for increasing times. \n```{julia}\nxaxis = range(0, 0.3, 1000)\nfig = lines(xaxis, pdf.(Gamma(1.1, 11), xaxis); color=:blue, label=\"Gamma(1.1, 11)\")\nvlines!([minimum(df.RT)]; color=\"red\", linestyle=:dash, label=\"Min. RT = 0.18 s\")\naxislegend()\nfig\n```\n\n\n### Model Specification\n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_LogNormal(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n τ ~ truncated(Gamma(1.1, 11); upper=min_rt)\n\n μ_intercept ~ Normal(0, exp(1)) # On the log-scale: exp(μ) to get value in seconds\n μ_condition ~ Normal(0, exp(0.3))\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ShiftedLogNormal(μ, σ, τ)\n end\nend\n\nfit_LogNormal = model_LogNormal(df.RT; condition=df.Accuracy)\nchain_LogNormal = sample(fit_LogNormal, NUTS(), 400)\n```\n\n### Interpretation\n\n```{julia}\n#| code-fold: false\n\nhpd(chain_LogNormal; alpha=0.05)\n```\n\n\n```{julia}\npred = predict(model_LogNormal([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_LogNormal)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_LogNormal)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\nThis model provides a much better fit to the data, and confirms that the `Accuracy` condition is associated with higher RTs and higher variability (i.e., a larger distribution width).\n\n## ExGaussian Model\n\nAnother popular model to describe RTs uses the **ExGaussian** distribution, i.e., the *Exponentially-modified Gaussian* distribution [@balota2011moving; @matzke2009psychological].\n\nThis distribution is a convolution of normal and exponential distributions and has three parameters, namely *mu* $\\mu$ and *sigma* $\\sigma$ - the mean and standard deviation of the Gaussian distribution - and *tau* $\\tau$ - the exponential component of the distribution (note that although denoted by the same letter, it does not correspond directly to a shift of the distribution). \nIntuitively, these parameters reflect the centrality, the width and the tail dominance, respectively.\n\n![](media/rt_exgaussian.gif)\n\n\nBeyond the descriptive value of these types of models, some have tried to interpret their parameters in terms of **cognitive mechanisms**, arguing for instance that changes in the Gaussian components ($\\mu$ and $\\sigma$) reflect changes in attentional processes [e.g., \"the time required for organization and execution of the motor response\"; @hohle1965inferred], whereas changes in the exponential component ($\\tau$) reflect changes in intentional (i.e., decision-related) processes [@kieffaber2006switch]. \nHowever, @matzke2009psychological demonstrate that there is likely no direct correspondence between ex-Gaussian parameters and cognitive mechanisms, and underline their value primarily as **descriptive tools**, rather than models of cognition *per se*.\n\nDescriptively, the three parameters can be interpreted as:\n\n- **Mu** $\\mu$ : The location / centrality of the RTs. Would correspond to the mean in a symmetrical distribution.\n- **Sigma** $\\sigma$ : The variability and dispersion of the RTs. Akin to the standard deviation in normal distributions.\n- **Tau** $\\tau$ : Tail weight / skewness of the distribution.\n\n::: {.callout-important}\nNote that these parameters are not independent with respect to distribution characteristics, such as the empirical mean and SD. \nBelow is an example of different distributions with the same location (*mu* $\\mu$) and dispersion (*sigma* $\\sigma$) parameters.\nAlthough only the tail weight parameter (*tau* $\\tau$) is changed, the whole distribution appears to shift is centre of mass. \nHence, one should be careful note to interpret the values of *mu* $\\mu$ directly as the \"mean\" or the distribution peak and *sigma* $\\sigma$ as the SD or the \"width\".\n:::\n\n![](media/rt_exgaussian2.gif)\n\n### Conditional Tau $\\tau$ Parameter\n\nIn the same way as we modeled the effect of the condition on the variance component *sigma* $\\sigma$, we can do the same for any other parameters, including the exponential component *tau* $\\tau$.\nAll wee need is to set a prior on the intercept and the condition effect, and make sure that $\\tau > 0$. \n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_ExGaussian(rt; condition=nothing)\n\n # Priors \n μ_intercept ~ Normal(0, 1) \n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n τ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n τ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ <= 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ExGaussian(μ, σ, τ)\n end\nend\n\nfit_ExGaussian = model_ExGaussian(df.RT; condition=df.Accuracy)\nchain_ExGaussian = sample(fit_ExGaussian, NUTS(), 400)\n```\n\n### Interpretation\n\n```{julia}\n#| code-fold: false\n\nhpd(chain_ExGaussian; alpha=0.05)\n```\n\n```{julia}\npred = predict(model_ExGaussian([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_ExGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_ExGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\nThe ExGaussian model also provides an excellent fit to the data. \nMoreover, by modeling more parameters (including *tau* $\\tau$), we can draw more nuanced conclusions.\nIn this case, the `Accuracy` condition is associated with higher RTs, higher variability, and a heavier tail (i.e., more extreme values).\n\n## Shifted Wald Model\n\nThe **Wald** distribution, also known as the **Inverse Gaussian** distribution, corresponds to the distribution of the first passage time of a Wiener process with a drift rate $\\mu$ and a diffusion rate $\\sigma$.\nWhile we will unpack this definition below and emphasize its important consequences, one can first note that it has been described as a potential model for RTs when convoluted with an *exponential* distribution (in the same way that the ExGaussian distribution is a convolution of a Gaussian and an exponential distribution).\nHowever, this **Ex-Wald** model [@schwarz2001ex] was shown to be less appropriate than one of its variant, the **Shifted Wald** distribution [@heathcote2004fitting; @anders2016shifted].\n\nNote that the Wald distribution, similarly to the models that we will be covering next (the \"generative\" models), is different from the previous distributions in that it is not characterized by a \"location\" and \"scale\" parameters (*mu* $\\mu$ and *sigma* $\\sigma$).\nInstead, the parameters of the Shifted Wald distribution are:\n\n- **Nu** $\\nu$ : A **drift** parameter, corresponding to the strength of the evidence accumulation process.\n- **Alpha** $\\alpha$ : A **threshold** parameter, corresponding to the amount of evidence required to make a decision.\n- **Tau** $\\tau$ : A **delay** parameter, corresponding to the non-response time (i.e., the minimum time required to process the stimulus and respond). A shift parameter similar to the one in the Shifted LogNormal model.\n\n![](media/rt_wald.gif)\n\nAs we can see, these parameters do not have a direct correspondence with the mean and standard deviation of the distribution.\nTheir interpretation is more complex but, as we will see below, offers a window to a new level of interpretation.\n\n::: {.callout-note}\nExplanations regarding these new parameters will be provided in the next chapter.\n:::\n\n### Model Specification\n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_Wald(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n ν_intercept ~ truncated(Normal(1, 3); lower=0)\n ν_condition ~ Normal(0, 1)\n\n α_intercept ~ truncated(Normal(0, 1); lower=0)\n α_condition ~ Normal(0, 0.5)\n\n τ_intercept ~ truncated(Gamma(1.1, 11); upper=min_rt)\n τ_condition ~ Normal(0, 0.01)\n\n for i in 1:length(rt)\n ν = ν_intercept + ν_condition * condition[i]\n if ν <= 0 # Avoid negative drift\n Turing.@addlogprob! -Inf\n return nothing\n end\n α = α_intercept + α_condition * condition[i]\n if α <= 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ < 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Wald(ν, α, τ)\n end\nend\n\nfit_Wald = model_Wald(df.RT; condition=df.Accuracy)\nchain_Wald = sample(fit_Wald, NUTS(), 600)\n```\n\n```{julia}\n#| code-fold: false\n\nhpd(chain_Wald; alpha=0.05)\n```\n\n```{julia}\npred = predict(model_Wald([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_Wald)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted Wald Model\")\nfor i in 1:length(chain_Wald)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n### Model Comparison\n\nAt this stage, given the multiple options avaiable to model RTs, you might be wondering which model is the best.\nOne can compare the models using the **Leave-One-Out Cross-Validation (LOO-CV)** method, which is a Bayesian method to estimate the out-of-sample predictive accuracy of a model.\n\n```{julia}\nusing ParetoSmooth\n\nloo_Gaussian = psis_loo(fit_Gaussian, chain_Gaussian, source=\"mcmc\")\nloo_ScaledGaussian = psis_loo(fit_ScaledlGaussian, chain_ScaledGaussian, source=\"mcmc\")\nloo_LogNormal = psis_loo(fit_LogNormal, chain_LogNormal, source=\"mcmc\")\nloo_ExGaussian = psis_loo(fit_ExGaussian, chain_ExGaussian, source=\"mcmc\")\nloo_Wald = psis_loo(fit_Wald, chain_Wald, source=\"mcmc\")\n\nloo_compare((\n Gaussian = loo_Gaussian, \n ScaledGaussian = loo_ScaledGaussian, \n LogNormal = loo_LogNormal, \n ExGaussian = loo_ExGaussian, \n Wald = loo_Wald))\n```\n\nThe `loo_compare()` function orders models from best to worse based on their ELPD (Expected Log Pointwise Predictive Density) and provides the difference in ELPD between the best model and the other models.\nAs one can see, traditional linear models perform terribly.\n\n","srcMarkdownNoYaml":""},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":true,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"execute":true,"engine":"jupyter"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":true,"code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","output-file":"4a_rt_descriptive.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.549","bibliography":["references.bib"],"theme":"pulse","number-depth":3},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]}
\ No newline at end of file
+{"title":"Descriptive Models","markdown":{"headingText":"Descriptive Models","containsRefs":false,"markdown":"\n![](https://img.shields.io/badge/status-up_to_date-green)\n\n## The Data\n\nFor this chapter, we will be using the data from @wagenmakers2008diffusion - Experiment 1 [also reanalyzed by @heathcote2012linear], that contains responses and response times for several participants in two conditions (where instructions emphasized either **speed** or **accuracy**).\nUsing the same procedure as the authors, we excluded all trials with uninterpretable response time, i.e., responses that are too fast (<180 ms) or too slow [>2 sec instead of >3 sec, see @theriault2024check for a discussion on outlier removal].\n\n```{julia}\n#| code-fold: false\n\nusing Downloads, CSV, DataFrames, Random\nusing Turing, Distributions, SequentialSamplingModels\nusing GLMakie\n\nRandom.seed!(123) # For reproducibility\n\ndf = CSV.read(Downloads.download(\"https://raw.githubusercontent.com/DominiqueMakowski/CognitiveModels/main/data/wagenmakers2008.csv\"), DataFrame)\n\n# Show 10 first rows\nfirst(df, 10)\n```\n\nIn the previous chapter, we modelled the error rate (the probability of making an error) using a logistic model, and observed that it was higher in the `\"Speed\"` condition. \nBut how about speed? We are going to first take interest in the RT of **Correct** answers only (as we can assume that errors are underpinned by a different *generative process*). \n\nAfter filtering out the errors, we create a new column, `Accuracy`, which is the \"binarization\" of the `Condition` column, and is equal to 1 when the condition is `\"Accuracy\"` and 0 when it is `\"Speed\"`.\n\n```{julia}\n#| output: false\n\ndf = df[df.Error .== 0, :]\ndf.Accuracy = df.Condition .== \"Accuracy\"\n```\n\n\n::: {.callout-tip title=\"Code Tip\"}\nNote the usage of *vectorization* `.==` as we want to compare each element of the `Condition` vector to the target `\"Accuracy\"`.\n:::\n\n```{julia}\nfunction plot_distribution(df, title=\"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n fig = Figure()\n ax = Axis(fig[1, 1], title=title,\n xlabel=\"RT (s)\",\n ylabel=\"Distribution\",\n yticksvisible=false,\n xticksvisible=false,\n yticklabelsvisible=false)\n Makie.density!(df[df.Condition .== \"Speed\", :RT], color=(\"#EF5350\", 0.7), label = \"Speed\")\n Makie.density!(df[df.Condition .== \"Accuracy\", :RT], color=(\"#66BB6A\", 0.7), label = \"Accuracy\")\n Makie.axislegend(\"Condition\"; position=:rt)\n Makie.ylims!(ax, (0, nothing))\n return fig\nend\n\nplot_distribution(df, \"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n```\n\n## Gaussian (aka *Linear*) Model\n\n::: {.callout-note}\nNote that until the last section of this chapter, we will disregard the existence of multiple participants (which require the inclusion of random effects in the model).\nWe will treat the data as if it was a single participant at first to better understand the parameters, but will show how to add random effects at the end.\n:::\n\nA linear model is the most common type of model. \nIt aims at predicting the **mean** $\\mu$ of the outcome variable using a **Normal** (aka *Gaussian*) distribution for the residuals.\nIn other words, it models the outcome $y$ as a Normal distribution with a mean $\\mu$ that is itself the result of a linear function of the predictors $X$ and a variance $\\sigma$ that is constant across all values of the predictors.\nIt can be written as $y = Normal(\\mu, \\sigma)$, where $\\mu = intercept + slope * X$.\n\nIn order to fit a Linear Model for RTs, we need to set a prior on all these parameters, namely:\n- The variance $\\sigma$ (correspondong to the \"spread\" of RTs)\n- The mean $\\mu$ for the intercept (i.e., at the reference condition which is in our case `\"Speed\"`)\n- The effect of the condition (the slope).\n\n### Model Specification\n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_Gaussian(rt; condition=nothing)\n\n # Set priors on variance, intercept and effect of condition\n σ ~ truncated(Normal(0, 0.5); lower=0)\n\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\n\nfit_Gaussian = model_Gaussian(df.RT; condition=df.Accuracy)\nchain_Gaussian = sample(fit_Gaussian, NUTS(), 400)\n```\n\n```{julia}\n#| code-fold: false\n\n# Summary (95% CI)\nhpd(chain_Gaussian; alpha=0.05)\n```\n\n\nThe effect of Condition is significant, people are on average slower (higher RT) when condition is `\"Accuracy\"`.\nBut is our model good?\n\n### Posterior Predictive Check\n\n```{julia}\n#| output: false\n\npred = predict(model_Gaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_Gaussian)\npred = Array(pred)\n```\n\n```{julia}\n#| fig-width: 10\n#| fig-height: 7\n\nfig = plot_distribution(df, \"Predictions made by Gaussian (aka Linear) Model\")\nfor i in 1:length(chain_Gaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n## Scaled Gaussian Model\n\nThe previous model, despite its poor fit to the data, suggests that the mean RT is higher for the `Accuracy` condition. But it seems like the distribution is also *wider* (response time is more variable). \nTypical linear model estimate only one value for sigma $\\sigma$ for the whole model, hence the requirement for **homoscedasticity**.\n\n::: {.callout-note}\n**Homoscedasticity**, or homogeneity of variances, is the assumption of similar variances accross different values of predictors. \nIt is important in linear models as only one value for sigma $\\sigma$ is estimated.\n:::\n\nIs it possible to set sigma $\\sigma$ as a parameter that would depend on the condition, in the same way as mu $\\mu$? In Julia, this is very simple.\n\nAll we need is to set sigma $\\sigma$ as the result of a linear function, such as $\\sigma = intercept + slope * condition$.\nThis means setting a prior on the intercept of sigma $\\sigma$ (in our case, the variance in the reference condition) and a prior on how much this variance changes for the other condition.\nThis change can, by definition, be positive or negative (i.e., the other condition can have either a biggger or a smaller variance), so the prior over the effect of condition should ideally allow for positive and negative values (e.g., `σ_condition ~ Normal(0, 0.1)`).\n\nBut this leads to an **important problem**.\n\n::: {.callout-important}\nThe combination of an intercept and a (possible negative) slope for sigma $\\sigma$ technically allows for negative variance values, which is impossible (distributions cannot have a negative variance).\nThis issue is one of the most important to address when setting up complex models for RTs.\n:::\n\nIndeed, even if we set a very narrow prior on the intercept of sigma $\\sigma$ to fix it at for instance **0.14**, and a narrow prior on the effect of condition, say $Normal(0, 0.001)$, an effect of condition of **-0.15** is still possible (albeit with very low probability). \nAnd such effect would lead to a sigma $\\sigma$ of **0.14 - 0.15 = -0.01**, which would lead to an error (and this will often happen as the sampling process does explore unlikely regions of the parameter space).\n\n\n### Solution 1: Directional Effect of Condition\n\nOne possible (but not recommended) solution is to simply make it impossible for the effect of condition to be negative by *Truncating* the prior to a lower bound of 0. \nThis can work in our case, because we know that the comparison condition is likely to have a higher variance than the reference condition (the intercept) - and if it wasn't the case, we could have changed the reference factor.\nHowever, this is not a good practice as we are enforcing a very strong a priori specific direction of the effect, which is not always justified.\n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0) # Same prior as previously\n σ_condition ~ truncated(Normal(0, 0.1); lower=0) # Enforce positivity\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n\n```{julia}\n#| code-fold: false\n\n# Summary (95% CI)\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\nWe can see that the effect of condition on sigma $\\sigma$ is significantly positive: the variance is higher in the `Accuracy` condition as compared to the `Speed` condition. \n\n### Solution 2: Avoid Exploring Negative Variance Values\n\nThe other trick is to force the sampling algorithm to avoid exploring negative variance values (when sigma $\\sigma$ < 0).\nThis can be done by adding a conditional statement when sigma $\\sigma$ is negative to avoid trying this value and erroring, and instead returning an infinitely low model probability (`-Inf`) to push away the exploration of this impossible region.\n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n\n```{julia}\n#| code-fold: false\n\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\n```{julia}\npred = predict(model_ScaledlGaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_ScaledGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Scaled Gaussian Model\")\nfor i in 1:length(chain_ScaledGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n\n\n\n\nAlthough relaxing the homoscedasticity assumption is a good step forward, allowing us to make **richer conclusions** and better capturing the data.\nDespite that, the Gaussian model stil seem to be a poor fit to the data.\n\n## The Problem with Linear Models\n\nReaction time (RTs) have been traditionally modeled using traditional linear models and their derived statistical tests such as *t*-test and ANOVAs. Importantly, linear models - by definition - will try to predict the *mean* of the outcome variable by estimating the \"best fitting\" *Normal* distribution. In the context of reaction times (RTs), this is not ideal, as RTs typically exhibit a non-normal distribution, skewed towards the left with a long tail towards the right. This means that the parameters of a Normal distribution (mean $\\mu$ and standard deviation $\\sigma$) are not good descriptors of the data.\n\n![](media/rt_normal.gif)\n\n> Linear models try to find the best fitting Normal distribution for the data. However, for reaction times, even the best fitting Normal distribution (in red) does not capture well the actual data (in grey).\n\nA popular mitigation method to account for the non-normality of RTs is to transform the data, using for instance the popular *log-transform*. \nHowever, this practice should be avoided as it leads to various issues, including loss of power and distorted results interpretation [@lo2015transform; @schramm2019reaction].\nInstead, rather than applying arbitrary data transformation, it would be better to swap the Normal distribution used by the model for a more appropriate one that can better capture the characteristics of a RT distribution.\n\n\n## Shifted LogNormal Model\n\nOne of the obvious candidate alternative to the log-transformation would be to use a model with a Log-transformed Normal distribution.\nA LogNormal distribution is a distribution of a random variable whose logarithm is normally distributed. In this model, the *mean* $\\mu$ and is defined on the log-scale, and effects must be interpreted as multiplicative rather than additive (the condition increases the mean RT by a factor of $\\exp(\\mu_{condition})$). \n\nNote that for LogNormal distributions (as it is the case for many of the models introduced in the rest of the capter), the distribution parameters ($\\mu$ and $\\sigma$) are not independent with respect to the mean and the standard deviation (SD).\nThe empirical SD increases when the *mean* $\\mu$ increases (which is seen as a feature rather than a bug, as it is consistent with typical reaction time data [@wagenmakers2005relation]).\n\nA **Shifted** LogNormal model introduces a shift (a delay) parameter *tau* $\\tau$ that corresponds to the minimum \"starting time\" of the response process.\n\nWe need to set a prior for this parameter, which is usually truncated between 0 (to exclude negative minimum times) and the minimum RT of the data (the logic being that the minimum delay for response must be lower than the faster response actually observed).\n\nWhile $Uniform(0, min(RT))$ is a common choice of prior, it is not ideal as it implies that all values between 0 and the minimum RT are equally likely, which is not the case.\nIndeed, psychology research has shown that such minimum response time for Humans is often betwen 100 and 250 ms. \nMoreover, in our case, we explicitly removed all RTs below 180 ms, suggesting that the minimum response time is more likely to approach 180 ms than 0 ms.\n\n### Prior on Minimum RT\n\nInstead of a $Uniform$ prior, we will use a $Gamma(1.1, 11)$ distribution (truncated at min. RT), as this particular parameterization reflects the low probability of very low minimum RTs (near 0) and a steadily increasing probability for increasing times. \n```{julia}\nxaxis = range(0, 0.3, 1000)\nfig = lines(xaxis, pdf.(Gamma(1.1, 11), xaxis); color=:blue, label=\"Gamma(1.1, 11)\")\nvlines!([minimum(df.RT)]; color=\"red\", linestyle=:dash, label=\"Min. RT = 0.18 s\")\naxislegend()\nfig\n```\n\n\n### Model Specification\n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_LogNormal(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n τ ~ truncated(Gamma(1.1, 11); upper=min_rt)\n\n μ_intercept ~ Normal(0, exp(1)) # On the log-scale: exp(μ) to get value in seconds\n μ_condition ~ Normal(0, exp(0.3))\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ShiftedLogNormal(μ, σ, τ)\n end\nend\n\nfit_LogNormal = model_LogNormal(df.RT; condition=df.Accuracy)\nchain_LogNormal = sample(fit_LogNormal, NUTS(), 400)\n```\n\n### Interpretation\n\n```{julia}\n#| code-fold: false\n\nhpd(chain_LogNormal; alpha=0.05)\n```\n\n\n```{julia}\npred = predict(model_LogNormal([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_LogNormal)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_LogNormal)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\nThis model provides a much better fit to the data, and confirms that the `Accuracy` condition is associated with higher RTs and higher variability (i.e., a larger distribution width).\n\n\n::: {.callout-note}\n\n### LogNormal distributions in nature\n\nThe reason why the Normal distribution is so ubiquituous in nature (and hence used as a good default) is due to the **Central Limit Theorem**, which states that the sum of a large number of independent random variables will be approximately normally distributed. Because many things in nature are the result of the *addition* of many random processes, the Normal distribution is very common in real life.\n\nHowever, it turns out that the multiplication of random variables result in a **LogNormal** distribution, and multiplicating (rather than additive) cascades of processes are also very common in nature, from lengths of latent periods of infectious diseases to distribution of mineral resources in the Earth's crust, and the elemental mechanisms at stakes in physics and cell biolody [@limpert2001log].\n\nThus, using LogNormal distributions for RTs can be justified with the assumption that response times are the result of multiplicative stochastic processes happening in the brain.\n\n:::\n\n\n## ExGaussian Model\n\nAnother popular model to describe RTs uses the **ExGaussian** distribution, i.e., the *Exponentially-modified Gaussian* distribution [@balota2011moving; @matzke2009psychological].\n\nThis distribution is a convolution of normal and exponential distributions and has three parameters, namely *mu* $\\mu$ and *sigma* $\\sigma$ - the mean and standard deviation of the Gaussian distribution - and *tau* $\\tau$ - the exponential component of the distribution (note that although denoted by the same letter, it does not correspond directly to a shift of the distribution). \nIntuitively, these parameters reflect the centrality, the width and the tail dominance, respectively.\n\n![](media/rt_exgaussian.gif)\n\n\nBeyond the descriptive value of these types of models, some have tried to interpret their parameters in terms of **cognitive mechanisms**, arguing for instance that changes in the Gaussian components ($\\mu$ and $\\sigma$) reflect changes in attentional processes [e.g., \"the time required for organization and execution of the motor response\"; @hohle1965inferred], whereas changes in the exponential component ($\\tau$) reflect changes in intentional (i.e., decision-related) processes [@kieffaber2006switch]. \nHowever, @matzke2009psychological demonstrate that there is likely no direct correspondence between ex-Gaussian parameters and cognitive mechanisms, and underline their value primarily as **descriptive tools**, rather than models of cognition *per se*.\n\nDescriptively, the three parameters can be interpreted as:\n\n- **Mu** $\\mu$ : The location / centrality of the RTs. Would correspond to the mean in a symmetrical distribution.\n- **Sigma** $\\sigma$ : The variability and dispersion of the RTs. Akin to the standard deviation in normal distributions.\n- **Tau** $\\tau$ : Tail weight / skewness of the distribution.\n\n::: {.callout-important}\nNote that these parameters are not independent with respect to distribution characteristics, such as the empirical mean and SD. \nBelow is an example of different distributions with the same location (*mu* $\\mu$) and dispersion (*sigma* $\\sigma$) parameters.\nAlthough only the tail weight parameter (*tau* $\\tau$) is changed, the whole distribution appears to shift is centre of mass. \nHence, one should be careful note to interpret the values of *mu* $\\mu$ directly as the \"mean\" or the distribution peak and *sigma* $\\sigma$ as the SD or the \"width\".\n:::\n\n![](media/rt_exgaussian2.gif)\n\n### Conditional Tau $\\tau$ Parameter\n\nIn the same way as we modeled the effect of the condition on the variance component *sigma* $\\sigma$, we can do the same for any other parameters, including the exponential component *tau* $\\tau$.\nAll wee need is to set a prior on the intercept and the condition effect, and make sure that $\\tau > 0$. \n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_ExGaussian(rt; condition=nothing)\n\n # Priors \n μ_intercept ~ Normal(0, 1) \n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n τ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n τ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ <= 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ExGaussian(μ, σ, τ)\n end\nend\n\nfit_ExGaussian = model_ExGaussian(df.RT; condition=df.Accuracy)\nchain_ExGaussian = sample(fit_ExGaussian, NUTS(), 400)\n```\n\n### Interpretation\n\n```{julia}\n#| code-fold: false\n\nhpd(chain_ExGaussian; alpha=0.05)\n```\n\n```{julia}\npred = predict(model_ExGaussian([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_ExGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_ExGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\nThe ExGaussian model also provides an excellent fit to the data. \nMoreover, by modeling more parameters (including *tau* $\\tau$), we can draw more nuanced conclusions.\nIn this case, the `Accuracy` condition is associated with higher RTs, higher variability, and a heavier tail (i.e., more extreme values).\n\n## Shifted Wald Model\n\nThe **Wald** distribution, also known as the **Inverse Gaussian** distribution, corresponds to the distribution of the first passage time of a Wiener process with a drift rate $\\mu$ and a diffusion rate $\\sigma$.\nWhile we will unpack this definition below and emphasize its important consequences, one can first note that it has been described as a potential model for RTs when convoluted with an *exponential* distribution (in the same way that the ExGaussian distribution is a convolution of a Gaussian and an exponential distribution).\nHowever, this **Ex-Wald** model [@schwarz2001ex] was shown to be less appropriate than one of its variant, the **Shifted Wald** distribution [@heathcote2004fitting; @anders2016shifted].\n\nNote that the Wald distribution, similarly to the models that we will be covering next (the \"generative\" models), is different from the previous distributions in that it is not characterized by a \"location\" and \"scale\" parameters (*mu* $\\mu$ and *sigma* $\\sigma$).\nInstead, the parameters of the Shifted Wald distribution are:\n\n- **Nu** $\\nu$ : A **drift** parameter, corresponding to the strength of the evidence accumulation process.\n- **Alpha** $\\alpha$ : A **threshold** parameter, corresponding to the amount of evidence required to make a decision.\n- **Tau** $\\tau$ : A **delay** parameter, corresponding to the non-response time (i.e., the minimum time required to process the stimulus and respond). A shift parameter similar to the one in the Shifted LogNormal model.\n\n![](media/rt_wald.gif)\n\nAs we can see, these parameters do not have a direct correspondence with the mean and standard deviation of the distribution.\nTheir interpretation is more complex but, as we will see below, offers a window to a new level of interpretation.\n\n::: {.callout-note}\nExplanations regarding these new parameters will be provided in the next chapter.\n:::\n\n### Model Specification\n\n```{julia}\n#| code-fold: false\n#| output: false\n\n@model function model_Wald(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n ν_intercept ~ truncated(Normal(1, 3); lower=0)\n ν_condition ~ Normal(0, 1)\n\n α_intercept ~ truncated(Normal(0, 1); lower=0)\n α_condition ~ Normal(0, 0.5)\n\n τ_intercept ~ truncated(Gamma(1.1, 11); upper=min_rt)\n τ_condition ~ Normal(0, 0.01)\n\n for i in 1:length(rt)\n ν = ν_intercept + ν_condition * condition[i]\n if ν <= 0 # Avoid negative drift\n Turing.@addlogprob! -Inf\n return nothing\n end\n α = α_intercept + α_condition * condition[i]\n if α <= 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ < 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Wald(ν, α, τ)\n end\nend\n\nfit_Wald = model_Wald(df.RT; condition=df.Accuracy)\nchain_Wald = sample(fit_Wald, NUTS(), 600)\n```\n\n```{julia}\n#| code-fold: false\n\nhpd(chain_Wald; alpha=0.05)\n```\n\n```{julia}\npred = predict(model_Wald([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_Wald)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted Wald Model\")\nfor i in 1:length(chain_Wald)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n### Model Comparison\n\nAt this stage, given the multiple options avaiable to model RTs, you might be wondering which model is the best.\nOne can compare the models using the **Leave-One-Out Cross-Validation (LOO-CV)** method, which is a Bayesian method to estimate the out-of-sample predictive accuracy of a model.\n\n```{julia}\nusing ParetoSmooth\n\nloo_Gaussian = psis_loo(fit_Gaussian, chain_Gaussian, source=\"mcmc\")\nloo_ScaledGaussian = psis_loo(fit_ScaledlGaussian, chain_ScaledGaussian, source=\"mcmc\")\nloo_LogNormal = psis_loo(fit_LogNormal, chain_LogNormal, source=\"mcmc\")\nloo_ExGaussian = psis_loo(fit_ExGaussian, chain_ExGaussian, source=\"mcmc\")\nloo_Wald = psis_loo(fit_Wald, chain_Wald, source=\"mcmc\")\n\nloo_compare((\n Gaussian = loo_Gaussian, \n ScaledGaussian = loo_ScaledGaussian, \n LogNormal = loo_LogNormal, \n ExGaussian = loo_ExGaussian, \n Wald = loo_Wald))\n```\n\nThe `loo_compare()` function orders models from best to worse based on their ELPD (Expected Log Pointwise Predictive Density) and provides the difference in ELPD between the best model and the other models.\nAs one can see, traditional linear models perform terribly.\n\n","srcMarkdownNoYaml":""},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":true,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"execute":true,"engine":"jupyter"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":true,"code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","output-file":"4a_rt_descriptive.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.549","bibliography":["references.bib"],"theme":"pulse","number-depth":3},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]}
\ No newline at end of file
diff --git a/content/.quarto/idx/4b_rt_generative.qmd.json b/content/.quarto/idx/4b_rt_generative.qmd.json
index 1c55e66..4e50dcd 100644
--- a/content/.quarto/idx/4b_rt_generative.qmd.json
+++ b/content/.quarto/idx/4b_rt_generative.qmd.json
@@ -1 +1 @@
-{"title":"Generative Models","markdown":{"headingText":"Generative Models","containsRefs":false,"markdown":"\n![](https://img.shields.io/badge/status-WIP-orange)\n\nIn this chapter, we will move away from statistical models that **describe** the data to models of **data-generation processes**.\n\n## Evidence Accumulation\n\nIn the previous chapter, we introduced the **Wald** distribution and its parameters, *nu* $\\nu$ (drift rate) and *alpha* $\\alpha$ (threshold).\nThis distribution appears to have been first derived in 1900 to model **the time a stock reaches a certain price** (a *threshold* price) for the first time, and used in 1915 by Schrödinger as the time to first passage of a threshold of a Brownian motion (i.e., a random walk).\n\nA random walk describes a path consisting of a succession of random steps. It has been used by Francis Galton in 1894 to illustrate the *Central Limit Theorem*, and is now known as the **Galton Board**. The Galton Board is a physical model of a random walk, where balls are dropped from the top and bounce left or right at each peg until they reach the bottom. The distribution of the final position of the balls is a normal distribution.\n\n![](media/rt_galtonboard.gif)\n\n::: {.callout-caution}\nTODO: Replace with my own video.\n:::\n\nIn the figure below, we can see a computer simulation illustrating the same concept, with \"time\" being displayed on the x-axis. All iterations start at *y*=0, and change by -1 or +1 at each time step, until it reaches a threshold of *t* = 0.7 seconds.\n\n![](media/rt_randomwalk.gif)\n\n\nRandom walks are used to model a wide range of phenomena, such as the movement of particules and molecules, the stock market, the behavior of animals and, crucially, **cognitive processes**.\nFor instance, it can be used to approximate **evidence accumulation**: the idea that a decision maker (be it a Human or any other system) accumulates evidence in a \"stochastic\" (i.e., random) fashion over time until a certain threshold is reached, at which point a decision is made.\n\n## Wald Distribution (Revisited)\n\nThis is how the Wald distribution is actually **generated**. It corresponds to the distribution of times that it takes for a stochastic process to reach a certain **threshold** $\\alpha$ (a certain amount of \"evidence\").\nThe twist is that the process underlying this model is a random walk with a **drift rate** $\\nu$, which corresponds to the average amount of evidence accumulated per unit of time. \nIn other words, the **drift rate** $\\nu$ is the \"slope\" of the evidence accumulation process, representing the **strength of the evidence** (or the **speed** by which the decision maker accumulates evidence).\nThe **threshold** $\\alpha$ is the amount of evidence required to reach a decision (\"decision\" typically meaning making a response).\n\n![](media/rt_wald2.gif)\n\nAs you can see, the Wald distribution belongs to a family of models thata do not merely attempt at describing the empirical distributions by tweaking and convolving distributions (like the ExGaussian or LogNormal models). Instead, their parameters are characterizing the **data generation process**. \n\n::: {.callout-important}\n\nWhile such \"generative\" models offer potential insights into the cognitive processes underlying the data, they inherently make **strong assumptions** about said underlying process (for instance, that the data of a task can be approximated by a stochastic evidence accumulation process). It is thus crucial to always keep in mind the limitations and assumptions of the models we use. Don't forget, **with great power comes great responsability.**\n:::\n\n\n## Drift Diffusion Model (DDM)\n\nUse DDM as a case study to introduce generative models\n\n- [**Drift Diffusion Model (DDM) in R: A Tutorial**](https://dominiquemakowski.github.io/easyRT/articles/ddm.html)\n\n## Other Models (LBA, LNR)\n\n## Including Random Effects\n\nTODO.\n\n## Additional Resources\n\n- [**Lindelov's overview of RT models**](https://lindeloev.github.io/shiny-rt/): An absolute must-read.\n- [**De Boeck & Jeon (2019)**](https://www.frontiersin.org/articles/10.3389/fpsyg.2019.00102/full): A paper providing an overview of RT models.\n- [https://github.com/vasishth/bayescogsci](https://github.com/vasishth/bayescogsci)\n","srcMarkdownNoYaml":""},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":true,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"execute":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":true,"code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","output-file":"4b_rt_generative.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.549","bibliography":["references.bib"],"theme":"pulse","number-depth":3},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]}
\ No newline at end of file
+{"title":"Generative Models","markdown":{"headingText":"Generative Models","containsRefs":false,"markdown":"\n![](https://img.shields.io/badge/status-WIP-orange)\n\nIn this chapter, we will move away from statistical models that **describe** the data to models of **data-generation processes**.\n\n## Evidence Accumulation\n\nIn the previous chapter, we introduced the **Wald** distribution and its parameters, *nu* $\\nu$ (drift rate) and *alpha* $\\alpha$ (threshold).\nThis distribution appears to have been first derived in 1900 to model **the time a stock reaches a certain price** (a *threshold* price) for the first time, and used in 1915 by Schrödinger as the time to first passage of a threshold of a Brownian motion (i.e., a random walk).\n\nA random walk describes a path consisting of a succession of random steps. It has been used by Francis Galton in 1894 to illustrate the *Central Limit Theorem*, and is now known as the **Galton Board**. The Galton Board is a physical model of a random walk, where balls are dropped from the top and bounce left or right at each peg until they reach the bottom. The distribution of the final position of the balls is a normal distribution.\n\n![](media/rt_galtonboard.gif)\n\n::: {.callout-caution}\nTODO: Replace with my own video.\n:::\n\nIn the figure below, we can see a computer simulation illustrating the same concept, with \"time\" being displayed on the x-axis. All iterations start at *y*=0, and change by -1 or +1 at each time step, until it reaches a threshold of *t* = 0.7 seconds.\n\n![](media/rt_randomwalk.gif)\n\n\nRandom walks are used to model a wide range of phenomena, such as the movement of particules and molecules, the stock market, the behavior of animals and, crucially, **cognitive processes**.\nFor instance, it can be used to approximate **evidence accumulation**: the idea that a decision maker (be it a Human or any other system) accumulates evidence in a \"stochastic\" (i.e., random) fashion over time until a certain threshold is reached, at which point a decision is made.\n\n## Wald Distribution (Revisited)\n\nThis is how the Wald distribution is actually **generated**. It corresponds to the distribution of times that it takes for a stochastic process to reach a certain **threshold** $\\alpha$ (a certain amount of \"evidence\").\nThe twist is that the process underlying this model is a random walk with a **drift rate** $\\nu$, which corresponds to the average amount of evidence accumulated per unit of time. \nIn other words, the **drift rate** $\\nu$ is the \"slope\" of the evidence accumulation process, representing the **strength of the evidence** (or the **speed** by which the decision maker accumulates evidence).\nThe **threshold** $\\alpha$ is the amount of evidence required to reach a decision (\"decision\" typically meaning making a response).\n\n![](media/rt_wald2.gif)\n\n> In this figure, the red line at 0 represents the non-decision time *tau* $\\tau$. The dotted red line corresponds to the *threshold* $\\alpha$, and the *drift rate* $\\nu$ is the slope of the black line. The time at which each individual random accumulator crosses the threshold forms a Wald distribution.\n\nAs you can see, the Wald distribution belongs to a family of models thata do not merely attempt at describing the empirical distributions by tweaking and convolving distributions (like the ExGaussian or LogNormal models). Instead, their parameters are characterizing the **data generation process**. \n\n::: {.callout-important}\n\nWhile such \"generative\" models offer potential insights into the cognitive processes underlying the data, they inherently make **strong assumptions** about said underlying process (for instance, that the data of a task can be approximated by a stochastic evidence accumulation process). It is thus crucial to always keep in mind the limitations and assumptions of the models we use. Don't forget, **with great power comes great responsability.**\n:::\n\n\n## Drift Diffusion Model (DDM)\n\nUse DDM as a case study to introduce generative models\n\n- [**Drift Diffusion Model (DDM) in R: A Tutorial**](https://dominiquemakowski.github.io/easyRT/articles/ddm.html)\n\n## Other Models (LBA, LNR)\n\n## Including Random Effects\n\nTODO.\n\n## Additional Resources\n\n- [**Lindelov's overview of RT models**](https://lindeloev.github.io/shiny-rt/): An absolute must-read.\n- [**De Boeck & Jeon (2019)**](https://www.frontiersin.org/articles/10.3389/fpsyg.2019.00102/full): A paper providing an overview of RT models.\n- [https://github.com/vasishth/bayescogsci](https://github.com/vasishth/bayescogsci)\n","srcMarkdownNoYaml":""},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":true,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"execute":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":true,"code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","output-file":"4b_rt_generative.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.549","bibliography":["references.bib"],"theme":"pulse","number-depth":3},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]}
\ No newline at end of file
diff --git a/content/.quarto/idx/5_individual.qmd.json b/content/.quarto/idx/5_individual.qmd.json
deleted file mode 100644
index 6c1b5e8..0000000
--- a/content/.quarto/idx/5_individual.qmd.json
+++ /dev/null
@@ -1 +0,0 @@
-{"title":"Individual Parameters","markdown":{"headingText":"Individual Parameters","containsRefs":false,"markdown":"\n![](https://img.shields.io/badge/status-not_started-red)\n\n1. From mixed models\n2. As prior-informed individual Bayesian models","srcMarkdownNoYaml":""},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":true,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"execute":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":true,"code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","output-file":"5_individual.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.549","bibliography":["references.bib"],"theme":"pulse","number-depth":3},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]}
\ No newline at end of file
diff --git a/content/.quarto/xref/04307669 b/content/.quarto/xref/04307669
index 42c82d3..7f7a477 100644
--- a/content/.quarto/xref/04307669
+++ b/content/.quarto/xref/04307669
@@ -1 +1 @@
-{"headings":[],"options":{"chapters":true},"entries":[]}
\ No newline at end of file
+{"headings":[],"entries":[],"options":{"chapters":true}}
\ No newline at end of file
diff --git a/content/.quarto/xref/15f266d2 b/content/.quarto/xref/15f266d2
index 9bba4c5..58a3446 100644
--- a/content/.quarto/xref/15f266d2
+++ b/content/.quarto/xref/15f266d2
@@ -1 +1 @@
-{"headings":["very-quick-intro-to-julia-and-turing","generate-data-from-normal-distribution","recover-distribution-parameters-with-turing","linear-models","boostrapping","hierarchical-models","bayesian-estimation","bayesian-mixed-linear-regression"],"options":{"chapters":true},"entries":[]}
\ No newline at end of file
+{"options":{"chapters":true},"headings":["very-quick-intro-to-julia-and-turing","generate-data-from-normal-distribution","recover-distribution-parameters-with-turing","linear-models","boostrapping","hierarchical-models","bayesian-estimation","bayesian-mixed-linear-regression"],"entries":[]}
\ No newline at end of file
diff --git a/content/.quarto/xref/26e6880e b/content/.quarto/xref/26e6880e
index fde02c6..472aafc 100644
--- a/content/.quarto/xref/26e6880e
+++ b/content/.quarto/xref/26e6880e
@@ -1 +1 @@
-{"options":{"chapters":true},"entries":[],"headings":["the-data","gaussian-aka-linear-model","model-specification","posterior-predictive-check","scaled-gaussian-model","solution-1-directional-effect-of-condition","solution-2-avoid-exploring-negative-variance-values","the-problem-with-linear-models","shifted-lognormal-model","prior-on-minimum-rt","model-specification-1","interpretation","exgaussian-model","conditional-tau-tau-parameter","interpretation-1","shifted-wald-model","model-specification-2","model-comparison"]}
\ No newline at end of file
+{"headings":["the-data","gaussian-aka-linear-model","model-specification","posterior-predictive-check","scaled-gaussian-model","solution-1-directional-effect-of-condition","solution-2-avoid-exploring-negative-variance-values","the-problem-with-linear-models","shifted-lognormal-model","prior-on-minimum-rt","model-specification-1","interpretation","exgaussian-model","conditional-tau-tau-parameter","interpretation-1","shifted-wald-model","model-specification-2","model-comparison"],"options":{"chapters":true},"entries":[]}
\ No newline at end of file
diff --git a/content/.quarto/xref/6cbe9151 b/content/.quarto/xref/6cbe9151
index 2210363..eb2e579 100644
--- a/content/.quarto/xref/6cbe9151
+++ b/content/.quarto/xref/6cbe9151
@@ -1 +1 @@
-{"options":{"chapters":true},"entries":[],"headings":["preface","why-julia","why-bayesian","the-plan","looking-for-coauthors"]}
\ No newline at end of file
+{"entries":[],"headings":["preface","why-julia","why-bayesian","the-plan","looking-for-coauthors"],"options":{"chapters":true}}
\ No newline at end of file
diff --git a/content/.quarto/xref/a408ff3e b/content/.quarto/xref/a408ff3e
index 0e7e7d0..8532718 100644
--- a/content/.quarto/xref/a408ff3e
+++ b/content/.quarto/xref/a408ff3e
@@ -1 +1 @@
-{"entries":[],"options":{"chapters":true},"headings":["evidence-accumulation","wald-distribution-revisited","drift-diffusion-model-ddm","other-models-lba-lnr","including-random-effects","additional-resources"]}
\ No newline at end of file
+{"headings":["evidence-accumulation","wald-distribution-revisited","drift-diffusion-model-ddm","other-models-lba-lnr","including-random-effects","additional-resources"],"options":{"chapters":true},"entries":[]}
\ No newline at end of file
diff --git a/content/.quarto/xref/ce37606d b/content/.quarto/xref/ce37606d
index 5129a29..42c82d3 100644
--- a/content/.quarto/xref/ce37606d
+++ b/content/.quarto/xref/ce37606d
@@ -1 +1 @@
-{"entries":[],"headings":[],"options":{"chapters":true}}
\ No newline at end of file
+{"headings":[],"options":{"chapters":true},"entries":[]}
\ No newline at end of file
diff --git a/content/3_scales.qmd b/content/3_scales.qmd
index 80eeedd..5a9c985 100644
--- a/content/3_scales.qmd
+++ b/content/3_scales.qmd
@@ -3,6 +3,23 @@
![](https://img.shields.io/badge/status-not_started-red)
1. Beta models
+
+```{julia}
+using Distributions
+
+# Reparameterized Beta distribution
+function BetaMod(μ, σ)
+ α = μ * (μ * (1 - μ) / σ^2 - 1)
+ β = α * (1 - μ) / μ
+ return Beta(α, β)
+end
+
+
+
+mean(BetaMod(0.5, 0.0833))
+var(BetaMod(0.2, 0.1))
+```
+
2. OrdBeta models for slider scales
3. Logistic models for binary data
diff --git a/content/4a_rt_descriptive.qmd b/content/4a_rt_descriptive.qmd
index 4cd4c9f..2675786 100644
--- a/content/4a_rt_descriptive.qmd
+++ b/content/4a_rt_descriptive.qmd
@@ -272,7 +272,11 @@ Instead, rather than applying arbitrary data transformation, it would be better
## Shifted LogNormal Model
One of the obvious candidate alternative to the log-transformation would be to use a model with a Log-transformed Normal distribution.
-A LogNormal distribution is a distribution of a random variable whose logarithm is normally distributed.
+A LogNormal distribution is a distribution of a random variable whose logarithm is normally distributed. In this model, the *mean* $\mu$ and is defined on the log-scale, and effects must be interpreted as multiplicative rather than additive (the condition increases the mean RT by a factor of $\exp(\mu_{condition})$).
+
+Note that for LogNormal distributions (as it is the case for many of the models introduced in the rest of the capter), the distribution parameters ($\mu$ and $\sigma$) are not independent with respect to the mean and the standard deviation (SD).
+The empirical SD increases when the *mean* $\mu$ increases (which is seen as a feature rather than a bug, as it is consistent with typical reaction time data [@wagenmakers2005relation]).
+
A **Shifted** LogNormal model introduces a shift (a delay) parameter *tau* $\tau$ that corresponds to the minimum "starting time" of the response process.
We need to set a prior for this parameter, which is usually truncated between 0 (to exclude negative minimum times) and the minimum RT of the data (the logic being that the minimum delay for response must be lower than the faster response actually observed).
@@ -347,6 +351,20 @@ fig
This model provides a much better fit to the data, and confirms that the `Accuracy` condition is associated with higher RTs and higher variability (i.e., a larger distribution width).
+
+::: {.callout-note}
+
+### LogNormal distributions in nature
+
+The reason why the Normal distribution is so ubiquituous in nature (and hence used as a good default) is due to the **Central Limit Theorem**, which states that the sum of a large number of independent random variables will be approximately normally distributed. Because many things in nature are the result of the *addition* of many random processes, the Normal distribution is very common in real life.
+
+However, it turns out that the multiplication of random variables result in a **LogNormal** distribution, and multiplicating (rather than additive) cascades of processes are also very common in nature, from lengths of latent periods of infectious diseases to distribution of mineral resources in the Earth's crust, and the elemental mechanisms at stakes in physics and cell biolody [@limpert2001log].
+
+Thus, using LogNormal distributions for RTs can be justified with the assumption that response times are the result of multiplicative stochastic processes happening in the brain.
+
+:::
+
+
## ExGaussian Model
Another popular model to describe RTs uses the **ExGaussian** distribution, i.e., the *Exponentially-modified Gaussian* distribution [@balota2011moving; @matzke2009psychological].
diff --git a/content/4b_rt_generative.qmd b/content/4b_rt_generative.qmd
index 31b17e0..0b8cb8f 100644
--- a/content/4b_rt_generative.qmd
+++ b/content/4b_rt_generative.qmd
@@ -34,6 +34,8 @@ The **threshold** $\alpha$ is the amount of evidence required to reach a decisio
![](media/rt_wald2.gif)
+> In this figure, the red line at 0 represents the non-decision time *tau* $\tau$. The dotted red line corresponds to the *threshold* $\alpha$, and the *drift rate* $\nu$ is the slope of the black line. The time at which each individual random accumulator crosses the threshold forms a Wald distribution.
+
As you can see, the Wald distribution belongs to a family of models thata do not merely attempt at describing the empirical distributions by tweaking and convolving distributions (like the ExGaussian or LogNormal models). Instead, their parameters are characterizing the **data generation process**.
::: {.callout-important}
diff --git a/content/_freeze/3_scales/execute-results/html.json b/content/_freeze/3_scales/execute-results/html.json
new file mode 100644
index 0000000..644be4a
--- /dev/null
+++ b/content/_freeze/3_scales/execute-results/html.json
@@ -0,0 +1,12 @@
+{
+ "hash": "b7153c2b346aaba747f06dca75c74302",
+ "result": {
+ "engine": "jupyter",
+ "markdown": "# Choice and Scales\n\n![](https://img.shields.io/badge/status-not_started-red)\n\n1. Beta models \n\n::: {#53bd139d .cell execution_count=1}\n``` {.julia .cell-code}\nusing Distributions\n\n# Reparameterized Beta distribution\nfunction BetaMod(μ, σ)\n α = μ * (μ * (1 - μ) / σ^2 - 1)\n β = α * (1 - μ) / μ\n return Beta(α, β)\nend\n\n\n\nmean(BetaMod(0.5, 0.0833))\nvar(BetaMod(0.2, 0.1))\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```\n0.01\n```\n:::\n:::\n\n\n2. OrdBeta models for slider scales\n3. Logistic models for binary data\n\nUse the speed accuracy data that we use in the next chapter.\n\n",
+ "supporting": [
+ "3_scales_files"
+ ],
+ "filters": [],
+ "includes": {}
+ }
+}
\ No newline at end of file
diff --git a/content/_freeze/4a_rt_descriptive/execute-results/html.json b/content/_freeze/4a_rt_descriptive/execute-results/html.json
index 8e9d93b..09524bf 100644
--- a/content/_freeze/4a_rt_descriptive/execute-results/html.json
+++ b/content/_freeze/4a_rt_descriptive/execute-results/html.json
@@ -1,8 +1,8 @@
{
- "hash": "f94cf23c97bc7ea53d810dcb9e760ab3",
+ "hash": "485397b899d19b6923f1cc82888d5854",
"result": {
"engine": "jupyter",
- "markdown": "# Descriptive Models\n\n![](https://img.shields.io/badge/status-up_to_date-green)\n\n## The Data\n\nFor this chapter, we will be using the data from @wagenmakers2008diffusion - Experiment 1 [also reanalyzed by @heathcote2012linear], that contains responses and response times for several participants in two conditions (where instructions emphasized either **speed** or **accuracy**).\nUsing the same procedure as the authors, we excluded all trials with uninterpretable response time, i.e., responses that are too fast (<180 ms) or too slow [>2 sec instead of >3 sec, see @theriault2024check for a discussion on outlier removal].\n\n::: {#def3e6bc .cell execution_count=2}\n``` {.julia .cell-code code-fold=\"false\"}\nusing Downloads, CSV, DataFrames, Random\nusing Turing, Distributions, SequentialSamplingModels\nusing GLMakie\n\nRandom.seed!(123) # For reproducibility\n\ndf = CSV.read(Downloads.download(\"https://raw.githubusercontent.com/DominiqueMakowski/CognitiveModels/main/data/wagenmakers2008.csv\"), DataFrame)\n\n# Show 10 first rows\nfirst(df, 10)\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
10×5 DataFrame
Row
Participant
Condition
RT
Error
Frequency
Int64
String15
Float64
Bool
String15
1
1
Speed
0.7
false
Low
2
1
Speed
0.392
true
Very Low
3
1
Speed
0.46
false
Very Low
4
1
Speed
0.455
false
Very Low
5
1
Speed
0.505
true
Low
6
1
Speed
0.773
false
High
7
1
Speed
0.39
false
High
8
1
Speed
0.587
true
Low
9
1
Speed
0.603
false
Low
10
1
Speed
0.435
false
High
\n```\n:::\n:::\n\n\nIn the previous chapter, we modelled the error rate (the probability of making an error) using a logistic model, and observed that it was higher in the `\"Speed\"` condition. \nBut how about speed? We are going to first take interest in the RT of **Correct** answers only (as we can assume that errors are underpinned by a different *generative process*). \n\nAfter filtering out the errors, we create a new column, `Accuracy`, which is the \"binarization\" of the `Condition` column, and is equal to 1 when the condition is `\"Accuracy\"` and 0 when it is `\"Speed\"`.\n\n::: {#3b99ba16 .cell execution_count=3}\n``` {.julia .cell-code}\ndf = df[df.Error .== 0, :]\ndf.Accuracy = df.Condition .== \"Accuracy\"\n```\n:::\n\n\n::: {.callout-tip title=\"Code Tip\"}\nNote the usage of *vectorization* `.==` as we want to compare each element of the `Condition` vector to the target `\"Accuracy\"`.\n:::\n\n::: {#60c565e5 .cell execution_count=4}\n``` {.julia .cell-code}\nfunction plot_distribution(df, title=\"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n fig = Figure()\n ax = Axis(fig[1, 1], title=title,\n xlabel=\"RT (s)\",\n ylabel=\"Distribution\",\n yticksvisible=false,\n xticksvisible=false,\n yticklabelsvisible=false)\n Makie.density!(df[df.Condition .== \"Speed\", :RT], color=(\"#EF5350\", 0.7), label = \"Speed\")\n Makie.density!(df[df.Condition .== \"Accuracy\", :RT], color=(\"#66BB6A\", 0.7), label = \"Accuracy\")\n Makie.axislegend(\"Condition\"; position=:rt)\n Makie.ylims!(ax, (0, nothing))\n return fig\nend\n\nplot_distribution(df, \"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n\n```\n:::\n:::\n\n\n## Gaussian (aka *Linear*) Model\n\n::: {.callout-note}\nNote that until the last section of this chapter, we will disregard the existence of multiple participants (which require the inclusion of random effects in the model).\nWe will treat the data as if it was a single participant at first to better understand the parameters, but will show how to add random effects at the end.\n:::\n\nA linear model is the most common type of model. \nIt aims at predicting the **mean** $\\mu$ of the outcome variable using a **Normal** (aka *Gaussian*) distribution for the residuals.\nIn other words, it models the outcome $y$ as a Normal distribution with a mean $\\mu$ that is itself the result of a linear function of the predictors $X$ and a variance $\\sigma$ that is constant across all values of the predictors.\nIt can be written as $y = Normal(\\mu, \\sigma)$, where $\\mu = intercept + slope * X$.\n\nIn order to fit a Linear Model for RTs, we need to set a prior on all these parameters, namely:\n- The variance $\\sigma$ (correspondong to the \"spread\" of RTs)\n- The mean $\\mu$ for the intercept (i.e., at the reference condition which is in our case `\"Speed\"`)\n- The effect of the condition (the slope).\n\n### Model Specification\n\n::: {#fdfd5559 .cell execution_count=5}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_Gaussian(rt; condition=nothing)\n\n # Set priors on variance, intercept and effect of condition\n σ ~ truncated(Normal(0, 0.5); lower=0)\n\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\n\nfit_Gaussian = model_Gaussian(df.RT; condition=df.Accuracy)\nchain_Gaussian = sample(fit_Gaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#1e1a3766 .cell execution_count=6}\n``` {.julia .cell-code code-fold=\"false\"}\n# Summary (95% CI)\nhpd(chain_Gaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\nThe effect of Condition is significant, people are on average slower (higher RT) when condition is `\"Accuracy\"`.\nBut is our model good?\n\n### Posterior Predictive Check\n\n::: {#ba2b1593 .cell execution_count=7}\n``` {.julia .cell-code}\npred = predict(model_Gaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_Gaussian)\npred = Array(pred)\n```\n:::\n\n\n::: {#f02ce7d4 .cell fig-height='7' fig-width='10' execution_count=8}\n``` {.julia .cell-code}\nfig = plot_distribution(df, \"Predictions made by Gaussian (aka Linear) Model\")\nfor i in 1:length(chain_Gaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=8}\n```{=html}\n\n```\n:::\n:::\n\n\n## Scaled Gaussian Model\n\nThe previous model, despite its poor fit to the data, suggests that the mean RT is higher for the `Accuracy` condition. But it seems like the distribution is also *wider* (response time is more variable). \nTypical linear model estimate only one value for sigma $\\sigma$ for the whole model, hence the requirement for **homoscedasticity**.\n\n::: {.callout-note}\n**Homoscedasticity**, or homogeneity of variances, is the assumption of similar variances accross different values of predictors. \nIt is important in linear models as only one value for sigma $\\sigma$ is estimated.\n:::\n\nIs it possible to set sigma $\\sigma$ as a parameter that would depend on the condition, in the same way as mu $\\mu$? In Julia, this is very simple.\n\nAll we need is to set sigma $\\sigma$ as the result of a linear function, such as $\\sigma = intercept + slope * condition$.\nThis means setting a prior on the intercept of sigma $\\sigma$ (in our case, the variance in the reference condition) and a prior on how much this variance changes for the other condition.\nThis change can, by definition, be positive or negative (i.e., the other condition can have either a biggger or a smaller variance), so the prior over the effect of condition should ideally allow for positive and negative values (e.g., `σ_condition ~ Normal(0, 0.1)`).\n\nBut this leads to an **important problem**.\n\n::: {.callout-important}\nThe combination of an intercept and a (possible negative) slope for sigma $\\sigma$ technically allows for negative variance values, which is impossible (distributions cannot have a negative variance).\nThis issue is one of the most important to address when setting up complex models for RTs.\n:::\n\nIndeed, even if we set a very narrow prior on the intercept of sigma $\\sigma$ to fix it at for instance **0.14**, and a narrow prior on the effect of condition, say $Normal(0, 0.001)$, an effect of condition of **-0.15** is still possible (albeit with very low probability). \nAnd such effect would lead to a sigma $\\sigma$ of **0.14 - 0.15 = -0.01**, which would lead to an error (and this will often happen as the sampling process does explore unlikely regions of the parameter space).\n\n\n### Solution 1: Directional Effect of Condition\n\nOne possible (but not recommended) solution is to simply make it impossible for the effect of condition to be negative by *Truncating* the prior to a lower bound of 0. \nThis can work in our case, because we know that the comparison condition is likely to have a higher variance than the reference condition (the intercept) - and if it wasn't the case, we could have changed the reference factor.\nHowever, this is not a good practice as we are enforcing a very strong a priori specific direction of the effect, which is not always justified.\n\n::: {#62ef3b30 .cell execution_count=9}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0) # Same prior as previously\n σ_condition ~ truncated(Normal(0, 0.1); lower=0) # Enforce positivity\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#4b488c39 .cell execution_count=10}\n``` {.julia .cell-code code-fold=\"false\"}\n# Summary (95% CI)\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\nWe can see that the effect of condition on sigma $\\sigma$ is significantly positive: the variance is higher in the `Accuracy` condition as compared to the `Speed` condition. \n\n### Solution 2: Avoid Exploring Negative Variance Values\n\nThe other trick is to force the sampling algorithm to avoid exploring negative variance values (when sigma $\\sigma$ < 0).\nThis can be done by adding a conditional statement when sigma $\\sigma$ is negative to avoid trying this value and erroring, and instead returning an infinitely low model probability (`-Inf`) to push away the exploration of this impossible region.\n\n::: {#7b17f69f .cell execution_count=11}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#fa8a4426 .cell execution_count=12}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#ebf2b747 .cell execution_count=13}\n``` {.julia .cell-code}\npred = predict(model_ScaledlGaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_ScaledGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Scaled Gaussian Model\")\nfor i in 1:length(chain_ScaledGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n\n```\n:::\n:::\n\n\n\n\nAlthough relaxing the homoscedasticity assumption is a good step forward, allowing us to make **richer conclusions** and better capturing the data.\nDespite that, the Gaussian model stil seem to be a poor fit to the data.\n\n## The Problem with Linear Models\n\nReaction time (RTs) have been traditionally modeled using traditional linear models and their derived statistical tests such as *t*-test and ANOVAs. Importantly, linear models - by definition - will try to predict the *mean* of the outcome variable by estimating the \"best fitting\" *Normal* distribution. In the context of reaction times (RTs), this is not ideal, as RTs typically exhibit a non-normal distribution, skewed towards the left with a long tail towards the right. This means that the parameters of a Normal distribution (mean $\\mu$ and standard deviation $\\sigma$) are not good descriptors of the data.\n\n![](media/rt_normal.gif)\n\n> Linear models try to find the best fitting Normal distribution for the data. However, for reaction times, even the best fitting Normal distribution (in red) does not capture well the actual data (in grey).\n\nA popular mitigation method to account for the non-normality of RTs is to transform the data, using for instance the popular *log-transform*. \nHowever, this practice should be avoided as it leads to various issues, including loss of power and distorted results interpretation [@lo2015transform; @schramm2019reaction].\nInstead, rather than applying arbitrary data transformation, it would be better to swap the Normal distribution used by the model for a more appropriate one that can better capture the characteristics of a RT distribution.\n\n\n## Shifted LogNormal Model\n\nOne of the obvious candidate alternative to the log-transformation would be to use a model with a Log-transformed Normal distribution.\nA LogNormal distribution is a distribution of a random variable whose logarithm is normally distributed.\nA **Shifted** LogNormal model introduces a shift (a delay) parameter *tau* $\\tau$ that corresponds to the minimum \"starting time\" of the response process.\n\nWe need to set a prior for this parameter, which is usually truncated between 0 (to exclude negative minimum times) and the minimum RT of the data (the logic being that the minimum delay for response must be lower than the faster response actually observed).\n\nWhile $Uniform(0, min(RT))$ is a common choice of prior, it is not ideal as it implies that all values between 0 and the minimum RT are equally likely, which is not the case.\nIndeed, psychology research has shown that such minimum response time for Humans is often betwen 100 and 250 ms. \nMoreover, in our case, we explicitly removed all RTs below 180 ms, suggesting that the minimum response time is more likely to approach 180 ms than 0 ms.\n\n### Prior on Minimum RT\n\nInstead of a $Uniform$ prior, we will use a $Gamma(1.1, 11)$ distribution (truncated at min. RT), as this particular parameterization reflects the low probability of very low minimum RTs (near 0) and a steadily increasing probability for increasing times. \n\n::: {#07b852a9 .cell execution_count=14}\n``` {.julia .cell-code}\nxaxis = range(0, 0.3, 1000)\nfig = lines(xaxis, pdf.(Gamma(1.1, 11), xaxis); color=:blue, label=\"Gamma(1.1, 11)\")\nvlines!([minimum(df.RT)]; color=\"red\", linestyle=:dash, label=\"Min. RT = 0.18 s\")\naxislegend()\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=14}\n```{=html}\n\n```\n:::\n:::\n\n\n### Model Specification\n\n::: {#dbebf70c .cell execution_count=15}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_LogNormal(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n τ ~ truncated(Gamma(1.1, 11); upper=min_rt)\n\n μ_intercept ~ Normal(0, exp(1)) # On the log-scale: exp(μ) to get value in seconds\n μ_condition ~ Normal(0, exp(0.3))\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ShiftedLogNormal(μ, σ, τ)\n end\nend\n\nfit_LogNormal = model_LogNormal(df.RT; condition=df.Accuracy)\nchain_LogNormal = sample(fit_LogNormal, NUTS(), 400)\n```\n:::\n\n\n### Interpretation\n\n::: {#76460a1b .cell execution_count=16}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_LogNormal; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#6a414e1f .cell execution_count=17}\n``` {.julia .cell-code}\npred = predict(model_LogNormal([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_LogNormal)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_LogNormal)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n\n```\n:::\n:::\n\n\nThis model provides a much better fit to the data, and confirms that the `Accuracy` condition is associated with higher RTs and higher variability (i.e., a larger distribution width).\n\n## ExGaussian Model\n\nAnother popular model to describe RTs uses the **ExGaussian** distribution, i.e., the *Exponentially-modified Gaussian* distribution [@balota2011moving; @matzke2009psychological].\n\nThis distribution is a convolution of normal and exponential distributions and has three parameters, namely *mu* $\\mu$ and *sigma* $\\sigma$ - the mean and standard deviation of the Gaussian distribution - and *tau* $\\tau$ - the exponential component of the distribution (note that although denoted by the same letter, it does not correspond directly to a shift of the distribution). \nIntuitively, these parameters reflect the centrality, the width and the tail dominance, respectively.\n\n![](media/rt_exgaussian.gif)\n\n\nBeyond the descriptive value of these types of models, some have tried to interpret their parameters in terms of **cognitive mechanisms**, arguing for instance that changes in the Gaussian components ($\\mu$ and $\\sigma$) reflect changes in attentional processes [e.g., \"the time required for organization and execution of the motor response\"; @hohle1965inferred], whereas changes in the exponential component ($\\tau$) reflect changes in intentional (i.e., decision-related) processes [@kieffaber2006switch]. \nHowever, @matzke2009psychological demonstrate that there is likely no direct correspondence between ex-Gaussian parameters and cognitive mechanisms, and underline their value primarily as **descriptive tools**, rather than models of cognition *per se*.\n\nDescriptively, the three parameters can be interpreted as:\n\n- **Mu** $\\mu$ : The location / centrality of the RTs. Would correspond to the mean in a symmetrical distribution.\n- **Sigma** $\\sigma$ : The variability and dispersion of the RTs. Akin to the standard deviation in normal distributions.\n- **Tau** $\\tau$ : Tail weight / skewness of the distribution.\n\n::: {.callout-important}\nNote that these parameters are not independent with respect to distribution characteristics, such as the empirical mean and SD. \nBelow is an example of different distributions with the same location (*mu* $\\mu$) and dispersion (*sigma* $\\sigma$) parameters.\nAlthough only the tail weight parameter (*tau* $\\tau$) is changed, the whole distribution appears to shift is centre of mass. \nHence, one should be careful note to interpret the values of *mu* $\\mu$ directly as the \"mean\" or the distribution peak and *sigma* $\\sigma$ as the SD or the \"width\".\n:::\n\n![](media/rt_exgaussian2.gif)\n\n### Conditional Tau $\\tau$ Parameter\n\nIn the same way as we modeled the effect of the condition on the variance component *sigma* $\\sigma$, we can do the same for any other parameters, including the exponential component *tau* $\\tau$.\nAll wee need is to set a prior on the intercept and the condition effect, and make sure that $\\tau > 0$. \n\n::: {#a7089bbe .cell execution_count=18}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ExGaussian(rt; condition=nothing)\n\n # Priors \n μ_intercept ~ Normal(0, 1) \n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n τ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n τ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ <= 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ExGaussian(μ, σ, τ)\n end\nend\n\nfit_ExGaussian = model_ExGaussian(df.RT; condition=df.Accuracy)\nchain_ExGaussian = sample(fit_ExGaussian, NUTS(), 400)\n```\n:::\n\n\n### Interpretation\n\n::: {#bf20b174 .cell execution_count=19}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_ExGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=19}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#d4d95c07 .cell execution_count=20}\n``` {.julia .cell-code}\npred = predict(model_ExGaussian([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_ExGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_ExGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=20}\n```{=html}\n\n```\n:::\n:::\n\n\nThe ExGaussian model also provides an excellent fit to the data. \nMoreover, by modeling more parameters (including *tau* $\\tau$), we can draw more nuanced conclusions.\nIn this case, the `Accuracy` condition is associated with higher RTs, higher variability, and a heavier tail (i.e., more extreme values).\n\n## Shifted Wald Model\n\nThe **Wald** distribution, also known as the **Inverse Gaussian** distribution, corresponds to the distribution of the first passage time of a Wiener process with a drift rate $\\mu$ and a diffusion rate $\\sigma$.\nWhile we will unpack this definition below and emphasize its important consequences, one can first note that it has been described as a potential model for RTs when convoluted with an *exponential* distribution (in the same way that the ExGaussian distribution is a convolution of a Gaussian and an exponential distribution).\nHowever, this **Ex-Wald** model [@schwarz2001ex] was shown to be less appropriate than one of its variant, the **Shifted Wald** distribution [@heathcote2004fitting; @anders2016shifted].\n\nNote that the Wald distribution, similarly to the models that we will be covering next (the \"generative\" models), is different from the previous distributions in that it is not characterized by a \"location\" and \"scale\" parameters (*mu* $\\mu$ and *sigma* $\\sigma$).\nInstead, the parameters of the Shifted Wald distribution are:\n\n- **Nu** $\\nu$ : A **drift** parameter, corresponding to the strength of the evidence accumulation process.\n- **Alpha** $\\alpha$ : A **threshold** parameter, corresponding to the amount of evidence required to make a decision.\n- **Tau** $\\tau$ : A **delay** parameter, corresponding to the non-response time (i.e., the minimum time required to process the stimulus and respond). A shift parameter similar to the one in the Shifted LogNormal model.\n\n![](media/rt_wald.gif)\n\nAs we can see, these parameters do not have a direct correspondence with the mean and standard deviation of the distribution.\nTheir interpretation is more complex but, as we will see below, offers a window to a new level of interpretation.\n\n::: {.callout-note}\nExplanations regarding these new parameters will be provided in the next chapter.\n:::\n\n### Model Specification\n\n::: {#4df349b0 .cell execution_count=21}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_Wald(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n ν_intercept ~ truncated(Normal(1, 3); lower=0)\n ν_condition ~ Normal(0, 1)\n\n α_intercept ~ truncated(Normal(0, 1); lower=0)\n α_condition ~ Normal(0, 0.5)\n\n τ_intercept ~ truncated(Gamma(1.1, 11); upper=min_rt)\n τ_condition ~ Normal(0, 0.01)\n\n for i in 1:length(rt)\n ν = ν_intercept + ν_condition * condition[i]\n if ν <= 0 # Avoid negative drift\n Turing.@addlogprob! -Inf\n return nothing\n end\n α = α_intercept + α_condition * condition[i]\n if α <= 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ < 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Wald(ν, α, τ)\n end\nend\n\nfit_Wald = model_Wald(df.RT; condition=df.Accuracy)\nchain_Wald = sample(fit_Wald, NUTS(), 600)\n```\n:::\n\n\n::: {#b9814b26 .cell execution_count=22}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_Wald; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=22}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#cf9d1165 .cell execution_count=23}\n``` {.julia .cell-code}\npred = predict(model_Wald([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_Wald)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted Wald Model\")\nfor i in 1:length(chain_Wald)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=23}\n```{=html}\n\n```\n:::\n:::\n\n\n### Model Comparison\n\nAt this stage, given the multiple options avaiable to model RTs, you might be wondering which model is the best.\nOne can compare the models using the **Leave-One-Out Cross-Validation (LOO-CV)** method, which is a Bayesian method to estimate the out-of-sample predictive accuracy of a model.\n\n::: {#398a7d32 .cell execution_count=24}\n``` {.julia .cell-code}\nusing ParetoSmooth\n\nloo_Gaussian = psis_loo(fit_Gaussian, chain_Gaussian, source=\"mcmc\")\nloo_ScaledGaussian = psis_loo(fit_ScaledlGaussian, chain_ScaledGaussian, source=\"mcmc\")\nloo_LogNormal = psis_loo(fit_LogNormal, chain_LogNormal, source=\"mcmc\")\nloo_ExGaussian = psis_loo(fit_ExGaussian, chain_ExGaussian, source=\"mcmc\")\nloo_Wald = psis_loo(fit_Wald, chain_Wald, source=\"mcmc\")\n\nloo_compare((\n Gaussian = loo_Gaussian, \n ScaledGaussian = loo_ScaledGaussian, \n LogNormal = loo_LogNormal, \n ExGaussian = loo_ExGaussian, \n Wald = loo_Wald))\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```\n\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\n┌────────────────┬──────────┬────────┬────────┐\n│ │ cv_elpd │ cv_avg │ weight │\n├────────────────┼──────────┼────────┼────────┤\n│ ExGaussian │ 0.00 │ 0.00 │ 1.00 │\n│ LogNormal │ -322.27 │ -0.03 │ 0.00 │\n│ Wald │ -379.85 │ -0.04 │ 0.00 │\n│ ScaledGaussian │ -2465.97 │ -0.26 │ 0.00 │\n│ Gaussian │ -2974.49 │ -0.31 │ 0.00 │\n└────────────────┴──────────┴────────┴────────┘\n```\n:::\n:::\n\n\nThe `loo_compare()` function orders models from best to worse based on their ELPD (Expected Log Pointwise Predictive Density) and provides the difference in ELPD between the best model and the other models.\nAs one can see, traditional linear models perform terribly.\n\n",
+ "markdown": "# Descriptive Models\n\n![](https://img.shields.io/badge/status-up_to_date-green)\n\n## The Data\n\nFor this chapter, we will be using the data from @wagenmakers2008diffusion - Experiment 1 [also reanalyzed by @heathcote2012linear], that contains responses and response times for several participants in two conditions (where instructions emphasized either **speed** or **accuracy**).\nUsing the same procedure as the authors, we excluded all trials with uninterpretable response time, i.e., responses that are too fast (<180 ms) or too slow [>2 sec instead of >3 sec, see @theriault2024check for a discussion on outlier removal].\n\n::: {#def3e6bc .cell execution_count=2}\n``` {.julia .cell-code code-fold=\"false\"}\nusing Downloads, CSV, DataFrames, Random\nusing Turing, Distributions, SequentialSamplingModels\nusing GLMakie\n\nRandom.seed!(123) # For reproducibility\n\ndf = CSV.read(Downloads.download(\"https://raw.githubusercontent.com/DominiqueMakowski/CognitiveModels/main/data/wagenmakers2008.csv\"), DataFrame)\n\n# Show 10 first rows\nfirst(df, 10)\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n
10×5 DataFrame
Row
Participant
Condition
RT
Error
Frequency
Int64
String15
Float64
Bool
String15
1
1
Speed
0.7
false
Low
2
1
Speed
0.392
true
Very Low
3
1
Speed
0.46
false
Very Low
4
1
Speed
0.455
false
Very Low
5
1
Speed
0.505
true
Low
6
1
Speed
0.773
false
High
7
1
Speed
0.39
false
High
8
1
Speed
0.587
true
Low
9
1
Speed
0.603
false
Low
10
1
Speed
0.435
false
High
\n```\n:::\n:::\n\n\nIn the previous chapter, we modelled the error rate (the probability of making an error) using a logistic model, and observed that it was higher in the `\"Speed\"` condition. \nBut how about speed? We are going to first take interest in the RT of **Correct** answers only (as we can assume that errors are underpinned by a different *generative process*). \n\nAfter filtering out the errors, we create a new column, `Accuracy`, which is the \"binarization\" of the `Condition` column, and is equal to 1 when the condition is `\"Accuracy\"` and 0 when it is `\"Speed\"`.\n\n::: {#3b99ba16 .cell execution_count=3}\n``` {.julia .cell-code}\ndf = df[df.Error .== 0, :]\ndf.Accuracy = df.Condition .== \"Accuracy\"\n```\n:::\n\n\n::: {.callout-tip title=\"Code Tip\"}\nNote the usage of *vectorization* `.==` as we want to compare each element of the `Condition` vector to the target `\"Accuracy\"`.\n:::\n\n::: {#60c565e5 .cell execution_count=4}\n``` {.julia .cell-code}\nfunction plot_distribution(df, title=\"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n fig = Figure()\n ax = Axis(fig[1, 1], title=title,\n xlabel=\"RT (s)\",\n ylabel=\"Distribution\",\n yticksvisible=false,\n xticksvisible=false,\n yticklabelsvisible=false)\n Makie.density!(df[df.Condition .== \"Speed\", :RT], color=(\"#EF5350\", 0.7), label = \"Speed\")\n Makie.density!(df[df.Condition .== \"Accuracy\", :RT], color=(\"#66BB6A\", 0.7), label = \"Accuracy\")\n Makie.axislegend(\"Condition\"; position=:rt)\n Makie.ylims!(ax, (0, nothing))\n return fig\nend\n\nplot_distribution(df, \"Empirical Distribution of Data from Wagenmakers et al. (2018)\")\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n\n```\n:::\n:::\n\n\n## Gaussian (aka *Linear*) Model\n\n::: {.callout-note}\nNote that until the last section of this chapter, we will disregard the existence of multiple participants (which require the inclusion of random effects in the model).\nWe will treat the data as if it was a single participant at first to better understand the parameters, but will show how to add random effects at the end.\n:::\n\nA linear model is the most common type of model. \nIt aims at predicting the **mean** $\\mu$ of the outcome variable using a **Normal** (aka *Gaussian*) distribution for the residuals.\nIn other words, it models the outcome $y$ as a Normal distribution with a mean $\\mu$ that is itself the result of a linear function of the predictors $X$ and a variance $\\sigma$ that is constant across all values of the predictors.\nIt can be written as $y = Normal(\\mu, \\sigma)$, where $\\mu = intercept + slope * X$.\n\nIn order to fit a Linear Model for RTs, we need to set a prior on all these parameters, namely:\n- The variance $\\sigma$ (correspondong to the \"spread\" of RTs)\n- The mean $\\mu$ for the intercept (i.e., at the reference condition which is in our case `\"Speed\"`)\n- The effect of the condition (the slope).\n\n### Model Specification\n\n::: {#fdfd5559 .cell execution_count=5}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_Gaussian(rt; condition=nothing)\n\n # Set priors on variance, intercept and effect of condition\n σ ~ truncated(Normal(0, 0.5); lower=0)\n\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\n\nfit_Gaussian = model_Gaussian(df.RT; condition=df.Accuracy)\nchain_Gaussian = sample(fit_Gaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#1e1a3766 .cell execution_count=6}\n``` {.julia .cell-code code-fold=\"false\"}\n# Summary (95% CI)\nhpd(chain_Gaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\nThe effect of Condition is significant, people are on average slower (higher RT) when condition is `\"Accuracy\"`.\nBut is our model good?\n\n### Posterior Predictive Check\n\n::: {#ba2b1593 .cell execution_count=7}\n``` {.julia .cell-code}\npred = predict(model_Gaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_Gaussian)\npred = Array(pred)\n```\n:::\n\n\n::: {#f02ce7d4 .cell fig-height='7' fig-width='10' execution_count=8}\n``` {.julia .cell-code}\nfig = plot_distribution(df, \"Predictions made by Gaussian (aka Linear) Model\")\nfor i in 1:length(chain_Gaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=8}\n```{=html}\n\n```\n:::\n:::\n\n\n## Scaled Gaussian Model\n\nThe previous model, despite its poor fit to the data, suggests that the mean RT is higher for the `Accuracy` condition. But it seems like the distribution is also *wider* (response time is more variable). \nTypical linear model estimate only one value for sigma $\\sigma$ for the whole model, hence the requirement for **homoscedasticity**.\n\n::: {.callout-note}\n**Homoscedasticity**, or homogeneity of variances, is the assumption of similar variances accross different values of predictors. \nIt is important in linear models as only one value for sigma $\\sigma$ is estimated.\n:::\n\nIs it possible to set sigma $\\sigma$ as a parameter that would depend on the condition, in the same way as mu $\\mu$? In Julia, this is very simple.\n\nAll we need is to set sigma $\\sigma$ as the result of a linear function, such as $\\sigma = intercept + slope * condition$.\nThis means setting a prior on the intercept of sigma $\\sigma$ (in our case, the variance in the reference condition) and a prior on how much this variance changes for the other condition.\nThis change can, by definition, be positive or negative (i.e., the other condition can have either a biggger or a smaller variance), so the prior over the effect of condition should ideally allow for positive and negative values (e.g., `σ_condition ~ Normal(0, 0.1)`).\n\nBut this leads to an **important problem**.\n\n::: {.callout-important}\nThe combination of an intercept and a (possible negative) slope for sigma $\\sigma$ technically allows for negative variance values, which is impossible (distributions cannot have a negative variance).\nThis issue is one of the most important to address when setting up complex models for RTs.\n:::\n\nIndeed, even if we set a very narrow prior on the intercept of sigma $\\sigma$ to fix it at for instance **0.14**, and a narrow prior on the effect of condition, say $Normal(0, 0.001)$, an effect of condition of **-0.15** is still possible (albeit with very low probability). \nAnd such effect would lead to a sigma $\\sigma$ of **0.14 - 0.15 = -0.01**, which would lead to an error (and this will often happen as the sampling process does explore unlikely regions of the parameter space).\n\n\n### Solution 1: Directional Effect of Condition\n\nOne possible (but not recommended) solution is to simply make it impossible for the effect of condition to be negative by *Truncating* the prior to a lower bound of 0. \nThis can work in our case, because we know that the comparison condition is likely to have a higher variance than the reference condition (the intercept) - and if it wasn't the case, we could have changed the reference factor.\nHowever, this is not a good practice as we are enforcing a very strong a priori specific direction of the effect, which is not always justified.\n\n::: {#62ef3b30 .cell execution_count=9}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0) # Same prior as previously\n σ_condition ~ truncated(Normal(0, 0.1); lower=0) # Enforce positivity\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#4b488c39 .cell execution_count=10}\n``` {.julia .cell-code code-fold=\"false\"}\n# Summary (95% CI)\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\nWe can see that the effect of condition on sigma $\\sigma$ is significantly positive: the variance is higher in the `Accuracy` condition as compared to the `Speed` condition. \n\n### Solution 2: Avoid Exploring Negative Variance Values\n\nThe other trick is to force the sampling algorithm to avoid exploring negative variance values (when sigma $\\sigma$ < 0).\nThis can be done by adding a conditional statement when sigma $\\sigma$ is negative to avoid trying this value and erroring, and instead returning an infinitely low model probability (`-Inf`) to push away the exploration of this impossible region.\n\n::: {#7b17f69f .cell execution_count=11}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ScaledlGaussian(rt; condition=nothing)\n\n # Priors\n μ_intercept ~ truncated(Normal(0, 1); lower=0)\n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Normal(μ, σ)\n end\nend\n\nfit_ScaledlGaussian = model_ScaledlGaussian(df.RT; condition=df.Accuracy)\nchain_ScaledGaussian = sample(fit_ScaledlGaussian, NUTS(), 400)\n```\n:::\n\n\n::: {#fa8a4426 .cell execution_count=12}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_ScaledGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#ebf2b747 .cell execution_count=13}\n``` {.julia .cell-code}\npred = predict(model_ScaledlGaussian([(missing) for i in 1:length(df.RT)], condition=df.Accuracy), chain_ScaledGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Scaled Gaussian Model\")\nfor i in 1:length(chain_ScaledGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n\n```\n:::\n:::\n\n\n\n\nAlthough relaxing the homoscedasticity assumption is a good step forward, allowing us to make **richer conclusions** and better capturing the data.\nDespite that, the Gaussian model stil seem to be a poor fit to the data.\n\n## The Problem with Linear Models\n\nReaction time (RTs) have been traditionally modeled using traditional linear models and their derived statistical tests such as *t*-test and ANOVAs. Importantly, linear models - by definition - will try to predict the *mean* of the outcome variable by estimating the \"best fitting\" *Normal* distribution. In the context of reaction times (RTs), this is not ideal, as RTs typically exhibit a non-normal distribution, skewed towards the left with a long tail towards the right. This means that the parameters of a Normal distribution (mean $\\mu$ and standard deviation $\\sigma$) are not good descriptors of the data.\n\n![](media/rt_normal.gif)\n\n> Linear models try to find the best fitting Normal distribution for the data. However, for reaction times, even the best fitting Normal distribution (in red) does not capture well the actual data (in grey).\n\nA popular mitigation method to account for the non-normality of RTs is to transform the data, using for instance the popular *log-transform*. \nHowever, this practice should be avoided as it leads to various issues, including loss of power and distorted results interpretation [@lo2015transform; @schramm2019reaction].\nInstead, rather than applying arbitrary data transformation, it would be better to swap the Normal distribution used by the model for a more appropriate one that can better capture the characteristics of a RT distribution.\n\n\n## Shifted LogNormal Model\n\nOne of the obvious candidate alternative to the log-transformation would be to use a model with a Log-transformed Normal distribution.\nA LogNormal distribution is a distribution of a random variable whose logarithm is normally distributed. In this model, the *mean* $\\mu$ and is defined on the log-scale, and effects must be interpreted as multiplicative rather than additive (the condition increases the mean RT by a factor of $\\exp(\\mu_{condition})$). \n\nNote that for LogNormal distributions (as it is the case for many of the models introduced in the rest of the capter), the distribution parameters ($\\mu$ and $\\sigma$) are not independent with respect to the mean and the standard deviation (SD).\nThe empirical SD increases when the *mean* $\\mu$ increases (which is seen as a feature rather than a bug, as it is consistent with typical reaction time data [@wagenmakers2005relation]).\n\nA **Shifted** LogNormal model introduces a shift (a delay) parameter *tau* $\\tau$ that corresponds to the minimum \"starting time\" of the response process.\n\nWe need to set a prior for this parameter, which is usually truncated between 0 (to exclude negative minimum times) and the minimum RT of the data (the logic being that the minimum delay for response must be lower than the faster response actually observed).\n\nWhile $Uniform(0, min(RT))$ is a common choice of prior, it is not ideal as it implies that all values between 0 and the minimum RT are equally likely, which is not the case.\nIndeed, psychology research has shown that such minimum response time for Humans is often betwen 100 and 250 ms. \nMoreover, in our case, we explicitly removed all RTs below 180 ms, suggesting that the minimum response time is more likely to approach 180 ms than 0 ms.\n\n### Prior on Minimum RT\n\nInstead of a $Uniform$ prior, we will use a $Gamma(1.1, 11)$ distribution (truncated at min. RT), as this particular parameterization reflects the low probability of very low minimum RTs (near 0) and a steadily increasing probability for increasing times. \n\n::: {#07b852a9 .cell execution_count=14}\n``` {.julia .cell-code}\nxaxis = range(0, 0.3, 1000)\nfig = lines(xaxis, pdf.(Gamma(1.1, 11), xaxis); color=:blue, label=\"Gamma(1.1, 11)\")\nvlines!([minimum(df.RT)]; color=\"red\", linestyle=:dash, label=\"Min. RT = 0.18 s\")\naxislegend()\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=14}\n```{=html}\n\n```\n:::\n:::\n\n\n### Model Specification\n\n::: {#dbebf70c .cell execution_count=15}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_LogNormal(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n τ ~ truncated(Gamma(1.1, 11); upper=min_rt)\n\n μ_intercept ~ Normal(0, exp(1)) # On the log-scale: exp(μ) to get value in seconds\n μ_condition ~ Normal(0, exp(0.3))\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ShiftedLogNormal(μ, σ, τ)\n end\nend\n\nfit_LogNormal = model_LogNormal(df.RT; condition=df.Accuracy)\nchain_LogNormal = sample(fit_LogNormal, NUTS(), 400)\n```\n:::\n\n\n### Interpretation\n\n::: {#76460a1b .cell execution_count=16}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_LogNormal; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#6a414e1f .cell execution_count=17}\n``` {.julia .cell-code}\npred = predict(model_LogNormal([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_LogNormal)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_LogNormal)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n\n```\n:::\n:::\n\n\nThis model provides a much better fit to the data, and confirms that the `Accuracy` condition is associated with higher RTs and higher variability (i.e., a larger distribution width).\n\n\n::: {.callout-note}\n\n### LogNormal distributions in nature\n\nThe reason why the Normal distribution is so ubiquituous in nature (and hence used as a good default) is due to the **Central Limit Theorem**, which states that the sum of a large number of independent random variables will be approximately normally distributed. Because many things in nature are the result of the *addition* of many random processes, the Normal distribution is very common in real life.\n\nHowever, it turns out that the multiplication of random variables result in a **LogNormal** distribution, and multiplicating (rather than additive) cascades of processes are also very common in nature, from lengths of latent periods of infectious diseases to distribution of mineral resources in the Earth's crust, and the elemental mechanisms at stakes in physics and cell biolody [@limpert2001log].\n\nThus, using LogNormal distributions for RTs can be justified with the assumption that response times are the result of multiplicative stochastic processes happening in the brain.\n\n:::\n\n\n## ExGaussian Model\n\nAnother popular model to describe RTs uses the **ExGaussian** distribution, i.e., the *Exponentially-modified Gaussian* distribution [@balota2011moving; @matzke2009psychological].\n\nThis distribution is a convolution of normal and exponential distributions and has three parameters, namely *mu* $\\mu$ and *sigma* $\\sigma$ - the mean and standard deviation of the Gaussian distribution - and *tau* $\\tau$ - the exponential component of the distribution (note that although denoted by the same letter, it does not correspond directly to a shift of the distribution). \nIntuitively, these parameters reflect the centrality, the width and the tail dominance, respectively.\n\n![](media/rt_exgaussian.gif)\n\n\nBeyond the descriptive value of these types of models, some have tried to interpret their parameters in terms of **cognitive mechanisms**, arguing for instance that changes in the Gaussian components ($\\mu$ and $\\sigma$) reflect changes in attentional processes [e.g., \"the time required for organization and execution of the motor response\"; @hohle1965inferred], whereas changes in the exponential component ($\\tau$) reflect changes in intentional (i.e., decision-related) processes [@kieffaber2006switch]. \nHowever, @matzke2009psychological demonstrate that there is likely no direct correspondence between ex-Gaussian parameters and cognitive mechanisms, and underline their value primarily as **descriptive tools**, rather than models of cognition *per se*.\n\nDescriptively, the three parameters can be interpreted as:\n\n- **Mu** $\\mu$ : The location / centrality of the RTs. Would correspond to the mean in a symmetrical distribution.\n- **Sigma** $\\sigma$ : The variability and dispersion of the RTs. Akin to the standard deviation in normal distributions.\n- **Tau** $\\tau$ : Tail weight / skewness of the distribution.\n\n::: {.callout-important}\nNote that these parameters are not independent with respect to distribution characteristics, such as the empirical mean and SD. \nBelow is an example of different distributions with the same location (*mu* $\\mu$) and dispersion (*sigma* $\\sigma$) parameters.\nAlthough only the tail weight parameter (*tau* $\\tau$) is changed, the whole distribution appears to shift is centre of mass. \nHence, one should be careful note to interpret the values of *mu* $\\mu$ directly as the \"mean\" or the distribution peak and *sigma* $\\sigma$ as the SD or the \"width\".\n:::\n\n![](media/rt_exgaussian2.gif)\n\n### Conditional Tau $\\tau$ Parameter\n\nIn the same way as we modeled the effect of the condition on the variance component *sigma* $\\sigma$, we can do the same for any other parameters, including the exponential component *tau* $\\tau$.\nAll wee need is to set a prior on the intercept and the condition effect, and make sure that $\\tau > 0$. \n\n::: {#a7089bbe .cell execution_count=18}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_ExGaussian(rt; condition=nothing)\n\n # Priors \n μ_intercept ~ Normal(0, 1) \n μ_condition ~ Normal(0, 0.3)\n\n σ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n σ_condition ~ Normal(0, 0.1)\n\n τ_intercept ~ truncated(Normal(0, 0.5); lower=0)\n τ_condition ~ Normal(0, 0.1)\n\n for i in 1:length(rt)\n μ = μ_intercept + μ_condition * condition[i]\n σ = σ_intercept + σ_condition * condition[i]\n if σ < 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ <= 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ ExGaussian(μ, σ, τ)\n end\nend\n\nfit_ExGaussian = model_ExGaussian(df.RT; condition=df.Accuracy)\nchain_ExGaussian = sample(fit_ExGaussian, NUTS(), 400)\n```\n:::\n\n\n### Interpretation\n\n::: {#bf20b174 .cell execution_count=19}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_ExGaussian; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=19}\n\n::: {.ansi-escaped-output}\n```{=html}\n
\n```\n:::\n\n:::\n:::\n\n\n::: {#d4d95c07 .cell execution_count=20}\n``` {.julia .cell-code}\npred = predict(model_ExGaussian([(missing) for i in 1:length(df.RT)]; condition=df.Accuracy), chain_ExGaussian)\npred = Array(pred)\n\nfig = plot_distribution(df, \"Predictions made by Shifted LogNormal Model\")\nfor i in 1:length(chain_ExGaussian)\n lines!(Makie.KernelDensity.kde(pred[:, i]), color=ifelse(df.Accuracy[i] == 1, \"#388E3C\", \"#D32F2F\"), alpha=0.1)\nend\nfig\n```\n\n::: {.cell-output .cell-output-stderr}\n```\n┌ Warning: Found `resolution` in the theme when creating a `Scene`. The `resolution` keyword for `Scene`s and `Figure`s has been deprecated. Use `Figure(; size = ...` or `Scene(; size = ...)` instead, which better reflects that this is a unitless size and not a pixel resolution. The key could also come from `set_theme!` calls or related theming functions.\n└ @ Makie C:\\Users\\domma\\.julia\\packages\\Makie\\VRavR\\src\\scenes.jl:220\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=20}\n```{=html}\n\n```\n:::\n:::\n\n\nThe ExGaussian model also provides an excellent fit to the data. \nMoreover, by modeling more parameters (including *tau* $\\tau$), we can draw more nuanced conclusions.\nIn this case, the `Accuracy` condition is associated with higher RTs, higher variability, and a heavier tail (i.e., more extreme values).\n\n## Shifted Wald Model\n\nThe **Wald** distribution, also known as the **Inverse Gaussian** distribution, corresponds to the distribution of the first passage time of a Wiener process with a drift rate $\\mu$ and a diffusion rate $\\sigma$.\nWhile we will unpack this definition below and emphasize its important consequences, one can first note that it has been described as a potential model for RTs when convoluted with an *exponential* distribution (in the same way that the ExGaussian distribution is a convolution of a Gaussian and an exponential distribution).\nHowever, this **Ex-Wald** model [@schwarz2001ex] was shown to be less appropriate than one of its variant, the **Shifted Wald** distribution [@heathcote2004fitting; @anders2016shifted].\n\nNote that the Wald distribution, similarly to the models that we will be covering next (the \"generative\" models), is different from the previous distributions in that it is not characterized by a \"location\" and \"scale\" parameters (*mu* $\\mu$ and *sigma* $\\sigma$).\nInstead, the parameters of the Shifted Wald distribution are:\n\n- **Nu** $\\nu$ : A **drift** parameter, corresponding to the strength of the evidence accumulation process.\n- **Alpha** $\\alpha$ : A **threshold** parameter, corresponding to the amount of evidence required to make a decision.\n- **Tau** $\\tau$ : A **delay** parameter, corresponding to the non-response time (i.e., the minimum time required to process the stimulus and respond). A shift parameter similar to the one in the Shifted LogNormal model.\n\n![](media/rt_wald.gif)\n\nAs we can see, these parameters do not have a direct correspondence with the mean and standard deviation of the distribution.\nTheir interpretation is more complex but, as we will see below, offers a window to a new level of interpretation.\n\n::: {.callout-note}\nExplanations regarding these new parameters will be provided in the next chapter.\n:::\n\n### Model Specification\n\n::: {#4df349b0 .cell execution_count=21}\n``` {.julia .cell-code code-fold=\"false\"}\n@model function model_Wald(rt; min_rt=minimum(df.RT), condition=nothing)\n\n # Priors \n ν_intercept ~ truncated(Normal(1, 3); lower=0)\n ν_condition ~ Normal(0, 1)\n\n α_intercept ~ truncated(Normal(0, 1); lower=0)\n α_condition ~ Normal(0, 0.5)\n\n τ_intercept ~ truncated(Gamma(1.1, 11); upper=min_rt)\n τ_condition ~ Normal(0, 0.01)\n\n for i in 1:length(rt)\n ν = ν_intercept + ν_condition * condition[i]\n if ν <= 0 # Avoid negative drift\n Turing.@addlogprob! -Inf\n return nothing\n end\n α = α_intercept + α_condition * condition[i]\n if α <= 0 # Avoid negative variance values\n Turing.@addlogprob! -Inf\n return nothing\n end\n τ = τ_intercept + τ_condition * condition[i]\n if τ < 0 # Avoid negative tau values\n Turing.@addlogprob! -Inf\n return nothing\n end\n rt[i] ~ Wald(ν, α, τ)\n end\nend\n\nfit_Wald = model_Wald(df.RT; condition=df.Accuracy)\nchain_Wald = sample(fit_Wald, NUTS(), 600)\n```\n:::\n\n\n::: {#b9814b26 .cell execution_count=22}\n``` {.julia .cell-code code-fold=\"false\"}\nhpd(chain_Wald; alpha=0.05)\n```\n\n::: {.cell-output .cell-output-display execution_count=22}\n\n::: {.ansi-escaped-output}\n```{=html}\n