In [1]:
import numpy as np
import pandas as pd
import statsrat as sr
from statsrat import rw
from statsrat import expr

# IMPORTANT NOTE: the "data" in this example are synthetic, i.e. generated by simulation rather than from
# actual human participants.  This is to avoid any worries privacy issues.

In [2]:
# DEFINE MODELS

# The derived attention model from Le Pelley, Mitchell, Beesley, George and Wills (2016).
drva = rw.model(name = 'drva',
                pred = rw.pred.identity,
                fbase = rw.fbase.elem,
                fweight = rw.fweight.none,
                lrate = rw.lrate.from_aux_feature,
                drate = rw.drate.zero,
                aux = rw.aux.drva)

# CompAct (with only elemental features); Model 4 from Paskewitz and Jones (2020).
CompAct = rw.model(name = 'CompAct',
                   pred = rw.pred.identity,
                   fbase = rw.fbase.elem,
                   fweight = rw.fweight.from_aux_norm,
                   lrate = rw.lrate.from_aux_norm,
                   drate = rw.drate.zero,
                   aux = rw.aux.gradcomp)

In [3]:
# DEFINE THE EXPERIMENT

# Loosely based on Le Pelley and McLaren 2003 (learned predictiveness)

# ADD COMMENTS TO GIVE MORE DETAIL
design = expr.schedule(resp_type = 'choice',
                      stages = {'training': expr.stage(
                                        x_pn = [['a', 'v'], ['b', 'v'], ['a', 'w'], ['b', 'w'], ['c', 'x'], ['d', 'x'], ['c', 'y'], ['d', 'y']],
                                        y = 4*[['cat1'], ['cat2']],
                                        y_psb = ['cat1', 'cat2'],
                                        n_rep = 14),
                                  'transfer': expr.stage(
                                        x_pn = [['a', 'x'], ['b', 'y'], ['c', 'v'], ['d', 'w'], ['e', 'f'], ['g', 'h'], ['i', 'j'], ['k', 'l']],
                                        y = 4*[['cat3'], ['cat4']],
                                        y_psb = ['cat3', 'cat4'],
                                        n_rep = 4),
                                  'test': expr.stage(
                                        x_pn = [['a', 'c'], ['b', 'd'], ['v', 'x'], ['w', 'y'], ['e', 'h'], ['f', 'g'], ['i', 'j'], ['k', 'l']],
                                        y_psb = ['cat3', 'cat4'],
                                        lrn = False,
                                        n_rep = 1)})

rel_irl = expr.oat(schedule_pos = ['design'],
                   behav_score_pos = expr.behav_score(stage = 'test',
                                                      trial_pos = ['a.c -> nothing', 'b.d -> nothing'],
                                                      trial_neg = ['v.x -> nothing', 'w.y -> nothing'],
                                                      resp_pos = ['cat3', 'cat4'],
                                                      resp_neg = ['cat3', 'cat4']))

lrn_pred = expr.experiment(schedules = {'design': design},
                           oats = {'rel_irl': rel_irl})

In [4]:
# Documentation on the read_csv method (used to import data)
help(lrn_pred.read_csv)

Help on method read_csv in module statsrat.expr.experiment:

read_csv(path, x_col, resp_col, resp_map, ident_col=None, conf_col=None, schedule=None, other_info=None, header='infer', n_final=8) method of statsrat.expr.experiment.experiment instance
    Import empirical data from .csv files.
    
    Parameters
    ----------
    path: str
        Path to the .csv files.
    x_col: list
        Names of columns (strings) indicating cues (stimulus
        attributes, i.e. columns of 'x').
    resp_col: list
        Names of columns (strings) indicating responses.
    resp_map: dict
        Maps response names in the raw data to response names in the
        schedule definition.
    ident_col: str or None, optional
        If string, name of column indicating individual identifier
        (the 'ident' variable).  If None, then file names are used
        as 'ident'.  Defaults to None.
    conf_col: str or None, optional
        Name of the column indicating confidence responses (i.e.
     

In [5]:
# IMPORT THE (SYNTHETIC) DATA

(ds, summary) = lrn_pred.read_csv(path = 'data/',
                                  x_col = ['left_stim', 'right_stim'],
                                  resp_col = ['key_press'],
                                  resp_map = {'a': 'cat1', 's': 'cat2', 'd': 'cat3', 'f': 'cat4'},
                                  ident_col = 'subject_id',
                                  conf_col = 'confidence_rating')

In [6]:
# View trial by trial dataset
print(ds)

<xarray.Dataset>
Dimensions:     (ident: 10, t: 152, x_name: 16, y_name: 4)
Coordinates:
  * ident       (ident) <U4 'sim7' 'sim6' 'sim0' 'sim1' ... 'sim2' 'sim9' 'sim8'
  * t           (t) int64 0 1 2 3 4 5 6 7 8 ... 144 145 146 147 148 149 150 151
    t_name      (t) <U4 'main' 'main' 'main' 'main' ... 'main' 'main' 'main'
    ex          (ident, t) <U3 'b.v' 'd.x' 'c.y' 'b.w' ... 'k.l' 'v.x' 'b.d'
    trial       (t) int64 0 1 2 3 4 5 6 7 8 ... 144 145 146 147 148 149 150 151
    trial_name  (ident, t) <U14 'b.v -> cat2' 'd.x -> cat2' ... 'b.d -> nothing'
    stage       (t) int64 0 0 0 0 0 0 0 0 0 0 0 ... 8 8 16 16 16 16 16 16 16 16
    stage_name  (t) <U8 'training' 'training' 'training' ... 'test' 'test'
  * x_name      (x_name) <U1 'a' 'b' 'c' 'd' 'e' 'f' ... 'k' 'l' 'v' 'w' 'x' 'y'
  * y_name      (y_name) <U4 'cat1' 'cat2' 'cat3' 'cat4'
    time        (t) int64 0 1 2 3 4 5 6 7 8 ... 144 145 146 147 148 149 150 151
Data variables:
    x           (ident, t, x_name) float64 0.0

In [7]:
# View summary dataframe
print(summary)

       training_last8_pct_correct  transfer_last8_pct_correct  rel_irl
ident                                                                 
sim7                        100.0                       100.0      7.5
sim6                         50.0                        37.5      5.5
sim0                        100.0                       100.0      2.0
sim1                        100.0                        87.5      1.0
sim4                         75.0                       100.0      2.0
sim5                        100.0                       100.0     11.5
sim3                         37.5                        87.5      1.0
sim2                         87.5                       100.0      2.5
sim9                        100.0                       100.0      3.0
sim8                        100.0                       100.0      4.0


In [8]:
# Randomly sample a subset of the data for compare optimization algorithms etc.

In [9]:
# FIRST MODEL (**MODEL NAME**)

# Test different optimization algorithms (subset of data)


In [10]:
# Determine how long the optimization algorithm needs to run (subset of data)


In [11]:
# Fit the model to the data (full dataset)


In [12]:
# SECOND MODEL (**MODEL NAME**)

# Test different optimization algorithms (subset of data)


In [13]:
# Determine how long the optimization algorithm needs to run (subset of data)


In [14]:
# Fit the model to the data (full dataset)


In [15]:
# Compare AIC (Akaike Information Criterion) values
# These are based on a log-likelihood but penalize the number of free parameters
# Higher is better
