** Neil Garrett, June 2018 **

# Start up commands/load relevant functions

In [1]:
# load required libraries
using Distributed

# # set everything up
parallel = true # Run on multiple CPUs. If youhttp://localhost:8888/notebooks/Dropbox/Daw_Lab/PreySelection/v103/models/model_subjective1beta2lr_delayreward/model_subjective1beta2lr_delayreward.jl.ipynb# are having trouble, set parallel = false: easier to debug

# this activates the multiprocessing threads
if (parallel)
	# only run this once
    addprocs(4)
end

# load required libraries
@everywhere using DataFrames
#using DataArrays
@everywhere using ForwardDiff
@everywhere using PyCall
@everywhere using Distributions
@everywhere using PyPlot
@everywhere using CSV
@everywhere using SpecialFunctions
@everywhere using SharedArrays
@everywhere using LinearAlgebra

@everywhere PyCall.@pyimport scipy.optimize as so

# this is the code for the actual fitting routines
@everywhere include("em.jl")
@everywhere include("common.jl")
@everywhere include("likfuns.jl")

# this is generates starting matricies for betas, sigmas etc to feed into model
@everywhere include("genVars.jl")

│ Use `(covvar < 0) ? NaN :` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/em.jl:288
│ Use `(covvar < 0) ? NaN : sqrt` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/em.jl:288
│ Use `(diag(covmtx)[i] .< 0) ? NaN :` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/em.jl:299
│ Use `(diag(covmtx)[i] .< 0) ? NaN : diag` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/em.jl:299
│ Use `(covvar < 0) ? NaN :` instead.
└ @ ~/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/em.jl:288
│ Use `(covvar < 0) ? NaN : sqrt` instead.
└ @ ~/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/em.jl:288
│ Use `(diag(covmtx)[i] .< 0) ? NaN :` instead.
└ @ ~/GitHubRepo/

│     # /Users/neil/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/common.jl, line 66
│     isa(x, Array)
│ end, a)?` at /Users/neil/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/common.jl:66.
│ Use `any(x -> begin
│     # /Users/neil/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/common.jl, line 66
│     isa(x, Array)
│ end, a) ?` instead.
└ @ ~/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/common.jl:66
│     # /Users/neil/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/common.jl, line 66
│     isa(x, Array)
│ end, a) ? flatten(vcat(map(flatten, a)...)):` at /Users/neil/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/common.jl:66.
│ Use `any(x -> begin
│     # /Users/neil/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/common.jl, l

# Data read and process

### Read in trial by trial data

In [2]:
#read in csv file of the data
#trial by trial data: note will include force trials and missed responses
df = readtable("/Users/neil/GitHubRepo/Projects/PreySelection/v104/data/trialdata_104_processed.csv")

#display header
head(df)


│   caller = top-level scope at In[2]:1
└ @ Core In[2]:1


Unnamed: 0_level_0,subj,trial_index_actual,block,stimulus,stim_rank,reward_percent,delay_s,profitability,stim_left_right,key_press,approach_avoid,rt,rt_z,force_trial,missed,order_condition,exclude,exclude_reason
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,String⍰,Int64⍰,Int64⍰,Int64⍰,Float64⍰,String⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,String⍰
1,1,0,0,../static/images/invador3.png,3,80,8,10.0,right,70,-1.0,1299.0,2.41977,2,0,1,1,poor behaviour
2,1,1,0,../static/images/invador1.png,1,80,2,40.0,left,70,1.0,1256.0,2.13807,0,0,1,1,poor behaviour
3,1,2,0,../static/images/invador1.png,1,80,2,40.0,right,74,1.0,923.0,-0.0434644,0,0,1,1,poor behaviour
4,1,3,0,../static/images/invador1.png,1,80,2,40.0,right,74,1.0,1005.0,0.49373,0,0,1,1,poor behaviour
5,1,4,0,../static/images/invador1.png,1,80,2,40.0,right,74,1.0,914.0,-0.102425,1,0,1,1,poor behaviour
6,1,5,0,../static/images/invador2.png,2,20,2,10.0,left,74,-1.0,969.0,0.257889,2,0,1,1,poor behaviour


### Append data with the column "sub" 


In [3]:
#this is just a replica of the existing column sub_no but think em looks for "sub" specifically
df[:sub] = df[:subj];


### Get rid of excluded subs

In [4]:
df = df[df[:exclude].==0,:];


### Convert approach avoid to 2s and 1s , missed as 0. Then convert to integers (necessary to use as an index)

In [5]:
#convert approach_avoid to 1s (avoid) and 2s (approach)
df[df[:approach_avoid].==1,:approach_avoid] = 2
df[df[:approach_avoid].==-1,:approach_avoid] = 1

index_NaN = find(isnan.(df[:approach_avoid]))
df[index_NaN, :approach_avoid] = 0

df[:approach_avoid] = convert(Vector{Integer}, df[:approach_avoid])

head(df)


│   caller = top-level scope at In[5]:4
└ @ Core In[5]:4


Unnamed: 0_level_0,subj,trial_index_actual,block,stimulus,stim_rank,reward_percent,delay_s,profitability,stim_left_right,key_press,approach_avoid,rt,rt_z,force_trial,missed,order_condition,exclude,exclude_reason,sub
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,String⍰,Int64⍰,Int64⍰,Int64⍰,Float64⍰,String⍰,Int64⍰,Integer,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,String⍰,Int64⍰
1,2,0,1,../static/images/invador1.png,4,20,8,2.5,right,74,2,1253.0,1.13279,0,0,2,0,do not exclude,2
2,2,1,1,../static/images/invador1.png,4,20,8,2.5,right,-1,0,,,0,1,2,0,do not exclude,2
3,2,2,1,../static/images/invador3.png,2,20,2,10.0,right,74,2,1185.0,0.859417,0,0,2,0,do not exclude,2
4,2,3,1,../static/images/invador4.png,1,80,2,40.0,left,70,2,1148.0,0.710668,0,0,2,0,do not exclude,2
5,2,4,1,../static/images/invador1.png,4,20,8,2.5,right,70,1,1156.0,0.74283,2,0,2,0,do not exclude,2
6,2,5,1,../static/images/invador1.png,4,20,8,2.5,left,70,0,,,0,1,2,0,do not exclude,2


### Read in summary stats

In [6]:
summary_stats = readtable("/Users/neil/GitHubRepo/Projects/PreySelection/v104/data/subdata_104.csv")
head(summary_stats)

│   caller = top-level scope at In[6]:1
└ @ Core In[6]:1


Unnamed: 0_level_0,MturkID,AssignID,sub_no,age,gender,n_trials,n_force_wrong,n_missed,bonus_payment,percent_accept_A1,percent_accept_A2,percent_accept_A3,percent_accept_A4,percent_accept_A2_A3,percent_accept_B1,percent_accept_B2,percent_accept_B3,percent_accept_B4,percent_accept_B2_B3,percent_accept_B1_min_A1,percent_accept_B2_min_A2,percent_accept_B3_min_A3,percent_accept_B4_min_A4,percent_accept_B2_B3_min_A2_A3,percent_accept_AB1,percent_accept_AB2,percent_accept_AB3,percent_accept_AB4,percent_accept_AB2_AB3,exclude,exclude_reason,order_condition,comment
Unnamed: 0_level_1,String⍰,String⍰,Int64⍰,Int64⍰,String⍰,Int64⍰,Int64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,String⍰,Int64⍰,String⍰
1,AA4KKLIU4C3NY,3YDGXNSEOZUCKHRBBCHD5KZ8AU0483,1,42,male,170,6,14,2.4,0.977273,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0227273,0.0,0.0,0.0,0.0,0.98,1.0,1.0,1.0,1.0,1,poor behaviour,1,It was fun and interesting. Thank you!
2,ASPRULQHB8B1F,3YW4XOSQKQLTQF15RXZG3EAUV1HU1V,2,32,Male,318,3,5,2.8,1.0,0.0,0.0833333,0.0,0.037037,1.0,0.0588235,0.8,0.0166667,0.40625,0.0,0.0588235,0.716667,0.0166667,0.369213,1.0,0.03125,0.481481,0.0126582,0.237288,0,do not exclude,2,
3,A1W8B7P1WVD7TX,3L6L49WXW0XTXZ89DEAAT0PH84Z451,3,20,p,308,4,11,2.6,1.0,0.533333,0.214286,0.142857,0.37931,1.0,1.0,0.0625,0.0133333,0.516129,0.0,0.466667,-0.151786,-0.129524,0.136819,1.0,0.766667,0.133333,0.0337079,0.45,0,do not exclude,1,
4,A1XA3QOXDTGCDL,3VSOLARPKB9RGQRHGQI8VB2DG0693W,4,27,f,276,0,0,2.7,1.0,1.0,0.0,0.0,0.5,1.0,1.0,0.933333,0.037037,0.961538,0.0,0.0,0.933333,0.037037,0.461538,1.0,1.0,0.466667,0.0294118,0.714286,0,do not exclude,2,this was a lot of fun!!!!
5,A10QZA83T80YA3,3UXUOQ9OKEXNMS29A8ZY4NPDRK37A7,5,29,f,266,1,2,2.7,1.0,1.0,0.5,0.181818,0.777778,0.866667,1.0,1.0,0.157895,1.0,-0.133333,0.0,0.5,-0.0239234,0.222222,0.969697,1.0,0.76,0.161765,0.896552,0,do not exclude,2,
6,A3RQN5RZD1N2KP,3M23Y66PO278IOAY8YGXJ9JNJLCS6O,6,45,f,291,0,1,2.8,1.0,0.0588235,0.1,0.1875,0.0810811,1.0,1.0,1.0,0.0181818,1.0,0.0,0.941176,0.9,-0.169318,0.918919,1.0,0.466667,0.485714,0.056338,0.476923,0,do not exclude,1,It was a lot of fun and I'd love to do more like this.


### Get rid of excluded subs

In [7]:
summary_stats = summary_stats[summary_stats[:exclude].==0,:];


### Get rid of Mturk ID etc (first 3 columns)

In [8]:
summary_stats = summary_stats[:,4:end];

│   caller = top-level scope at In[8]:1
└ @ Core In[8]:1


# MVT Model Learn Options

This model comprises: 

1. An intercept which reflects degree of bias to reject.

2. A beta (termperature parameter) which controls sensitivity to the difference between the options (0 = pick 50/50. Higher it is, the more sensative subs are tothe different options (more step functionesque). <br>

3. One learning rate for average reward rate

4. Seperate learning rate for learning the value of the options (no longer assume they know this)

Uses Q learned average to predict choice

Initalise Qaverage in model at the arithmetic average over all subs over both sessions

In [9]:
@everywhere function model_MVT_learn_options(params, data)
     
    #model parameters
    intercept = params[1]
    beta = params[2]
    lr_environment = 0.5 .+ 0.5.*erf(params[3]/sqrt(2))
    lr_options = 0.5 .+ 0.5.*erf(params[4]/sqrt(2))
    
    #initalise various variables
    delay_sum = zeros(typeof(beta),1)
    reward_sum = zeros(typeof(beta),1)
    Q_arithmetic = zeros(typeof(beta),1) # stores actual (arithmetic) average reward rate
    opp_cost_arithmetic = zeros(typeof(beta),1) # stores actual (arithmetic) opp cost
    Q_estimate = zeros(typeof(beta),1) .+ 7.77 # stores estimated global reward rate
    opp_cost_estimate = zeros(typeof(beta),1) # stores estimated opp cost
    
    Qd = zeros(typeof(beta),2) # decision variable; 1st element is the opp cost of accepting (or value of rejecting), 2nd element is just the reward of the option (value of accepting)

    # maintains estimate of the reward for each option (options indexed by rank: 1-4). 
    Q_options_reward = zeros(typeof(beta),4)
    Q_options_delay = zeros(typeof(beta),4)
    
    lik = 0 #likelihood

    #these store new trial by trial values (e.g. Q estimate on each trial etc.)
    reward_sum_store = [];
    delay_sum_store = [];
    Q_arithmetic_store = [];
    opp_cost_arithmetic_store  = []; 
    Q_estimate_store = [];
    opp_cost_estimate_store = [];

    #extract various variables from the dataframe
    reward = data[:reward_percent]
    delay = data[:delay_s]
    force = data[:force_trial]  
    missed = data[:missed] #missed responses 
    c = data[:approach_avoid] #choice
    option_rank = data[:stim_rank] #option rank 

    for i = 1:length(c)
        
            #option presented in current trial
            option_index = option_rank[i]
        
            # 2 seconds without reward on each trial regadless of accept/reject
            delay_sum .+= 2;
         
            # calculate current (arithmetic) reward per second from number of seconds elapsed and reward accured
            Q_arithmetic = reward_sum./delay_sum
            opp_cost_arithmetic = Q_arithmetic*delay[i]
           
            # decrease estimate of global reward rate for encounter time (2seconds)
            Q_estimate = (1-lr_environment) * Q_estimate .+ 0
            Q_estimate = (1-lr_environment) * Q_estimate .+ 0
        
            #calculate estimate of opportunity cost given estimate of reward rate and delay incurred by option 
            opp_cost_estimate = Q_estimate*Q_options_delay[option_index]
        
            #add trial by trial values 
            append!(reward_sum_store, reward_sum)
            append!(delay_sum_store, delay_sum)
            append!(Q_arithmetic_store, Q_arithmetic)
            append!(opp_cost_arithmetic_store, opp_cost_arithmetic)
            append!(Q_estimate_store, Q_estimate)
            append!(opp_cost_estimate_store, opp_cost_estimate)
        
            # if not a force trial predict choice based on current values
            if ((force[i]<1) & (missed[i]<1))
                        
                # decision variable - the estimate of opportunity cost ("reward" of rejecting) versus 
                # reward of the current option (if accepted)
                Qd = [intercept, 0] .+ [beta.*opp_cost_estimate[1], beta.*Q_options_reward[option_index]]

                # increment likelihood
                lik += Qd[c[i]] - log(sum(exp.(Qd)))
            
            end
            
            #incur 8second time out for missed response
            if (missed[i]==1)
                
                delay_sum .+= 8
            
                for j = 1:8
                
                     Q_estimate = (1-lr_environment) * Q_estimate .+ 0

                end
            
            end
        
            # regardless of whether a force trial or not, 
            # if accept the option, Q_estimate updates and there is a delay incurred
            if ((c[i] == 2) & (missed[i]==0))
                
                delay_sum .+= delay[i]
                reward_sum .+= reward[i]
            
                for j = 1:delay[i]
                
                    Q_estimate = (1-lr_environment) * Q_estimate .+ 0
                
                end
            
                    Q_estimate = (1-lr_environment) * Q_estimate .+ lr_environment*reward[i]
            
                Q_options_reward[option_index] = (1-lr_options)*Q_options_reward[option_index] .+ lr_options*reward[i]
                Q_options_delay[option_index] = (1-lr_options)*Q_options_delay[option_index] .+ lr_options*delay[i]
                
            end
    
    end
    
    # compile trial by trial values here
    trial_data = DataFrame(reward_sum = reward_sum_store,
            delay_sum = delay_sum_store,
            Q_arithmetic = Q_arithmetic_store,
            opp_cost_arithmetic = opp_cost_arithmetic_store,
            Q_estimate = Q_estimate_store,
            opp_cost_estimate = opp_cost_estimate_store)
    
    # here if running em you can only return the likelihood
    return -lik
    
    # but if you run in order to extract trials, subs etc then want to return this
    #return (-lik, trial_data)
    
end


# Parameter optimisiation

### Run model for one subject

aids debugging

In [10]:
# initialize parameter structures
(df, subs, X, betas, sigma) = genVars(df, 4);

# run model for sub 1
model_MVT_learn_options(betas,df[df[:sub].==subs[1],:])


155.26496844542802

### Run em to get best fit parameters for each subject

In [11]:
# initialized parameter structures (again)
# note that some of the variables (e.g. betas, sigma) are entered and returned by em function 
(df, subs, X, betas, sigma) = genVars(df, 4);

# run for full learner
# x contains the parameters for each subject (note not the same as variable X)
# l and h are per-subject likelihood and hessians
@time (betas, sigma, x, l, h) = em(df, subs, X, betas, sigma, model_MVT_learn_options; emtol=1e-3, parallel=true, full=true, quiet=false);



iter: 32
betas: [0.7, 0.11, -2.02, -0.67]
sigma: [0.75 -0.01 0.29 0.05; -0.01 0.0 -0.02 -0.02; 0.29 -0.02 1.92 0.64; 0.05 -0.02 0.64 0.57]
change: [9.0e-6, 0.00011, -3.0e-6, -8.3e-5, 0.000117, -0.000314, 3.0e-5, 0.000505, 0.000901, -9.7e-5, -0.000454, 5.0e-6, 7.0e-6, 6.7e-5]
max: 0.000901
369.447878 seconds (35.72 M allocations: 1.452 GiB, 0.81% gc time)


### Generate Model Statistics 
(IAIC, IBIC, LOOCV)

In [12]:
## model selection/comparison/scoring

# laplace approximation to the aggregate log marginal likelihood of the whole dataset
# marginalized over the individual params

aggll = lml(x,l,h)

# to compare this between models you need to correct for the group-level free parameters
# either aic or bic

aggll_ibic = ibic(x,l,h,betas,sigma,nrow(df))
aggll_iaic = iaic(x,l,h,betas,sigma)

# or you can compute unbiased per subject marginal likelihoods via subject-level cross validation
# you can do paired t tests on these between models
# these are also appropriate for SPM_BMS etc

# takes ages so comment in when want to run, otherwise just use IAIC above
liks = loocv(df, subs, x, X, betas, sigma, model_MVT_learn_options; emtol=1e-3, parallel=true, full=true)
#aggll_loo = sum(liks)

#println("\n\nraw nll:  $aggll\nibic nll: $aggll_ibic\niaic nll: $aggll_iaic\nloo nll:  $aggll_loo")
#println("\n\nraw nll:  $aggll\nibic nll: $aggll_ibic\niaic nll:")
#print(aggll_iaic)


Subject: 1..2..3..4..5..6..7..8..9..10..11..12..13..14..15..16..17..18..19..20..21..22..23..24..25..26..27..28..29..30..31..32..33..34..35..36..37..38..

38-element Array{Float64,1}:
  33.76163980672144 
  65.50947306552057 
  44.14033011099355 
  72.98459133330056 
  49.67327887956529 
  72.70520680483916 
  64.28074757583224 
  56.3688090381397  
  58.42327837015828 
  54.29728818637489 
  62.649612508110806
  34.911037388327784
  68.12975964687058 
   ⋮                
  79.70519178046479 
 105.72311757047557 
  73.12086308584576 
  61.3308519824782  
  75.66648200677085 
  74.7361206103508  
  58.113782432992174
  68.44921826990748 
  75.9268211424846  
  51.67327134343156 
  66.37495914601315 
  73.17744145191332 

### Write loocv scores to csv file

(if you have run loocv above)

In [13]:
# put loocv scores into dataframe
loocv_scores = DataFrame(sub = subs,
liks = vec(liks));


#### save LOOCV to csv file


In [14]:
CSV.write("loocv_scores.csv", DataFrame(loocv_scores))


│   caller = top-level scope at In[14]:1
└ @ Core In[14]:1


"loocv_scores.csv"

#### add to summary stats to LOOCV as well

In [15]:
summary_stats = [summary_stats loocv_scores];

### Calculate and write p values, std error and covariance

In [16]:
# standard errors on the subject-level means, based on an asymptotic Gaussian approx 
# (these may be inflated esp for small n)
(standarderrors, pvalues, covmtx) = emerrors(df,subs,x,X,h,betas,sigma,model_MVT_learn_options);


  likely near /Users/neil/.julia/packages/IJulia/DL02A/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/DL02A/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/DL02A/src/kernel.jl:41
in #53 at none
  likely near /Users/neil/.julia/packages/IJulia/DL02A/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/DL02A/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/DL02A/src/kernel.jl:41
in #53 at none
│   caller = emerrors(::DataFrame, ::Array{Union{Missing, Int64},1}, ::SharedArray{Float64,2}, ::Array{Float64,3}, ::SharedArray{Float64,3}, ::Array{Float64,1}, ::Array{Float64,2}, ::Function) at em.jl:300
└ @ Main /Users/neil/GitHubRepo/Projects/PreySelection/v104/models/supplementary/model_MVT_learn_options/em.jl:300


In [17]:
model_stats = DataFrame(stderror = vec(standarderrors),
pvalues = vec(pvalues),
covmtx_1 = vec(covmtx[:,1]),
covmtx_2 = vec(covmtx[:,2]),
covmtx_3 = vec(covmtx[:,3]),
covmtx_4 = vec(covmtx[:,4]));

# save model stats to csv file
CSV.write("model_stats.csv", DataFrame(model_stats));

│   caller = top-level scope at In[17]:7
└ @ Core In[17]:7


In [18]:
print(standarderrors)


[0.148372, 0.0111989, 0.229364, 0.146045]

In [19]:
print(pvalues)


[2.77655e-6, 1.75993e-24, 1.53697e-18, 5.15616e-6]

In [20]:
print(covmtx)


[0.0220143 -0.000333127 0.00790021 0.00170137; -0.000333127 0.000125416 -0.000576716 -0.000820526; 0.00790021 -0.000576716 0.0526077 0.0168813; 0.00170137 -0.000820526 0.0168813 0.021329]

### Write per subject model parameters to csv files

In [21]:
# put parameters into variable d
d=x';

# now put parameters into dataframe
params = DataFrame(sub = subs,
intercept = vec(d[:,1]), 
beta = vec(d[:,2]),
learning_rate_environment_raw = vec(d[:,3]),
learning_rate_environment_transformed = vec(0.5 .+ 0.5*erf.(d[:,3] / sqrt(2))),
learning_rate_options_raw = vec(d[:,4]),
learning_rate_options_transformed = vec(0.5 .+ 0.5*erf.(d[:,4] / sqrt(2))));


#### save parameters to csv file


In [22]:
CSV.write("subject_params.csv", DataFrame(params))


│   caller = top-level scope at In[22]:1
└ @ Core In[22]:1


"subject_params.csv"