** Neil Garrett, June 2018 **
uses julia EM model fitting by Nathaniel Daw


# Start up commands/load relevant functions

In [1]:

parallel = true # Run on multiple CPUs. If you are having trouble, set parallel = false: easier to debug
full = false    # Maintain full covariance matrix (vs a diagional one) at the group level
emtol = 1e-3    # stopping condition (relative change) for EM

using Distributed
if (parallel)
	# only run this once
	addprocs()
end

# this loads the packages needed -- the @everywhere makes sure they 
# available on all CPUs 

@everywhere using DataFrames
@everywhere using SharedArrays
@everywhere using ForwardDiff
@everywhere using Optim
@everywhere using LinearAlgebra       # for tr, diagonal
@everywhere using StatsFuns           # logsumexp
@everywhere using SpecialFunctions    # for erf
@everywhere using Statistics          # for mean
@everywhere using Distributions
@everywhere using GLM
@everywhere using CSV #for reading/writing csv files

# change this to where you keep the Daw's latest em code
@everywhere directory = "/Users/neil/GitHubRepo/Projects/PreySelection/em"

#load in functions including em
@everywhere include("$directory/em.jl");
@everywhere include("$directory/common.jl");
@everywhere include("$directory/likfuns.jl")


┌ Info: Recompiling stale cache file /Users/neil/.julia/compiled/v0.7/Distributions/xILW0.ji for Distributions [31c24e10-a181-5473-b8eb-7969acd0382f]
└ @ Base loading.jl:1185


      From worker 2:	│   exception = Required dependency Arpack [7d9fca2a-8960-54d3-9f78-7d1dccf2cb97] failed to load from a cache file.
      From worker 2:	└ @ Base loading.jl:963
      From worker 5:	│   exception = Required dependency Arpack [7d9fca2a-8960-54d3-9f78-7d1dccf2cb97] failed to load from a cache file.
      From worker 5:	└ @ Base loading.jl:963
      From worker 3:	│   exception = Required dependency Arpack [7d9fca2a-8960-54d3-9f78-7d1dccf2cb97] failed to load from a cache file.
      From worker 3:	└ @ Base loading.jl:963
      From worker 4:	│   exception = Required dependency Arpack [7d9fca2a-8960-54d3-9f78-7d1dccf2cb97] failed to load from a cache file.
      From worker 4:	└ @ Base loading.jl:963


│   exception = ErrorException("Required dependency QuadGK [1fd47b50-473d-5c70-9696-f719f8f3bcdc] failed to load from a cache file.")
└ @ Base loading.jl:963
┌ Info: Recompiling stale cache file /Users/neil/.julia/compiled/v0.7/GLM/6OREG.ji for GLM [38e38edf-8417-5370-95a0-9cbb8c7f171a]
└ @ Base loading.jl:1185
│ This may mean Distributions [31c24e10-a181-5473-b8eb-7969acd0382f] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:941


      From worker 2:	│   exception = Required dependency CodecZlib [944b1d66-785c-5afd-91f1-9de20f533193] failed to load from a cache file.
      From worker 2:	└ @ Base loading.jl:963
      From worker 4:	│   exception = Required dependency CodecZlib [944b1d66-785c-5afd-91f1-9de20f533193] failed to load from a cache file.
      From worker 4:	└ @ Base loading.jl:963
      From worker 5:	│   exception = Required dependency CodecZlib [944b1d66-785c-5afd-91f1-9de20f533193] failed to load from a cache file.
      From worker 5:	└ @ Base loading.jl:963
      From worker 3:	│   exception = Required dependency CodecZlib [944b1d66-785c-5afd-91f1-9de20f533193] failed to load from a cache file.
      From worker 3:	└ @ Base loading.jl:963


# Data read and process

### Read in trial by trial data

In [2]:

#read in csv file of the data
#trial by trial data: note will include force trials and missed responses
df = CSV.read("/Users/neil/GitHubRepo/Projects/PreySelection/v105/data/trialdata_105_processed.csv");


### Get rid of excluded subs

In [3]:

df = df[df[:exclude].==0,:];


### Convert approach avoid to 2s and 1s , missed as 0. Then convert to integers (necessary to use as an index)

In [4]:

# convert approach_avoid to 1s (avoid) and 2s (approach)
df[df[:approach_avoid].==1,:approach_avoid] = 2
df[df[:approach_avoid].==-1,:approach_avoid] = 1

# put 0 for missed responses
index_NaN = findall(isnan.(df[:approach_avoid]))
df[index_NaN, :approach_avoid] = 0

df[:approach_avoid] = convert(Vector{Integer}, df[:approach_avoid])

first(df, 6)


Unnamed: 0_level_0,subj,trial_index_actual,block,stimulus,stim_rank,reward_percent,delay_s,profitability,stim_left_right,key_press,approach_avoid,rt,rt_z,force_trial,missed,order_condition,exclude,exclude_reason
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,String⍰,Int64⍰,Int64⍰,Int64⍰,Float64⍰,String⍰,Int64⍰,Integer,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,String⍰
1,1,0,0,../static/images/invador1.png,4,20,8,2.5,right,74,2,1195.0,1.09567,1,0,1,0,do not exclude
2,1,1,0,../static/images/invador3.png,1,80,2,40.0,right,74,2,718.0,-1.04464,0,0,1,0,do not exclude
3,1,2,0,../static/images/invador2.png,2,20,2,10.0,left,70,0,,,2,1,1,0,do not exclude
4,1,3,0,../static/images/invador4.png,3,80,8,10.0,left,70,2,933.0,-0.0799261,0,0,1,0,do not exclude
5,1,4,0,../static/images/invador3.png,1,80,2,40.0,left,70,2,750.0,-0.901051,1,0,1,0,do not exclude
6,1,5,0,../static/images/invador3.png,1,80,2,40.0,right,74,2,628.0,-1.44847,0,0,1,0,do not exclude


# Symmetric Model

This model comprises: 

1. An intercept which reflects degree of bias to reject.

2. A beta (termperature parameter) which controls sensitivity to the difference between the options (0 = pick 50/50. Higher it is, the more sensative subs are tothe different options (more step functionesque). <br>

3. One learning rate

Uses Q learned average to predict choice

Initalise Qaverage in model at the arithmetic average over all subs over both sessions

In [5]:

@everywhere function model_symmetric(params, data)
    
   #model parameters
    intercept = params[1]
    beta = params[2]
    lr_environment = 0.5 .+ 0.5.*erf(params[3]/sqrt(2))
    lr_options = 0.5 .+ 0.5.*erf(params[4]/sqrt(2))
   
    #initalise various variables
    Q_estimate = zeros(typeof(beta),1) .+ 7.02 # stores estimated global reward rate
    opp_cost_estimate = zeros(typeof(beta),1) # stores estimated opp cost
    
    Qd = zeros(typeof(beta),2) # decision variable; 1st element is the opp cost of accepting (or value of rejecting), 2nd element is just the reward of the option (value of accepting)

    # maintains estimate of the reward for each option (options indexed by rank: 1-4). 
    Q_options_reward = zeros(typeof(beta),4)
    Q_options_delay = zeros(typeof(beta),4)
    
    lik = 0 #likelihood

    #extract various variables from the dataframe
    reward = data[:reward_percent]
    delay = data[:delay_s]
    force = data[:force_trial]  
    missed = data[:missed] #missed responses 
    c = data[:approach_avoid] #choice
    option_rank = data[:stim_rank] #option rank 

    for i = 1:length(c)
        
            #option presented in current trial
            option_index = option_rank[i]
           
            # decrease estimate of global reward rate for encounter time (2seconds)
            Q_estimate = (1-lr_environment) * Q_estimate .+ 0
            Q_estimate = (1-lr_environment) * Q_estimate .+ 0
        
            #calculate estimate of opportunity cost given estimate of reward rate and delay incurred by option 
            opp_cost_estimate = Q_estimate*Q_options_delay[option_index]
        
            # if not a force trial predict choice based on current values
            if ((force[i]<1) & (missed[i]<1))
                        
                # decision variable - the estimate of opportunity cost ("reward" of rejecting) versus 
                # reward of the current option (if accepted)
                Qd = [intercept, 0] .+ [beta.*opp_cost_estimate[1], beta.*Q_options_reward[option_index]]

                # increment likelihood
                lik += Qd[c[i]] - log(sum(exp.(Qd)))
            
            end
            
            #incur 8second time out for missed response
            if (missed[i]==1)
                            
                for j = 1:8
                
                     Q_estimate = (1-lr_environment) * Q_estimate .+ 0

                end
            
            end
        
            # regardless of whether a force trial or not, 
            # if accept the option, Q_estimate updates and there is a delay incurred
            if ((c[i] == 2) & (missed[i]==0))
            
                for j = 1:delay[i]
                
                    Q_estimate = (1-lr_environment) * Q_estimate .+ 0
                
                end
            
                Q_estimate = (1-lr_environment) * Q_estimate .+ lr_environment*reward[i]
            
                Q_options_reward[option_index] = (1-lr_options)*Q_options_reward[option_index] .+ lr_options*reward[i]
                Q_options_delay[option_index] = (1-lr_options)*Q_options_delay[option_index] .+ lr_options*delay[i]
                
            end
    
    end
            
    # here if running em you can only return the likelihood
    return -lik
    
    
end


# Parameter optimisiation

### setup variables for em

In [6]:

#store list of actual subject numbers (in subj)
subs = unique(df[:subj])

#put in a new column called "sub" which is identical to subj - em looks for this
df[:sub] = df[:subj];

NS = length(subs)
X = ones(NS)
betas = [0. 0. 0. 0.]
sigma = [1., 1., 1., 1.];


### Run em to get best fit parameters for each subject

In [7]:
model_symmetric(betas,df[df[:sub].==subs[1],:])

140.0157304731092

In [8]:

# run em
# x contains the parameters for each subject (note not the same as variable X)
# l and h are per-subject likelihood and hessians
(betas, sigma, x, l, h) = em(df, subs, X, betas, sigma, model_symmetric; emtol=emtol, parallel=parallel, full=full);


CompositeException: On worker 3:
AssertionError("isfinite(phi_c) && isfinite(dphi_c)")
error at ./error.jl:42
macro expansion at /Users/neil/GitHubRepo/Projects/PreySelection/em/em.jl:92 [inlined]
#6 at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/macros.jl:291
#170 at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/macros.jl:43
#109 at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/process_messages.jl:265
run_work_thunk at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/process_messages.jl:56
run_work_thunk at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/process_messages.jl:65
#102 at ./task.jl:262
#remotecall_fetch#149(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Distributed.Worker, ::Distributed.RRID, ::Vararg{Any,N} where N) at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/remotecall.jl:379
remotecall_fetch(::Function, ::Distributed.Worker, ::Distributed.RRID, ::Vararg{Any,N} where N) at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/remotecall.jl:371
#remotecall_fetch#152(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Int64, ::Distributed.RRID, ::Vararg{Any,N} where N) at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/remotecall.jl:392
remotecall_fetch at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/remotecall.jl:392 [inlined]
call_on_owner at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/remotecall.jl:465 [inlined]
wait at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/remotecall.jl:486 [inlined]
_wait(::Future) at ./task.jl:196
sync_end(::Array{Any,1}) at ./task.jl:216
macro expansion at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v0.7/Distributed/src/macros.jl:268 [inlined]
(::getfield(Distributed, Symbol("##169#171")){getfield(Main, Symbol("##6#13")){DataFrame,Array{Union{Missing, Int64},1},SharedArray{Float64,2},SharedArray{Float64,2},SharedArray{Float64,1},SharedArray{Float64,3},Diagonal{Float64,Array{Float64,1}},typeof(model_symmetric),Array{Float64,2}},UnitRange{Int64}})() at ./task.jl:247

### Generate Model Statistics 
(LOOCV)

In [None]:

#compute unbiased per subject marginal likelihoods via cross validation.
liks = loocv(df, subs, x, X, betas, sigma, model_symmetric; emtol=emtol, parallel=parallel, full=full)

print(sum(liks))


### Write loocv scores to csv file and save

(if you have run loocv above)

In [None]:

#put loocv scores into dataframe
loocv_scores = DataFrame(sub = subs,
liks = vec(liks));

CSV.write("loocv_scores.csv", DataFrame(loocv_scores))


### Calculate and write p values, std error and covariance

In [None]:

# standard errors on the subject-level means, based on an asymptotic Gaussian approx 
# (these may be inflated esp for small n)
(standarderrors, pvalues, covmtx) = emerrors(df, subs, x, X, h, betas, sigma, model_symmetric);


In [None]:

model_stats = DataFrame(stderror = vec(standarderrors),
pvalues = vec(pvalues),
covmtx_1 = vec(covmtx[:,1]),
covmtx_2 = vec(covmtx[:,2]),
covmtx_3 = vec(covmtx[:,3]),
covmtx_4 = vec(covmtx[:,4]));

# save model stats to csv file
CSV.write("model_stats.csv", DataFrame(model_stats));


### Write per subject model parameters to csv files and save

In [None]:

# put parameters into variable d
d=x;

# now put parameters into dataframe
params = DataFrame(sub = subs,
intercept = vec(d[:,1]), 
beta = vec(d[:,2]),
learning_rate_environment_raw = vec(d[:,3]),
lr_options_raw = vec(d[:,4]));

CSV.write("subject_params.csv", DataFrame(params))
