** Neil Garrett, June 2018 **
uses julia EM model fitting by Nathaniel Daw


# Start up commands/load relevant functions

In [1]:

parallel = true # Run on multiple CPUs. If you are having trouble, set parallel = false: easier to debug
full = false    # Maintain full covariance matrix (vs a diagional one) at the group level
emtol = 1e-3    # stopping condition (relative change) for EM

using Distributed
if (parallel)
	# only run this once
	addprocs()
end

# this loads the packages needed -- the @everywhere makes sure they 
# available on all CPUs 

@everywhere using DataFrames
@everywhere using SharedArrays
@everywhere using ForwardDiff
@everywhere using Optim
@everywhere using LinearAlgebra       # for tr, diagonal
@everywhere using StatsFuns           # logsumexp
@everywhere using SpecialFunctions    # for erf
@everywhere using Statistics          # for mean
@everywhere using Distributions
@everywhere using GLM
@everywhere using CSV #for reading/writing csv files

# change this to where you keep the Daw's latest em code
@everywhere directory = "/Users/neil/GitHubRepo/Projects/PreySelection/em"

#load in functions including em
@everywhere include("$directory/em.jl");
@everywhere include("$directory/common.jl");
@everywhere include("$directory/likfuns.jl")


# Data read and process

### Read in trial by trial data

In [2]:

#read in csv file of the data
#trial by trial data: note will include force trials and missed responses
df = CSV.read("/Users/neil/GitHubRepo/Projects/PreySelection/v104/data/trialdata_104_processed.csv");


### Get rid of excluded subs

In [3]:

df = df[df[:exclude].==0,:];


### Convert approach avoid to 2s and 1s , missed as 0. Then convert to integers (necessary to use as an index)

In [4]:

# convert approach_avoid to 1s (avoid) and 2s (approach)
df[df[:approach_avoid].==1,:approach_avoid] = 2
df[df[:approach_avoid].==-1,:approach_avoid] = 1

# put 0 for missed responses
index_NaN = findall(isnan.(df[:approach_avoid]))
df[index_NaN, :approach_avoid] = 0

df[:approach_avoid] = convert(Vector{Integer}, df[:approach_avoid])

first(df, 6)


Unnamed: 0_level_0,subj,trial_index_actual,block,stimulus,stim_rank,reward_percent,delay_s,profitability,stim_left_right,key_press,approach_avoid,rt,rt_z,force_trial,missed,order_condition,exclude,exclude_reason
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,String⍰,Int64⍰,Int64⍰,Int64⍰,Float64⍰,String⍰,Int64⍰,Integer,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,String⍰
1,2,0,1,../static/images/invador1.png,4,20,8,2.5,right,74,2,1253.0,1.13279,0,0,2,0,do not exclude
2,2,1,1,../static/images/invador1.png,4,20,8,2.5,right,-1,0,,,0,1,2,0,do not exclude
3,2,2,1,../static/images/invador3.png,2,20,2,10.0,right,74,2,1185.0,0.859417,0,0,2,0,do not exclude
4,2,3,1,../static/images/invador4.png,1,80,2,40.0,left,70,2,1148.0,0.710668,0,0,2,0,do not exclude
5,2,4,1,../static/images/invador1.png,4,20,8,2.5,right,70,1,1156.0,0.74283,2,0,2,0,do not exclude
6,2,5,1,../static/images/invador1.png,4,20,8,2.5,left,70,0,,,0,1,2,0,do not exclude


# Symmetric Model

This model comprises: 

1. An intercept which reflects degree of bias to reject.

2. A beta (termperature parameter) which controls sensitivity to the difference between the options (0 = pick 50/50. Higher it is, the more sensative subs are tothe different options (more step functionesque). <br>

3. One learning rate

Uses Q learned average to predict choice

Initalise Qaverage in model at the arithmetic average over all subs over both sessions

In [8]:

@everywhere function model_symmetric(params, data)
     
    #model parameters
    intercept = params[1]
    beta = params[2]
    lr = 0.5 .+ 0.5.*erf(params[3]/sqrt(2))
    ps = params[4]
    
    #initalise various variables
    opp_cost_estimate = zeros(typeof(beta),1) # stores estimated opp cost

    #initalise to average rate over the experiment
    Q_estimate = zeros(typeof(beta),1) .+ 7.77 # stores estimated global reward rate
    
    Qd = zeros(typeof(beta),2) # decision variable; 1st element is the opp cost of accepting (or value of rejecting), 2nd element is just the reward of the option (value of accepting)

    lik = 0 #likelihood

    #extract various variables from the dataframe
    reward = data[:reward_percent]
    delay = data[:delay_s]
    force = data[:force_trial]  
    missed = data[:missed] 
    c = data[:approach_avoid]
    
    prevc = 0

    for i = 1:length(c)
                    
            # decrease estimate of global reward rate for encounter time (2seconds)
            Q_estimate = (1-lr) * Q_estimate .+ 0
            Q_estimate = (1-lr) * Q_estimate .+ 0
        
            #calculate estimate of opportunity cost given estimate of reward rate and delay incurred by option 
            opp_cost_estimate = Q_estimate*delay[i]
        
            # if not a force trial predict choice based on current values
            if ((force[i]<1) & (missed[i]<1))
                        
                # decision variable - the estimate of opportunity cost ("reward" of rejecting) versus 
                # reward of the current option (if accepted)
                Qd = [intercept, 0] .+ [beta.*opp_cost_estimate[1], beta.*reward[i]]

                #if previously logged choice wasn't a miss or initial trial
                if prevc>0
                    Qd[prevc] += ps # bonus Qd[prevc] by ps 
                end
            
                # increment likelihood
                lik += Qd[c[i]] - log(sum(exp.(Qd)))
            
            end
            
            #incur 8second time out for missed response
            if (missed[i]==1)
            
                for j = 1:8
                
                     Q_estimate = (1-lr) * Q_estimate .+ 0

                end
            
            end
        
            # regardless of whether a force trial or not, 
            # if accept the option, Q_estimate updates and there is a delay incurred
            if ((c[i] == 2) & (missed[i]==0))
                
                for j = 1:delay[i]
                
                    Q_estimate = (1-lr) * Q_estimate .+ 0
                
                end
            
                Q_estimate = (1-lr) * Q_estimate .+ lr*reward[i]
                
            end
        
            # store previous choice to apply perseverance bonus
            prevc = c[i]
    
    end
        
    # here if running em you can only return the likelihood
    return -lik
    
end


# Parameter optimisiation

### setup variables for em

In [9]:

#store list of actual subject numbers (in subj)
subs = unique(df[:subj])

#put in a new column called "sub" which is identical to subj - em looks for this
df[:sub] = df[:subj];

NS = length(subs)
X = ones(NS)
betas = [0. 0. 0. 0.]
sigma = [1., 1., 1., 1.];


### Run em to get best fit parameters for each subject

In [10]:

# run em
# x contains the parameters for each subject (note not the same as variable X)
# l and h are per-subject likelihood and hessians
(betas, sigma, x, l, h) = em(df, subs, X, betas, sigma, model_symmetric; emtol=emtol, parallel=parallel, full=full);



iter: 13
betas: [1.04 0.07 -1.42 0.36]
sigma: [1.01, 0.0, 2.07, 0.23]
free energy: -2742.133453
change: [2.0e-6, 5.4e-5, -2.0e-6, 4.0e-6, 9.5e-5, 0.000976, 1.8e-5, 7.6e-5]
max: 0.000976


### Generate Model Statistics 
(LOOCV)

In [11]:

#compute unbiased per subject marginal likelihoods via cross validation.
liks = loocv(df, subs, x, X, betas, sigma, model_symmetric; emtol=emtol, parallel=parallel, full=full)

print(sum(liks))


Subject: 1..2..3..4..5..6..7..8..9..10..11..12..13..14..15..16..17..18..19..20..21..22..23..24..25..26..27..28..29..30..31..32..33..34..35..36..37..38..2565.15433911757

### Write loocv scores to csv file and save

(if you have run loocv above)

In [12]:

#put loocv scores into dataframe
loocv_scores = DataFrame(sub = subs,
liks = vec(liks));

CSV.write("loocv_scores.csv", DataFrame(loocv_scores))


"loocv_scores.csv"

### Calculate and write p values, std error and covariance

In [13]:

# standard errors on the subject-level means, based on an asymptotic Gaussian approx 
# (these may be inflated esp for small n)
(standarderrors, pvalues, covmtx) = emerrors(df, subs, x, X, h, betas, sigma, model_symmetric);


In [14]:

model_stats = DataFrame(stderror = vec(standarderrors),
pvalues = vec(pvalues),
covmtx_1 = vec(covmtx[:,1]),
covmtx_2 = vec(covmtx[:,2]),
covmtx_3 = vec(covmtx[:,3]),
covmtx_4 = vec(covmtx[:,4]));

# save model stats to csv file
CSV.write("model_stats.csv", DataFrame(model_stats));


### Write per subject model parameters to csv files and save

In [15]:

# put parameters into variable d
d=x;

# now put parameters into dataframe
params = DataFrame(sub = subs,
intercept = vec(d[:,1]), 
beta = vec(d[:,2]),
learning_rate_raw = vec(d[:,3]),
perseverance = vec(d[:, 4]));

CSV.write("subject_params.csv", DataFrame(params))


"subject_params.csv"