** Neil Garrett, June 2018 **
uses julia EM model fitting by Nathaniel Daw


# Start up commands/load relevant functions

In [1]:

parallel = true # Run on multiple CPUs. If you are having trouble, set parallel = false: easier to debug
full = false    # Maintain full covariance matrix (vs a diagional one) at the group level
emtol = 1e-3    # stopping condition (relative change) for EM

using Distributed
if (parallel)
	# only run this once
	addprocs()
end

# this loads the packages needed -- the @everywhere makes sure they 
# available on all CPUs 

@everywhere using DataFrames
@everywhere using SharedArrays
@everywhere using ForwardDiff
@everywhere using Optim
@everywhere using LinearAlgebra       # for tr, diagonal
@everywhere using StatsFuns           # logsumexp
@everywhere using SpecialFunctions    # for erf
@everywhere using Statistics          # for mean
@everywhere using Distributions
@everywhere using GLM
@everywhere using CSV #for reading/writing csv files

# change this to where you keep the Daw's latest em code
@everywhere directory = "/Users/neil/GitHubRepo/Projects/PreySelection/em"

#load in functions including em
@everywhere include("$directory/em.jl");
@everywhere include("$directory/common.jl");
@everywhere include("$directory/likfuns.jl")


# Data read and process

### Read in trial by trial data

In [2]:

#read in csv file of the data
#trial by trial data: note will include force trials and missed responses
df = CSV.read("/Users/neil/GitHubRepo/Projects/PreySelection/v103/data/trialdata_103_processed.csv");


### Get rid of excluded subs

In [3]:

df = df[df[:exclude].==0,:];


### Convert approach avoid to 2s and 1s , missed as 0. Then convert to integers (necessary to use as an index)

In [4]:

# convert approach_avoid to 1s (avoid) and 2s (approach)
df[df[:approach_avoid].==1,:approach_avoid] = 2
df[df[:approach_avoid].==-1,:approach_avoid] = 1

# put 0 for missed responses
index_NaN = findall(isnan.(df[:approach_avoid]))
df[index_NaN, :approach_avoid] = 0

df[:approach_avoid] = convert(Vector{Integer}, df[:approach_avoid])

first(df, 6)


Unnamed: 0_level_0,subj,trial_index_actual,block,stimulus,stim_rank,reward_percent,delay_s,profitability,stim_left_right,key_press,approach_avoid,rt,rt_z,force_trial,missed,order_condition,exclude,exclude_reason
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,String⍰,Int64⍰,Int64⍰,Int64⍰,Float64⍰,String⍰,Int64⍰,Integer,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,String⍰
1,1,0,1,../static/images/invador2.png,4,20,8,2.5,left,74,1,1172.0,1.475,0,0,2,0,do not exclude
2,1,1,1,../static/images/invador3.png,1,80,2,40.0,right,74,2,1046.0,0.846311,1,0,2,0,do not exclude
3,1,2,1,../static/images/invador1.png,3,80,8,10.0,left,70,2,743.0,-0.665547,0,0,2,0,do not exclude
4,1,3,1,../static/images/invador2.png,4,20,8,2.5,right,74,0,,,1,1,2,0,do not exclude
5,1,4,1,../static/images/invador2.png,4,20,8,2.5,left,70,2,858.0,-0.0917396,0,0,2,0,do not exclude
6,1,5,1,../static/images/invador4.png,2,20,2,10.0,left,70,2,818.0,-0.291325,0,0,2,0,do not exclude


### Keep First Block Only (throw out second block)

In [5]:

df = df[((df[:order_condition].==2) .& (df[:block].==1)) .| ((df[:order_condition].==1) .& (df[:block].==0)), :]


Unnamed: 0_level_0,subj,trial_index_actual,block,stimulus,stim_rank,reward_percent,delay_s,profitability,stim_left_right,key_press,approach_avoid,rt,rt_z,force_trial,missed,order_condition,exclude,exclude_reason
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,String⍰,Int64⍰,Int64⍰,Int64⍰,Float64⍰,String⍰,Int64⍰,Integer,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,String⍰
1,1,0,1,../static/images/invador2.png,4,20,8,2.5,left,74,1,1172.0,1.475,0,0,2,0,do not exclude
2,1,1,1,../static/images/invador3.png,1,80,2,40.0,right,74,2,1046.0,0.846311,1,0,2,0,do not exclude
3,1,2,1,../static/images/invador1.png,3,80,8,10.0,left,70,2,743.0,-0.665547,0,0,2,0,do not exclude
4,1,3,1,../static/images/invador2.png,4,20,8,2.5,right,74,0,,,1,1,2,0,do not exclude
5,1,4,1,../static/images/invador2.png,4,20,8,2.5,left,70,2,858.0,-0.0917396,0,0,2,0,do not exclude
6,1,5,1,../static/images/invador4.png,2,20,2,10.0,left,70,2,818.0,-0.291325,0,0,2,0,do not exclude
7,1,6,1,../static/images/invador2.png,4,20,8,2.5,left,70,2,1076.0,0.996,0,0,2,0,do not exclude
8,1,7,1,../static/images/invador2.png,4,20,8,2.5,right,74,2,987.0,0.551923,0,0,2,0,do not exclude
9,1,8,1,../static/images/invador1.png,3,80,8,10.0,left,74,1,766.0,-0.550786,2,0,2,0,do not exclude
10,1,9,1,../static/images/invador4.png,2,20,2,10.0,right,74,2,1059.0,0.911176,0,0,2,0,do not exclude


# Asymmetric Model

This model comprises: 

1. An intercept which reflects degree of bias to reject.

2. A beta (termperature parameter) which controls sensitivity to the difference between the options (0 = pick 50/50. Higher it is, the more sensative subs are tothe different options (more step functionesque). <br>

3. Two learning rates: one for appetative component (reward), one for aversive (delay)

Uses Q learned average to predict choice

Initalise Qaverage in model at the arithmetic average over all subs over both sessions

In [6]:

@everywhere function model_asymmetric(params, data)
     
    #model parameters
    intercept = params[1]
    beta = params[2]
    lr_pos = 0.5 .+ 0.5.*erf(params[3]/sqrt(2))
    lr_neg = 0.5 .+ 0.5.*erf(params[4]/sqrt(2))
    
    #initalise various variables
    opp_cost_estimate = zeros(typeof(beta),1) # stores estimated opp cost

    #initalise to average rate over the experiment
    Q_estimate = zeros(typeof(beta),1) .+ 8.22 # stores estimated global reward rate
    
    Qd = zeros(typeof(beta),2) # decision variable; 1st element is the opp cost of accepting (or value of rejecting), 2nd element is just the reward of the option (value of accepting)

    lik = 0 #likelihood

    #extract various variables from the dataframe
    reward = data[:reward_percent]
    delay = data[:delay_s]
    force = data[:force_trial]  
    missed = data[:missed] 
    c = data[:approach_avoid]
        
    for i = 1:length(c)
                    
            # decrease estimate of global reward rate for encounter time (2seconds)
            Q_estimate = (1-lr_neg) * Q_estimate .+ 0
            Q_estimate = (1-lr_neg) * Q_estimate .+ 0
        
            #calculate estimate of opportunity cost given estimate of reward rate and delay incurred by option 
            opp_cost_estimate = Q_estimate*delay[i]
        
            # if not a force trial predict choice based on current values
            if ((force[i]<1) & (missed[i]<1))
                        
                # decision variable - the estimate of opportunity cost ("reward" of rejecting) versus 
                # reward of the current option (if accepted)
                Qd = [intercept, 0] .+ [beta.*opp_cost_estimate[1], beta.*reward[i]]

                # increment likelihood
                lik += Qd[c[i]] - log(sum(exp.(Qd)))
            
            end
            
            #incur 8second time out for missed response
            if (missed[i]==1)
            
                for j = 1:8
                
                     Q_estimate = (1-lr_neg) * Q_estimate .+ 0

                end
            
            end
        
            # regardless of whether a force trial or not, 
            # if accept the option, Q_estimate updates and there is a delay incurred
            if ((c[i] == 2) & (missed[i]==0))
                
                for j = 1:delay[i]
                
                    Q_estimate = (1-lr_neg) * Q_estimate .+ 0
                
                end
            
                    Q_estimate = (1-lr_pos) * Q_estimate .+ lr_pos*reward[i]
                
            end
    
    end
        
    # here if running em you can only return the likelihood
    return -lik
    
end


# Parameter optimisiation

### setup variables for em

In [7]:

#store list of actual subject numbers (in subj)
subs = unique(df[:subj])

#put in a new column called "sub" which is identical to subj - em looks for this
df[:sub] = df[:subj];

NS = length(subs)
X = ones(NS)
betas = [0. 0. 0. 0.]
sigma = [1., 1., 1., 1.];


### Run em to get best fit parameters for each subject

In [8]:

# run em
# x contains the parameters for each subject (note not the same as variable X)
# l and h are per-subject likelihood and hessians
(betas, sigma, x, l, h) = em(df, subs, X, betas, sigma, model_asymmetric; emtol=emtol, parallel=parallel, full=full);



iter: 34
betas: [-1.79 0.07 -2.32 -2.62]
sigma: [3.83, 0.0, 0.04, 0.07]
free energy: -1620.528652
change: [-8.5e-5, 1.0e-6, -7.3e-5, -6.0e-5, 0.000291, 0.000154, 0.000605, 0.000916]
max: 0.000916


### Generate Model Statistics 
(LOOCV)

In [9]:

#compute unbiased per subject marginal likelihoods via cross validation.
liks = loocv(df, subs, x, X, betas, sigma, model_asymmetric; emtol=emtol, parallel=parallel, full=full)

print(sum(liks))


Subject: 1..2..3..4..5..6..7..8..9..10..11..12..13..14..15..16..17..18..19..20..21..22..23..24..25..26..27..28..29..30..31..32..33..34..35..36..37..38..39..40..1466.010183751361

### Write loocv scores to csv file and save

(if you have run loocv above)

In [10]:

#put loocv scores into dataframe
loocv_scores = DataFrame(sub = subs,
liks = vec(liks));

CSV.write("loocv_scores.csv", DataFrame(loocv_scores))


"loocv_scores.csv"

### Calculate and write p values, std error and covariance

In [11]:

# standard errors on the subject-level means, based on an asymptotic Gaussian approx 
# (these may be inflated esp for small n)
(standarderrors, pvalues, covmtx) = emerrors(df, subs, x, X, h, betas, sigma, model_asymmetric);


In [12]:

model_stats = DataFrame(stderror = vec(standarderrors),
pvalues = vec(pvalues),
covmtx_1 = vec(covmtx[:, 1]),
covmtx_2 = vec(covmtx[:, 2]),
covmtx_3 = vec(covmtx[:, 3]),
covmtx_4 = vec(covmtx[:, 4]));

# save model stats to csv file
CSV.write("model_stats.csv", DataFrame(model_stats));


### Write per subject model parameters to csv files and save

In [13]:

# put parameters into variable d
d=x;

# now put parameters into dataframe
params = DataFrame(sub = subs,
intercept = vec(d[:,1]), 
beta = vec(d[:,2]),
learning_rate_raw_pos = vec(d[:,3]),
learning_rate_raw_neg = vec(d[:,4]));

CSV.write("subject_params.csv", DataFrame(params))


"subject_params.csv"