# Start up commands/load relevant functions

In [1]:
# load required libraries
using Distributed

# # set everything up
parallel = true # Run on multiple CPUs. If youhttp://localhost:8888/notebooks/Dropbox/Daw_Lab/PreySelection/v103/models/model_subjective1beta2lr_delayreward/model_subjective1beta2lr_delayreward.jl.ipynb# are having trouble, set parallel = false: easier to debug

# this activates the multiprocessing threads
if (parallel)
	# only run this once
    addprocs(2)
end

# load required libraries
@everywhere using DataFrames
@everywhere using ForwardDiff
@everywhere using PyCall
@everywhere using Distributions
@everywhere using PyPlot
@everywhere using CSV
@everywhere using SpecialFunctions
@everywhere using SharedArrays
@everywhere using LinearAlgebra

@everywhere PyCall.@pyimport scipy.optimize as so

# this is the code for the actual fitting routines
@everywhere include("em.jl")
@everywhere include("common.jl")
@everywhere include("likfuns.jl")

# this is generates starting matricies for betas, sigmas etc to feed into model
@everywhere include("genVars.jl")


┌ Info: Precompiling CSV [336ed68f-0bac-5ca0-87d4-7b16caf5d00b]
└ @ Base loading.jl:1187


      From worker 3:	│ This may mean Tables [bd369af6-aec1-5ad0-b16a-f7cc5008161c] does not support precompilation but is imported by a module that does.
      From worker 3:	└ @ Base loading.jl:941
      From worker 2:	│ This may mean Tables [bd369af6-aec1-5ad0-b16a-f7cc5008161c] does not support precompilation but is imported by a module that does.
      From worker 2:	└ @ Base loading.jl:941


│ This may mean Tables [bd369af6-aec1-5ad0-b16a-f7cc5008161c] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:941
│ Use `(covvar < 0) ? NaN :` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/model1/em.jl:288
│ Use `(covvar < 0) ? NaN : sqrt` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/model1/em.jl:288
│ Use `(diag(covmtx)[i] .< 0) ? NaN :` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/model1/em.jl:299
│ Use `(diag(covmtx)[i] .< 0) ? NaN : diag` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/model1/em.jl:299
│ Use `(covvar < 0) ? NaN :` instead.
└ @ ~/GitHubRepo/Projects/ValueInference/study4_mri/models/model1/em.jl:288
│ Use `(covvar < 0) ? NaN : sqrt` instead.
└ @ ~/GitHubRepo/Projects/ValueInference/study4_mri/models/model1/em.jl:288
│ Use `(diag(covmtx)[i] .< 0) ? NaN :` instead.
└ @ 

└ @ nothing /Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/model1/genVars.jl:17
└ @ ~/GitHubRepo/Projects/ValueInference/study4_mri/models/model1/genVars.jl:17
└ @ ~/GitHubRepo/Projects/ValueInference/study4_mri/models/model1/genVars.jl:17


# Data read and process

### Read in data

In [29]:
#read in data
df = readtable("/Users/Neil/GitHubRepo/Projects/ValueInference/study4_mri/data/gem_dat.csv")

#get rid of missed responses
df = df[df[:missed_trial].!=1,:]

#add "sub column" 
# this is just a replica of the existing column sub_no but I think em looks for "sub" specifically
df[:sub] = df[:participantID];

#change coding so that 1 = market 1 in dependent condition,
#2 and 3 refer to the two markets in the independent condition
df[:market_presented] = df[:market_presented] + 1
df[df[:blockType].==1,:market_presented] = 1

#code picking white as 2, picking black as 1
df[:state_chosen] = df[:pick_black]
df[df[:state_chosen].==0, :state_chosen] = 2

#convert this so can use in model
df[:state_chosen] = convert(Vector{Integer}, df[:state_chosen])

head(df)

│   caller = top-level scope at In[29]:1
└ @ Core In[29]:1
│   caller = top-level scope at In[29]:10
└ @ Core In[29]:10
│   caller = top-level scope at In[29]:20
└ @ Core In[29]:20


Unnamed: 0_level_0,participantID,block_n,trials,blockType,forcedTrial,gem_presented,market_presented,door_side,chooseLeft,outcomeState,outcome,ons_fixation,ons_door_display,ons_responsecue,ons_gem_fixation,ons_outcome_display,ons_condition_text,ons_trigger,missed_trial,rew_loss,rt,prob_market_presented,correct_choice,market_reversal,pick_black,black_presented_force,prob_independent_1,prob_independent_2,prob_dependent,sub,state_chosen
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64,Int64⍰,Float64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Integer
1,1,1,1,1,1,2,1,1,1.0,2,0.0,12435.6,12440.7,12443.7,12445.3,12448.1,12432.6,,0,1,0.658662,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,1
2,1,1,2,1,0,2,1,2,1.0,1,-1.0,12450.1,12452.6,12456.1,12457.6,12461.8,,,0,-1,0.599319,0.8,0.0,0,1.0,,0.2,0.8,0.8,1,1
3,1,1,3,1,1,2,1,1,1.0,2,0.0,12463.8,12466.4,12468.2,12469.8,12474.8,,,0,1,0.781297,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,1
4,1,1,4,1,0,1,1,1,1.0,2,0.0,12476.8,12481.7,12484.1,12485.6,12488.7,,,0,-1,0.889409,0.8,1.0,0,2.0,,0.2,0.8,0.8,1,2
5,1,1,5,1,1,1,1,1,1.0,1,-1.0,12490.7,12494.0,12496.4,12497.9,12501.1,,,0,-1,0.389441,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,1
6,1,1,6,1,1,1,1,2,0.0,1,-1.0,12503.1,12509.0,12512.2,12513.8,12517.5,,,0,-1,0.302638,0.8,,0,2.0,0.0,0.2,0.8,0.8,1,2


In [35]:
#exlude subs 21 and 28..

df = df[df[:participantID].!=21,:]
df = df[df[:participantID].!=28,:]


Unnamed: 0_level_0,participantID,block_n,trials,blockType,forcedTrial,gem_presented,market_presented,door_side,chooseLeft,outcomeState,outcome,ons_fixation,ons_door_display,ons_responsecue,ons_gem_fixation,ons_outcome_display,ons_condition_text,ons_trigger,missed_trial,rew_loss,rt,prob_market_presented,correct_choice,market_reversal,pick_black,black_presented_force,prob_independent_1,prob_independent_2,prob_dependent,sub,state_chosen
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64,Int64⍰,Float64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Integer
1,1,1,1,1,1,2,1,1,1.0,2,0.0,12435.6,12440.7,12443.7,12445.3,12448.1,12432.6,,0,1,0.658662,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,1
2,1,1,2,1,0,2,1,2,1.0,1,-1.0,12450.1,12452.6,12456.1,12457.6,12461.8,,,0,-1,0.599319,0.8,0.0,0,1.0,,0.2,0.8,0.8,1,1
3,1,1,3,1,1,2,1,1,1.0,2,0.0,12463.8,12466.4,12468.2,12469.8,12474.8,,,0,1,0.781297,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,1
4,1,1,4,1,0,1,1,1,1.0,2,0.0,12476.8,12481.7,12484.1,12485.6,12488.7,,,0,-1,0.889409,0.8,1.0,0,2.0,,0.2,0.8,0.8,1,2
5,1,1,5,1,1,1,1,1,1.0,1,-1.0,12490.7,12494.0,12496.4,12497.9,12501.1,,,0,-1,0.389441,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,1
6,1,1,6,1,1,1,1,2,0.0,1,-1.0,12503.1,12509.0,12512.2,12513.8,12517.5,,,0,-1,0.302638,0.8,,0,2.0,0.0,0.2,0.8,0.8,1,2
7,1,1,7,1,1,1,1,1,1.0,2,0.0,12519.5,12524.3,12526.5,12528.1,12532.1,,,0,-1,0.82701,0.8,,0,2.0,0.0,0.2,0.8,0.8,1,2
8,1,1,8,1,0,2,1,2,0.0,1,1.0,12534.2,12537.9,12540.7,12542.2,12546.0,,,0,1,1.01809,0.8,1.0,0,1.0,,0.2,0.8,0.8,1,1
9,1,1,9,1,1,1,1,1,1.0,2,0.0,12548.1,12553.0,12556.3,12557.9,12563.8,,,0,-1,0.428916,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,1
10,1,1,10,1,1,1,1,2,0.0,1,1.0,12565.8,12568.3,12571.7,12573.2,12578.3,,,0,1,0.75484,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,1


# RL Model

In [37]:
@everywhere function rl_model(params, data)
    
    #model parameteres
	beta_mb = params[1] #weight for MB
    lr = 0.5 .+ 0.5 * erf(params[2] / sqrt(2)) #learning rate
                
    c1 = data[:state_chosen] # choice: 1 = black door, 2 = white door
    r = data[:outcome] # outcome: coded as +1 = gain, -1 = loss, 0 = neutral 
    s = data[:outcomeState] # stage 2 state: coded as 1 = gain/loss state reached, 2 = neutral state reached
    t = data[:trials] # trial number
    sub = data[:sub] # subject number
    condition = data[:blockType] # condition: 1 = dependent, 2=independent
    gem = data[:gem_presented] #gem presented
    market = data[:market_presented] #market presented
    reward_loss_trial = data[:rew_loss]
    force_t = data[:forcedTrial]
    
    SR_m = zeros(typeof(beta_mb), 4, 1) .+ 0.5 #initalise to 0.5. stores estimates of transition probabilities for black/white door going to reward/loss state 

	Qmb = zeros(typeof(beta_mb), 2) #decision variable
    
    # initialize likelihood
    lik = 0 

	for i = 1:length(c1)
                                        
        Qmb_curr_gem = [SR_m[gem[i]].*reward_loss_trial[i], (1-SR_m[gem[i]]).*reward_loss_trial[i]]
        
        Qmb = Qmb_curr_gem
        
        #Q-values that determine the decision
        Qd = beta_mb.*Qmb 

        # given Q values, posterior probability that choice was the observed choice is given by the softmax
        # add that likelihood to the running likelihood
        #only implement for force trials
        if (force_t[i] == 0)
            lik += Qd[c1[i]] - log(sum(exp.(Qd)))
        else
        end
    
        # updates go in here - these are updates of probability estimates (not contingent on outcome)
        if (s[i]==1 & c1[i]==1)
            SR_m[gem[i]] = (1-lr)*SR_m[gem[i]] .+ lr*1          
        elseif (s[i]==2 & c1[i]==2)
            SR_m[gem[i]] = (1-lr)*SR_m[gem[i]] .+ lr*1
        else
            SR_m[gem[i]] = (1-lr)*SR_m[gem[i]] .+ lr*0
        end
        

	end
        
    # here if running em you can only return the likelihood
    return -lik
    
end

# Parameter optimisation

### Run model for one subject
(aids debugging)

In [43]:
# initialize parameter structures
(df, subs, X, betas, sigma) = genVars(df, 2);

# run model for sub 1
rl_model(betas, df[df[:sub].==subs[1],:])

53.372332903115804

### Run em to get best fit parameters for each subject

In [39]:
# initialized parameter structures (again)
# note that some of the variables (e.g. betas, sigma) are entered and returned by em function 
(df, subs, X, betas, sigma) = genVars(df, 2);

# run for full learner
# x contains the parameters for each subject (note not the same as variable X)
# l and h are per-subject likelihood and hessians
@time (betas, sigma, x, l, h) = em(df, subs, X, betas, sigma, rl_model; emtol=1e-3, parallel=true, full=true, quiet=false);



iter: 101
betas: [1.14, 0.02]
sigma: [0.24 0.06; 0.06 0.23]
change: [4.5e-5, 0.003446, 2.0e-6, 0.000392, 0.000449]
max: 0.003446
 78.094962 seconds (64.85 M allocations: 1.298 GiB, 0.31% gc time)


### Generate Model Statistics 

IBIC, IAIC and LOOcv

In [42]:
## model selection/comparison/scoring

# laplace approximation to the aggregate log marginal likelihood of the whole dataset
# marginalized over the individual params

aggll = lml(x, l, h)

# to compare this between models you need to correct for the group-level free parameters
# either aic or bic

aggll_ibic = ibic(x, l, h, betas, sigma, nrow(df))
aggll_iaic = iaic(x, l, h, betas, sigma)

# or you can compute unbiased per subject marginal likelihoods via subject-level cross validation
# you can do paired t tests on these between models
# these are also appropriate for SPM_BMS etc

# takes ages so comment in when want to run, otherwise just use IAIC above
#liks = loocv(df, subs, x, X, betas, sigma, rl_model; emtol=1e-3, parallel=true, full=true)
#aggll_loo = sum(liks)

#println("\n\nraw nll:  $aggll\nibic nll: $aggll_ibic\niaic nll: $aggll_iaic\nloo nll:  $aggll_loo")
#println("\n\nraw nll:  $aggll\nibic nll: $aggll_ibic\niaic nll:")
print(aggll_iaic)

1407.856832704395

### Write loocv scores to csv file

(if you have run this part above)

In [11]:
# put loocv scores into dataframe
loocv_scores = DataFrame(sub = subs,
liks = vec(liks));

#write to csv
CSV.write("loocv_scores.csv", DataFrame(loocv_scores))

"loocv_scores.csv"

### Calculate and write p values, std error and covariance

In [12]:
# standard errors on the subject-level means, based on an asymptotic Gaussian approx 
# (these may be inflated esp for small n)
(standarderrors, pvalues, covmtx) = emerrors(df, subs, x, X, h, betas, sigma, rl_model);

  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
in #53 at none
  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
in #53 at none
│   caller = emerrors(::DataFrame, ::Array{Union{Missing, Int64},1}, ::SharedArray{Float64,2}, ::Array{Float64,3}, ::SharedArray{Float64,3}, ::Array{Float64,1}, ::Array{Float64,2}, ::Function) at em.jl:300
└ @ Main /Users/neil/GitHubRepo/Projects/ValueInference/study1/models/model1/em.jl:300


In [13]:
model_stats = DataFrame(stderror = vec(standarderrors),
pvalues = vec(pvalues),
covmtx_1 = vec(covmtx[:,1]),
covmtx_2 = vec(covmtx[:,2]),

# save model stats to csv file
CSV.write("model_stats.csv", DataFrame(model_stats));

In [14]:
print(standarderrors)


[0.124872, 0.0283781, 0.101596]

In [15]:
print(pvalues)


[7.15137e-28, 2.82796e-10, 0.0263853]

In [16]:
print(covmtx)


[0.0155931 -0.000557111 -0.00464208; -0.000557111 0.000805318 0.000494813; -0.00464208 0.000494813 0.0103217]

### Write per subject model parameters to csv file


#### Save a copy of just the parameters

In [41]:
# put parameters into variable d
d=x';

# now put parameters into dataframe
params = DataFrame(sub = subs,
beta_mb = vec(d[:, 1]), 
eta_unconverted = vec(d[:, 2]),
eta_converted = vec(0.5 .+ 0.5*erf.(d[:, 2] / sqrt(2))))

# save parameters to csv file
CSV.write("subject_params.csv", DataFrame(params))

"subject_params.csv"

# END