# Start up commands/load relevant functions

In [1]:
# load required libraries
using Distributed

# # set everything up
parallel = true # Run on multiple CPUs. If youhttp://localhost:8888/notebooks/Dropbox/Daw_Lab/PreySelection/v103/models/model_subjective1beta2lr_delayreward/model_subjective1beta2lr_delayreward.jl.ipynb# are having trouble, set parallel = false: easier to debug

# this activates the multiprocessing threads
if (parallel)
	# only run this once
    addprocs(2)
end

# load required libraries
@everywhere using DataFrames
@everywhere using ForwardDiff
@everywhere using PyCall
@everywhere using Distributions
@everywhere using PyPlot
@everywhere using CSV
@everywhere using SpecialFunctions
@everywhere using SharedArrays
@everywhere using LinearAlgebra

@everywhere PyCall.@pyimport scipy.optimize as so

# this is the code for the actual fitting routines
@everywhere include("em.jl")
@everywhere include("common.jl")
@everywhere include("likfuns.jl")

# this is generates starting matricies for betas, sigmas etc to feed into model
@everywhere include("genVars.jl")


┌ Info: Precompiling CSV [336ed68f-0bac-5ca0-87d4-7b16caf5d00b]
└ @ Base loading.jl:1187


      From worker 2:	│ This may mean Tables [bd369af6-aec1-5ad0-b16a-f7cc5008161c] does not support precompilation but is imported by a module that does.
      From worker 2:	└ @ Base loading.jl:941
      From worker 3:	│ This may mean Tables [bd369af6-aec1-5ad0-b16a-f7cc5008161c] does not support precompilation but is imported by a module that does.
      From worker 3:	└ @ Base loading.jl:941


│ This may mean Tables [bd369af6-aec1-5ad0-b16a-f7cc5008161c] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:941
│ Use `(covvar < 0) ? NaN :` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/model5/em.jl:288
│ Use `(covvar < 0) ? NaN : sqrt` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/model5/em.jl:288
│ Use `(diag(covmtx)[i] .< 0) ? NaN :` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/model5/em.jl:299
│ Use `(diag(covmtx)[i] .< 0) ? NaN : diag` instead.
└ @ nothing /Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/model5/em.jl:299
│ Use `(covvar < 0) ? NaN :` instead.
└ @ ~/GitHubRepo/Projects/ValueInference/study4_mri/models/model5/em.jl:288
│ Use `(covvar < 0) ? NaN : sqrt` instead.
└ @ ~/GitHubRepo/Projects/ValueInference/study4_mri/models/model5/em.jl:288
│ Use `(diag(covmtx)[i] .< 0) ? NaN :` instead.
└ @ 

└ @ nothing /Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/model5/genVars.jl:17
└ @ ~/GitHubRepo/Projects/ValueInference/study4_mri/models/model5/genVars.jl:17
└ @ ~/GitHubRepo/Projects/ValueInference/study4_mri/models/model5/genVars.jl:17


# Data read and process

### Read in data

In [2]:
#read in data
df = readtable("/Users/Neil/GitHubRepo/Projects/ValueInference/study4_mri/data/gem_dat.csv")

#get rid of missed responses
df = df[df[:missed_trial].!=1,:]

#add "sub column" 
# this is just a replica of the existing column sub_no but I think em looks for "sub" specifically
df[:sub] = df[:participantID];

#change coding so that 1 = market 1 in dependent condition,
#2 and 3 refer to the two markets in the independent condition
df[:market_presented] = df[:market_presented] + 1
df[df[:blockType].==1,:market_presented] = 1

#code picking white as 2, picking black as 1
df[:state_chosen] = df[:pick_black]
df[df[:state_chosen].==0, :state_chosen] = 2

#convert this so can use in model
df[:state_chosen] = convert(Vector{Integer}, df[:state_chosen])

head(df)

│   caller = top-level scope at In[2]:1
└ @ Core In[2]:1
│   caller = top-level scope at In[2]:10
└ @ Core In[2]:10
│   caller = top-level scope at In[2]:22
└ @ Core In[2]:22


Unnamed: 0_level_0,participantID,block_n,trials,blockType,forcedTrial,gem_presented,market_presented,door_side,chooseLeft,outcomeState,outcome,ons_fixation,ons_door_display,ons_responsecue,ons_gem_fixation,ons_outcome_display,ons_condition_text,ons_trigger,missed_trial,rew_loss,rt,prob_market_presented,correct_choice,market_reversal,pick_black,black_presented_force,prob_independent_1,prob_independent_2,prob_dependent,sub,state_chosen
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64,Int64⍰,Float64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Integer
1,1,1,1,1,1,2,1,1,1.0,2,0.0,12435.6,12440.7,12443.7,12445.3,12448.1,12432.6,,0,1,0.658662,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,1
2,1,1,2,1,0,2,1,2,1.0,1,-1.0,12450.1,12452.6,12456.1,12457.6,12461.8,,,0,-1,0.599319,0.8,0.0,0,1.0,,0.2,0.8,0.8,1,1
3,1,1,3,1,1,2,1,1,1.0,2,0.0,12463.8,12466.4,12468.2,12469.8,12474.8,,,0,1,0.781297,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,1
4,1,1,4,1,0,1,1,1,1.0,2,0.0,12476.8,12481.7,12484.1,12485.6,12488.7,,,0,-1,0.889409,0.8,1.0,0,2.0,,0.2,0.8,0.8,1,2
5,1,1,5,1,1,1,1,1,1.0,1,-1.0,12490.7,12494.0,12496.4,12497.9,12501.1,,,0,-1,0.389441,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,1
6,1,1,6,1,1,1,1,2,0.0,1,-1.0,12503.1,12509.0,12512.2,12513.8,12517.5,,,0,-1,0.302638,0.8,,0,2.0,0.0,0.2,0.8,0.8,1,2


In [3]:
#exlude subs 21 and 28..

df = df[df[:participantID].!=21,:];
df = df[df[:participantID].!=28,:];


In [4]:
#use recoded condition variable in the model
df[:condition_recode] = df[:blockType]
df[df[:condition_recode].==2,:condition_recode] = -1

#now 

-1

# RL Model

In [5]:
@everywhere function rl_model(params, data)
    
    #model parameteres
	beta_mb = params[1] #weight for MB
    w_intercept = params[2] #weight for MB
    w_slope = params[3]
    lr =  0.5 .+ 0.5.*erf(params[4]/sqrt(2))
    
    c1 = data[:state_chosen] # choice: 1 = black door, 2 = white door
    r = data[:outcome] # outcome: coded as +1 = gain, -1 = loss, 0 = neutral 
    s = data[:outcomeState] # stage 2 state: coded as 1 = gain/loss state reached, 2 = neutral state reached
    t = data[:trials] # trial number
    sub = data[:sub] # subject number
    condition = data[:condition_recode] # condition: 1 = dependent, -1=independent
    gem = data[:gem_presented] #gem presented
    market = data[:market_presented] #market presented
    reward_loss_trial = data[:rew_loss]
    force_t = data[:forcedTrial]
    block_n = data[:block_n]
    
    SR_m = zeros(typeof(beta_mb), 2) .+ 0.5 #initalise to 0.5. stores estimates of transition probabilities for black/white door going to reward/loss state 
    SR_gem = zeros(typeof(beta_mb), 4) .+ 0.5 #initalise to 0.5. stores estimates of transition probabilities for black/white door going to reward/loss state 
   
	Qmb = zeros(typeof(beta_mb), 2) #decision variable
    Qmb_gem = zeros(typeof(beta_mb), 2) #decision variable
    
    #encode in the frame of getting to the rl state
    prob_rl_chosen_m = [];
    prob_rl_unchosen_m = [];
    
    ev_rl_chosen_m = [];
    ev_rl_unchosen_m = [];
    
    prob_rl_chosen_gem = [];
    prob_rl_unchosen_gem = [];

    ev_rl_chosen_gem = [];
    ev_rl_unchosen_gem = [];
    
    PE_m_compile_signed = [];
    PE_m_compile_abs = [];
    PE_gem_compile_signed = [];
    PE_gem_compile_abs = [];
    
    # initialize likelihood
    lik = 0 
    
	for i = 1:length(c1)
        
        w = w_intercept + w_slope*(condition[i])

        if gem[i]<3
            index = 1            
        else
            index = 2
        end
                
        Qmb = [SR_m[index].*reward_loss_trial[i], (1-SR_m[index]).*reward_loss_trial[i]]
        Qmb_gem = [SR_gem[gem[i]].*reward_loss_trial[i], (1-SR_gem[gem[i]]).*reward_loss_trial[i]]
       
        if (c1[i]==1)
            append!(prob_rl_chosen_m, SR_m[index]); append!(prob_rl_unchosen_m, 1-SR_m[index]);
            append!(prob_rl_chosen_gem, SR_gem[gem[i]]); append!(prob_rl_unchosen_gem, 1-SR_gem[gem[i]]);
            append!(ev_rl_chosen_m, Qmb[1]); append!(ev_rl_unchosen_m, Qmb[2]);    
            append!(ev_rl_chosen_gem, Qmb_gem[1]); append!(ev_rl_unchosen_gem, Qmb_gem[2]);    
            
        elseif (c1[i]==2)
            append!(prob_rl_chosen_m, 1-SR_m[index]); append!(prob_rl_unchosen_m, SR_m[index]);
            append!(prob_rl_chosen_gem, 1-SR_gem[gem[i]]); append!(prob_rl_unchosen_gem, SR_gem[gem[i]]);    
            append!(ev_rl_chosen_m, Qmb[2]); append!(ev_rl_unchosen_m, Qmb[1]);  
            append!(ev_rl_chosen_gem, Qmb_gem[2]); append!(ev_rl_unchosen_gem, Qmb_gem[1]);    
        end
        
        # given Q values, posterior probability that choice was the observed choice is given by the softmax
        # add that likelihood to the running likelihood
        #only implement for force trials
        if (force_t[i] == 0)
            
            #Q-values that determine the decision
            #Qd = beta_mb.*Qmb + beta_mb_gem.*Qmb_gem
            Q_combined = (1-w).*Qmb + w.*Qmb_gem
            Qd = beta_mb.*Q_combined
            
            lik += Qd[c1[i]] .- log(sum(exp.(Qd)))
            
        else
        end

        # updates go in here - these are updates of probability estimates (not contingent on outcome)
        if (s[i]==1 & c1[i]==1)
            PE_m = 1 - SR_m[index]
            SR_m[index] = (1-lr)*SR_m[index] .+ lr*1
            PE_gem = 1 - SR_gem[gem[i]]
            SR_gem[gem[i]] = (1-lr)*SR_gem[gem[i]] .+ lr*1
        elseif (s[i]==2 & c1[i]==2)
            PE_m = 1 - SR_m[index]
            SR_m[index] = (1-lr)*SR_m[index] .+ lr*1
            PE_gem = 1 - SR_gem[gem[i]]           
            SR_gem[gem[i]] = (1-lr)*SR_gem[gem[i]] .+ lr*1
        else
            PE_m = 0 - SR_m[index]
            SR_m[index] = (1-lr)*SR_m[index] .+ lr*0
            PE_gem = 0 - SR_gem[gem[i]]            
            SR_gem[gem[i]] = (1-lr)*SR_gem[gem[i]] .+ lr*0
        end
        
        append!(PE_m_compile_signed, PE_m); 
        append!(PE_m_compile_abs, abs(PE_m));    
        append!(PE_gem_compile_signed, PE_gem);    
        append!(PE_gem_compile_abs, abs(PE_gem));
        
	end
    
    #compile trial by trial values here
    trial_data = DataFrame(trial = t,
    sub = sub,
    block_n = block_n,
    choice = c1,
    outcomeState = s,
    outcome = r,
    gem = gem,
    condition = condition,
    market = market,
    force_t = force_t,
    prob_rl_chosen_m = prob_rl_chosen_m,
    prob_rl_unchosen_m = prob_rl_unchosen_m,
    ev_rl_chosen_m = ev_rl_chosen_m,
    ev_rl_unchosen_m = ev_rl_unchosen_m,
    prob_rl_chosen_gem = prob_rl_chosen_gem,
    prob_rl_unchosen_gem = prob_rl_unchosen_gem,
    ev_rl_chosen_gem =ev_rl_chosen_gem,
    ev_rl_unchosen_gem = ev_rl_unchosen_gem,
    PE_m_compile_signed = PE_m_compile_signed,
    PE_m_compile_abs = PE_m_compile_abs,
    PE_gem_compile_signed = PE_gem_compile_signed,
    PE_gem_compile_abs = PE_gem_compile_abs)

    # here if running em you can only return the likelihood
    return -lik
    #return (-lik, trial_data)

    
end

# Parameter optimisation

### Run model for one subject
(aids debugging)

In [9]:
# initialize parameter structures
(df, subs, X, betas, sigma) = genVars(df, 4);

# run model for sub 1
rl_model(betas, df[df[:sub].==subs[1], :])

53.372332903115804

### Run em to get best fit parameters for each subject

In [12]:
# initialized parameter structures (again)
# note that some of the variables (e.g. betas, sigma) are entered and returned by em function 
(df, subs, X, betas, sigma) = genVars(df, 4);

# run for full learner
# x contains the parameters for each subject (note not the same as variable X)
# l and h are per-subject likelihood and hessians
@time (betas, sigma, x, l, h) = em(df, subs, X, betas, sigma, rl_model; emtol=1e-3, parallel=true, full=true, quiet=false);



iter: 73
betas: [1.5, 0.38, -0.27, 0.41]
sigma: [0.9 0.08 -0.31 -0.68; 0.08 0.04 -0.02 -0.02; -0.31 -0.02 0.11 0.26; -0.68 -0.02 0.26 0.92]
change: [3.8e-5, 7.7e-5, -8.1e-5, 0.000862, 0.000245, 2.8e-5, -2.2e-5, -0.000754, 0.000624, -0.000232, -0.001158, 0.000916, 0.00015, 0.000699]
max: 0.000916
160.646370 seconds (70.46 M allocations: 2.072 GiB, 0.69% gc time)


### Generate Model Statistics 

IBIC, IAIC and LOOcv

In [27]:
## model selection/comparison/scoring

# laplace approximation to the aggregate log marginal likelihood of the whole dataset
# marginalized over the individual params

aggll = lml(x, l, h)

# to compare this between models you need to correct for the group-level free parameters
# either aic or bic

aggll_ibic = ibic(x, l, h, betas, sigma, nrow(df))
aggll_iaic = iaic(x, l, h, betas, sigma)

# or you can compute unbiased per subject marginal likelihoods via subject-level cross validation
# you can do paired t tests on these between models
# these are also appropriate for SPM_BMS etc

# takes ages so comment in when want to run, otherwise just use IAIC above
liks = loocv(df, subs, x, X, betas, sigma, rl_model; emtol=1e-3, parallel=true, full=true)
#aggll_loo = sum(liks)

#println("\n\nraw nll:  $aggll\nibic nll: $aggll_ibic\niaic nll: $aggll_iaic\nloo nll:  $aggll_loo")
#println("\n\nraw nll:  $aggll\nibic nll: $aggll_ibic\niaic nll:")
print(aggll_iaic)

Subject: 1..2..3..4..5..6..7..8..9..10..11..12..13..14..15..16..17..18..19..20..21..22..23..24..25..26..27..28..29..1350.093646152738

### Write loocv scores to csv file

(if you have run this part above)

In [28]:
# put loocv scores into dataframe
loocv_scores = DataFrame(sub = subs,
liks = vec(liks));

#write to csv
CSV.write("loocv_scores.csv", DataFrame(loocv_scores))

"loocv_scores.csv"

### Calculate and write p values, std error and covariance

In [29]:
# standard errors on the subject-level means, based on an asymptotic Gaussian approx 
# (these may be inflated esp for small n)
(standarderrors, pvalues, covmtx) = emerrors(df, subs, x, X, h, betas, sigma, rl_model);

  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
in #53 at none
  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
  likely near /Users/neil/.julia/packages/IJulia/GIANC/src/kernel.jl:41
in #53 at none
│   caller = emerrors(::DataFrame, ::Array{Union{Missing, Int64},1}, ::SharedArray{Float64,2}, ::Array{Float64,3}, ::SharedArray{Float64,3}, ::Array{Float64,1}, ::Array{Float64,2}, ::Function) at em.jl:300
└ @ Main /Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/model5/em.jl:300


In [30]:
model_stats = DataFrame(stderror = vec(standarderrors),
pvalues = vec(pvalues),
covmtx_1 = vec(covmtx[:,1]),
covmtx_2 = vec(covmtx[:,2]),
covmtx_3 = vec(covmtx[:,3]),
covmtx_4 = vec(covmtx[:,4]))

# save model stats to csv file
CSV.write("model_stats.csv", DataFrame(model_stats));

In [31]:
print(standarderrors)


[0.204707, 0.0785324, 0.0477878, 0.328098]

In [33]:
print(pvalues)


[1.97961e-13, 1.54164e-6, 0.00426516, 0.217232]

In [32]:
print(covmtx)


[0.0419051 0.0026705 -0.00473983 -0.0280005; 0.0026705 0.00616734 0.000115523 0.00269598; -0.00473983 0.000115523 0.00228368 0.00554894; -0.0280005 0.00269598 0.00554894 0.107648]

### Write per subject model parameters to csv file


#### Save a copy of just the parameters

In [13]:
# put parameters into variable d
d=x';

# now put parameters into dataframe
params = DataFrame(sub = subs,
beta_mb = vec(d[:, 1]),
w_intercept = vec(d[:, 2]),
w_slope = vec(d[:, 3]),   
eta_unconverted = vec(d[:, 4]),
eta_converted = vec(0.5 .+ 0.5*erf.(d[:, 4] / sqrt(2))))

# save parameters to csv file
CSV.write("subject_params.csv", DataFrame(params))

"subject_params.csv"

In [None]:
params = readtable("subject_params.csv")

In [51]:
# initialize parameter structures once again
(df, subs, X, betas, sigma) = genVars(df, 4);

# initalise this - will store all trial to trial parameters
trial_data_compile = [];

# run model for each subject using best fit parameters
for x = 1:length(subs)

    # pull out optimal betas for subject - these are used in the model
    # note: you want the unconverted learning score to be fed in
    betas_sub = convert(Array, params[x, [:beta_mb, :eta_unconverted, :w_intercept, :w_slope]])
    data_sub = df[df[:sub].==subs[x], :]
    
    # run model using these parameters - note must have commented in the model to return all of these variables (and not only -lik)
    (minus_li, trial_data) = rl_model(betas_sub, data_sub)
    
    if x.==1
        
        trial_data_compile = trial_data
        
    else
        
        append!(trial_data_compile, trial_data)
        
    end
 
end
# check these are all the same sizes
print(size(df))
print(size(trial_data_compile))

# print header of data compile
head(trial_data_compile)

CSV.write("trial_by_trial_vals.csv", DataFrame(trial_data_compile))

│ Use `global x` instead.
└ @ nothing none:0
└ @ nothing In[51]:6
│   caller = top-level scope at In[51]:12
└ @ Core ./In[51]:12


(9094, 32)(9094, 22)

│   caller = top-level scope at In[51]:32
└ @ Core In[51]:32


"trial_by_trial_vals.csv"

# END