# Start up commands/load relevant functions

In [1]:
parallel = true # Run on multiple CPUs. If you are having trouble, set parallel = false: easier to debug
full = false    # Maintain full covariance matrix (vs a diagional one) at the group level
emtol = 1e-3    # stopping condition (relative change) for EM

using Distributed
if (parallel)
	# only run this once
	addprocs()
end

# this loads the packages needed -- the @everywhere makes sure they 
# available on all CPUs 

@everywhere using DataFrames
@everywhere using SharedArrays
@everywhere using ForwardDiff
@everywhere using Optim
@everywhere using LinearAlgebra       # for tr, diagonal
@everywhere using StatsFuns           # logsumexp
@everywhere using SpecialFunctions    # for erf
@everywhere using Statistics          # for mean
@everywhere using Distributions
@everywhere using GLM
@everywhere using CSV #for reading/writing csv files

# change this to where you keep the Daw's latest em code
@everywhere directory = "/Users/neil/GitHubRepo/Projects/ValueInference/study4_mri/models/em"

#load in functions including em
@everywhere include("$directory/em.jl");
@everywhere include("$directory/common.jl");
@everywhere include("$directory/likfuns.jl");


# Data read and process

### Read in data

In [2]:
#read in data
df = readtable("/Users/Neil/GitHubRepo/Projects/ValueInference/study4_mri/data/gem_dat.csv")

#get rid of missed responses
df = df[df[:missed_trial].!=1,:]

#add "sub column" 
# this is just a replica of the existing column sub_no but I think em looks for "sub" specifically
df[:sub] = df[:participantID];

#change coding so that 1 = market 1 in dependent condition,
#2 and 3 refer to the two markets in the independent condition
df[:market_presented] = df[:market_presented] + 1
df[df[:blockType].==1,:market_presented] = 1

#code picking white as 2, picking black as 1
df[:state_chosen] = df[:pick_black]
df[df[:state_chosen].==0, :state_chosen] = 2

#convert this so can use in model
df[:state_chosen] = convert(Vector{Integer}, df[:state_chosen])

head(df)

│   caller = top-level scope at In[2]:1
└ @ Core In[2]:1
│   caller = top-level scope at In[2]:10
└ @ Core In[2]:10
│   caller = top-level scope at In[2]:22
└ @ Core In[2]:22


Unnamed: 0_level_0,participantID,block_n,trials,blockType,forcedTrial,gem_presented,market_presented,door_side,chooseLeft,outcomeState,outcome,ons_fixation,ons_door_display,ons_responsecue,ons_gem_fixation,ons_outcome_display,ons_condition_text,ons_trigger,missed_trial,rew_loss,rt,prob_market_presented,correct_choice,market_reversal,pick_black,black_presented_force,prob_independent_1,prob_independent_2,prob_dependent,blackFirst,gem_colour,sub,state_chosen
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64,Int64⍰,Float64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Int64⍰,Integer
1,1,1,1,1,1,2,1,1,1.0,2,0.0,12435.6,12440.7,12443.7,12445.3,12448.1,12432.6,,0,1,0.658662,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,2,1,1
2,1,1,2,1,0,2,1,2,1.0,1,-1.0,12450.1,12452.6,12456.1,12457.6,12461.8,,,0,-1,0.599319,0.8,0.0,0,1.0,,0.2,0.8,0.8,-1,2,1,1
3,1,1,3,1,1,2,1,1,1.0,2,0.0,12463.8,12466.4,12468.2,12469.8,12474.8,,,0,1,0.781297,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,2,1,1
4,1,1,4,1,0,1,1,1,1.0,2,0.0,12476.8,12481.7,12484.1,12485.6,12488.7,,,0,-1,0.889409,0.8,1.0,0,2.0,,0.2,0.8,0.8,-1,4,1,2
5,1,1,5,1,1,1,1,1,1.0,1,-1.0,12490.7,12494.0,12496.4,12497.9,12501.1,,,0,-1,0.389441,0.8,,0,1.0,1.0,0.2,0.8,0.8,1,4,1,1
6,1,1,6,1,1,1,1,2,0.0,1,-1.0,12503.1,12509.0,12512.2,12513.8,12517.5,,,0,-1,0.302638,0.8,,0,2.0,0.0,0.2,0.8,0.8,-1,4,1,2


In [3]:
#exlude subs 21 and 28..

df = df[df[:participantID].!=21,:];
df = df[df[:participantID].!=28,:];


In [4]:
#use recoded condition variable in the model
df[:condition_recode] = df[:blockType]
df[df[:condition_recode].==2,:condition_recode] = -1

#now 

-1

# RL Model

In [74]:
@everywhere function rl_model(params, data)
    
    #model parameteres
	beta_mb = params[1] 
    w_slope = params[2]
    lr =  0.5 .+ 0.5.*erf(params[3]/sqrt(2))
   
    c1 = data[:state_chosen] # choice: 1 = black door, 2 = white door
    r = data[:outcome] # outcome: coded as +1 = gain, -1 = loss, 0 = neutral 
    s = data[:outcomeState] # stage 2 state: coded as 1 = gain/loss state reached, 2 = neutral state reached
    t = data[:trials] # trial number
    sub = data[:sub] # subject number
    condition = data[:condition_recode] # condition: 1 = dependent, -1=independent
    gem = data[:gem_presented] #gem presented
    market = data[:market_presented] #market presented
    reward_loss_trial = data[:rew_loss]
    force_t = data[:forcedTrial]
    block_n = data[:block_n]
    blackFirst = data[:blackFirst]
    
    SR_m = zeros(typeof(beta_mb), 2) .+ 0.5 #initalise to 0.5. stores estimates of transition probabilities for black/white door going to reward/loss state 
    SR_gem = zeros(typeof(beta_mb), 4) .+ 0.5 #initalise to 0.5. stores estimates of transition probabilities for black/white door going to reward/loss state 
   
	Qmb = zeros(typeof(beta_mb), 2) #decision variable
    
    #encode in the frame of getting to the rl state
    prob_rl_chosen_m = [];
    prob_rl_unchosen_m = [];
    
    prob_rl_chosen_gem = [];
    prob_rl_unchosen_gem = [];
    
    prob_combined_rl_chosen_compile = [];

    prob_rl_doorpresented_m = [];
    prob_rl_doorpresented_gem = [];
    prob_combined_rl_doorpresented = [];
   
    SPE_raw_compile_signed = [];
    SPE_raw_compile_abs = [];
    SR_m_change_compile_signed = [];
    SR_m_change_compile_abs = [];
    SR_gem_change_compile_signed = [];
    SR_gem_change_compile_abs = [];
    
    # initialize likelihood
    lik = 0 
    
	for i = 1:length(c1)
 
        w_raw = w_slope*(condition[i])
        w = 0.5 .+ 0.5.*erf(w_raw/sqrt(2))
        
        if gem[i]<3
            index = 1            
        else
            index = 2
        end
        
        Vtot = w*SR_m[index] .+ (1-w)*SR_gem[gem[i]]
        
        Qmb = [Vtot.*reward_loss_trial[i], (1-Vtot).*reward_loss_trial[i]]
        
         if (c1[i]==1)
            append!(prob_rl_chosen_m, SR_m[index]); append!(prob_rl_unchosen_m, 1-SR_m[index]);
            append!(prob_rl_chosen_gem, SR_gem[gem[i]]); append!(prob_rl_unchosen_gem, 1-SR_gem[gem[i]]);
            append!(prob_combined_rl_chosen_compile, Vtot); 
        elseif (c1[i]==2)
            append!(prob_rl_chosen_m, 1-SR_m[index]); append!(prob_rl_unchosen_m, SR_m[index]);
            append!(prob_rl_chosen_gem, 1-SR_gem[gem[i]]); append!(prob_rl_unchosen_gem, SR_gem[gem[i]]);
            append!(prob_combined_rl_chosen_compile, 1-Vtot); 
        end
        
        if (blackFirst[i]==1)
            append!(prob_rl_doorpresented_m, SR_m[index]); 
            append!(prob_rl_doorpresented_gem, SR_gem[gem[i]]); 
            append!(prob_combined_rl_doorpresented, Vtot); 
        elseif (blackFirst[i]==-1)
            append!(prob_rl_doorpresented_m, 1-SR_m[index]); 
            append!(prob_rl_doorpresented_gem, 1-SR_gem[gem[i]]); 
            append!(prob_combined_rl_doorpresented, 1-Vtot);         
        end
            
        if (force_t[i] == 0)
            
            #Q-values that determine the decision
            Qd = beta_mb.*Qmb
            lik += Qd[c1[i]] .- log(sum(exp.(Qd)))
            
        else
        end
        
        SR_m_prev = SR_m[index]
        SR_gem_prev = SR_gem[gem[i]]
        
        # updates go in here - these are updates of probability estimates (not contingent on outcome)
        if (s[i]==1 & c1[i]==1)
            
            SPE_raw = 1-Vtot
            SR_m[index] = SR_m[index] .+ w*lr*(1-Vtot)
            SR_gem[gem[i]] = SR_gem[gem[i]] .+ (1-w)*lr*(1-Vtot)
            
        elseif (s[i]==2 & c1[i]==2)
            
            SPE_raw = 1-Vtot
            SR_m[index] = SR_m[index] .+ w*lr*(1-Vtot)
            SR_gem[gem[i]] = SR_gem[gem[i]] .+ (1-w)*lr*(1-Vtot)
            
        else

            SPE_raw = 0-Vtot
            SR_m[index] = SR_m[index] .+ w*lr*(0-Vtot)
            SR_gem[gem[i]] = SR_gem[gem[i]] .+ (1-w)*lr*(0-Vtot)
            
        end
        
        append!(SPE_raw_compile_signed, SPE_raw)
        append!(SPE_raw_compile_abs, abs(SPE_raw))
        append!(SR_m_change_compile_signed, SR_m[index] - SR_m_prev)  
        append!(SR_m_change_compile_abs, abs(SR_m[index] - SR_m_prev))  
        append!(SR_gem_change_compile_signed, SR_gem[gem[i]] - SR_gem_prev)   
        append!(SR_gem_change_compile_abs, abs(SR_gem[gem[i]] - SR_gem_prev))
        
	end
    
    #compile trial by trial values here
    trial_data = DataFrame(trial = t,
    sub = sub,
    block_n = block_n,
    choice = c1,
    outcomeState = s,
    outcome = r,
    gem = gem,
    condition = condition,
    market = market,
    force_t = force_t,
    prob_rl_chosen_m = prob_rl_chosen_m,
    prob_rl_unchosen_m = prob_rl_unchosen_m,
    prob_rl_chosen_gem = prob_rl_chosen_gem,
    prob_rl_unchosen_gem = prob_rl_unchosen_gem,
    prob_combined_rl_chosen = prob_combined_rl_chosen_compile,
    prob_rl_doorpresented_m = prob_rl_doorpresented_m,
    prob_rl_doorpresented_gem = prob_rl_doorpresented_gem,
    prob_combined_rl_doorpresented = prob_combined_rl_doorpresented,
    SPE_signed = SPE_raw_compile_signed,
    SPE_abs = SPE_raw_compile_abs,
    SR_m_change_signed = SR_m_change_compile_signed,
    SR_m_change_abs = SR_m_change_compile_abs,
    SR_gem_change_signed = SR_gem_change_compile_signed,
    SR_gem_change_abs = SR_gem_change_compile_abs)
        
    # here if running em you can only return the likelihood
    return -lik
    
    return (-lik, trial_data)

end

# Parameter optimisation

### setup variables for em


In [77]:

#store list of actual subject numbers (in subj)
subs = unique(df[:participantID])

#put in a new column called "sub" which is identical to subj - em looks for this
df[:sub] = df[:participantID];

NS = length(subs)
X = ones(NS)
betas = [0. 0. 0.]
sigma = [1., 1., 1.];


### Run em to get best fit parameters for each subject


In [78]:
# run em
# x contains the parameters for each subject (note not the same as variable X)
# l and h are per-subject likelihood and hessians
(betas, sigma, x, l, h) = em(df, subs, X, betas, sigma, rl_model; emtol=emtol, parallel=parallel, full=full);



iter: 30
betas: [1.54 1.19 0.1]
sigma: [0.39, 0.45, 0.67]
free energy: -1372.920522
change: [2.2e-5, 8.0e-5, 0.000876, 0.000226, 0.000202, 0.00013]
max: 0.000876


In [83]:
aggll_iaic = iaic(x, l, h, betas, sigma)

1343.8043547972886

### Generate Model Statistics 
(LOOCV)

In [80]:

#compute unbiased per subject marginal likelihoods via cross validation.
liks = loocv(df, subs, x, X, betas, sigma, rl_model; emtol=emtol, parallel=parallel, full=full)

print(sum(liks))


Subject: 1..2..3..4..5..6..7..8..9..10..11..12..13..14..15..16..17..18..19..20..21..22..23..24..25..26..27..28..29..1341.310170443063

### Write loocv scores to csv file and save

(if you have run loocv above)

In [82]:

#put loocv scores into dataframe
loocv_scores = DataFrame(sub = subs,
liks = vec(liks));

CSV.write("loocv_scores.csv", DataFrame(loocv_scores))


"loocv_scores.csv"

### Calculate and write p values, std error and covariance

In [None]:

# standard errors on the subject-level means, based on an asymptotic Gaussian approx 
# (these may be inflated esp for small n)
(standarderrors, pvalues, covmtx) = emerrors(df, subs, x, X, h, betas, sigma, rl_model);


In [None]:

model_stats = DataFrame(stderror = vec(standarderrors),
pvalues = vec(pvalues),
covmtx_1 = vec(covmtx[:,1]),
covmtx_2 = vec(covmtx[:,2]),
covmtx_3 = vec(covmtx[:,3]));

# save model stats to csv file
CSV.write("model_stats.csv", DataFrame(model_stats));


### Write per subject model parameters to csv files and save

In [81]:

# put parameters into variable d
d=x;

# now put parameters into dataframe
params = DataFrame(sub = subs,
slope = vec(d[:,1]), 
w_raw = vec(d[:,2]),
lr_raw = vec(d[:, 3]));

CSV.write("subject_params.csv", DataFrame(params))


"subject_params.csv"

In [None]:
# initialize parameter structures once again
#(df, subs, X, betas, sigma) = genVars(df, 3);

# initalise this - will store all trial to trial parameters
trial_data_compile = [];

# run model for each subject using best fit parameters
for x = 1:length(subs)

    # pull out optimal betas for subject - these are used in the model
    # note: you want the unconverted learning score to be fed in
    betas_sub = convert(Array, params[x, [:slope, :w_raw, :lr_raw]])
    data_sub = df[df[:sub].==subs[x], :]
    
    # run model using these parameters - note must have commented in the model to return all of these variables (and not only -lik)
    (minus_li, trial_data) = rl_model(betas_sub, data_sub)
    
    if x.==1
        
        trial_data_compile = trial_data
        
    else
        
        append!(trial_data_compile, trial_data)
        
    end
 
end
# check these are all the same sizes
print(size(df))
print(size(trial_data_compile))
    
# print header of data compile
head(trial_data_compile)

CSV.write("trial_by_trial_vals.csv", DataFrame(trial_data_compile))