# Start up commands/load relevant functions

In [1]:
# load required libraries
using Distributed

# # set everything up
parallel = true # Run on multiple CPUs. If youhttp://localhost:8888/notebooks/Dropbox/Daw_Lab/PreySelection/v103/models/model_subjective1beta2lr_delayreward/model_subjective1beta2lr_delayreward.jl.ipynb# are having trouble, set parallel = false: easier to debug

# this activates the multiprocessing threads
if (parallel)
	# only run this once
    addprocs(2)
end

# load required libraries
@everywhere using DataFrames
@everywhere using ForwardDiff
@everywhere using PyCall
@everywhere using Distributions
@everywhere using PyPlot
@everywhere using CSV
@everywhere using SpecialFunctions
@everywhere using SharedArrays
@everywhere using LinearAlgebra

@everywhere PyCall.@pyimport scipy.optimize as so

# this is the code for the actual fitting routines
@everywhere include("em.jl")
@everywhere include("common.jl")
@everywhere include("likfuns.jl")

# this is generates starting matricies for betas, sigmas etc to feed into model
@everywhere include("genVars.jl")


┌ Info: Precompiling CSV [336ed68f-0bac-5ca0-87d4-7b16caf5d00b]
└ @ Base loading.jl:1187


      From worker 2:	│ This may mean Tables [bd369af6-aec1-5ad0-b16a-f7cc5008161c] does not support precompilation but is imported by a module that does.
      From worker 2:	└ @ Base loading.jl:941
      From worker 3:	│ This may mean Tables [bd369af6-aec1-5ad0-b16a-f7cc5008161c] does not support precompilation but is imported by a module that does.
      From worker 3:	└ @ Base loading.jl:941


│ This may mean Tables [bd369af6-aec1-5ad0-b16a-f7cc5008161c] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:941
│ Use `(covvar < 0) ? NaN :` instead.
└ @ nothing /Users/neil/Dropbox/Summerfield_Lab/ValueInference/models/model101/em.jl:288
│ Use `(covvar < 0) ? NaN : sqrt` instead.
└ @ nothing /Users/neil/Dropbox/Summerfield_Lab/ValueInference/models/model101/em.jl:288
│ Use `(diag(covmtx)[i] .< 0) ? NaN :` instead.
└ @ nothing /Users/neil/Dropbox/Summerfield_Lab/ValueInference/models/model101/em.jl:299
│ Use `(diag(covmtx)[i] .< 0) ? NaN : diag` instead.
└ @ nothing /Users/neil/Dropbox/Summerfield_Lab/ValueInference/models/model101/em.jl:299
│ Use `(covvar < 0) ? NaN :` instead.
└ @ ~/Dropbox/Summerfield_Lab/ValueInference/models/model101/em.jl:288
│ Use `(covvar < 0) ? NaN : sqrt` instead.
└ @ ~/Dropbox/Summerfield_Lab/ValueInference/models/model101/em.jl:288
│ Use `(diag(covmtx)[i] .< 0) ? NaN :` instead.
└ @ ~/Dropbox/Summerfield_Lab/Valu

└ @ nothing /Users/neil/Dropbox/Summerfield_Lab/ValueInference/models/model101/genVars.jl:17
└ @ ~/Dropbox/Summerfield_Lab/ValueInference/models/model101/genVars.jl:17
└ @ ~/Dropbox/Summerfield_Lab/ValueInference/models/model101/genVars.jl:17


# Data read and process

### Read in data

In [2]:
#read in csv file of the data
#df = readtable("/Users/Leonie/Dropbox/ValueInference/data/complete_dat.csv")
df = readtable("/Users/Neil/Dropbox/Summerfield_Lab/ValueInference/data/complete_dat.csv")

#Leonie\Dropbox\ValueInference

#get rid of missed responses
df = df[df[:outcome].!=9,:]

#outcome state 1 = reward/loss state
#outcome state 2 = neutral state

#add "sub column" 
# this is just a replica of the existing column sub_no but I think em looks for "sub" specifically
df[:sub] = df[:participantID];

#change coding of markets prestened so that 1 = market 1 in dependent condition, 2 and 3 refer to the two markets in the independent condition
df[df[:condition].==2,:market_presented] = df[df[:condition].==2,:market_presented] .+ 1

# rescale rewards? think about this

# display header
head(df)

│   caller = top-level scope at In[2]:1
└ @ Core In[2]:1
│   caller = top-level scope at In[2]:19
└ @ Core In[2]:19


Unnamed: 0_level_0,participantID,condition,blocks,blockType,trials,probabilitiesMarketOne_1,probabilitiesMarketOne_2,probabilitiesMarketTwo_1,probabilitiesMarketTwo_2,rewards_1,rewards_2,gem_presented,market_presented,chooseLeft,stateChosen,outcomeState,outcome,RT,black_rl_p,black_sofa_p,chosen_rl_p,chosen_sofa_p,pick_black,rt_log_trans,white_rl_p,white_sofa_p,missed_response,structureAwareness,evidence_consistency,consistent_choice,same_gem,doorRepeat,sideRepeat,sub
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰
1,1,1,1,1,2,0.2,0.8,0.2,0.8,1,0,1,1,1,2,2,0,1.235,0.2,0.8,0.8,0.2,0.0,-0.211073,0.8,0.2,0,0,0,0,1,0,0,1
2,1,1,1,1,3,0.2,0.8,0.2,0.8,1,0,1,1,0,1,1,1,1.75767,0.2,0.8,0.2,0.8,1.0,-0.56399,0.8,0.2,0,0,1,1,1,0,0,1
3,1,1,1,1,4,0.2,0.8,0.2,0.8,1,0,1,1,1,1,2,0,1.23885,0.2,0.8,0.2,0.8,1.0,-0.214187,0.8,0.2,0,0,1,1,1,1,0,1
4,1,1,1,1,5,0.2,0.8,0.2,0.8,-1,0,2,1,0,1,2,0,3.50974,0.2,0.8,0.2,0.8,1.0,-1.25554,0.8,0.2,0,0,0,0,0,1,0,1
5,1,1,1,1,6,0.2,0.8,0.2,0.8,1,0,1,1,1,2,1,1,1.87775,0.2,0.8,0.8,0.2,0.0,-0.630076,0.8,0.2,0,0,0,0,0,0,0,1
6,1,1,1,1,7,0.2,0.8,0.2,0.8,1,0,2,1,1,2,1,1,1.35541,0.2,0.8,0.8,0.2,0.0,-0.304105,0.8,0.2,0,0,0,0,0,1,1,1


# RL Model (Basic)


Takes into account gem type -  updates probabilites for black/white door based on state reached following choice and then uses these to compute the value of each door on the current trial. seperate traces for each gem

have just one probability estimate which captures proability of reaching the gain loss state for black door and the safe state for white (i.e. take into account inherent dependence between the doors)

Model comprises:

slope governing sensitivity to two quantities
one learning rate

In [54]:
@everywhere function rl_model(params, data)
    
    #model parameteres
	beta_mb = params[1] #weight for MB
	beta_mf = params[2] #weight for MB
    lr_pos = 0.5 .+ 0.5 * erf(params[3] / sqrt(2)) #learning rate
    lr_neg = 0.5 .+ 0.5 * erf(params[4] / sqrt(2)) #learning rate
    lr_MF = 0.5 .+ 0.5 * erf(params[5] / sqrt(2)) #learning rate
    lr_w = 0.5 + 0.5 * erf(params[6] / sqrt(2)) #contribution of each gem
            
    w = 0.5
    
    c1 = data[:stateChosen] # choice: 1 = black door, 2 = white door
    r = data[:outcome] # outcome: coded as +1 = gain, -1 = loss, 0 = neutral 
    s = data[:outcomeState] # stage 2 state: coded as 1 = gain/loss state reached, 2 = neutral state reached
    t = data[:trials] # trial number
    sub = data[:sub] # subject number
    condition = data[:blockType] # condition: 1 = dependent, 2=independent
    gem = data[:gem_presented] #gem presented
    market = data[:market_presented] #market presented
    reward_loss_trial = data[:rewards_1] #market presented
    
    SR_m = zeros(typeof(beta_mb), 4, 1) .+ 0.5 #initalise to 0.5. stores estimates of transition probabilities for black/white door going to reward/loss state 

	Qm = zeros(typeof(beta_mb), 2) #decision variable
    Q1 = zeros(typeof(beta_mb), 2) #TD1
    
    covar = zeros(typeof(beta_mb), 1) .+ 0.0 #one or one per block?

    # initialize likelihood
    lik = 0 

    # tracking previous choice to determine perseveration
    prevc = 0 

	for i = 1:length(c1)
                
        #if gem[i]<3
        #    QMB1 = [SR_m[gem[i]].*reward_loss_trial[i], (1-SR_m[gem[i]]).*reward_loss_trial[i]] #two column vector predicted value black white door       
        #    QMB2 = [SR_m[abs(gem[i]-3)].*reward_loss_trial[i], (1-SR_m[abs(gem[i]-3)]).*reward_loss_trial[i]] #two column vector predicted value black white door       
        #else
        #    QMB1 = [SR_m[gem[i]].*reward_loss_trial[i], (1-SR_m[gem[i]]).*reward_loss_trial[i]] #two column vector predicted value black white door       
        #    QMB2 = [SR_m[abs(gem[i]-2-3)+2].*reward_loss_trial[i], (1-SR_m[abs(gem[i]-2-3)+2]).*reward_loss_trial[i]] #two column vector predicted value black white door 
        #end
                        
        #covar_rescaled = 0.5*(covar - 1) + 0.5
        
        covar_rescaled = 0 
        
        Qmb_curr_gem = [SR_m[gem[i]].*reward_loss_trial[i], (1-SR_m[gem[i]]).*reward_loss_trial[i]]
        
        if gem[i]<3
            Qmb_other_gem = [SR_m[abs(gem[i]-3)].*reward_loss_trial[i], (1-SR_m[abs(gem[i]-3)]).*reward_loss_trial[i]]
        else
            Qmb_other_gem = [SR_m[abs(gem[i]-2-3)+2].*reward_loss_trial[i], (1-SR_m[abs(gem[i]-2-3)+2]).*reward_loss_trial[i]]            
        end
        
        
        #if gem[i]<3
            #Qmb = (1-covar_rescaled).*QMB1[gem[i]] + covar_rescaled.*QMB2[abs(gem[i]-3)]
        #    Qmb = QMB1[gem[i]]
        #else
            #Qmb = (1-covar_rescaled).*QMB1[gem[i]] + covar_rescaled.*QMB2[abs(gem[i]-2-3)+2]   
        #    Qmb = QMB1[gem[i]]
        #end
                                
        #Q-values that determine the decision
        Qd = beta_mb.*Qmb #.+ beta_mf.*Q1 

        # given Q values, posterior probability that choice was the observed choice is given by the softmax
        # add that likelihood to the running likelihood
        lik += Qd[c1[i]] - log(sum(exp.(Qd)))
        
        if (reward_loss_trial[i]==1 & r[i]==1)
            lr = lr_pos
        elseif (reward_loss_trial[i]==-1 & r[i]==0)
            lr = lr_pos
        else
            lr = lr_neg
        end 
        
        # updates go in here - these are updates of probability estimates (not contingent on outcome)
        if (s[i]==1 & c1[i]==1)
            SR_m[gem[i]] = (1-lr)*SR_m[gem[i]] .+ lr*1          
        elseif (s[i]==2 & c1[i]==2)
            SR_m[gem[i]] = (1-lr)*SR_m[gem[i]] .+ lr*1
        else
            SR_m[gem[i]] = (1-lr)*SR_m[gem[i]] .+ lr*0
        end
        
        #MF update
        Q1[c1[i]] = (1-lr_MF) * Q1[c1[i]] .+ lr_MF*r[i] #TD1
        
        #if gem[i] < 3
            #variance of the SRs
        #    var = minimum(SR_m[1:2])/maximum(SR_m[1:2])
        #else
        #     var = minimum(SR_m[3:4])/maximum(SR_m[3:4])       
        #end
        
        
        #update the covariance estimate between the two gems
        #covar[1] = (1-lr_w)*covar[1] + lr_w*var

	end

    # compile trial by trial values here
    trial_data = DataFrame(trial = t,
            sub = sub,
            choice = c1,
            state = s,
            reward = r)
        
    # here if running em you can only return the likelihood
    return -lik
    
    # but if you run in order to extract trial by trial Q values etc. (once you know the parameters) then want to return this
    #return (-lik, trial_data)
       
end

# Parameter optimisation

### Run model for one subject
(aids debugging)

In [55]:
# initialize parameter structures
(df, subs, X, betas, sigma) = genVars(df, 6);

# run model for sub 1
rl_model(betas, df[df[:sub].==subs[1],:])

331.3243523076546

### Run em to get best fit parameters for each subject

In [39]:
df[2:3,:]

Unnamed: 0_level_0,participantID,condition,blocks,blockType,trials,probabilitiesMarketOne_1,probabilitiesMarketOne_2,probabilitiesMarketTwo_1,probabilitiesMarketTwo_2,rewards_1,rewards_2,gem_presented,market_presented,chooseLeft,stateChosen,outcomeState,outcome,RT,black_rl_p,black_sofa_p,chosen_rl_p,chosen_sofa_p,pick_black,rt_log_trans,white_rl_p,white_sofa_p,missed_response,structureAwareness,evidence_consistency,consistent_choice,same_gem,doorRepeat,sideRepeat,sub
Unnamed: 0_level_1,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Float64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰
1,1,1,1,1,3,0.2,0.8,0.2,0.8,1,0,1,1,0,1,1,1,1.75767,0.2,0.8,0.2,0.8,1.0,-0.56399,0.8,0.2,0,0,1,1,1,0,0,1
2,1,1,1,1,4,0.2,0.8,0.2,0.8,1,0,1,1,1,1,2,0,1.23885,0.2,0.8,0.2,0.8,1.0,-0.214187,0.8,0.2,0,0,1,1,1,1,0,1


In [None]:
# initialized parameter structures (again)
# note that some of the variables (e.g. betas, sigma) are entered and returned by em function 
(df, subs, X, betas, sigma) = genVars(df, 5);

# run for full learner
# x contains the parameters for each subject (note not the same as variable X)
# l and h are per-subject likelihood and hessians
@time (betas, sigma, x, l, h) = em(df, subs, X, betas, sigma, rl_model; emtol=1e-3, parallel=true, full=true, quiet=false);


### Generate Model Statistics 

IBIC, IAIC and LOOcv

In [None]:
## model selection/comparison/scoring

# laplace approximation to the aggregate log marginal likelihood of the whole dataset
# marginalized over the individual params

aggll = lml(x, l, h)

# to compare this between models you need to correct for the group-level free parameters
# either aic or bic

aggll_ibic = ibic(x, l, h, betas, sigma, nrow(df))
aggll_iaic = iaic(x, l, h, betas, sigma)

# or you can compute unbiased per subject marginal likelihoods via subject-level cross validation
# you can do paired t tests on these between models
# these are also appropriate for SPM_BMS etc

# takes ages so comment in when want to run, otherwise just use IAIC above
#liks = loocv(df, subs, x, X, betas, sigma, rl_model; emtol=1e-3, parallel=true, full=true)
#aggll_loo = sum(liks)

#println("\n\nraw nll:  $aggll\nibic nll: $aggll_ibic\niaic nll: $aggll_iaic\nloo nll:  $aggll_loo")
#println("\n\nraw nll:  $aggll\nibic nll: $aggll_ibic\niaic nll:")
print(aggll_iaic)

### Write loocv scores to csv file

(if you have run this part above)

In [None]:
# put loocv scores into dataframe
loocv_scores = DataFrame(sub = subs,
liks = vec(liks));

#write to csv
CSV.write("loocv_scores.csv", DataFrame(loocv_scores))

### Calculate and write p values, std error and covariance

In [None]:
# standard errors on the subject-level means, based on an asymptotic Gaussian approx 
# (these may be inflated esp for small n)
(standarderrors, pvalues, covmtx) = emerrors(df, subs, x, X, h, betas, sigma, rl_model);

In [None]:
model_stats = DataFrame(stderror = vec(standarderrors),
pvalues = vec(pvalues),
covmtx_1 = vec(covmtx[:,1]),
covmtx_2 = vec(covmtx[:,2]),
covmtx_3 = vec(covmtx[:,3]));

# save model stats to csv file
CSV.write("model_stats.csv", DataFrame(model_stats));

In [None]:
print(standarderrors)


In [None]:
print(pvalues)


In [None]:
print(covmtx)


### Write per subject model parameters to csv file


#### Save a copy of just the parameters

In [None]:
# put parameters into variable d
d=x';

# now put parameters into dataframe
params = DataFrame(sub = subs,
beta_mb = vec(d[:, 1]), 
beta_mf = vec(d[:, 2]), 
eta_unconverted_pos = vec(d[:, 3]),
eta_converted_pos = vec(0.5 .+ 0.5*erf.(d[:, 3] / sqrt(2))),
eta_unconverted_neg = vec(d[:, 4]),
eta_converted_neg = vec(0.5 .+ 0.5*erf.(d[:, 4] / sqrt(2))),
p_unconverted = vec(d[:, 5]),
p_converted = vec(0.5 .+ 0.5*erf.(d[:, 5] / sqrt(2))))

# save parameters to csv file
CSV.write("subject_params.csv", DataFrame(params))

#### Save a copy with summary stats as well


In [None]:
params = params[:,2:end]
summary_stats = [summary_stats params]
CSV.write("summary_stats.csv", DataFrame(summary_stats))


# Generate trial by trial values

### Get best fit parameters from model

In [None]:
# if you already have best fit parameters saved, can read in here (rather than running model to find)
params_full = readtable("subject_params.csv")

### Run model for each sub using best fit parameters

Note: must rerun model with it set to return trial data (uncomment this)


In [None]:
# initialize parameter structures once again
(df, subs, X, betas, sigma) = genVars(df, 5);

# initalise this - will store all trial to trial parameters
trial_data_compile = []

# run model for each subject using best fit parameters
for x = 1:length(subs)

    # pull out optimal betas for subject - these are used in the model
    # note: you want the unconverted learning score to be fed in
    betas_sub = convert(Array, params[x, [:betamb, :beta_mf0, :beta_mf1, :eta_unconverted, :sticky]])
    data_sub = df[df[:sub].==subs[x], :]
    
    # run model using these parameters - note must have commented in the model to return all of these variables (and not only -lik)
    (minus_li, trial_data) = rl_model(betas_sub, data_sub)
    
    if x.==1
        
        trial_data_compile = trial_data
        
    else
        
        append!(trial_data_compile, trial_data)
        
    end
 
end
# check these are all the same sizes
print(size(df))
print(size(trial_data_compile))

# print header of data compile
head(trial_data_compile)

### Save data to csv in model folder


In [None]:
CSV.write("trial_data_compile.csv", DataFrame(trial_data_compile))

# END