# Model subjective 1 beta 1 lr

#### This model has one beta 
#### Also has an intercept

#### one learning rate (no distinciton between appetative/aversive updates)

#### Uses Q learned average to predict choice

#### NJG, 14th November 2017

### Start up commands/load relevant functions

In [None]:
# set everything up
parallel = true # Run on multiple CPUs. If you are having trouble, set parallel = false: easier to debug

# this activates the multiprocessing threads
if (parallel)
	# only run this once
addprocs(4)
end

# load required libraries

@everywhere using DataFrames
@everywhere using ForwardDiff
@everywhere using PyCall
@everywhere using Distributions
@everywhere using PyPlot

@everywhere PyCall.@pyimport scipy.optimize as so

# this is the code for the actual fitting routines
@everywhere include("em.jl")
@everywhere include("common.jl")
@everywhere include("likfuns.jl")

# this is generates starting matricies for betas, sigmas etc to feed into model
@everywhere include("genVars.jl")

### Read in task data

In [None]:
#read in csv file of the data
df = readtable("/Users/neil/Dropbox/Daw_Lab/PreySelection/v103/data/subject_data_excluded_deleted.csv")

#note will include force trials and missed responses 

#display header
head(df)

### Append data with the column "sub" 


In [None]:
#this is just a replica of the existing column sub_no but think em looks for "sub" specifically
df[:sub] = df[:sub_no]
head(df)

### Take out missed responses
#### note: you will need to account for these at a later point as missing repsonses imposes a time delay

In [None]:
#exclude missed responses
df = df[df[:missed].==0,:]
head(df)

### The model: "subjective"

In [None]:
@everywhere function model_subjective(params, data)
        
    intercept = params[1]
    beta = params[2]
    lr = 0.5 + 0.5*erf(params[3]/sqrt(2))
    
    #Q_av estimates reward in the environment
    Q_av_per_time = zeros(typeof(beta),1)
    delay_sum = zeros(typeof(beta),1)
    reward_sum = zeros(typeof(beta),1)
    opp_cost = zeros(typeof(beta),1)
    
    Q_estimate = zeros(typeof(beta),1)
    opp_cost_estimate = zeros(typeof(beta),1)
    
    Qd = zeros(typeof(beta),2)

    #initalise likelihood value
    lik = 0
    
    reward = data[:reward]
    delay = data[:delay]
    force = data[:force_trial] 
    t = data[:trial_index] # trial 
    sub = data[:sub_no] # subject number
    block = data[:block] # block
    
    c = data[:approach_avoid]
    
    #convert data to 1s (=avoid) and 2s (=approach); 
    # 1 (previously -1) is going to index choice to go with opportunity cost, 
    # 2 (previously +1) to go with the reward of the encountered option
    c = c+1;
    c_index_avoid = find(c.==0)
    c[c_index_avoid] = 1
        
    #must convert floats (i.e. decimals) to integers in order to use as an index
    c = convert(DataVector{Integer}, c)

    
    reward_sum_store = [];
    delay_sum_store = [];
    av_reward_store = [];
    opp_cost_store  = []; 
    opp_cost_estimate_store = [];
    Q_estimate_store = [];
    
    for i = 1:length(c)
        
            # 2 seconds without reward on each trial regadless of accept/reject
            delay_sum += 2;
         
            # calculate current (arithmetic) reward per second from number of seconds elapsed and reward accured
            Q_av_per_time = reward_sum./delay_sum
            opp_cost = Q_av_per_time*delay[i]
        
            Q_estimate = (1-lr) * Q_estimate + 0
            Q_estimate = (1-lr) * Q_estimate + 0
        
            opp_cost_estimate = Q_estimate*delay[i]

            append!(reward_sum_store, reward_sum)
            append!(delay_sum_store, delay_sum)
            append!(av_reward_store, Q_av_per_time)
            append!(opp_cost_store, opp_cost)
            append!(opp_cost_estimate_store, opp_cost_estimate)
            append!(Q_estimate_store, Q_estimate)
        
            # if not a force trial predict choice based on current values
            if (force[i]<1)
                        
                # decision variable - the estimate of opportunity cost ("reward" of rejecting) versus 
                # reward of the current option (if accepted)
                Qd = [intercept, 0] + [beta.*opp_cost_estimate[1], beta.*reward[i]]

                # increment likelihood
                lik += Qd[c[i]] - log(sum(exp.(Qd)))
            
            end
        
            # regardless of whether a force trial or not, 
            # if accept the option, Qreward updates and there is a longer period of delay
            if (c[i] == 2)
                
                delay_sum += delay[i]
                reward_sum += reward[i]
            
                for i = 1:length(delay[i])
                
                    Q_estimate = (1-lr) * Q_estimate + 0
                
                end
            
                    Q_estimate = (1-lr) * Q_estimate + lr*reward[i]
                
            end
    
    end
    
     trial_data = DataFrame([sub,
            block,
            t,
            force,
            reward,
            delay,
            c,
            reward_sum_store,
            delay_sum_store,
            av_reward_store,
            opp_cost_store,
            Q_estimate_store,
            opp_cost_estimate_store])
    
    # detail names of variables - frustrating this is neccesary
    names!(trial_data,[:sub,
            :block,
            :trial,
            :force,
            :reward,
            :delay,
            :choice,
            :reward_sum,
            :delay_sum,
            :avreward_arithmetic,
            :opp_cost_arithmetic,
            :avreward_estimate,
            :opp_cost_estimate])
    
    
    # here if running em you can only return the likelihood
    return -lik
    
    # but if you run in order to extract trials, subs etc then want to return this
    #return (-lik, trial_data)
    
end

### Run model for one subject

##### aids debugging

In [None]:
# initialize parameter structures
(df, subs, X, betas, sigma) = genVars(df, 3);

# run model for sub 1
model_subjective(betas,df[df[:sub].==subs[1],:])

### Run em to get best fit parameters for each subject

In [None]:
# initialized parameter structures (again)
# note that some of the variables (e.g. betas, sigma) are entered and returned by em function 
(df, subs, X, betas, sigma) = genVars(df, 3);

# run for full learner
# x contains the parameters for each subject (note not the same as variable X)
# l and h are per-subject likelihood and hessians
@time (betas, sigma, x, l, h) = em(df, subs, X, betas, sigma, model_subjective; emtol=1e-3, parallel=true, full=true, quiet=false);


### Generate Model Statistics 
#### (IAIC, LOOCV, etc.)

In [None]:
## model selection/comparison/scoring

# laplace approximation to the aggregate log marginal likelihood of the whole dataset
# marginalized over the individual params

aggll = lml(x,l,h)

# to compare this between models you need to correct for the group-level free parameters
# either aic or bic

aggll_ibic = ibic(x,l,h,betas,sigma,nrow(df))
aggll_iaic = iaic(x,l,h,betas,sigma)

# or you can compute unbiased per subject marginal likelihoods via subject-level cross validation
# you can do paired t tests on these between models
# these are also appropriate for SPM_BMS etc

# takes ages so comment in when want to run, otherwise just use IAIC above

#liks = loocv(df, subs, x, X, betas, sigma, model_subjective; emtol=1e-3, parallel=true, full=true)
#aggll_loo = sum(liks)

#println("\n\nraw nll:  $aggll\nibic nll: $aggll_ibic\niaic nll: $aggll_iaic\nloo nll:  $aggll_loo")
#println("\n\nraw nll:  $aggll\nibic nll: $aggll_ibic\niaic nll:")

### Write loocv scores to csv file

#### (if you have run this part above)

In [None]:
# put loocv scores into dataframe
 loocv_scores = DataFrame(sub = subs,
 liks = vec(liks));

# save loocv scores to csv file
 writetable("loocv_scores.csv", DataFrame(loocv_scores))

### Write per subject model parameters to csv file


In [None]:
# put parameters into variable d
d=x';

# now put parameters into dataframe
params = DataFrame(sub = subs,
intercept = vec(d[:,1]), 
beta = vec(d[:,2]),
learning_rate_raw = vec(d[:,3]),
learning_rate_transformed = vec(0.5 + 0.5*erf.(d[:,3] / sqrt(2))));

# save parameters to csv file
writetable("subject_params.csv", DataFrame(params))

#or: CSV.write("subject_params_full_learner.csv",params_full)

## Now run  model with these parameters for each subject to get trial by trial Q values
##### Note: must rerun model with it set to return trial data (uncomment this)



In [None]:
# if you already have best fit parameters saved, can read in here (rather than running model to find)
params = readtable("subject_params.csv")
head(params)

### run model for each sub using best fit parameters

In [None]:
# initialize parameter structures once again
(df, subs, X, betas, sigma) = genVars(df, 3);

# initalise this - will store all trial to trial parameters
trial_data_compile = []

# run model for each subject using best fit parameters
for x = 1:length(subs)

    # pull out optimal betas for subject - these are used in the model
    # note: you want the unconverted learning score to be fed in
    betas_sub = Array(params[x, [:intercept, :beta, :learning_rate_raw]])
    data_sub = df[df[:sub].==subs[x], :]
    
    # run model using these parameters - note must have commented in the model to return all of these variables (and not only -lik)
    (minus_li, trial_data) = model_subjective(betas_sub, data_sub)
    
    if x==1
        
        trial_data_compile = trial_data
        
    else
        
        append!(trial_data_compile, trial_data)
        
    end
 
end

# check these are all the same sizes
print(size(df))
print(size(trial_data_compile))

# print header of data compile
head(trial_data_compile)

### Calculate probabilities of choosing

In [None]:
ProbAccept_ALL = []
ProbReject_ALL = [] 
ProbAccept_minus_ProbReject_ALL = []

for x = 1:length(subs)

    current_sub = subs[x];
    
    # pull out optimal betas for subject - these are used in the model
    # note: you want the unconverted learning score to be fed in
    betas_sub = Array(params[x, [:intercept, :beta]])
    
    intercept = betas_sub[1] 
    beta = betas_sub[2]
            
    subset_data = trial_data_compile[trial_data_compile[:sub].==subs[x], :]
    
    n_trials = size(subset_data); n_trials = n_trials[1]

    ProbAccept = zeros(n_trials)
    ProbReject = zeros(n_trials)
    ProbAccept_minus_ProbReject = zeros(n_trials)
    
    accept_value = subset_data[:reward]
    reject_value = subset_data[:opp_cost_estimate]
    choices = subset_data[:choice]
    
    for t = 1:n_trials
                       
        ProbAccept[t] = exp(0 + beta*accept_value[t])/(exp(0 + beta*accept_value[t]) + exp(intercept + beta*reject_value[t])) 
        ProbReject[t] = 1 - ProbAccept[t];
        ProbAccept_minus_ProbReject[t] = ProbAccept[t] - ProbReject[t];
         
    end

    ProbAccept_ALL = [ProbAccept_ALL; ProbAccept]
    ProbReject_ALL = [ProbReject_ALL; ProbReject]
    ProbAccept_minus_ProbReject_ALL = [ProbAccept_minus_ProbReject_ALL; ProbAccept_minus_ProbReject]
    
end

#Now bung into data frame and merge with rest
Q_probs = DataFrame([ProbAccept_ALL, 
        ProbReject_ALL, 
        ProbAccept_minus_ProbReject_ALL]) 

#annoying - must be a better way to do this
names!(Q_probs, [:ProbAccept, 
        :ProbReject, 
        :ProbAccept_minus_ProbReject])

# now merge the two dataframes together (note this overwrites previous full compile)
trial_data_compile = hcat(trial_data_compile, Q_probs); #could also do just: [full_Q_compile Q_probs]

### Save data to csv in model folder
##### NOTE: after this note you must save as an xlsx file to run in matlab 

In [None]:
writetable("trial_by_trial_values.csv", DataFrame(trial_data_compile))


### Inspect parameters


In [None]:
println("intercept min: ", minimum(params[:intercept]))
println("intercept max: ", maximum(params[:intercept]))
println("beta min: ", minimum(params[:beta]))
println("beta max: ", maximum(params[:beta]))
println("lr min: ", minimum(params[:learning_rate_transformed]))
println("lr max: ", maximum(params[:learning_rate_transformed]))


In [None]:
x = [1, 2, 3]

my_xticks = ["intercept","beta", "lr"]

y=[mean(params[:intercept]), mean(params[:beta]), mean(params[:learning_rate_transformed])]

PyPlot.plt[:xticks](x, my_xticks)
PyPlot.plt[:bar](x,y,color="#0f87bf",align="center",alpha=0.4)
title("average parameter values")


###### NOTE: intercept in the model is put on the value of rejecting: hence a negative value suggests a bias away from rejecting (the value of rejecting is devalued)

In [None]:
PyPlot.plt[:hist](params[:intercept],10)
title("Histrogram of intercept parameters")

In [None]:
PyPlot.plt[:hist](params[:beta],10)
title("Histrogram of beta value")

In [None]:
PyPlot.plt[:hist](params[:learning_rate_transformed],10)
title("Histrogram of learning parameters")

In [None]:
PyPlot.plt[:scatter](params[:beta],params[:learning_rate_transformed])
title("learning parameters: beta vs lr")
xlabel("beta")
ylabel("learning_rate")

println("correlation: ", cor(params[:beta],params[:learning_rate_transformed]))

In [None]:
for x = 1:length(subs)

    current_sub = subs[x];
    
    subset_data_all = trial_data_compile[trial_data_compile[:sub].==current_sub, :]
    
    #subset_data_b1 = 
    #subset_data_b2  = 
    
    X = subset_data_all[:trial]
    Y = subset_data_all[:opp_cost_arithmetic]
    
    subplot(7,7,x)

    PyPlot.plt[:scatter](X,Y,s=0.5)
            
end

suptitle("Arithmetic opportunity cost over time for each sub")


#### opp cost fluctuates trial by trial a lot depending on the options (their delay) as well as the average reward rate 
#### hence why it looks like two lines 

In [None]:
for x = 1:length(subs)

    current_sub = subs[x];
    
    subset_data_all = trial_data_compile[trial_data_compile[:sub].==current_sub, :]
    
    
    X = subset_data_all[:trial]
    Y = subset_data_all[:opp_cost_estimate]
    
    subplot(7,7,x)

    PyPlot.plt[:scatter](X,Y,s=0.5)
            
end

suptitle("Estimated opportunity cost over time for each sub")


In [None]:
for x = 1:length(subs)

    current_sub = subs[x];
    
    subset_data_all = trial_data_compile[trial_data_compile[:sub].==current_sub, :]
    
    #subset_data_b1 = 
    #subset_data_b2  = 
    
    X = subset_data_all[:trial]
    Y = subset_data_all[:avreward_arithmetic]
    
    subplot(7,7,x)

    PyPlot.plt[:scatter](X,Y,s=0.5)
            
end

suptitle("Arithmetic average reward rate over time for each sub")


In [None]:
for x = 1:length(subs)

    current_sub = subs[x];
    
    subset_data_all = trial_data_compile[trial_data_compile[:sub].==current_sub, :]
    
    #subset_data_b1 = 
    #subset_data_b2  = 
    
    X = subset_data_all[:trial]
    Y = subset_data_all[:avreward_estimate]
    
    subplot(7,7,x)

    PyPlot.plt[:scatter](X,Y,s=0.5)
            
end

suptitle("Estimated average reward rate over time for each sub")
