In [15]:
import numpy as np
import math
import scipy as sp
from scipy import optimize
from numpy import genfromtxt
import pandas as pd

Run Functions!

In [37]:
##------------------------------------------------------------------##
##                             Executor                             ##
##  This code loops through the files and applies the full set of   ##
##  functions to each. It takes the output, compiles it into        ##
##  tables, and exports the full tables to csv files.               ##
##                                                                  ##
##  Exports: Model Information Table: a table of b, k, etc. for     ##  
##               ID/day. This does not contain SV information.      ##   
##           Full Subjective Value Table: a table of calculated     ##
##               SVs, model parameters, etc. etc. This contains     ##
##               all of the information provided by these           ##
##               functions.                                         ##
##                                                                  ##
##  Please see the functions for more details on what each one      ##
##  does!                                                           ##
##------------------------------------------------------------------##

# Titles for "Model Information Table"
bigarray = np.array(["ID","Day","File Name", "k", "beta", "LL", "AIC", "BIC", "r2", "Percent correct - model predicting choices","Percent correct answer on catch trials"])  #, "g_LL", "g_AIC", "g_BIC","g_r2","g_Percent correct"])

# Titles for "Full Subjective Value Table"
largeSVarray = np.array(["Trial Number","Stimulus Time", "Response Time","SS amount","LL amount","SS delay","LL delay","Response","SS SV","LL SV","k","beta","ID","Day","Date","Time","LL","AIC","BIC","r2","Percent correct - model predicting choices","Percent correct answer on catch trials"])

# Make the title array horizontal. This works (with np.append below), but I'm not sure if I tried the seemingly simpler vstack option that I'm using for bigarray. This works fine regardless.
largeSVarray.shape = (1,largeSVarray.size)

# "files" is established in some code below all of the functions. Run that first!
for filename in files:
    k,beta,LL,LL0,AIC,BIC,r2,correct,modelchart,p_d0s,p_i1000s,p_overall = run_functions(filename)  #given_k,,g_LL,g_LL0,g_AIC,g_BIC,g_r2,g_correct  <- use this if comparing to a given k. See outdated function at the bottom of the file.

    # IDs (my randomized ones) and days (1, 2, sometimes 3) can be found in the file names at these points.
    ID_for_results = filename[34:38]
    day_for_results = filename[39:40]

    # This is used for Model Information Table - one line per file.
    resultarray = np.array([ID_for_results,day_for_results,filename,k,beta,LL,AIC,BIC,r2,correct,p_overall])  #,g_LL,g_AIC,g_BIC,g_r2,g_correct   <- use this if comparing to a given k. See outdated function at the bottom of the file. #p_d0s,p_i1000s, <- use these if you want to get more specific with people's catch trial failures.
    
    # In both of the below lines, I'm adding the new content arrays to the bottom of the title arrays. This feels like an easy way to iteratively add everything.
    bigarray = np.vstack((bigarray,resultarray))

    # This is used for the Full Subjective Value Table. The indexing removes the title line on the modelchart.
    largeSVarray = np.append(largeSVarray,modelchart[1:,:], axis=0)

# I use the pandas to_csv function, which requires converting the numpy arrays into DataFrames. This has worked best for me so far. I'm sure it's technically inefficient, but this whole code runs in around 3 seconds, so I think it's fine.
pd.DataFrame(bigarray).to_csv("tests\Model Information Table %100.csv", header=False, index=False)
pd.DataFrame(largeSVarray).to_csv("tests\Full Subjective Value Table %100.csv", header=False, index=False)

In [33]:
##------------------------------------------------------------------##
##                          Run Functions                           ##
##  This code just runs all of the main functions on one individual ##
##  file. The code above will loop through the files and call this  ##
##  on each one, constructing the output tables from the arrays     ##
##  that this function returns.                                     ##
##                                                                  ##
##  Inputs: filename: name of file to be read                       ##
##                                                                  ##
##  Outputs: k: the optimized k (kappa), the discount parameter.    ##
##           beta: the optimized b, the stochasticity=of-choice     ##
##                factor.                                           ##
##           LL: log likelihood (high is good)                      ##
##           LL0: log likelihood of a model that just coin flips    ##
##                every choice guess (probability of LL = 0.5). The ##
##                analysis is mostly judging LL vs LL0.             ##
##           AIC: Akaike Information Criterion (high is bad)        ##
##           BIC: Bayesian Information Criterion (high is bad)      ##
##           r2: r squared                                          ##
##           correct: the percentage of choices that the model      ##
##                accurately predicts based on k                    ##
##           modelchart: the output of write_SVs(). This is a huge  ##
##                table of information - check out the write_SVs()  ##
##                documentation for more information.               ##
##           percent_of_dzeros: the percent of correct answers in   ##
##              catch trials in which LL delay is zero.             ##
##           percent_of_imm1000s: the percent of correct answers in ##
##              catch trials in which LL delay is zero.             ##
##           overall_percent: the overall percent of correct        ##
##              answers in all catch trials.                        ##
##                                                                  ##
##  Note: modelchart comes with a header. We'll remove that when    ##
##        it's compiled into a big table.                           ##
##                                                                  ##
##  Note: Currently, the only catch trial accuracy measure used is  ##
##        the overall_percent. I wrote it to be able to provide     ##
##        more specific output, but I don't think that level of     ##
##        detail is necessary in the output tables. In the main     ##
##        code above, I accept but never use the first two values.  ##
##        If you want to analyze them, you can use them there.      ##
##------------------------------------------------------------------##

def run_functions(filename):
    #import data
    immediate_vals, immediate_times, delay_vals, delay_times, choices, p_immediate_vals, p_immediate_times, p_delay_vals, p_delay_times, p_choices = import_data(filename)

    #check attention
    percent_of_dzeros, percent_of_imm1000s, overall_percent = checkAttention(p_immediate_vals, p_delay_times, p_choices)

    #optimize k and beta
    negLL, beta, k, SS_V, SS_D, LL_V, LL_D, risk = optimizer(immediate_vals, immediate_times, delay_vals, delay_times, choices)

    #run primary analysis
    LL,LL0,AIC,BIC,r2,correct = analysis(negLL,choices,SS_V,SS_D,LL_V,LL_D,risk,k,beta)

    #get SVs
    modelchart = write_SVs(filename,k,beta,LL,AIC,BIC,r2,correct,risk,overall_percent)

    #use given k value
    #negLL = LLfromGiven(given_k, given_b, choices, SS_V, SS_D, LL_V, LL_D, risk)

    #run alternative analysis
    #g_LL,g_LL0,g_AIC,g_BIC,g_r2,g_correct = analysis(negLL,choices,SS_V,SS_D,LL_V,LL_D,risk,given_k,beta)

    return k,beta,LL,LL0,AIC,BIC,r2,correct,modelchart,percent_of_dzeros, percent_of_imm1000s, overall_percent   #given_k,,g_LL,g_LL0,g_AIC,g_BIC,g_r2,g_correct


Import data:

In [31]:
##------------------------------------------------------------------##
##                           Import Data                            ##
##  This code imports data from the cleaned "FixedFixed" files.     ##
##  This function is called one-by-one on these files.              ##
##                                                                  ##
##  Inputs: filename: name of file to be read                       ##
##                                                                  ##
##  Outputs: Lists (length=number of trials):                       ##
##               immediate_vals: the array of SS values             ##
##               immediate_times: the array of SS delays            ##
##               delay_vals: the array of LL values                 ##
##               delay_times: the array of LL delays                ##
##               choices: the array of choices (1=delay, 0=imm.)    ##
##               p_immediate_vals: above but only catch trials      ##
##               p_immediate_times: above but only catch trials     ##
##               p_delay_vals: above but only catch trials          ##
##               p_delay_times: above but only catch trials         ##
##               p_choices: above but only catch trials             ##
##------------------------------------------------------------------##

def import_data(filename):
    
    # This is a numpy function, so it creates a numpy array. Note: if ever importing data that aren't already cleaned (write_SVs() below does this), you'll have to do more here. The commas in "$1,000" interfere with a direct delimiter-based strategy like this one. It's ok in this context, but be careful if using this code on other files.
    data = genfromtxt(filename, delimiter=',')

    immediate_vals = data[1:,1]
    immediate_times = data[1:,3]
    delay_vals = data[1:,2]
    delay_times = data[1:,4]
    choices = data[1:,5]

    # This will select only the non-catch trials.
    boolselector = (immediate_vals<1000)*(delay_times>0)

    # This will select the catch trials.
    catchtrials = np.logical_or((immediate_vals==1000),(delay_times==0))

    p_immediate_vals = immediate_vals[catchtrials]
    p_immediate_times = immediate_times[catchtrials]
    p_delay_vals = delay_vals[catchtrials]
    p_delay_times = delay_times[catchtrials]
    p_choices = choices[catchtrials]

    # You might also test scaling down all of the values by some factor. The k gets scaled up by the same amount. I think this might improve model performance when beta gets large.
    testfactor = 1   

    immediate_vals = immediate_vals[boolselector]/testfactor
    immediate_times = immediate_times[boolselector]/testfactor
    delay_vals = delay_vals[boolselector]/testfactor
    delay_times = delay_times[boolselector]/testfactor
    choices = choices[boolselector]

    return immediate_vals, immediate_times, delay_vals, delay_times, choices, p_immediate_vals, p_immediate_times, p_delay_vals, p_delay_times, p_choices

Run the optimizer:

In [35]:
##------------------------------------------------------------------##
##                            Optimizer                             ##
##  This code runs the optimization function that calls most of the ##
##  other functions. It relies on sp.optimize.minimize, which seems ##
##  to work very well in this case. You can specify the method that ##
##  you want (e.g., "Nelder-Mead"), but the default has             ##
##  outperformed these options as far as I've seen. We can pass in  ##
##  bounds on the parameters using this method, which is not true   ##
##  of every optimization protocol (and seems necessary). To        ##
##  actuallly run the optimizer, you must have a function that      ##
##  takes exactly two inputs - the parameter (or array of           ##
##  parameters) and an array of the other necessary inputs into     ##
##  that function. In this case, we pass in optimize_me(), an       ##
##  array of beta and k, and an array of the other things           ##
##  necessary - the choices, values, delays, etc. The magic machine ##
##  does its thing and we end up with 'results', which is a         ##
##  complicated array but which has everything we need. results.x   ##
##  contains our optimized parameters in the same shape as our      ##
##  input array. results.fun gives the thing that was optimized -   ##
##  in this case, the negLL.                                        ##
##                                                                  ##
##                                                                  ##
##  Inputs: Lists (length=number of trials):                        ##
##              immediate_vals: the array of SS values              ##
##              immediate_times: the array of SS delays             ##
##              delay_vals: the array of LL values                  ##
##              delay_times: the array of LL delays                 ##
##              choices: the array of choices (1=delay, 0=imm.)     ##
##                                                                  ##
##  Outputs: negLL: the negLL of the final optimized set of         ##
##               parameters. This will be used a lot in the         ##
##               analysis section below.                            ##
##           beta: the optimized b, the stochasticity=of-choice     ##
##               factor.                                            ##
##           k: the optimized k (kappa), the discount parameter.    ##
##           SS_V: the array of values of the immediate options.    ##
##           SS_D: the array of delays of the immediate options.    ##
##           LL_V: the array of values of the delay options.        ##
##           LL_D: the array of delays of the delay options.        ##
##           risk: not currently in use; more information below.    ##
##------------------------------------------------------------------##

def optimizer(immediate_vals, immediate_times, delay_vals, delay_times, choices):
    # We do start the optimizer off with the guesses below, but those aren't updated like Bayesian priors. They are simply a starting point in parameter space for the optimizer. Changes here could be an avenue to explore when seeking to improve performance.
    guesses = [0.005, 0.01]

    # These are the bounds on k and beta. The first tuple corresponds to beta, the second to kappa.
    bkbounds = ((0,8),(0.00000001,6.4))
    risk = 1
    SS_V = immediate_vals.tolist()  
    SS_D = immediate_times.tolist()
    LL_V = delay_vals.tolist()
    LL_D = delay_times.tolist()

    # These are the inputs of the local_negLL function. They'll be passed through optimize_me()
    inputs = [choices,SS_V,SS_D,LL_V,LL_D,risk]

    # If seeking to improve performance, could change optimization method, could change maxiter(ations), or could fiddle with other things. You might be able to change the distance between steps in the optimzation.
    results = sp.optimize.minimize(optimize_me,guesses,inputs, bounds = bkbounds, tol=None, callback = None, options={'maxiter':10000, 'disp': False})
    negLL = results.fun
    beta = results.x[0]
    k = results.x[1]
    
    return negLL, beta, k, SS_V, SS_D, LL_V, LL_D, risk

Analytics:

Assess performance on catch trials

In [36]:
##------------------------------------------------------------------##
##                          Check Attention                         ##
##  This code determines the percentage of catch trials that a      ##
##  participant got right. These can be instances in which the      ##
##  delay for LL was 0 or the value of SS was $1,000. In both       ##
##  cases, this is $1,000 immediately, which is  strictly the       ##
##  optimal choice given basic assumptions about the persons's      ##
##  priorities and environment.                                     ##
##                                                                  ##
##  Inputs: p_immediate_vals: the SS values in choices              ##
##              identified as catch trials (in import_data()). I    ##
##              used "p_" because I originally referred to these as ##
##              "prank trials" before I knew the real name. Change  ##
##              to "c_" or "catch_" or whatever if you want it to   ##
##              be more accurately descriptive, but make sure to    ##
##              change it everywhere.                               ## 
##              optimization                                        ##
##          p_delay_times: the LL delay times in choices identified ##
##              as catch trials.                                    ##
##          p_choices: the selected options in choices identified   ##
##              as catch trials.                                    ##
##                                                                  ##
##  Outputs: percent_of_dzeros: the percent of correct answers in   ##
##              catch trials in which LL delay is zero.             ##
##           percent_of_imm1000s: the percent of correct answers in ##
##              catch trials in which LL delay is zero.             ##
##           overall_percent: the overall percent of correct        ##
##              answers in all catch trials.                        ##
##                                                                  ##
##  Note: Currently, the only output used is the overall_percent.   ##
##        I wrote it to be able to provide more specific output,    ##
##        but I don't think that level of detail is necessary in    ##
##        the output tables, so when this function is called, I     ##
##        accept but never use the first two values. If you want to ##
##        analyze them, you can use them there.                     ##
##------------------------------------------------------------------##

def checkAttention(p_immediate_vals, p_delay_times, p_choices):
    # p_choices is 1 when LL is chosen. p_choices[p_delay_times==0] gets all instances in which the delay time is zero. These are "correct" when p_choices is 1, so we take the sum and divide by the total trials to get the average correct.
    percent_of_dzeros = np.sum(p_choices[p_delay_times==0])/np.size(p_choices[p_delay_times==0])
    # Same as above, except we're indexing based on SS_V being 1000 and we're dividing (the total number of options - the number of delay choices ("wrong" selections) in that set) by the total number of options.
    # For example, if someone is given 3 trials in which SS_V=1000 and they choose SS/SS/LL, choices[] = [0,0,1]. np.size(<-) = 3, np.sum(<-) = 1, (size-sum)/size = 2/3 = 66% correct.
    percent_of_imm1000s = (np.size(p_choices[p_immediate_vals==1000])-np.sum(p_choices[p_immediate_vals==1000]))/np.size(p_choices[p_immediate_vals==1000])
    # Weighted average of the above two.
    overall_percent = (percent_of_dzeros*np.size(p_choices[p_delay_times==0]) + percent_of_imm1000s*np.size(p_choices[p_immediate_vals==1000]))/np.size(p_choices)
    return percent_of_dzeros, percent_of_imm1000s, overall_percent

Main analysis

In [22]:
##------------------------------------------------------------------##
##                             Analysis                             ##
##  This code runs the log-likelihood-based analysis of model fit.  ##
##  It also computes the percentage of choices that the model       ##
##  accurately predicts.                                            ##
##                                                                  ##
##  Inputs: negLL: negative log likelihood of the best option after ## 
##              optimization                                        ##
##          choices: the set of choices from the participants       ##
##          Lists (length=number of trials):                        ##
##              SS_V: values of the smaller-sooner (immediate) ops. ##
##              SS_D: delays of the smaller-sooner (immediate) ops. ##
##              LL_V: values of the larger-later (delayed) options  ##
##              LL_D: delays of the larger-later (delayed) options  ##
##          risk: not currently in use (always 1 in this code). If  ##
##              you want to use a risk parameter (alpha), you       ##
##              can input one here.                                 ##
##          given_k: the optimized k value. If used elsewhere, this ##
##              function could also accept and test any k value     ##
##          beta: this is actually unnecessary. I originally wrote  ##
##              this to use the choice_prob function to determine   ##
##              the probability of each choice based on k, but it's ##
##              turned into a binary anyway, so the beta is not     ##
##              really taken into account. However, as they say, it ##
##              ain't broke, so I haven't fixed it.                 ##
##                                                                  ##
##  Outputs: LL: log likelihood (high is good)                      ##
##           LL0: log likelihood of a model that just coin flips    ##
##                every choice guess (probability of LL = 0.5). The ##
##                analysis is mostly judging LL vs LL0.             ##
##           AIC: Akaike Information Criterion (high is bad)        ##
##           BIC: Bayesian Information Criterion (high is bad)      ##
##           r2: r squared                                          ##
##           correct: the percentage of choices that the model      ##
##                accurately predicts based on k                    ##
##------------------------------------------------------------------##

def analysis(negLL,choices,SS_V,SS_D,LL_V,LL_D,risk,given_k,beta):
    # Unrestricted log-likelihood
    LL = -negLL

    # Restricted log-likelihood
    LL0 = np.sum((choices==1)*math.log(0.5) + (1-(choices==1))*math.log(0.5))

    # Akaike Information Criterion
    AIC = -2*LL + 2*2  #CHANGE TO len(results.x) IF USING A DIFFERENT MODEL (parameters != 2)

    # Bayesian information criterion
    BIC = -2*LL + 2*math.log(len(SS_V))  #len(results.x)

    #R squared
    r2 = 1 - LL/LL0

    #Percent accuracy
    k_for_accuracy = given_k
    beta_and_k_array_for_accuracy = [beta,k_for_accuracy] 
    parray = np.array(choice_prob(SS_V,SS_D,LL_V,LL_D,beta_and_k_array_for_accuracy,risk))  # gets an array of probabilities of choosing the LL choice
    correct =sum((parray>=0.5)==choices)/len(SS_V)                                          # LL is 1 in choices, so when the parray is > 0.5 and choices==1, the model has correctly predicted a choice.
    
    return(LL,LL0,AIC,BIC,r2,correct)

# Hessian unavailable in this optimization function, but would use results.hess_inv here
#Tester line if you want: print("LL",LL,"AIC",AIC,"BIC",BIC,"R2",r2,"correct",correct)


Functions:

In [23]:
##------------------------------------------------------------------##
##                        Discount function                         ##
##  This code performs the hyperbolic discounting function.         ##
##                                                                  ##
##  Inputs: v: value                                                ##
##          d: delay                                                ##
##          kappa: k, the discount parameter                        ##
##          risk: not currently in use (always 1 in this code). If  ##
##                you want to use a risk parameter (alpha), you     ##
##                can input one here.                               ##
##                                                                  ##
##  Outputs: SV: subjective value                                   ##
##------------------------------------------------------------------##



def discount(v,d,kappa,risk):
    SV = (v**risk)/(1+kappa*d)
    return SV

In [24]:
##------------------------------------------------------------------##
##                        Choice Probability                        ##
##  This code uses the stochasticity factor beta to determine,      ##
##  based on the subjective values of the immediate and delayed     ##
##  options, the chance of someone choosing the delay option.       ##
##  Note: whether the chance is above or below 0.5 (50%) is         ##
##  entirely based on which SV is higher. This could hypothetically ##
##  be changed with some bias factor, but isn't currently. In other ##
##  words, if SSsv is greater than LLsv, the chance that the person ##
##  chooses the SS option will be >0.5 regardless, but beta         ##
##  determines if it's 0.51 or 0.99.                                ##
##                                                                  ##
##  Inputs: Lists (length=number of trials):                        ##
##              SS_V: values of the smaller-sooner (immediate) ops. ##
##              SS_D: delays of the smaller-sooner (immediate) ops. ##
##              LL_V: values of the larger-later (delayed) options  ##
##              LL_D: delays of the larger-later (delayed) options  ##
##          beta_and_k_array: two item array consisting of:         ##
##              [0]: beta (b), the stochasticity-of-choice factor   ##
##              [1]: kappa (k), the discount parameter              ##
##          risk: not currently in use (always 1 in this code). If  ##
##                you want to use a risk parameter (alpha), you     ##
##                can input one here.                               ##
##                                                                  ##
##  Outputs: ps: this is a list of probabilities of choosing the    ##
##               delay option. Note that when the probability of    ##
##               choosing the delay option approaches 1, it also    ##
##               approaches the indicator of that option in the     ##
##               choice data (1).                                   ##
##               This is a list of size = num of trials. It gets    ##
##               turned into a numpy array in local_negLL()         ##
##                                                                  ##
##  Note: (SS_V,SS_D,LL_V,LL_D) were once called (v1,d1,v2,d2).     ##
##        I've changed them here for clarity, but if you see those  ##
##        terms elsewhere, now you know.                            ##
##                                                                  ##
##  Note: This code used to use "beta" to mean the array of the     ##
##        stochasticity factor and kappa. We're calling the         ##
##        stochasticity factor "beta" now, however, making this     ##
##        terminology confusing. For clarity, I've changed the name ##
##        of the array to "beta_and_k_array".                       ##
##------------------------------------------------------------------##

def choice_prob(SS_V,SS_D,LL_V,LL_D,beta_and_k_array,risk):
    ps = []

    for n in range(len(SS_V)):

        SS_SV = discount(SS_V[n],SS_D[n],beta_and_k_array[1],risk)
        LL_SV = discount(LL_V[n],LL_D[n],beta_and_k_array[1],risk)

        try: 
            p = 1 / (1 + math.exp(beta_and_k_array[0]*(SS_SV-LL_SV)))     ## Math.exp does e^(). In other words, if the smaller-sooner SV is higher than the larger-later SV, e^x will be larger, making the denominator larger, making 1/denom closer to zero (low probability of choosing delay). If the LL SV is higher, the e^x will be lower, making 1/denom close to 1 (high probability of choosing delay). If they are the same, e^0=1, 1/(1+1) = 0.5, 50% chance of choosing delay.
        except OverflowError:                                             ## Sometimes the SS_SV is very much higher than the LL_SV. If beta gets too high, the exponent on e will get huge. Math.exp will throw an OverflowError if the numbers get too big. In that case, 1/(1+[something huge]) is essentially zero, so we just set it to 0.
            p = 0
        ps.append(p)
        
    return ps

## If you're getting weird results or think there are problems with the data, inserting the below indicator might provide clarity.
#print("beta:",beta[0],"k:",beta[1],"SV_1:",SV_1,"SV_2",SV_2,"imm val",SS_V[n],"imm delay",SS_D[n], "del val",LL_V[n],"del del",LL_D[n])

In [25]:
##------------------------------------------------------------------##
##                  Local Negative Log Likelihood                   ##
##  This code uses choice probabilities computed by choice_prob()   ##
##  to calculate the negative log likelihood of the data given      ##
##  given those choice probabilities. Essentially, we're seeing if  ##
##  the provided k and b are a good match for the data we have.     ##
##  If it isn't clear, the optimization algorithm will be looping   ##
##  through this over and over, finding the negative log likelihood ##
##  at each point in the parameter space and "surfing" downwards to ##
##  find the ideal point (the minimum)                              ##
##                                                                  ##
##  Inputs: beta_and_k_array: two item array consisting of:         ##
##              [0]: beta (b), the stochasticity-of-choice factor   ##
##              [1]: kappa (k), the discount parameter              ##
##          Lists (length=number of trials):                        ##
##              choices_list: choices (1 or 0, 1=delay option)      ##
##              SS_V: values of the smaller-sooner (immediate) ops. ##
##              SS_D: delays of the smaller-sooner (immediate) ops. ##
##              LL_V: values of the larger-later (delayed) options  ##
##              LL_D: delays of the larger-later (delayed) options  ##
##          risk: not currently in use (always 1 in this code). If  ##
##                you want to use a risk parameter (alpha), you     ##
##                can input one here.                               ##
##                                                                  ##
##  Outputs: sumerr: this is the negative log likelihood - the sum  ##
##           of the error at each choice (or trial). The error is   ##
##           determined as follows: if the choice was 1, the        ##
##           calculated probability would ideally be 1. Log(1) is   ##
##           0, indicating no error. The same idea applies for the  ##
##           choices of 0 - error=0 when p=0. As p deviates from    ##
##           the actual choice, error increases. This is all done   ##
##           at once using numpy arrays.                            ##
##                                                                  ##
##  Note: if anyone is reading this who is new to arrays in Python, ##
##  doing (array==x) returns an array boolean values indicating the ##
##  points at which the original array equals x. Array2[array1==x]  ##
##  feeds that boolean array into array2, getting the values of     ##
##  array2 where array1 equals x. This basic form is used all       ##
##  thoughout this program, so I thought I'd mention it.            ##
##------------------------------------------------------------------##

def local_negLL(beta_and_k_array,choices_list,SS_V,SS_D,LL_V,LL_D,risk):

    ps = np.array(choice_prob(SS_V,SS_D,LL_V,LL_D,beta_and_k_array,risk))
    choices = np.array(choices_list)

    # Trap log(0). This will prevent the code from trying to calculate the log of 0 in the next section.
    ps[ps==0] = 0.0001
    ps[ps==1] = 0.9999
    
    # Log-likelihood
    err = (choices==1)*np.log(ps) + ((choices==0))*np.log(1-ps)

    # Sum of -log-likelihood
    sumerr = -sum(err)

    return sumerr

In [26]:
##------------------------------------------------------------------##
##                           Optimize Me                            ##
##  This code gives the optimization algorithm a specific function  ##
##  to optimize. The scipy -sp.optimize.minimize- requests a        ##
##  function followed by two inputs: an array of the parameters to  ##
##  optimize (which is why we have them in a beta_and_k_array in    ##
##  the first place) and the other inputs to that function.         ##
##  Our local_negLL() is formulated slightly differently than that, ##
##  so we're using this code to construct it properly.              ##
##                                                                  ##
##  Inputs: beta_and_k_array_to_optimize: two item array:           ##
##              [0]: beta (b), the stochasticity-of-choice factor   ##
##              [1]: kappa (k), the discount parameter              ##
##          inputs: six item array, the first five of which are     ##
##          Lists (length=number of trials):                        ##
##              [0]: choices_list: choices (1 or 0, 1=delay option) ##
##              [1]: SS_V: values of the SS (immediate) ops.        ##
##              [2]: SS_D: delays of the SS (immediate) ops.        ##
##              [3]: LL_V: values of the LL (delayed) options       ##
##              [4]: LL_D: delays of the LL (delayed) options       ##
##              [5]: risk: not currently in use (always 1 in this   ##
##                      code). If you want to use a risk parameter  ##
##                      (alpha), you can input one here.            ##
##                                                                  ##
##  Outputs: the output of the local_negLL function applied to the  ##
##           given b, k, choices, values, and delays.               ##
##------------------------------------------------------------------##

def optimize_me(beta_and_k_array_to_optimize, inputs):
    choices_list,SS_V,SS_D,LL_V,LL_D,risk = inputs
    return local_negLL(beta_and_k_array_to_optimize,choices_list,SS_V,SS_D,LL_V,LL_D,risk)

In [32]:
##------------------------------------------------------------------##
##                            Write SVs                             ##
##  This code is just array management - nothing clever. It writes  ##
##  the calculated SVs into arrays for export into individual       ##
##  files and into an overall SV table. I think the individual      ##
##  files are less useful. The large one is critical though - it's  ##
##  used by the Bayesian method as a source for the values and IDs. ##
##                                                                  ##
##  Inputs: filename: name of the file (includes "FixedFixed")      ##
##          k: kappa, the discount parameter                        ##
##          b: beta, the stochasticity-of-choice factor             ##
##          LL: log likelihood                                      ##
##          AIC: Akaike Information Criterion (from analysis())     ##
##          BIC: Bayesian Information Criterion (from analysis())   ##
##          r2: r squared                                           ##
##          correct: the percentage of choices that the model       ##
##                accurately predicted                              ##
##          risk: not currently in use (always 1 in this code). If  ##
##                you want to use a risk parameter (alpha), you     ##
##                can input one here.                               ##
##          catchtrialpercentcorrect: percentage of the catch       ##
##                trials accurately predicted by the model          ##
##                                                                  ##
##  Outputs: the output of the local_negLL function applied to the  ##
##           given b, k, choices, values, and delays.               ##
##------------------------------------------------------------------##

def write_SVs(filename,k,beta,LL,AIC,BIC,r2,correct,risk,catchtrialpercentcorrect):
    # Construct the file path to the "SVFixed" files. 
    # Example input file name:  All Files Folder\FixedFixedAdjAmt_1083_1_01-16-2020_16h-31m.csv
    #                                          17^  22^
    # Example output file name: All Files Folder\SVFixedAdjAmt_1083_1_01-16-2020_16h-31m.csv
    newpath = filename[:17]+"SV"+filename[22:]
    a=pd.read_csv(newpath,quotechar='"',skipinitialspace=True, header=None,index_col=None, usecols=[0,1,2,3,4,5,6,7]).to_numpy()
    
    # Define new arrays based on the shape of the old columns. The -1 allows the height of the new arrays to be anything (based on a column number of 1)
    SV_imm_array = np.empty((a.shape[0]-1,1))
    SV_del_array = np.empty((a.shape[0]-1,1))

    # Note: This is looping through one person/one session at a time. We only find one k, beta, etc. per session, so our k column should just be a column all of the same number. While a little goofy, I suspect this will become convenient for analysis later when using the large SV table, as multiple sessions are combined there.
    k_array = np.full((a.shape[0]-1,1),k)
    beta_array = np.full((a.shape[0]-1,1),beta)
    LL_array = np.full((a.shape[0]-1,1),LL)
    AIC_array = np.full((a.shape[0]-1,1),AIC)
    BIC_array = np.full((a.shape[0]-1,1),BIC)
    r2_array = np.full((a.shape[0]-1,1),r2)
    correct_array = np.full((a.shape[0]-1,1),correct)
    catchtrialpercentcorrect_array = np.full((a.shape[0]-1,1),catchtrialpercentcorrect)

    # Same as above but for identifiers
    ID_array = np.full((a.shape[0]-1,1),filename[34:38])
    firstorsecond_array = np.full((a.shape[0]-1,1),filename[39:40])
    date_array = np.full((a.shape[0]-1,1),filename[41:51])
    time_array = np.full((a.shape[0]-1,1),filename[52:59])

    # Here's the arrangement of column titles in the output file:
    # Trial number	Stimulus onset	Response time	SS amount	LL amount	SS delay	LL delay	Response [-1O_0I_1D]	SS SV	LL SV	k	beta	ID	Day	Date	Time	LL	AIC	BIC	r2	Percent correct	Percent correct on catch trials
    # value_col in range(3,5) | value_col==3:           3                     (3+2)
    #                         | value_col==4:                       4                     (4+2)               

    for value_col in range(3,5):                  # see above
        for row in range(1,a.shape[0]):           # iterate through columns, computing all the SS SVs and then the LL SVs. I don't think this code is especially elegant - feel free to change (though it works)
            v = float(a[row,value_col])
            d = float(a[row,(value_col+2)])
            SV = discount(v,d,k,risk)
            if value_col==3: SV_imm_array[row-1] = (SV)
            if value_col==4: SV_del_array[row-1] = (SV)
    
    # Here we're constructing the final output array. The various numerical columns are hstacked together:
    #    [0]    +    [2]     =    [0][2]
    #    [1]         [3]          [1][3]
    # Then we vstack the titles onto the number array:
    #    ["SS SV"]["LL SV"]  +    [0][2]    =    ["SS SV"]["LL SV"] 
    #                             [1][3]         [   0   ][   2   ]
    #                                            [   1   ][   3   ]
    # Then we hstack the original array (which was read in from the SV file, but we only used the first eight columns, so this contains primarily the original value and delay information)
    #   ["Trial Number"]["Stimulus Onset"]     +     ["SS SV"]["LL SV"]     =     ["Trial Number"]["Stimulus Onset"]["SS SV"]["LL SV"]
    #   [       1      ][       10       ]           [   0   ][   2   ]           [       1      ][       10       ][   0   ][   2   ]
    #   [       2      ][       20       ]           [   1   ][   3   ]     =     [       2      ][       20       ][   1   ][   3   ]


    titles = np.array(["SS SV","LL SV","k","beta","ID","Day","Date","Time","LL","AIC","BIC","r2","Percent correct","Percent correct on catch trials"])
    numbers = np.hstack((SV_imm_array,SV_del_array,k_array,beta_array,ID_array,firstorsecond_array,date_array,time_array,LL_array,AIC_array,BIC_array,r2_array,correct_array,catchtrialpercentcorrect_array))
    sidebar = np.vstack((titles,numbers))
    output = np.hstack((a,sidebar))
    
    # This will write individual SV files. Comment out to prevent this.
    #pd.DataFrame(output).to_csv(newpath, header=False, index=False)  
    return output


# I had previously had trouble with this code, so I wrote some testing lines. I'll leave them here in case they're useful in the future (though the original issues were solved)
    #print(a.shape, SV_imm_array.shape, SV_del_array.shape)
    #print("initial array")
    #print(a[:4,:])
    #print("sv i")
    #print(SV_imm_array[:4,:])
    #print("sv d")
    #print(SV_del_array[:4,:])   
    # 
    #print("output")
    #print(output[:4,:])
    #print(newpath)
    #print(a[4,:],SV_imm_array[4,:],SV_del_array[4,:])     

Construct File Name List

In [29]:
##------------------------------------------------------------------##
##                     Construct File Name List                     ##
##  This code is very simple. It begins with the full list of       ##
##  original file names (those original files are no longer used    ##
##  here). I had run these files through some code cleaned them up: ##
##  for example, "get $1,000" is changed to "1000". The             ##
##  "FixedFixed" files are the correct ones.                        ##
##------------------------------------------------------------------##

datanames = ["All Files Folder\AdjAmt_1083_1_01-16-2020_16h-31m.csv","All Files Folder\AdjAmt_1083_1_03-03-2022_16h-12m.csv","All Files Folder\AdjAmt_1539_1_04-04-2019_17h-21m.csv","All Files Folder\AdjAmt_1539_2_04-11-2019_16h-51m.csv","All Files Folder\AdjAmt_2008_1_11-14-2019_17h-30m.csv","All Files Folder\AdjAmt_2008_2_11-19-2019_16h-16m.csv","All Files Folder\AdjAmt_2020_1_09-24-2019_16h-25m.csv","All Files Folder\AdjAmt_2020_2_10-10-2019_16h-47m.csv","All Files Folder\AdjAmt_2042_1_09-30-2021_17h-13m.csv","All Files Folder\AdjAmt_2042_2_04-07-2022_15h-40m.csv","All Files Folder\AdjAmt_2399_1_07-16-2019_16h-53m.csv","All Files Folder\AdjAmt_2475_1_12-05-2019_16h-44m.csv","All Files Folder\AdjAmt_2475_2_01-09-2020_16h-36m.csv","All Files Folder\AdjAmt_2503_1_01-07-2020_16h-48m.csv","All Files Folder\AdjAmt_2503_2_02-06-2020_16h-03m.csv","All Files Folder\AdjAmt_2506_1_10-01-2019_16h-33m.csv","All Files Folder\AdjAmt_2506_2_12-10-2019_16h-32m.csv","All Files Folder\AdjAmt_2874_1_04-05-2018_16h-54m.csv","All Files Folder\AdjAmt_2874_2_06-21-2018_15h-26m.csv","All Files Folder\AdjAmt_3278_1_05-10-2018_16h-29m.csv","All Files Folder\AdjAmt_3278_2_06-07-2018_16h-59m.csv","All Files Folder\AdjAmt_3458_1_01-31-2019_16h-21m.csv","All Files Folder\AdjAmt_3546_1_05-05-2022_16h-34m.csv","All Files Folder\AdjAmt_3546_2_06-02-2022_14h-16m.csv","All Files Folder\AdjAmt_3638_1_10-12-2017_16h-41m.csv","All Files Folder\AdjAmt_3638_2_11-28-2017_17h-14m.csv","All Files Folder\AdjAmt_3687_1_03-18-2021_16h-35m.csv","All Files Folder\AdjAmt_3687_2_03-25-2021_16h-50m.csv","All Files Folder\AdjAmt_3710_1_05-10-2022_16h-30m.csv","All Files Folder\AdjAmt_3812_1_08-08-2019_16h-55m.csv","All Files Folder\AdjAmt_3812_2_11-12-2019_13h-36m.csv","All Files Folder\AdjAmt_3924_1_04-14-2022_16h-24m.csv","All Files Folder\AdjAmt_3924_2_04-21-2022_15h-10m.csv","All Files Folder\AdjAmt_3937_1_12-03-2020_17h-33m.csv","All Files Folder\AdjAmt_4292_1_05-14-2019_17h-32m.csv","All Files Folder\AdjAmt_4292_2_05-21-2019_17h-27m.csv","All Files Folder\AdjAmt_4513_1_09-27-2018_16h-19m.csv","All Files Folder\AdjAmt_4513_2_10-11-2018_16h-08m.csv","All Files Folder\AdjAmt_4513_3_05-16-2019_12h-55m.csv","All Files Folder\AdjAmt_4556_1_03-14-2019_16h-22m.csv","All Files Folder\AdjAmt_4556_2_01-14-2021_15h-35m.csv","All Files Folder\AdjAmt_4776_1_09-26-2019_17h-08m.csv","All Files Folder\AdjAmt_4776_2_10-17-2019_16h-28m.csv","All Files Folder\AdjAmt_4805_1_12-12-2017_17h-42m.csv","All Files Folder\AdjAmt_4805_2_01-04-2018_16h-27m.csv","All Files Folder\AdjAmt_4884_1_08-27-2019_16h-41m.csv","All Files Folder\AdjAmt_4884_2_08-29-2019_16h-20m.csv","All Files Folder\AdjAmt_4884_3_07-26-2021_16h-18m.csv","All Files Folder\AdjAmt_5334_1_01-07-2021_16h-51m.csv","All Files Folder\AdjAmt_5625_1_08-15-2019_16h-30m.csv","All Files Folder\AdjAmt_5625_2_09-12-2019_16h-20m.csv","All Files Folder\AdjAmt_5667_1_07-11-2019_16h-46m.csv","All Files Folder\AdjAmt_5667_2_08-13-2019_16h-58m.csv","All Files Folder\AdjAmt_5764_1_10-26-2017_16h-57m.csv","All Files Folder\AdjAmt_5764_2_11-20-2017_15h-19m.csv","All Files Folder\AdjAmt_5897_1_11-08-2018_16h-56m.csv","All Files Folder\AdjAmt_5897_2_11-29-2018_16h-24m.csv","All Files Folder\AdjAmt_5900_1_01-18-2018_16h-33m.csv","All Files Folder\AdjAmt_5900_2_02-01-2018_16h-39m.csv","All Files Folder\AdjAmt_5940_1_04-29-2021_16h-42m.csv","All Files Folder\AdjAmt_5940_2_06-24-2021_15h-25m.csv","All Files Folder\AdjAmt_6059_1_12-20-2018_16h-31m.csv","All Files Folder\AdjAmt_6059_2_01-03-2019_16h-16m.csv","All Files Folder\AdjAmt_6269_1_03-08-2018_16h-54m.csv","All Files Folder\AdjAmt_6269_2_03-29-2018_16h-43m.csv","All Files Folder\AdjAmt_6281_1_07-12-2018_16h-49m.csv","All Files Folder\AdjAmt_6281_2_07-26-2018_16h-30m.csv","All Files Folder\AdjAmt_6382_1_04-18-2022_17h-06m.csv","All Files Folder\AdjAmt_6463_1_02-15-2018_16h-46m.csv","All Files Folder\AdjAmt_6463_2_03-15-2018_16h-16m.csv","All Files Folder\AdjAmt_6617_1_05-12-2022_16h-17m.csv","All Files Folder\AdjAmt_6624_1_08-09-2018_16h-36m.csv","All Files Folder\AdjAmt_6624_2_09-13-2018_16h-02m.csv","All Files Folder\AdjAmt_6630_2_06-16-2022_16h-29m.csv","All Files Folder\AdjAmt_6707_1.2_06-22-2021_15h-17m.csv","All Files Folder\AdjAmt_6707_2.2_07-12-2021_15h-47m.csv","All Files Folder\AdjAmt_6817_1_10-25-2021_16h-25m.csv","All Files Folder\AdjAmt_6817_2_11-16-2021_16h-47m.csv","All Files Folder\AdjAmt_7054_1_12-02-2021_17h-19m.csv","All Files Folder\AdjAmt_7054_2_12-09-2021_16h-23m.csv","All Files Folder\AdjAmt_7238_1_02-05-2019_17h-02m.csv","All Files Folder\AdjAmt_7238_2_02-07-2019_16h-26m.csv","All Files Folder\AdjAmt_7309_1_09-10-2019_16h-30m.csv","All Files Folder\AdjAmt_7309_1_10-03-2019_16h-02m.csv","All Files Folder\AdjAmt_7387_1_06-13-2019_17h-16m.csv","All Files Folder\AdjAmt_7387_2_09-05-2019_16h-06m.csv","All Files Folder\AdjAmt_7467_1_01-13-2022_16h-48m.csv","All Files Folder\AdjAmt_7626_1_06-09-2022_15h-17m.csv","All Files Folder\AdjAmt_7637_1_07-29-2020_17h-09m.csv","All Files Folder\AdjAmt_7637_2_10-22-2020_15h-46m.csv","All Files Folder\AdjAmt_7897_1_09-03-2020_17h-33m.csv","All Files Folder\AdjAmt_7897_2_10-08-2020_16h-28m.csv","All Files Folder\AdjAmt_8267_2_05-09-2019_16h-07m.csv","All Files Folder\AdjAmt_8276_1_07-25-2019_17h-30m.csv","All Files Folder\AdjAmt_8276_2_08-06-2019_16h-56m.csv","All Files Folder\AdjAmt_8370_1_04-17-2018_16h-35m.csv","All Files Folder\AdjAmt_8370_2_04-26-2018_16h-35m.csv","All Files Folder\AdjAmt_8416_1_04-12-2018_16h-17m.csv","All Files Folder\AdjAmt_8416_2_05-03-2018_15h-12m.csv","All Files Folder\AdjAmt_8474_1_08-30-2018_16h-12m.csv","All Files Folder\AdjAmt_8474_2_10-04-2018_16h-10m.csv","All Files Folder\AdjAmt_8475_1_01-30-2020_17h-01m.csv","All Files Folder\AdjAmt_8475_3_03-15-2021_15h-48m.csv","All Files Folder\AdjAmt_8532_1_12-14-2017_16h-45m.csv","All Files Folder\AdjAmt_8532_2_12-21-2017_15h-20m.csv","All Files Folder\AdjAmt_8568_1_01-21-2021_16h-52m.csv","All Files Folder\AdjAmt_8568_2_01-28-2021_16h-23m.csv","All Files Folder\AdjAmt_8582_2_04-26-2022_15h-00m.csv","All Files Folder\AdjAmt_8890_1_12-03-2019_16h-11m.csv","All Files Folder\AdjAmt_8890_2_01-14-2020_16h-24m.csv","All Files Folder\AdjAmt_9337_1_04-16-2019_15h-26m.csv","All Files Folder\AdjAmt_9337_2_04-23-2019_15h-42m.csv","All Files Folder\AdjAmt_9639_1_03-07-2019_16h-16m.csv","All Files Folder\AdjAmt_9639_2_04-25-2019_16h-16m.csv","All Files Folder\AdjAmt_9716_1_05-23-2019_16h-24m.csv","All Files Folder\AdjAmt_9716_2_07-09-2019_16h-35m.csv"]
files = []
for nam in datanames:
    newpath = nam[:17]+"FixedFixed"+nam[17:]
    files.append(newpath)

Bonus Outdated Function - LL From Given

In [None]:
# This function gets the negLL from a given k and b. It's no longer in use, as I'm no longer doing those comparisons. However, because the run_functions() code allows for the option to run a comparison like this, I figured I'd leave the code here. 

def LLfromGiven(given_k, given_b, choices, SS_V, SS_D, LL_V, LL_D, risk):
    given_beta_and_k_array = [given_b,given_k]
    negLL = local_negLL(given_beta_and_k_array,choices,SS_V,SS_D,LL_V,LL_D,risk)
    return negLL