In [2]:
import numpy as np
import csv
import os
import os.path
import matplotlib.pyplot as plt
import random
import pandas as pd
import seaborn as sns
import umap
from sklearn.decomposition import PCA
import math
import scipy

## first, run the functions needed to generate walks

In [3]:
'''This function takes in a list of reward probabilities and a number that indicates the number of lags 
(usually set at lags = 50 but ot's flexible), and returns a list of correlation efficients at each lag. 
The length of this returned list should be the same number of lags'''
def autocorr1(x,lags):
    '''numpy.corrcoef, partial'''
    corr = [1. if l==0 else np.corrcoef(x[l:],x[:-l])[0][1] for l in range (lags)]
    return np.array(corr)

In [4]:
'''This functions takes in a list of three lists of reward probabilities and returns a number that indicate the average half 
life of the walk.'''
def halflife(walk):

    armA = walk[0]
    armB = walk[1]
    armC = walk[2]
    corr = []
    halflife = 0

    autoc1 = autocorr1(armA,50).tolist()
    autoc2 = autocorr1(armB,50).tolist()
    autoc3 = autocorr1(armC,50).tolist()

    for m in range (len(autoc1)):
        corr.append((autoc1[m]+autoc2[m]+autoc3[m])/3)

    for p in range (len(corr)):
        if corr[p]>= 0.5: 
            halflife += 1

    return (halflife)

In [5]:
def walk():
    while True:
        ######################## SET PARAMETERS ########################
        armNum = 3  ##3-armed bandit
        walkList =[] ### contains three lists of rewawrd prob, one for each arm
        initialProbList = [0.9,0.7,0.3]  ## set initial values 
        random.shuffle(initialProbList)  ## randomize inital values for each arm
        hazard = 0.2  ## harzard rate
        step = 0.2  ## step size
        trialNum = 300  ## number of trials
        bounds = [0.1,0.8] ## upper and lower bounds of reward probs
        
        for i in range (armNum):
            ### first while loop aims to find an individual arm that walks up and down over 300 trials
            #print ('individual')

            #### step = 0.2, and hazard rate. = 0.33. Mean = 9.8 and sd < 2
            while True:
                armList = []
                prob = initialProbList[i] ## add the first vaue in
                armList.append(prob)

                for j in range (trialNum-1):
                    rand = np.random.random()
                    if rand <= hazard: ### change occurs this trial, append the new prob
                        if prob >= round (bounds[1],1):
                            prob = prob - step
                            armList.append(prob)

                        elif prob <= round (bounds[0] , 1):
                            prob = prob + step
                            armList.append(prob)

                        else:
                            action = np.random.choice (['up', 'down'], 1)
                            if action == 'up':
                                prob = prob + step
                                armList.append(prob)
                            else:
                                prob = prob - step
                                armList.append(prob)
                    else:  ## no change occurs on this trial, append the same prob
                        armList.append(prob)


                ### defining some criteria for individual walks

                ### for each walk, the range (maximum-minimum) should be over 0.6
                maxDiff = max(armList)-min(armList)

                ### the average of each walk should be around 50% (48%-52%)
                average = np.average(armList)
                
    

                #myList = list(np.around(np.array(armList),2))*100

                if maxDiff >= 0.6 and 0.48 <= average <=0.52 :
                    break
            
            myList = list(np.around(np.array(armList),2))
            walkList.append(myList) 
            
        halfLife = halflife (walkList)
        
        ### the half life of autocorrelation of the alk should have mean = 10, sd = 2
        if 8 <= halfLife <= 15:
            break

    richness = np.sum(walkList)/trialNum
            
    return (walkList, halfLife, richness)  

In [10]:
walkList = walk()

### define a noisy win-stay lose-shift agent

In [11]:
def WSLS_agent (walkList, bias):

    ### initialize some parameters
    choiceList = []
    outcomeList = []
    trialNum = 300

    ### determine the first choice, which is random
    firstChoice = np.random.choice(3)
    choiceList.append(firstChoice)
    currentChoice = firstChoice

    ### determine whether the current choice is rewarded or not, based on the walk
    for i in range (trialNum):
        rewardProb = walkList[currentChoice][i]
        rand = np.random.random()
        if rand <= rewardProb:
            reward = 1
        else: 
            reward = 0
        outcomeList.append(reward)
    
        ### determine the next choice based on the current choice and reward outcome
        if i == trialNum-1:
            continue
        else:
            if reward == 1:
                pStay = 1-bias/2
                rand2 = np.random.random()
                if rand2 <= pStay:  ## next choice is repeat
                    currentChoice = currentChoice
                else:
                    otherChoice = [0,1,2]
                    otherChoice.pop(currentChoice)
                    currentChoice = np.random.choice(otherChoice)
            elif reward == 0:
                pShift = 1-bias/2
                rand2 = np.random.random()
                if rand2 <= pShift:  ## next choice is shift
                    otherChoice = [0,1,2]
                    otherChoice.pop(currentChoice)
                    currentChoice = np.random.choice(otherChoice)
                else:
                    currentChoice = currentChoice
            choiceList.append (currentChoice)
        
    return choiceList, outcomeList
    

   

### define a RL agent

In [12]:
def RL_agent (walkList, alpha, beta):

    ### initialize some parameters
    choiceList = []
    outcomeList = []
    trialNum = 300
    Qvalue = [0,0,0]

    ### determine the first choice, which is random
    firstChoice = np.random.choice(3)
    choiceList.append(firstChoice)
    currentChoice = firstChoice

    ### determine whether the current choice is rewarded or not, based on the walk
    ### update Q value of coresponding choice
    for i in range (trialNum):
        rewardProb = walkList[currentChoice][i]
        rand = np.random.random()
        if rand <= rewardProb:
            reward = 1
            Qvalue [currentChoice] += alpha*(reward-Qvalue[currentChoice])
        else: 
            reward = 0
            Qvalue [currentChoice] += alpha*(reward-Qvalue[currentChoice])
        outcomeList.append(reward)

        ## determine the next choice based on the current choice and reward outcome
        prob0 = (np.exp((Qvalue[0])*beta))/(np.exp((Qvalue[0])*beta)+np.exp((Qvalue[1])*beta)+np.exp((Qvalue[2])*beta))
        prob1 = (np.exp((Qvalue[1])*beta))/(np.exp((Qvalue[0])*beta)+np.exp((Qvalue[1])*beta)+np.exp((Qvalue[2])*beta))

        rand1 = np.random.random()
        if rand1 <= prob0:
            currentChoice = 0
        elif prob0 < rand1 <= (prob0+prob1):
            currentChoice = 1
        else:
            currentChoice = 2
        choiceList.append (currentChoice)

    return choiceList, outcomeList


### define a foraging agent

In [13]:
def foraging_agent (walkList, alpha, beta, rho):

    ### initialize some parameters
    choiceList = []
    outcomeList = []
    trialNum = 300
    V_oit = 0.5

    ### determine the first choice, which is random
    firstChoice = np.random.choice(3)
    currentChoice = firstChoice

    ### determine whether the current choice is rewarded or not, based on the walk
    ### update value of exploiting
    for i in range (trialNum):
        rewardProb = walkList[currentChoice][i]
        rand = np.random.random()
        if rand <= rewardProb:
            reward = 1
            V_oit += alpha*(reward-V_oit)
        else: 
            reward = 0
            V_oit += alpha*(reward-V_oit)
        outcomeList.append(reward)

        ## determine whether to exploit or to explore
        prob_oit = 1/(1+(np.exp(-1*(V_oit-rho)*beta)))

        rand1 = np.random.random()
        if rand1 <= prob_oit:  ### exploit the same option
            currentChoice = currentChoice
        else: ### explore other options and reset v_oit back to 0.5
            otherChoice = [0,1,2]
            otherChoice.pop(currentChoice)
            currentChoice = np.random.choice(otherChoice)
            V_oit = 0.5
            
        choiceList.append (currentChoice)

    return choiceList, outcomeList


In [14]:
def plot (walkList, choice, outcome):
    fig = plt.figure (figsize= (30,5))
    ax = fig.add_subplot(111)   

    color = ['orange','green','blue','pink','grey']
    for h in range (len(walkList)):
        plt.plot(walkList[h], color = color[h])

    ax.set_ylabel ('reward probability')

    trialList = np.linspace(0, 300, 301)
    for i in range (len(choice)):
        if choice[i] == 0:
            plt.scatter(trialList[i], 1.2, color = 'orange', marker = 's')
        if choice[i] == 1:
            plt.scatter(trialList[i], 1.2, color = 'green', marker = 's')
        if choice[i] == 2:
            plt.scatter(trialList[i], 1.2, color = 'blue', marker = 's')
        if outcome[i] == 0:
            plt.scatter(trialList[i], 1.4, color = 'grey', marker = 's')
        if outcome[i] == 1:
            plt.scatter(trialList[i], 1.4, color = 'black', marker = 's')

    plt.axis([1,300,0,1.5])
    plt.show()

### test if the foraging agent works

In [None]:
walkList, halfLife, richness = walk()
alpha = 0.8
beta = 1
rho = 0.1

choice, outcome = foraging_agent (walkList, alpha, beta, rho)
plot(walkList, choice, outcome)

print (np.mean(walkList))
print (outcome.count(1)/len(outcome))

## define a main function that runs the simulation, noist WSLS agents with different level of bias

In [None]:
replication = 500
recodeList = []
# relativeRecodeList = []
groupList = []
# biasLevel = [0]
biasLevel = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
halfLifeList = []
richnessList = []
pRewardList = []


for i in range (replication):
    walkList, halfLife, richness = walk()
    for bias in (biasLevel): ### the number of noisy WSLS agent
        choice, outcome = WSLS_agent (walkList, bias)
        groupList.append(bias)
        halfLifeList.append(halfLife)
        richnessList.append(richness)
        pRewardList.append(outcome.count(1)/len(outcome))


## define a main function that runs the simulation, RL agents with different level of learning rate and noise

In [None]:
replication = 200
recodeList = []
# relativeRecodeList = []
alphaList = []
betaList = []
# biasLevel = [0]
alphaLevel = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
betaLevel = [0.1, 0.2, 0.3, 0.5, 0.6, 0.8, 1, 1.2, 1.5, 2]
# betaLevel = [0.01, 0.05, 0.1, 0.25, 0.5, 0.8, 1, 1.5, 2.5, 5]
alpha = 0.9
# beta = 1.5
halfLifeList = []
richnessList = []
pRewardList = []


for i in range (replication):
    walkList, halfLife, richness = walk()
    for h in range (len(alphaLevel)):
        for j in range (len(betaLevel)): ### the number of noisy WSLS agent
            choice, outcome = RL_agent (walkList, alphaLevel[h], betaLevel[j])
            alphaList.append(alphaLevel[h])
            betaList.append (betaLevel[j])
            halfLifeList.append(halfLife)
            richnessList.append(richness)
            pRewardList.append(outcome.count(1)/len(outcome))
            

## define a main function that runs RL foraging agents with different learning rate, noise, and threshold

In [None]:
replication = 200
recodeList = []
# relativeRecodeList = []
alphaList = []
betaList = []
rhoList = []

alphaLevel = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9, 1]
betaLevel = [0.1, 0.3, 0.4, 0.6, 0.8, 0.9, 1, 1.2, 2, 2.5, 3, 5,8]
rhoLevel = [0.05, 0.1, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]

# betaLevel = [0.01, 0.05, 0.1, 0.25, 0.5, 0.8, 1, 1.5, 2.5, 5]
# alpha = 0.9
# beta = 1.5
halfLifeList = []
richnessList = []
pRewardList = []


for i in range (replication):
    walkList, halfLife, richness = walk()
    for k in range (len(rhoLevel)):
        for h in range (len(alphaLevel)):
            for j in range (len(betaLevel)): 
                choice, outcome = foraging_agent (walkList, alphaLevel[h], betaLevel[j], rhoLevel[k])
                alphaList.append(alphaLevel[h])
                betaList.append (betaLevel[j])
                rhoList.append (rhoLevel[k])
                halfLifeList.append(halfLife)
                richnessList.append(richness)
                pRewardList.append(outcome.count(1)/len(outcome))