In [2]:
# imports 
import numpy as np
from tqdm import tqdm
import pandas as pd
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import random
import pickle as pkl

#import both RL and Foraing agents 
from Agent import QAgent   
from Agent import ForagingAgent

#generate 2-armed bandit environment 
from Walks import KArmedBandit as ab

### Finding best parameters (that maximize reward) for both models in bandit environement

#### Setting agents and environment parameters

In [3]:
#walks parameters 

n = 300 
k = 2 

ab.n= n # number of trials per simulation
ab.k = k # number of arms 
ab.step_size = 0.1 # probability step size 
ab.hazard_rate = 0.1 #hazard rate 
ab.lb = 0.1 #lower bound of reward probability
ab.hb = 0.9 #higher bound of reward probability

#agents parameters 

alpha_bnd = (0,1) #learning rate
beta_bnd = (0, 100) #inverse temperature
rho_bnd = (-1.5,1.5) #threshold (only in foraging model)

#
epsilon = 1e-10

In [4]:
def sim(agent,theta, walks): 

    if agent == "QL": 
        alpha, beta = theta
        agent = QAgent(walks.shape[0],walks.shape[1],walks,alpha=alpha,beta=beta)
        
    elif agent == "FOR":
        alpha, beta, rho = theta
        agent = ForagingAgent(walks.shape[0],walks.shape[1],walks,alpha=alpha,beta=beta, rho=rho)

    agent.walk()
    agent_rewards = agent.get_reward_history()
    agent_values = agent.get_q_history()
    agent_choices = agent.get_choice_history()

    # p(objective best choice)
    best_choices = np.argmax(walks, axis=0)
    equal_prob = (walks[0, :] == walks[1, :])
    best_choices[equal_prob] = agent_choices[equal_prob]
    p_best = np.mean(agent_choices == best_choices)

    # p ( normalize rwd )

    p_rwdNorm = (agent_rewards.mean() /  np.mean(walks)) - 1 

    return p_best,  p_rwdNorm


def linear_function(x, slope, intercept):
    return slope * x + intercept

### Optimization step

Here we find the best parameters for each models that maximize the overall reward. 
To plot the performance of the agents, run the "plotAgentPerf" notebook with the generated data. 


In [5]:
agents = ["QL", "FOR"]

replication = 258 # we match the number of participants in experiment 1


sim_perf = {agent: [] for agent in agents}

for h in tqdm(range (replication)):
    
    alpha =  random.uniform(*alpha_bnd)
    beta = random.uniform(*beta_bnd)
    rho = random.uniform(*rho_bnd)

    walks, _ = ab.generate_walk(ab, plot=False,plt_title=None)
    
    for agent in agents : 
        
        if agent == "QL":
            
            bnds = (alpha_bnd,beta_bnd )
            theta = [alpha,beta]


            theta_optim = minimize(lambda theta : - (sim(agent,theta,walks)[1]), theta, 
                                   method = "TNC", bounds = bnds, tol = 0.001)
                               
            
    
            alpha, beta = theta_optim.x
            p_best, rwd = sim(agent,[alpha,beta],walks)

            df = pd.DataFrame( { "succ" : [theta_optim.success], 
             "nfev" : [theta_optim.nfev], 
             "nit" : [theta_optim.nit], 
             "alpha": [alpha] , "beta": [beta], "fun": [theta_optim.fun], 
             "rwd" : [rwd], "p_best" : [p_best] }) 
            
            sim_perf[agent].append(df)
        
        if agent == "FOR":
            
            bnds = (alpha_bnd,beta_bnd,rho_bnd )
            theta = [alpha,beta,rho]


            theta_optim = minimize(lambda theta : - (sim(agent,theta,walks)[1]), theta, 
                                   method = "TNC", bounds = bnds,tol = 0.001)
                                
            
    
            alpha, beta, rho = theta_optim.x
            p_best, rwd = sim(agent,[alpha,beta,rho],walks)

            df = pd.DataFrame( { "succ" : [theta_optim.success], 
             "nfev" : [theta_optim.nfev], 
             "nit" : [theta_optim.nit], 
             "alpha": [alpha] , "beta": [beta], "rho": [rho], "fun": [theta_optim.fun], 
             "rwd" : [rwd], "p_best" : [p_best] }) 

            sim_perf[agent].append(df)



for agent in agents:
    sim_perf[agent] = pd.concat(sim_perf[agent])

100%|██████████| 258/258 [02:23<00:00,  1.80it/s]


In [6]:
with open('bestperfDic.pickle', 'wb') as file:
    pkl.dump(sim_perf, file)