In [1]:
import sys
sys.path.append("../")

import warnings
warnings.filterwarnings("ignore")

In [2]:
from Classes.cognitive_model_agents import QAttendance

In [3]:
DASH_LINE = '-'*60
fixed_parameters = {
	"threshold":0.5,
	"num_agents":2,
}
free_parameters = {
	"learning_rate": 0.1,
	"discount_factor": 0.7,
	"go_discount_factor": 0.5,
	"go_drive":0.01,
	"inverse_temperature":32
}
agent = QAttendance(
	fixed_parameters=fixed_parameters,
	free_parameters=free_parameters,
	n=0
)
agent.debug = True

In [4]:
print('')
print(DASH_LINE)
print('Test bar is full')
print(DASH_LINE)    
action = 1
state = [action, 1]
print('Initial state:', state)
agent.decisions.append(action)
agent.prev_state_ = tuple(state)
for i in range(10):
	print(f'---------- Round {i} ----------')
	preferences = agent.determine_action_preferences(state)
	print(f'Action preferences in state {state}: {preferences}')
	action = agent.make_decision()
	print('Chosen action:', action)
	new_state = [action, 1]
	print('State arrived:', new_state)
	payoff = agent.payoff(action, new_state)
	print(f'Payoff action {action}: {payoff}')
	agent.update(payoff, new_state)
	state = new_state 


------------------------------------------------------------
Test bar is full
------------------------------------------------------------
Initial state: [1, 1]
---------- Round 0 ----------
Action preferences in state [1, 1]: [0.   0.01]
Action probabilities:
no go:0.4206757478512505 ---- go:0.5793242521487494
Chosen action: 0
State arrived: [0, 1]
Payoff action 0: 0
Discounted actions: [0.0, 0.25]
Discounted average go frequency: 0.25
Reward: 0
Estimated long term reward: 0.006999999999999999
Reward with average go frequency: 0.257
Learning rule:
Q[(1, 1),0] <- 0.0 + 0.1 * (0.257 - 0.0)
Q[(1, 1),0] = 0.0257
---------- Round 1 ----------
Action preferences in state [0, 1]: [0.   0.01]
Action probabilities:
no go:0.4206757478512505 ---- go:0.5793242521487494
Chosen action: 0
State arrived: [0, 1]
Payoff action 0: 0
Discounted actions: [0.0, 0.0, 0.125]
Discounted average go frequency: 0.125
Reward: 0
Estimated long term reward: 0.006999999999999999
Reward with average go frequency: 0.

In [5]:
print('')
print(DASH_LINE)
print('Test bar has capacity')
print(DASH_LINE) 
agent = QAttendance(
	fixed_parameters=fixed_parameters,
	free_parameters=free_parameters,
	n=0
)
agent.debug = True
action = 0
state = [action, 0]
print('Initial state:', state)
agent.decisions.append(action)
agent.prev_state_ = tuple(state)
for i in range(10):
	print(f'---------- Round {i} ----------')
	preferences = agent.determine_action_preferences(state)
	print(f'Action preferences in state {state}: {preferences}')
	action = agent.make_decision()
	print('Chosen action:', action)
	new_state = [action, 0]
	print('State arrived:', new_state)
	payoff = agent.payoff(action, new_state)
	print(f'Payoff action {action}: {payoff}')
	agent.update(payoff, new_state)
	state = new_state


------------------------------------------------------------
Test bar has capacity
------------------------------------------------------------
Initial state: [0, 0]
---------- Round 0 ----------
Action preferences in state [0, 0]: [0.   0.01]
Action probabilities:
no go:0.4206757478512505 ---- go:0.5793242521487494
Chosen action: 1
State arrived: [1, 0]
Payoff action 1: 1
Discounted actions: [0.5, 0.0]
Discounted average go frequency: 0.5
Reward: 1
Estimated long term reward: 1.007
Reward with average go frequency: 1.507
Learning rule:
Q[(0, 0),1] <- 0.01 + 0.1 * (1.507 - 0.01)
Q[(0, 0),1] = 0.1597
---------- Round 1 ----------
Action preferences in state [1, 0]: [0.   0.01]
Action probabilities:
no go:0.4206757478512505 ---- go:0.5793242521487494
Chosen action: 1
State arrived: [1, 0]
Payoff action 1: 1
Discounted actions: [0.5, 0.25, 0.0]
Discounted average go frequency: 0.75
Reward: 1
Estimated long term reward: 1.007
Reward with average go frequency: 1.757
Learning rule:
Q[[1, 0]

In [6]:
from pathlib import Path

image_folder = Path('../../images/QAttendance')
image_folder.mkdir(parents=True, exist_ok=True)
data_folder = Path('../../data/QAttendance')
image_folder.mkdir(parents=True, exist_ok=True)

simulation_parameters = {
	'num_episodes':100,
	'num_rounds':100,
	'verbose':False
}

In [10]:
import numpy as np
from Utils.interaction import Performer

n_points = 5
LaTeX_string = Performer.sweep(
    agent_class=QAttendance,
    fixed_parameters=fixed_parameters,
    free_parameters=free_parameters,
    simulation_parameters=simulation_parameters,
    sweep_parameter='go_discount_factor',
    values=[x for x in np.linspace(0, 1, n_points)],
    image_folder=image_folder,
    measures=['efficiency', 'inequality', 'entropy', 'conditional_entropy']
)

Running models for each go_discount_factor:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

Plotting efficiency...
Plot saved to ../../images/QAttendance/efficiency_go_discount_factor.pdf
Plotting inequality...
Plot saved to ../../images/QAttendance/inequality_go_discount_factor.pdf
Plotting entropy...
Plot saved to ../../images/QAttendance/entropy_go_discount_factor.pdf
Plotting conditional_entropy...
Plot saved to ../../images/QAttendance/conditional_entropy_go_discount_factor.pdf


LaTeX_string = Performer.simple_plots(
	agent_class=QAttendance,
	fixed_parameters=fixed_parameters,
	free_parameters=free_parameters,
	simulation_parameters=simulation_parameters,
	measures=['efficiency', 'inequality', 'entropy', 'conditional_entropy'],
	image_folder=image_folder
)

In [None]:
from Utils.interaction import Experiment

exp = Experiment(
    agent_class=QAttendance,
    fixed_parameters=fixed_parameters,
    free_parameters=free_parameters,
    simulation_parameters=simulation_parameters,
    measures=['efficiency', 'inequality', 'entropy', 'conditional_entropy']
)

In [None]:
import numpy as np

n_points = 10
exp.run_sweep2(
    parameter1='discount_factor',
    values1=[x for x in np.logspace(-2, 0, n_points)],
    parameter2='go_discount_factor',
    values2=[x for x in np.logspace(-2, 0, n_points)],
    file=image_folder / 'sweep_discount_vs_go_discount'
)

In [12]:
from Utils.plot_utils import PlotsAndMeasures

p = PlotsAndMeasures(exp.data)

In [None]:
p.plot_scores_sweep2(
    parameter1='discount_factor', 
    parameter2='go_discount_factor',
    file=image_folder / Path('sweep_discount_vs_go_discount.png')
)