In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from use_case.baseline import * 
from tests.eval import *

payoff_i = np.random.uniform(-10, 10, (5, 5))
payoff_j = np.transpose(payoff_i).copy()

# Initialize environment
N_ACTIONS = payoff_i.shape[0]
N_AGENTS = 1000
env = BaselineEnvironment(N_AGENTS, payoff_i, payoff_j, total_games = 5)

# Actual Run

In [3]:
from models.model import *
from models.trainer import *


In [4]:
# Configure the network here
parameters = ParameterSettings(
    n_agents = N_AGENTS,
    d_action = N_ACTIONS, 
    d_obs = env.obs_size, 
    d_traits = 1,
    d_beliefs = 1
)
parameters.device = "cuda" if torch.cuda.is_available() else "cpu"

model = Model(parameters)

In [5]:
equilibriua = find_pure_equilibria(payoff_i, payoff_j)

for eq in equilibriua:
    x, y = eq 
    a = (y[0] + y[1]) / 2

    print(x, a)

(9, 9) 8.850245180402425


In [6]:
evaluate_policy(model, env, 10)

Average Return: 11.26022180426131
Total returns: 112.60221804261309
Action Dist, (array([ 11, 897,   0,   0,   0,   1,   6,   0,  10,  75]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))


In [7]:
# Setup the training loop
training_parameters = TrainingParameters(
    actor_training_loops = 1000, 
    outer_loops = 100,
    learning_rate= 0.01,
    experience_buffer_size = 3
)

train_model(model, env, training_parameters)
        

Epoch 0


Actor Training: 100%|██████████| 1000/1000 [05:28<00:00,  3.04it/s]


Average Return: 10.905179340048246
Total returns: 109.05179340048245
Action Dist, (array([  1, 844,   0,   0,   0,   1,   3,   0, 114,  37]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
Epoch 1


Actor Training: 100%|██████████| 1000/1000 [04:34<00:00,  3.64it/s]


Average Return: 11.070708748432608
Total returns: 110.70708748432608
Action Dist, (array([  0, 724,   0,   3,   0,   1,   6,   0, 238,  28]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
Epoch 2


Actor Training: 100%|██████████| 1000/1000 [06:54<00:00,  2.41it/s]


Average Return: 10.979536453078179
Total returns: 109.7953645307818
Action Dist, (array([  2, 816,   0,   0,   0,   0,   3,   0, 134,  45]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
Epoch 3


Actor Training: 100%|██████████| 1000/1000 [06:58<00:00,  2.39it/s]


Average Return: 10.900439864002745
Total returns: 109.00439864002746
Action Dist, (array([  2, 799,   0,   0,   0,   4,   1,   0, 152,  42]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
Epoch 4


Actor Training: 100%|██████████| 1000/1000 [06:36<00:00,  2.52it/s]


Average Return: 10.877452095704763
Total returns: 108.77452095704763
Action Dist, (array([  3, 823,   0,   0,   0,   2,   0,   0, 144,  28]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
Epoch 5


Actor Training: 100%|██████████| 1000/1000 [03:02<00:00,  5.48it/s]


Average Return: 10.928409439257845
Total returns: 109.28409439257844
Action Dist, (array([  3, 818,   0,   0,   0,   0,   3,   0, 140,  36]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
Epoch 6


Actor Training: 100%|██████████| 1000/1000 [02:56<00:00,  5.66it/s]


Average Return: 10.918721440562784
Total returns: 109.18721440562784
Action Dist, (array([  2, 754,   0,   0,   0,   4,   2,   0, 212,  26]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
Epoch 7


Actor Training: 100%|██████████| 1000/1000 [02:55<00:00,  5.70it/s]


Average Return: 10.995926415266071
Total returns: 109.95926415266071
Action Dist, (array([  2, 810,   0,   0,   0,   3,   8,   0, 132,  45]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
Epoch 8


Actor Training: 100%|██████████| 1000/1000 [02:56<00:00,  5.65it/s]


Average Return: 10.944269027179613
Total returns: 109.44269027179614
Action Dist, (array([  2, 783,   0,   0,   0,   5,   4,   0, 173,  33]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
Epoch 9


Actor Training: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s]


Average Return: 10.970526716489292
Total returns: 109.70526716489293
Action Dist, (array([  0, 808,   0,   0,   0,   4,   4,   0, 138,  46]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
Epoch 10


Actor Training: 100%|██████████| 1000/1000 [04:32<00:00,  3.67it/s]


Average Return: 11.082766094902583
Total returns: 110.82766094902583
Action Dist, (array([  2, 836,   0,   1,   0,   2,   0,   0, 114,  45]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
Epoch 11


Actor Training:  32%|███▎      | 325/1000 [00:58<02:00,  5.60it/s]


KeyboardInterrupt: 