In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from use_case.baseline import * 
from tests.eval import *

payoff_i = np.random.uniform(-10, 10, (20, 20))
payoff_j = np.transpose(payoff_i).copy()

# Initialize environment
N_ACTIONS = payoff_i.shape[0]
N_AGENTS = 1000
env = BaselineEnvironment(N_AGENTS, payoff_i, payoff_j, total_games = 1)

# Actual Run

In [3]:
from models.model import *
from models.trainer import *


In [4]:
# Configure the network here
parameters = ParameterSettings(
    n_agents = N_AGENTS,
    d_action = N_ACTIONS, 
    d_obs = env.obs_size, 
    d_traits = 1,
    d_beliefs = 1
)
parameters.device = "cuda" if torch.cuda.is_available() else "cpu"

model = Model(parameters)

In [5]:
equilibriua = find_pure_equilibria(payoff_i, payoff_j)

for eq in equilibriua:
    x, y = eq 
    a = (y[0] + y[1]) / 2

    print(x, a)

(10, 10) 9.15679166251643


In [6]:
evaluate_policy(model, env, 10)

Average Return: -0.11054450682910329
Total returns: -1.1054450682910328
Action Distribution
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[  5  52  53   5  31  11  21  16  61 157  36  32  96  37  51  21 122  39
 145   9]


In [7]:
# Setup the training loop
training_parameters = TrainingParameters(
    actor_training_loops = 10,
    hypernet_training_loops= 5, 
    outer_loops = 200,
    learning_rate= 0.01,
    experience_buffer_size = 3,
)

train_model(model, env, training_parameters)
        

Epoch 0


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Average Return: 2.6615315702145232
Total returns: 26.61531570214523
Action Distribution
[ 0  1  5  9 10 11 13 14 17 18 19]
[280  51  10  11   5   1  18  67 128 297 132]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.89it/s]


Average Return: 2.5575703596661197
Total returns: 25.575703596661196
Action Distribution
[ 0  1  5  9 10 11 13 14 17 18 19]
[254  49  18  31  13   2  67  43 147 290  86]
Epoch 1


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]


Average Return: 2.563810725917013
Total returns: 25.63810725917013
Action Distribution
[ 0  1 10 11 13 14 17 18 19]
[545 223   7  54  44  26  10  28  63]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.72it/s]


Average Return: 2.053204628376658
Total returns: 20.53204628376658
Action Distribution
[ 0  1 10 11 13 14 17 18 19]
[439 245   1  67  45  57   4  52  90]
Epoch 2


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]


Average Return: 3.484548375755228
Total returns: 34.84548375755228
Action Distribution
[ 0  1  5 10 11 13 14 18 19]
[692 200   1  23  30  28   9  12   5]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.64it/s]


Average Return: 3.4354747213984083
Total returns: 34.35474721398408
Action Distribution
[ 0  1 10 11 13 14 17 18 19]
[687 206  26  31  28   7   1   3  11]
Epoch 3


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 3.435140110291821
Total returns: 34.35140110291821
Action Distribution
[ 0  1 10 11 13 14 18 19]
[701 199  33  35  16   9   2   5]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.49it/s]


Average Return: 3.4095791773358757
Total returns: 34.095791773358755
Action Distribution
[ 0  1 10 11 13 14 18 19]
[664 211  45  38  21   6   5  10]
Epoch 4


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.25it/s]


Average Return: 4.436648562665297
Total returns: 44.366485626652974
Action Distribution
[ 0  1 10 11 13 14 17 18 19]
[766 162  32  19   6   8   1   3   3]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.43it/s]


Average Return: 4.267098699632681
Total returns: 42.67098699632681
Action Distribution
[ 0  1 10 11 13 14 18 19]
[752 166  33  24  11   2   4   8]
Epoch 5


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.23it/s]


Average Return: 5.356187620534223
Total returns: 53.56187620534224
Action Distribution
[ 0  1 10 11 13 14 17 18 19]
[855 113  18   4   2   3   1   3   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.14it/s]


Average Return: 5.507589034304556
Total returns: 55.07589034304556
Action Distribution
[ 0  1 10 11 13 14 17 18]
[847 125  14   8   2   2   1   1]
Epoch 6


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.22it/s]


Average Return: 6.215945999805209
Total returns: 62.15945999805209
Action Distribution
[ 0  1 10 11 13 14]
[901  78   9   4   6   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.32it/s]


Average Return: 6.542083574732532
Total returns: 65.42083574732531
Action Distribution
[ 0  1 10 11 13 14 19]
[921  68   7   1   1   1   1]
Epoch 7


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.24it/s]


Average Return: 6.915022206589957
Total returns: 69.15022206589957
Action Distribution
[ 0  1 10 11 13]
[939  53   5   1   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.46it/s]


Average Return: 6.950308252763858
Total returns: 69.50308252763858
Action Distribution
[ 0  1 10 11 13]
[944  50   3   1   2]
Epoch 8


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.25it/s]


Average Return: 7.084075945059974
Total returns: 70.84075945059973
Action Distribution
[ 0  1 10 11]
[937  57   4   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.69it/s]


Average Return: 7.009382644791908
Total returns: 70.09382644791908
Action Distribution
[ 0  1 10 11 13]
[939  52   6   1   2]
Epoch 9


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.23it/s]


Average Return: 7.0769834987670945
Total returns: 70.76983498767095
Action Distribution
[ 0  1 10]
[937  61   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.12it/s]


Average Return: 7.184625439620868
Total returns: 71.84625439620868
Action Distribution
[ 0  1 10 13 14 19]
[940  54   3   1   1   1]
Epoch 10


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.24it/s]


Average Return: 7.193118762371377
Total returns: 71.93118762371377
Action Distribution
[ 0  1 10]
[942  56   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.76it/s]


Average Return: 7.157113631261938
Total returns: 71.57113631261937
Action Distribution
[ 0  1 10]
[942  55   3]
Epoch 11


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.20it/s]


Average Return: 7.160990196711071
Total returns: 71.60990196711072
Action Distribution
[ 0  1 10 11]
[952  45   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.33it/s]


Average Return: 7.092393534522458
Total returns: 70.92393534522458
Action Distribution
[ 0  1 10]
[927  71   2]
Epoch 12


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.21it/s]


Average Return: 7.0747688510873274
Total returns: 70.74768851087327
Action Distribution
[ 0  1 10 11]
[940  54   3   3]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.18it/s]


Average Return: 7.144794101902939
Total returns: 71.44794101902939
Action Distribution
[ 0  1 10 11 13 18 19]
[941  50   5   1   1   1   1]
Epoch 13


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.25it/s]


Average Return: 7.236452403855715
Total returns: 72.36452403855715
Action Distribution
[ 0  1 10 11 13]
[946  49   2   1   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.50it/s]


Average Return: 7.1460630137854455
Total returns: 71.46063013785445
Action Distribution
[ 0  1 10 11]
[941  55   2   2]
Epoch 14


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.21it/s]


Average Return: 7.084304720295743
Total returns: 70.84304720295744
Action Distribution
[ 0  1 10 11 14]
[944  51   2   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  5.96it/s]


Average Return: 7.0940012799884
Total returns: 70.940012799884
Action Distribution
[ 0  1 10 11 13]
[943  50   4   2   1]
Epoch 15


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.22it/s]


Average Return: 7.136811506590438
Total returns: 71.36811506590438
Action Distribution
[ 0  1 10 11 13]
[944  49   3   3   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.63it/s]


Average Return: 7.149126124004117
Total returns: 71.49126124004117
Action Distribution
[ 0  1 10 11 13 14]
[952  43   1   2   1   1]
Epoch 16


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.21it/s]


Average Return: 7.19282404108082
Total returns: 71.9282404108082
Action Distribution
[ 0  1 10]
[945  51   4]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.55it/s]


Average Return: 7.0867121415427885
Total returns: 70.86712141542789
Action Distribution
[ 0  1 10 11 19]
[940  53   5   1   1]
Epoch 17


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]


Average Return: 7.0630492701240275
Total returns: 70.63049270124027
Action Distribution
[ 0  1 10 13]
[937  59   3   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.80it/s]


Average Return: 7.132210074520858
Total returns: 71.32210074520857
Action Distribution
[ 0  1 10 13 14]
[954  41   2   1   2]
Epoch 18


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.31it/s]


Average Return: 7.106459636434455
Total returns: 71.06459636434455
Action Distribution
[ 0  1 10 11 19]
[932  60   5   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.91it/s]


Average Return: 7.072367240879312
Total returns: 70.72367240879312
Action Distribution
[ 0  1 10 19]
[931  61   7   1]
Epoch 19


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.32it/s]


Average Return: 6.992459804870019
Total returns: 69.92459804870019
Action Distribution
[ 0  1 10 11 13 18]
[934  55   5   2   2   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.81it/s]


Average Return: 6.964953080226628
Total returns: 69.64953080226628
Action Distribution
[ 0  1 10 11]
[937  59   3   1]
Epoch 20


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 6.825842310911087
Total returns: 68.25842310911086
Action Distribution
[ 0  1 10 14]
[941  50   8   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.85it/s]


Average Return: 6.949020328241241
Total returns: 69.49020328241241
Action Distribution
[ 0  1 10 11]
[924  72   3   1]
Epoch 21


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.25it/s]


Average Return: 6.956470733546958
Total returns: 69.56470733546958
Action Distribution
[ 0  1 10 11]
[929  62   7   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.56it/s]


Average Return: 6.888847249694864
Total returns: 68.88847249694864
Action Distribution
[ 0  1 10 11 13]
[922  72   1   4   1]
Epoch 22


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.27it/s]


Average Return: 6.880539627153924
Total returns: 68.80539627153924
Action Distribution
[ 0  1 10 13 14]
[931  62   4   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.83it/s]


Average Return: 6.870326844032907
Total returns: 68.70326844032907
Action Distribution
[ 0  1 10 11 13]
[943  52   1   2   2]
Epoch 23


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]


Average Return: 6.607563411001782
Total returns: 66.07563411001782
Action Distribution
[ 0  1 10 11 17]
[916  79   3   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.59it/s]


Average Return: 6.865134425168099
Total returns: 68.651344251681
Action Distribution
[ 0  1 10 11 14 17]
[933  61   2   2   1   1]
Epoch 24


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.42it/s]


Average Return: 6.830725458390357
Total returns: 68.30725458390357
Action Distribution
[ 0  1 10 11]
[933  60   3   4]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.60it/s]


Average Return: 7.072960510425152
Total returns: 70.72960510425152
Action Distribution
[ 0  1 10 11]
[940  55   2   3]
Epoch 25


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.29it/s]


Average Return: 7.088035736946518
Total returns: 70.88035736946517
Action Distribution
[ 0  1 10 11 13 19]
[954  41   1   2   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.52it/s]


Average Return: 7.103275324142326
Total returns: 71.03275324142326
Action Distribution
[ 0  1 10 11 13]
[935  54   8   1   2]
Epoch 26


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.27it/s]


Average Return: 7.108399122416553
Total returns: 71.08399122416553
Action Distribution
[ 0  1 10 11]
[944  52   2   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.66it/s]


Average Return: 7.157939953829894
Total returns: 71.57939953829894
Action Distribution
[ 0  1 10 11 13 19]
[942  49   5   2   1   1]
Epoch 27


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.23it/s]


Average Return: 7.185837496689677
Total returns: 71.85837496689678
Action Distribution
[ 0  1 10 11 13]
[935  57   6   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.26it/s]


Average Return: 7.109026117218299
Total returns: 71.09026117218299
Action Distribution
[ 0  1 10 13]
[939  55   4   2]
Epoch 28


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.24it/s]


Average Return: 7.031473289607431
Total returns: 70.3147328960743
Action Distribution
[ 0  1 10 13]
[943  55   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.72it/s]


Average Return: 6.877049636910614
Total returns: 68.77049636910614
Action Distribution
[ 0  1 10 11]
[926  66   6   2]
Epoch 29


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.26it/s]


Average Return: 7.034720390607123
Total returns: 70.34720390607123
Action Distribution
[ 0  1 10 11 13]
[939  51   6   3   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.58it/s]


Average Return: 6.994386902305362
Total returns: 69.94386902305362
Action Distribution
[ 0  1 10 11 17 18]
[938  56   2   1   2   1]
Epoch 30


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.31it/s]


Average Return: 6.931066421297987
Total returns: 69.31066421297987
Action Distribution
[ 0  1 10 13]
[936  56   7   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  5.91it/s]


Average Return: 6.979203976320679
Total returns: 69.79203976320679
Action Distribution
[ 0  1 10]
[942  54   4]
Epoch 31


Hypernet Loop: 100%|██████████| 5/5 [00:04<00:00,  1.11it/s]


Average Return: 7.0000696552566355
Total returns: 70.00069655256635
Action Distribution
[ 0  1 10 11]
[935  54   5   6]


Actor Training: 100%|██████████| 10/10 [00:04<00:00,  2.35it/s]


Average Return: 7.061420397247005
Total returns: 70.61420397247005
Action Distribution
[ 0  1 10 11 18]
[944  53   1   1   1]
Epoch 32


Hypernet Loop: 100%|██████████| 5/5 [00:04<00:00,  1.05it/s]


Average Return: 7.169256666912636
Total returns: 71.69256666912636
Action Distribution
[ 0  1 10 11 13 14]
[940  52   4   2   1   1]


Actor Training: 100%|██████████| 10/10 [00:04<00:00,  2.28it/s]


Average Return: 7.139993865716238
Total returns: 71.39993865716238
Action Distribution
[ 0  1 10 11 13]
[946  49   3   1   1]
Epoch 33


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Average Return: 7.148272811434056
Total returns: 71.48272811434056
Action Distribution
[ 0  1 10 11 13]
[938  53   6   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  5.99it/s]


Average Return: 7.137591307003521
Total returns: 71.37591307003521
Action Distribution
[ 0  1 10 11 13]
[944  51   3   1   1]
Epoch 34


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.23it/s]


Average Return: 7.183173886479703
Total returns: 71.83173886479703
Action Distribution
[ 0  1 10 11]
[949  47   2   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.69it/s]


Average Return: 7.137622184208915
Total returns: 71.37622184208915
Action Distribution
[ 0  1 10 11 13 19]
[951  40   3   4   1   1]
Epoch 35


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]


Average Return: 7.351588553539642
Total returns: 73.51588553539642
Action Distribution
[ 0  1 10 11]
[941  57   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.93it/s]


Average Return: 7.228716644857572
Total returns: 72.28716644857572
Action Distribution
[ 0  1 10 11 17]
[932  62   3   2   1]
Epoch 36


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 7.165957875497064
Total returns: 71.65957875497064
Action Distribution
[ 0  1 10 11]
[952  46   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.83it/s]


Average Return: 7.237080011489736
Total returns: 72.37080011489736
Action Distribution
[ 0  1 10 11]
[943  51   2   4]
Epoch 37


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.26it/s]


Average Return: 7.357985349349147
Total returns: 73.57985349349147
Action Distribution
[ 0  1 10 11 18]
[964  31   2   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.80it/s]


Average Return: 7.329210193171714
Total returns: 73.29210193171714
Action Distribution
[ 0  1 10 11 18]
[938  54   6   1   1]
Epoch 38


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]


Average Return: 7.274941874342669
Total returns: 72.74941874342669
Action Distribution
[ 0  1 11]
[961  37   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.79it/s]


Average Return: 7.275497092030706
Total returns: 72.75497092030706
Action Distribution
[ 0  1 10 11]
[954  42   3   1]
Epoch 39


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]


Average Return: 7.43776156272539
Total returns: 74.3776156272539
Action Distribution
[ 0  1 10 11]
[959  37   3   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.73it/s]


Average Return: 7.393332558943103
Total returns: 73.93332558943104
Action Distribution
[ 0  1 10 13 14]
[955  38   4   2   1]
Epoch 40


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 7.3221415174836295
Total returns: 73.2214151748363
Action Distribution
[ 0  1 10 13]
[951  46   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.72it/s]


Average Return: 7.402943657866371
Total returns: 74.0294365786637
Action Distribution
[ 0  1 10 14]
[952  44   3   1]
Epoch 41


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.27it/s]


Average Return: 7.4120135716414195
Total returns: 74.1201357164142
Action Distribution
[ 0  1 10]
[967  31   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.18it/s]


Average Return: 7.333746389312582
Total returns: 73.33746389312581
Action Distribution
[ 0  1 10]
[941  58   1]
Epoch 42


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.26it/s]


Average Return: 7.459827433831498
Total returns: 74.59827433831498
Action Distribution
[ 0  1 10 11 14]
[957  40   1   1   1]


Actor Training:  20%|██        | 2/10 [00:00<00:01,  4.39it/s]


KeyboardInterrupt: 

In [None]:
print(payoff_i[0][0])