In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from use_case.baseline import * 
from tests.eval import *

payoff_i = np.random.uniform(-10, 10, (20, 20))
payoff_j = np.transpose(payoff_i).copy()

# Initialize environment
N_ACTIONS = payoff_i.shape[0]
N_AGENTS = 1000
env = BaselineEnvironment(N_AGENTS, payoff_i, payoff_j, total_games = 1)

# Actual Run

In [3]:
from models.model import *
from models.trainer import *


In [4]:
# Configure the network here
parameters = ParameterSettings(
    n_agents = N_AGENTS,
    d_action = N_ACTIONS, 
    d_obs = env.obs_size, 
    d_traits = 1,
    d_beliefs = 1
)
parameters.device = "cuda" if torch.cuda.is_available() else "cpu"

model = Model(parameters)

In [5]:
equilibriua = find_pure_equilibria(payoff_i, payoff_j)

for eq in equilibriua:
    x, y = eq 
    a = (y[0] + y[1]) / 2

    print(x, a)

In [6]:
evaluate_policy(model, env, 10)

Average Return: -0.1680346371399749
Total returns: -1.680346371399749
Action Distribution
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[ 74  11  83   9  16  60  17  52  32  24  10  16  16 132 125  20  10  59
 127 107]


In [7]:
# Setup the training loop
training_parameters = TrainingParameters(
    actor_training_loops = 10,
    hypernet_training_loops= 5, 
    outer_loops = 200,
    actor_learning_rate= 0.01,
    experience_buffer_size = 3,
)

train_model(model, env, training_parameters)
        

Epoch 0


Hypernet Loop: 100%|██████████| 5/5 [00:03<00:00,  1.26it/s]


Average Return: 2.5579683071271595
Total returns: 25.579683071271596
Action Distribution
[ 0  1  6  7  8  9 10 14]
[272   4 159  36  22 251   1 255]


Actor Training: 100%|██████████| 10/10 [00:02<00:00,  3.34it/s]


Average Return: 2.0487213702583262
Total returns: 20.48721370258326
Action Distribution
[ 0  1  6  7  8  9 14]
[256  39 188  53  28 169 267]
Epoch 1


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.29it/s]


Average Return: 3.1310780061826478
Total returns: 31.310780061826478
Action Distribution
[ 0  1  6  8  9 10 14]
[628 176  53  55  60  15  13]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.29it/s]


Average Return: 2.622233310477539
Total returns: 26.222333104775387
Action Distribution
[ 0  1  6  8  9 10 14]
[471 200  83  80 108  29  29]
Epoch 2


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.27it/s]


Average Return: 3.1879935137787543
Total returns: 31.879935137787545
Action Distribution
[ 0  1  6  8  9 10 14]
[713 153  64  36  21   6   7]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.71it/s]


Average Return: 3.8536733472877955
Total returns: 38.536733472877955
Action Distribution
[ 0  1  6  8  9 10 14]
[796 113  47  25  13   2   4]
Epoch 3


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 3.357931600502316
Total returns: 33.57931600502316
Action Distribution
[ 0  1  6  8  9 10 14]
[690 179  58  42  17  10   4]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.50it/s]


Average Return: 2.866935427387657
Total returns: 28.66935427387657
Action Distribution
[ 0  1  6  8  9 10 14]
[660 146  69  54  48  18   5]
Epoch 4


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.25it/s]


Average Return: 3.3717211181261186
Total returns: 33.717211181261185
Action Distribution
[ 0  1  6  8  9 10 14]
[722 127  74  46  22   4   5]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.52it/s]


Average Return: 3.7891819827803928
Total returns: 37.89181982780393
Action Distribution
[ 0  1  6  8  9 10]
[780 131  47  28  10   4]
Epoch 5


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 4.053578181299288
Total returns: 40.53578181299289
Action Distribution
[ 0  1  6  8  9 10 14]
[819  90  61  19   9   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.56it/s]


Average Return: 4.043703542682532
Total returns: 40.43703542682532
Action Distribution
[ 0  1  6  8  9 10 14]
[832  99  35  18   9   6   1]
Epoch 6


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 4.235068970070037
Total returns: 42.350689700700364
Action Distribution
[0 1 6 8 9]
[831 100  43  20   6]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.40it/s]


Average Return: 4.29394744818447
Total returns: 42.9394744818447
Action Distribution
[ 0  1  6  8  9 10 14]
[843  98  37  15   5   1   1]
Epoch 7


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 4.452740538020249
Total returns: 44.52740538020249
Action Distribution
[ 0  1  6  8  9 10]
[863  86  34  11   5   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.90it/s]


Average Return: 4.4400840737155445
Total returns: 44.40084073715544
Action Distribution
[0 1 6 8 9]
[869  87  30   9   5]
Epoch 8


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.29it/s]


Average Return: 4.546500403847329
Total returns: 45.46500403847329
Action Distribution
[ 0  1  6  8  9 10 14]
[886  72  22  12   6   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.67it/s]


Average Return: 4.556610686797354
Total returns: 45.56610686797354
Action Distribution
[ 0  1  6  8  9 10]
[863  81  40   4   8   4]
Epoch 9


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]


Average Return: 4.566319899025693
Total returns: 45.66319899025693
Action Distribution
[ 0  1  6  8  9 10]
[881  83  27   4   4   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.52it/s]


Average Return: 4.660363622939672
Total returns: 46.60363622939672
Action Distribution
[ 0  1  6  8  9 10 14]
[882  75  18  15   6   3   1]
Epoch 10


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 4.593735971780843
Total returns: 45.937359717808434
Action Distribution
[ 0  1  6  8  9 10 14]
[887  77  25   5   2   3   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.70it/s]


Average Return: 4.6216780096630945
Total returns: 46.21678009663094
Action Distribution
[ 0  1  6  8  9 10 14]
[882  75  31   5   4   2   1]
Epoch 11


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 4.652372846877284
Total returns: 46.52372846877284
Action Distribution
[ 0  1  6  8  9 10]
[894  78  15  10   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.63it/s]


Average Return: 4.622816013190504
Total returns: 46.22816013190504
Action Distribution
[ 0  1  6  8  9 14]
[887  74  22  11   4   2]
Epoch 12


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.32it/s]


Average Return: 4.675704879401537
Total returns: 46.75704879401537
Action Distribution
[0 1 6 8 9]
[896  57  40   5   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.61it/s]


Average Return: 4.707374187689706
Total returns: 47.07374187689706
Action Distribution
[0 1 6 8 9]
[902  64  21  10   3]
Epoch 13


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.29it/s]


Average Return: 4.7054523072047445
Total returns: 47.054523072047445
Action Distribution
[ 0  1  6  8  9 10]
[888  71  23  13   3   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.54it/s]


Average Return: 4.703436741631807
Total returns: 47.03436741631807
Action Distribution
[ 0  1  6  8  9 10 14]
[903  66  17   6   6   1   1]
Epoch 14


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 4.786583537360985
Total returns: 47.865835373609855
Action Distribution
[ 0  1  6  8  9 10]
[923  52  16   6   1   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.60it/s]


Average Return: 4.750752486918799
Total returns: 47.50752486918799
Action Distribution
[0 1 6 8 9]
[896  72  17  11   4]
Epoch 15


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.32it/s]


Average Return: 4.729129222310762
Total returns: 47.29129222310762
Action Distribution
[ 0  1  6  8  9 10 14]
[894  67  25   9   3   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.57it/s]


Average Return: 4.78100570577593
Total returns: 47.810057057759295
Action Distribution
[ 0  1  6  8  9 10]
[905  68  19   6   1   1]
Epoch 16


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]


Average Return: 4.8225780244032155
Total returns: 48.22578024403216
Action Distribution
[ 0  1  6  8  9 14]
[913  55  24   3   4   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.62it/s]


Average Return: 4.806622677639225
Total returns: 48.06622677639225
Action Distribution
[0 1 6 8 9]
[907  63  19  10   1]
Epoch 17


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]


Average Return: 4.758822630734892
Total returns: 47.58822630734892
Action Distribution
[ 0  1  6  8  9 14]
[910  64  20   3   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.73it/s]


Average Return: 4.7071587841134015
Total returns: 47.07158784113401
Action Distribution
[0 1 6 8 9]
[908  63  17  10   2]
Epoch 18


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.31it/s]


Average Return: 4.710242972909639
Total returns: 47.10242972909639
Action Distribution
[0 1 6 8 9]
[902  69  22   4   3]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.66it/s]


Average Return: 4.7805359904500735
Total returns: 47.80535990450073
Action Distribution
[0 1 6 8 9]
[904  63  27   4   2]
Epoch 19


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.26it/s]


Average Return: 4.765080138919448
Total returns: 47.65080138919448
Action Distribution
[0 1 6 8 9]
[919  61  15   4   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.83it/s]


Average Return: 4.741279494470696
Total returns: 47.41279494470696
Action Distribution
[0 1 6 8 9]
[914  61  20   4   1]
Epoch 20


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.27it/s]


Average Return: 4.757663494266854
Total returns: 47.57663494266854
Action Distribution
[0 1 6 8 9]
[904  67  19   9   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.68it/s]


Average Return: 4.687075794366068
Total returns: 46.87075794366068
Action Distribution
[ 0  1  6  8  9 10]
[885  74  28   7   4   2]
Epoch 21


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.27it/s]


Average Return: 4.796293199668533
Total returns: 47.96293199668533
Action Distribution
[ 0  1  6  8  9 14]
[898  72  15   8   6   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.63it/s]


Average Return: 4.759546374379431
Total returns: 47.595463743794305
Action Distribution
[0 1 6 8 9]
[914  61  16   6   3]
Epoch 22


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.26it/s]


Average Return: 4.790681574131679
Total returns: 47.90681574131679
Action Distribution
[ 0  1  6  8  9 10]
[907  61  25   4   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.91it/s]


Average Return: 4.802205463625473
Total returns: 48.02205463625473
Action Distribution
[ 0  1  6  8  9 10]
[905  71  15   5   3   1]
Epoch 23


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 4.791918786107469
Total returns: 47.91918786107469
Action Distribution
[ 0  1  6  8  9 14]
[906  69  17   5   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  7.17it/s]


Average Return: 4.782765444286813
Total returns: 47.82765444286813
Action Distribution
[ 0  1  6  8  9 10]
[914  54  20   8   2   2]
Epoch 24


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.44it/s]


Average Return: 4.837656334657352
Total returns: 48.37656334657352
Action Distribution
[ 0  1  6  8  9 10]
[901  69  19   7   3   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  7.09it/s]


Average Return: 4.799365833787632
Total returns: 47.99365833787632
Action Distribution
[0 1 6 8 9]
[902  63  23   5   7]
Epoch 25


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.33it/s]


Average Return: 4.759203434236674
Total returns: 47.59203434236674
Action Distribution
[ 0  1  6  8 10]
[918  56  19   6   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.90it/s]


Average Return: 4.793866528848337
Total returns: 47.93866528848337
Action Distribution
[0 1 6 8 9]
[901  70  22   5   2]
Epoch 26


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.31it/s]


Average Return: 4.868624376233438
Total returns: 48.68624376233438
Action Distribution
[ 0  1  6  8  9 10]
[919  59  15   5   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.76it/s]


Average Return: 4.822046080529246
Total returns: 48.22046080529246
Action Distribution
[0 1 6 8 9]
[918  56  16   8   2]
Epoch 27


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.24it/s]


Average Return: 4.954714436223449
Total returns: 49.54714436223449
Action Distribution
[ 0  1  6  8  9 14]
[930  46  19   3   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  7.01it/s]


Average Return: 4.956602821064104
Total returns: 49.566028210641036
Action Distribution
[ 0  1  6  8  9 10]
[913  61  20   4   1   1]
Epoch 28


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.26it/s]


Average Return: 4.957429106862453
Total returns: 49.57429106862453
Action Distribution
[0 1 6 8 9]
[934  47  10   6   3]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.67it/s]


Average Return: 5.042634423557574
Total returns: 50.42634423557574
Action Distribution
[0 1 6 8]
[926  52  14   8]
Epoch 29


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.27it/s]


Average Return: 5.025562550971658
Total returns: 50.25562550971658
Action Distribution
[0 1 6 8 9]
[936  39  15   5   5]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.66it/s]


Average Return: 5.095789701285319
Total returns: 50.95789701285319
Action Distribution
[0 1 6 8]
[939  43  16   2]
Epoch 30


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.27it/s]


Average Return: 5.021688556750131
Total returns: 50.21688556750131
Action Distribution
[ 0  1  6  8  9 10]
[935  42  12   7   1   3]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.83it/s]


Average Return: 5.082212926720172
Total returns: 50.82212926720172
Action Distribution
[ 0  1  6  8  9 10 14]
[952  34   6   5   1   1   1]
Epoch 31


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.26it/s]


Average Return: 5.146315831970819
Total returns: 51.46315831970819
Action Distribution
[0 1 6 8 9]
[957  28  10   4   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.76it/s]


Average Return: 5.103010788580063
Total returns: 51.03010788580063
Action Distribution
[ 0  1  6  8  9 14]
[945  27  18   8   1   1]
Epoch 32


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 5.0910180808505405
Total returns: 50.91018080850541
Action Distribution
[0 1 6 8 9]
[953  27  16   3   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.75it/s]


Average Return: 5.115431056664582
Total returns: 51.154310566645826
Action Distribution
[ 0  1  6  8  9 10]
[950  31  13   3   1   2]
Epoch 33


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 5.09684499429628
Total returns: 50.9684499429628
Action Distribution
[ 0  1  6  8  9 10 14]
[942  37  14   3   2   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.61it/s]


Average Return: 5.070040287297293
Total returns: 50.70040287297293
Action Distribution
[ 0  1  6  8 14]
[938  40  13   8   1]
Epoch 34


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.27it/s]


Average Return: 5.069602842741242
Total returns: 50.696028427412415
Action Distribution
[0 1 6 8 9]
[937  39  18   3   3]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.57it/s]


Average Return: 5.090675074804085
Total returns: 50.90675074804085
Action Distribution
[ 0  1  6  8  9 10]
[939  38  11   9   2   1]
Epoch 35


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.24it/s]


Average Return: 5.067149601352322
Total returns: 50.671496013523225
Action Distribution
[0 1 6 8 9]
[948  36  12   2   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.84it/s]


Average Return: 5.095717269138467
Total returns: 50.95717269138466
Action Distribution
[ 0  1  6  8 10]
[943  40  12   3   2]
Epoch 36


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 5.068723064786014
Total returns: 50.68723064786014
Action Distribution
[ 0  1  6  8  9 10]
[943  42  11   2   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.52it/s]


Average Return: 5.083785568725587
Total returns: 50.837855687255875
Action Distribution
[0 1 6 8 9]
[940  41  13   4   2]
Epoch 37


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.25it/s]


Average Return: 5.1154043862353795
Total returns: 51.154043862353795
Action Distribution
[0 1 6 8 9]
[929  49  11   8   3]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.65it/s]


Average Return: 5.052783580883418
Total returns: 50.527835808834176
Action Distribution
[0 1 6 8 9]
[938  44   9   7   2]
Epoch 38


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.23it/s]


Average Return: 5.069744476170554
Total returns: 50.69744476170553
Action Distribution
[0 1 6 8 9]
[946  42   8   3   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.77it/s]


Average Return: 5.1093968339669456
Total returns: 51.09396833966946
Action Distribution
[0 1 6 8 9]
[952  34  10   3   1]
Epoch 39


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.26it/s]


Average Return: 5.075028356284622
Total returns: 50.75028356284622
Action Distribution
[0 1 6 8 9]
[935  42  16   4   3]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.78it/s]


Average Return: 5.101027666599434
Total returns: 51.01027666599434
Action Distribution
[ 0  1  6  8  9 10]
[942  43  11   2   1   1]
Epoch 40


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 5.033837046071098
Total returns: 50.33837046071098
Action Distribution
[0 1 6 8]
[938  43  16   3]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.59it/s]


Average Return: 5.0265299823434315
Total returns: 50.26529982343432
Action Distribution
[ 0  1  6  8  9 10 14]
[917  49  20  10   2   1   1]
Epoch 41


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]


Average Return: 4.994085285832841
Total returns: 49.94085285832841
Action Distribution
[0 1 6 8 9]
[928  45  23   2   2]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.82it/s]


Average Return: 5.038719326485752
Total returns: 50.387193264857515
Action Distribution
[0 1 6 8 9]
[939  47   8   3   3]
Epoch 42


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.26it/s]


Average Return: 5.025629869893168
Total returns: 50.25629869893167
Action Distribution
[0 1 6 8]
[939  44  14   3]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.48it/s]


Average Return: 5.002297695807323
Total returns: 50.02297695807323
Action Distribution
[ 0  1  6  8 10]
[926  54  13   6   1]
Epoch 43


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]


Average Return: 5.049564796357133
Total returns: 50.49564796357133
Action Distribution
[ 0  1  6  8  9 10]
[931  41  19   6   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.91it/s]


Average Return: 5.059228533319405
Total returns: 50.59228533319405
Action Distribution
[ 0  1  6  8  9 10]
[932  44  17   5   1   1]
Epoch 44


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.32it/s]


Average Return: 5.048215779782479
Total returns: 50.4821577978248
Action Distribution
[ 0  1  6  8  9 10 14]
[934  45  13   5   1   1   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.71it/s]


Average Return: 5.061054898409331
Total returns: 50.61054898409331
Action Distribution
[ 0  1  6  8 10]
[944  34  14   7   1]
Epoch 45


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.31it/s]


Average Return: 5.048791222874991
Total returns: 50.48791222874991
Action Distribution
[0 1 6 8 9]
[934  47  12   6   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  7.13it/s]


Average Return: 5.088703088181983
Total returns: 50.88703088181983
Action Distribution
[0 1 6 8 9]
[940  43  12   2   3]
Epoch 46


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.31it/s]


Average Return: 5.110199099170482
Total returns: 51.101990991704824
Action Distribution
[ 0  1  6  8 14]
[946  36  14   3   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  7.05it/s]


Average Return: 5.073693180970754
Total returns: 50.73693180970754
Action Distribution
[0 1 6 8 9]
[944  34  14   6   2]
Epoch 47


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.31it/s]


Average Return: 5.072552693714911
Total returns: 50.72552693714911
Action Distribution
[0 1 6 8 9]
[950  37  10   2   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  7.06it/s]


Average Return: 5.118004515983237
Total returns: 51.18004515983237
Action Distribution
[ 0  1  6  8  9 14]
[938  42  13   4   1   2]
Epoch 48


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.32it/s]


Average Return: 5.107890679409045
Total returns: 51.07890679409045
Action Distribution
[0 1 6 8 9]
[939  41  13   6   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.94it/s]


Average Return: 5.09540322624776
Total returns: 50.954032262477604
Action Distribution
[ 0  1  6  8 10]
[929  51  14   5   1]
Epoch 49


Hypernet Loop: 100%|██████████| 5/5 [00:02<00:00,  2.33it/s]


Average Return: 5.142014508533021
Total returns: 51.420145085330205
Action Distribution
[0 1 6 8 9]
[938  48   9   4   1]


Actor Training: 100%|██████████| 10/10 [00:01<00:00,  6.70it/s]


Average Return: 5.087788956849017
Total returns: 50.87788956849017
Action Distribution
[0 1 6]
[964  28   8]
Epoch 50


KeyboardInterrupt: 

In [None]:
for i in range(0, payoff_i.shape[0]):
    print(payoff_i[i][i])