In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from use_case.baseline import * 
from tests.eval import *

np.random.seed(1337)

N = 20
payoff_i = np.random.uniform(-10, 10, (N, N))
payoff_j = np.random.uniform(-10, 10, (N, N))

# Initialize environment
N_ACTIONS = payoff_i.shape[0]
N_AGENTS = 2000
env = BaselineEnvironment(N_AGENTS, payoff_i, payoff_j, total_games = 1)

# Actual Run

In [3]:
from models.model import *
from models.trainer import *
from torch.utils.tensorboard import SummaryWriter


In [4]:
# Configure the network here
parameters = ParameterSettings(
    n_agents = N_AGENTS,
    d_action = N_ACTIONS, 
    d_obs = env.obs_size, 
    d_traits = 1,
    d_beliefs = 1
)
parameters.device = "cuda" if torch.cuda.is_available() else "cpu"

model = Model(parameters)

In [5]:
equilibriua = find_pure_equilibria(payoff_i, payoff_j)

for eq in equilibriua:
    x, y = eq 
    a = (y[0] + y[1]) / 2

    print(x, a)

(18, 10) 9.42934343544164


In [6]:
evaluate_policy(model, env, 10)

np.float64(-0.004720429133506836)

In [None]:
# Setup the training loop
training_parameters = TrainingParameters(
    outer_loops = 200,
    hypernet_training_loops= 10, 
    actor_training_loops = 30,

    actor_learning_rate= 2.5e-4,
    critic_learning_rate = 1e-3,
    hypernet_learning_rate = 2.5e-4,

    hypernet_jsd_threshold = 0.25,

    sampled_agents = N_AGENTS // 4,
    experience_buffer_size = 10,

)


train_model(model, env, training_parameters)
        

Epoch 0


Hypernet Loop: 100%|██████████| 10/10 [00:04<00:00,  2.47it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.34it/s]


Epoch 1


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.74it/s]
Actor Training: 100%|██████████| 30/30 [00:09<00:00,  3.26it/s]


Epoch 2


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.69it/s]
Actor Training: 100%|██████████| 30/30 [00:09<00:00,  3.15it/s]


Epoch 3


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.71it/s]
Actor Training: 100%|██████████| 30/30 [00:09<00:00,  3.29it/s]


Epoch 4


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.75it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.43it/s]


Epoch 5


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.91it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.38it/s]


Epoch 6


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.77it/s]
Actor Training: 100%|██████████| 30/30 [00:09<00:00,  3.32it/s]


Epoch 7


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.92it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.35it/s]


Epoch 8


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.83it/s]
Actor Training: 100%|██████████| 30/30 [00:09<00:00,  3.29it/s]


Epoch 9


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.82it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.39it/s]


Epoch 10


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.62it/s]
Actor Training: 100%|██████████| 30/30 [00:23<00:00,  1.29it/s]


Epoch 11


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.07it/s]
Actor Training: 100%|██████████| 30/30 [00:23<00:00,  1.30it/s]


Epoch 12


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.02it/s]
Actor Training: 100%|██████████| 30/30 [00:25<00:00,  1.19it/s]


Epoch 13


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.08it/s]
Actor Training: 100%|██████████| 30/30 [00:22<00:00,  1.33it/s]


Epoch 14


Hypernet Loop: 100%|██████████| 10/10 [00:08<00:00,  1.11it/s]
Actor Training: 100%|██████████| 30/30 [00:22<00:00,  1.31it/s]


Epoch 15


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.03it/s]
Actor Training: 100%|██████████| 30/30 [00:24<00:00,  1.23it/s]


Epoch 16


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.00it/s]
Actor Training: 100%|██████████| 30/30 [00:23<00:00,  1.29it/s]


Epoch 17


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.07it/s]
Actor Training: 100%|██████████| 30/30 [00:23<00:00,  1.27it/s]


Epoch 18


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.09it/s]
Actor Training: 100%|██████████| 30/30 [00:24<00:00,  1.23it/s]


Epoch 19


Hypernet Loop: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]
Actor Training: 100%|██████████| 30/30 [00:23<00:00,  1.27it/s]


Epoch 20


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.00it/s]
Actor Training: 100%|██████████| 30/30 [00:23<00:00,  1.26it/s]


Epoch 21


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.06it/s]
Actor Training: 100%|██████████| 30/30 [00:09<00:00,  3.21it/s]


Epoch 22


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.85it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.48it/s]


Epoch 23


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.86it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.51it/s]


Epoch 24


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.74it/s]
Actor Training: 100%|██████████| 30/30 [00:22<00:00,  1.33it/s]


Epoch 25


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.09it/s]
Actor Training: 100%|██████████| 30/30 [00:22<00:00,  1.35it/s]


Epoch 26


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.11it/s]
Actor Training: 100%|██████████| 30/30 [00:23<00:00,  1.26it/s]


Epoch 27


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.00it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.34it/s]


Epoch 28


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.67it/s]
Actor Training: 100%|██████████| 30/30 [00:09<00:00,  3.22it/s]


Epoch 29


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.67it/s]
Actor Training: 100%|██████████| 30/30 [00:09<00:00,  3.31it/s]


Epoch 30


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.85it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.51it/s]


Epoch 31


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.87it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.43it/s]


Epoch 32


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.81it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.35it/s]


Epoch 33


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.88it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.44it/s]


Epoch 34


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.79it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.39it/s]


Epoch 35


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.73it/s]
Actor Training: 100%|██████████| 30/30 [00:09<00:00,  3.11it/s]


Epoch 36


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.04it/s]
Actor Training: 100%|██████████| 30/30 [00:23<00:00,  1.27it/s]


Epoch 37


Hypernet Loop: 100%|██████████| 10/10 [00:09<00:00,  1.06it/s]
Actor Training: 100%|██████████| 30/30 [00:23<00:00,  1.27it/s]


Epoch 38


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.68it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.50it/s]


Epoch 39


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.89it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.47it/s]


Epoch 40


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.92it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.44it/s]


Epoch 41


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.84it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.36it/s]


Epoch 42


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.82it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.40it/s]


Epoch 43


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.83it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.36it/s]


Epoch 44


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.84it/s]
Actor Training: 100%|██████████| 30/30 [00:08<00:00,  3.43it/s]


Epoch 45


Hypernet Loop: 100%|██████████| 10/10 [00:03<00:00,  2.81it/s]
Actor Training:  23%|██▎       | 7/30 [00:02<00:06,  3.43it/s]

# Heterogeneous Baseline

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from use_case.baseline import * 
from tests.eval import *
from models.model import *
from models.trainer import *


In [3]:
np.random.seed(1337)

N_AGENTS = 2000 
N_TYPES = 10
N_ACTIONS = 10
type_payoffs = np.random.uniform(-10, 10, (N_TYPES, N_TYPES, 2, N_ACTIONS, N_ACTIONS))
env = BaselineHeterogeneous(N_AGENTS, N_TYPES, type_payoffs)

In [4]:
# Configure the network here
parameters = ParameterSettings(
    n_agents = N_AGENTS,
    d_action = N_ACTIONS, 
    d_obs = env.obs_size, 
    d_traits = N_TYPES,
    d_beliefs = 1
    
)
parameters.device = "cuda" if torch.cuda.is_available() else "cpu"

model = Model(parameters)

In [5]:
# Setup the training loop
training_parameters = TrainingParameters(
    outer_loops = 200,
    hypernet_training_loops= 10, 
    actor_training_loops = 30,

    actor_learning_rate= 2.5e-4,
    critic_learning_rate = 1e-3,
    hypernet_learning_rate = 2.5e-4,

    hypernet_jsd_threshold = 0.25,

    sampled_agents = N_AGENTS // 4,
    experience_buffer_size = 10,

)


In [None]:

train_model(model, env, training_parameters)

Epoch 0


Hypernet Loop: 100%|██████████| 10/10 [00:42<00:00,  4.28s/it]
Actor Training: 100%|██████████| 30/30 [02:04<00:00,  4.16s/it]


Epoch 1


Hypernet Loop: 100%|██████████| 10/10 [00:41<00:00,  4.15s/it]
Actor Training: 100%|██████████| 30/30 [02:03<00:00,  4.10s/it]


Epoch 2


Hypernet Loop: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Actor Training: 100%|██████████| 30/30 [02:30<00:00,  5.03s/it]


Epoch 3


Hypernet Loop: 100%|██████████| 10/10 [00:45<00:00,  4.57s/it]
Actor Training: 100%|██████████| 30/30 [02:56<00:00,  5.90s/it]


Epoch 4


Hypernet Loop: 100%|██████████| 10/10 [01:04<00:00,  6.42s/it]
Actor Training: 100%|██████████| 30/30 [02:47<00:00,  5.57s/it]


Epoch 5


Hypernet Loop: 100%|██████████| 10/10 [01:05<00:00,  6.54s/it]
Actor Training: 100%|██████████| 30/30 [03:05<00:00,  6.18s/it]


Epoch 6


Hypernet Loop: 100%|██████████| 10/10 [01:01<00:00,  6.19s/it]
Actor Training: 100%|██████████| 30/30 [02:29<00:00,  4.97s/it]


Epoch 7


Hypernet Loop: 100%|██████████| 10/10 [01:01<00:00,  6.10s/it]
Actor Training: 100%|██████████| 30/30 [02:48<00:00,  5.61s/it]


Epoch 8


Hypernet Loop: 100%|██████████| 10/10 [00:55<00:00,  5.59s/it]
Actor Training: 100%|██████████| 30/30 [02:29<00:00,  4.99s/it]


Epoch 9


Hypernet Loop: 100%|██████████| 10/10 [00:53<00:00,  5.35s/it]
Actor Training: 100%|██████████| 30/30 [02:31<00:00,  5.06s/it]


Epoch 10


Hypernet Loop: 100%|██████████| 10/10 [00:51<00:00,  5.19s/it]
Actor Training: 100%|██████████| 30/30 [02:33<00:00,  5.13s/it]


Epoch 11


Hypernet Loop: 100%|██████████| 10/10 [00:57<00:00,  5.71s/it]
Actor Training: 100%|██████████| 30/30 [02:37<00:00,  5.25s/it]


Epoch 12


Hypernet Loop: 100%|██████████| 10/10 [00:54<00:00,  5.50s/it]
Actor Training: 100%|██████████| 30/30 [02:26<00:00,  4.88s/it]


Epoch 13


Hypernet Loop: 100%|██████████| 10/10 [00:53<00:00,  5.37s/it]
Actor Training: 100%|██████████| 30/30 [02:18<00:00,  4.62s/it]


Epoch 14


Hypernet Loop: 100%|██████████| 10/10 [00:54<00:00,  5.47s/it]
Actor Training: 100%|██████████| 30/30 [02:40<00:00,  5.34s/it]


Epoch 15


Hypernet Loop: 100%|██████████| 10/10 [00:56<00:00,  5.65s/it]
Actor Training: 100%|██████████| 30/30 [02:40<00:00,  5.35s/it]


Epoch 16


Hypernet Loop: 100%|██████████| 10/10 [00:57<00:00,  5.71s/it]
Actor Training: 100%|██████████| 30/30 [02:35<00:00,  5.17s/it]


Epoch 17


Hypernet Loop: 100%|██████████| 10/10 [00:53<00:00,  5.39s/it]
Actor Training: 100%|██████████| 30/30 [02:27<00:00,  4.93s/it]


Epoch 18


Hypernet Loop: 100%|██████████| 10/10 [00:53<00:00,  5.36s/it]
Actor Training: 100%|██████████| 30/30 [02:21<00:00,  4.70s/it]


Epoch 19


Hypernet Loop: 100%|██████████| 10/10 [00:43<00:00,  4.32s/it]
Actor Training: 100%|██████████| 30/30 [02:18<00:00,  4.60s/it]


Epoch 20


Hypernet Loop: 100%|██████████| 10/10 [00:49<00:00,  4.95s/it]
Actor Training: 100%|██████████| 30/30 [02:27<00:00,  4.93s/it]


Epoch 21


Hypernet Loop: 100%|██████████| 10/10 [00:53<00:00,  5.34s/it]
Actor Training: 100%|██████████| 30/30 [02:29<00:00,  4.99s/it]


Epoch 22


Hypernet Loop: 100%|██████████| 10/10 [00:53<00:00,  5.38s/it]
Actor Training: 100%|██████████| 30/30 [02:28<00:00,  4.93s/it]


Epoch 23


Hypernet Loop: 100%|██████████| 10/10 [00:52<00:00,  5.24s/it]
Actor Training: 100%|██████████| 30/30 [02:32<00:00,  5.08s/it]


Epoch 24


Hypernet Loop: 100%|██████████| 10/10 [00:52<00:00,  5.24s/it]
Actor Training: 100%|██████████| 30/30 [02:21<00:00,  4.71s/it]


Epoch 25


Hypernet Loop: 100%|██████████| 10/10 [00:53<00:00,  5.34s/it]
Actor Training: 100%|██████████| 30/30 [02:21<00:00,  4.71s/it]


Epoch 26


Hypernet Loop: 100%|██████████| 10/10 [00:53<00:00,  5.37s/it]
Actor Training: 100%|██████████| 30/30 [02:34<00:00,  5.14s/it]


Epoch 27


Hypernet Loop: 100%|██████████| 10/10 [00:54<00:00,  5.45s/it]
Actor Training: 100%|██████████| 30/30 [02:29<00:00,  4.98s/it]


Epoch 28


Hypernet Loop: 100%|██████████| 10/10 [00:56<00:00,  5.69s/it]
Actor Training: 100%|██████████| 30/30 [02:21<00:00,  4.73s/it]


Epoch 29


Hypernet Loop:  50%|█████     | 5/10 [00:20<00:20,  4.18s/it]