In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from use_case.baseline import * 
from tests.eval import *

np.random.seed(1337)

N = 20
payoff_i = np.random.uniform(-10, 10, (N, N))
payoff_j = np.random.uniform(-10, 10, (N, N))

# Initialize environment
N_ACTIONS = payoff_i.shape[0]
N_AGENTS = 2000
env = BaselineEnvironment(N_AGENTS, payoff_i, payoff_j, total_games = 1)

# Actual Run

In [3]:
from models.model import *
from models.trainer import *


In [4]:
# Configure the network here
parameters = ParameterSettings(
    n_agents = N_AGENTS,
    d_action = N_ACTIONS, 
    d_obs = env.obs_size, 
    d_traits = 1,
    d_beliefs = 1
)
parameters.device = "cuda" if torch.cuda.is_available() else "cpu"

model = Model(parameters)

In [5]:
equilibriua = find_pure_equilibria(payoff_i, payoff_j)

for eq in equilibriua:
    x, y = eq 
    a = (y[0] + y[1]) / 2

    print(x, a)

(18, 10) 9.42934343544164


In [6]:
evaluate_policy(model, env, 10)


Breakdown of average return per agent traits cluster:
Cluster 0: -0.25 (count: 10000)
Cluster 1: 0.18 (count: 10000)

    Average Return: -0.037756932843208454
    Total returns: -0.3775693284320845
    


np.float64(-0.037756932843208454)

In [None]:
# Setup the training loop
training_parameters = TrainingParameters(
    outer_loops = 200,
    hypernet_training_loops= 10, 
    actor_training_loops = 100,

    actor_learning_rate= 2.5e-4,
    critic_learning_rate = 2.5e-4,
    hypernet_learning_rate = 2.5e-4,

)

train_model(model, env, training_parameters)
        

Epoch 0


Hypernet Loop:   0%|          | 0/10 [00:00<?, ?it/s]


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


# Heterogeneous Baseline

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from use_case.baseline import * 
from tests.eval import *
from models.model import *
from models.trainer import *


In [None]:
np.random.seed(1337)

N_AGENTS = 2000 
N_TYPES = 10
N_ACTIONS = 10
type_payoffs = np.random.uniform(-10, 10, (N_TYPES, N_TYPES, 2, N_ACTIONS, N_ACTIONS))
env = BaselineHeterogeneous(N_AGENTS, N_TYPES, type_payoffs)

In [None]:
# Configure the network here
parameters = ParameterSettings(
    n_agents = N_AGENTS,
    d_action = N_ACTIONS, 
    d_obs = env.obs_size, 
    d_traits = N_TYPES,
    d_beliefs = 1
)
parameters.device = "cuda" if torch.cuda.is_available() else "cpu"

model = Model(parameters)

In [None]:
# Setup the training loop
training_parameters = TrainingParameters(
    outer_loops = 200,
    hypernet_training_loops= 0, 
    actor_training_loops = 100,
    experience_buffer_size = 3,

    actor_learning_rate= 2.5e-4,
    critic_learning_rate = 1e-3,
    hypernet_learning_rate = 2.5e-4,

    hypernet_jsd_threshold = 0.25,

    ppo_epochs = 4
)


In [None]:

train_model(model, env, training_parameters)