In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from use_case.baseline import * 
from tests.eval import *

np.random.seed(1337)

N = 10
payoff_i = np.random.uniform(-10, 10, (N, N))
payoff_j = np.random.uniform(-10, 10, (N, N))

# Initialize environment
N_ACTIONS = payoff_i.shape[0]
N_AGENTS = 1000
env = BaselineEnvironment(N_AGENTS, payoff_i, payoff_j, total_games = 1)

# Actual Run

In [3]:
from models.model import *
from models.trainer import *
from torch.utils.tensorboard import SummaryWriter


In [4]:
# Configure the network here
parameters = ParameterSettings(
    n_agents = N_AGENTS,
    d_action = N_ACTIONS, 
    d_obs = env.obs_size, 
    d_traits = 1,
    d_beliefs = 1,
    d_het_latent = 4
)
parameters.device = "cuda" if torch.cuda.is_available() else "cpu"

model = Model(parameters)

In [5]:
equilibriua = find_pure_equilibria(payoff_i, payoff_j)

for eq in equilibriua:
    x, y = eq 
    a = (y[0] + y[1]) / 2

    print(x, a)

(264, 811) 9.966706937603602
(602, 548) 9.959116047155838


In [6]:
evaluate_policy(model, env, 10)

np.float64(-0.1758169378863897)

In [None]:
# Setup the training loop
training_parameters = TrainingParameters(
    outer_loops = 5_000,
    
    actor_learning_rate= 1e-3,
    critic_learning_rate = 1e-3,
    hypernet_learning_rate = 1e-3,

    hypernet_jsd_threshold = 0.25,

    sampled_agents = int(0.25 * N_AGENTS),
    experience_sampling_steps = 5,
    experience_buffer_size = 25,

    hypernet_jsd_weight = 1000,
)


train_model(model, env, training_parameters)
        

  0%|          | 0/5000 [00:00<?, ?it/s]

tensor([[0.2059, 0.0400],
        [0.0374, 0.0694],
        [0.0874, 0.1402],
        [0.1485, 0.0684],
        [0.1186, 0.0856]], device='cuda:0', grad_fn=<StackBackward0>) tensor([[ 5.5152,  7.9771],
        [ 7.8888, -9.4161],
        [ 1.6451, -7.6902],
        [ 0.4421, -7.2821],
        [-5.0838,  6.0047]], device='cuda:0')


# Heterogeneous Baseline

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from use_case.baseline import * 
from tests.eval import *
from models.model import *
from models.trainer import *


In [None]:
np.random.seed(1337)

N_AGENTS = 2000 
N_TYPES = 10
N_ACTIONS = 10
type_payoffs = np.random.uniform(-10, 10, (N_TYPES, N_TYPES, 2, N_ACTIONS, N_ACTIONS))
env = BaselineHeterogeneous(N_AGENTS, N_TYPES, type_payoffs)

In [None]:
# Configure the network here
parameters = ParameterSettings(
    n_agents = N_AGENTS,
    d_action = N_ACTIONS, 
    d_obs = env.obs_size, 
    d_traits = N_TYPES,
    d_beliefs = 1
    
)
parameters.device = "cuda" if torch.cuda.is_available() else "cpu"

model = Model(parameters)

In [None]:
# Setup the training loop
training_parameters = TrainingParameters(
    outer_loops = 100,

    actor_learning_rate= 2.5e-4,
    critic_learning_rate = 1e-3,
    hypernet_learning_rate = 2.5e-4,

    hypernet_jsd_threshold = 0.25,

    sampled_agents = N_AGENTS // 4,
    experience_buffer_size = 10,

)


In [None]:

train_model(model, env, training_parameters)