In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from use_case.baseline import * 
from tests.eval import *

np.random.seed(1337)

N = 20
payoff_i = np.random.uniform(-10, 10, (N, N))
payoff_j = np.random.uniform(-10, 10, (N, N))

# Initialize environment
N_ACTIONS = payoff_i.shape[0]
N_AGENTS = 2000
env = BaselineEnvironment(N_AGENTS, payoff_i, payoff_j, total_games = 1)

# Actual Run

In [None]:
from models.model import *
from models.trainer import *


In [None]:
# Configure the network here
parameters = ParameterSettings(
    n_agents = N_AGENTS,
    d_action = N_ACTIONS, 
    d_obs = env.obs_size, 
    d_traits = 1,
    d_beliefs = 1
)
parameters.device = "cuda" if torch.cuda.is_available() else "cpu"

model = Model(parameters)

In [None]:
equilibriua = find_pure_equilibria(payoff_i, payoff_j)

for eq in equilibriua:
    x, y = eq 
    a = (y[0] + y[1]) / 2

    print(x, a)

In [None]:
evaluate_policy(model, env, 10)

In [None]:
# Setup the training loop
training_parameters = TrainingParameters(
    outer_loops = 200,
    hypernet_training_loops= 50, 
    actor_training_loops = 100,
    experience_buffer_size = 3,

    actor_learning_rate= 2.5e-4,
    critic_learning_rate = 2.5e-4,
    hypernet_learning_rate = 2.5e-4,
)

train_model(model, env, training_parameters)
        

# Heterogeneous Baseline

In [19]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
from use_case.baseline import * 
from tests.eval import *
from models.model import *
from models.trainer import *


In [21]:
np.random.seed(1337)

N_AGENTS = 2000 
N_TYPES = 10
N_ACTIONS = 10
type_payoffs = np.random.uniform(-10, 10, (N_TYPES, N_TYPES, 2, N_ACTIONS, N_ACTIONS))
env = BaselineHeterogeneous(N_AGENTS, N_TYPES, type_payoffs)

In [22]:
# Configure the network here
parameters = ParameterSettings(
    n_agents = N_AGENTS,
    d_action = N_ACTIONS, 
    d_obs = env.obs_size, 
    d_traits = N_TYPES,
    d_beliefs = 1
)
parameters.device = "cuda" if torch.cuda.is_available() else "cpu"

model = Model(parameters)

In [None]:
# Setup the training loop
training_parameters = TrainingParameters(
    outer_loops = 200,
    hypernet_training_loops= 0, 
    actor_training_loops = 100,
    experience_buffer_size = 3,

    actor_learning_rate= 2.5e-4,
    critic_learning_rate = 1e-3,
    hypernet_learning_rate = 2.5e-4,

    hypernet_jsd_threshold = 0.25,

    ppo_epochs = 4
)


In [None]:

train_model(model, env, training_parameters)

Epoch 0


Actor Training: 100%|██████████| 100/100 [02:53<00:00,  1.74s/it]



    Average Policy Loss: 0.057799567066133026
    Average Value Loss: 5.577195649623871
    Average Entropy Loss: 2.2549852058887483
    

Breakdown of average return per agent traits cluster:
Cluster 0: 0.09 (count: 3740)
Cluster 1: 0.25 (count: 2010)
Cluster 2: 0.47 (count: 2060)
Cluster 3: 0.02 (count: 2300)
Cluster 4: 0.04 (count: 1970)
Cluster 5: 0.27 (count: 2090)
Cluster 6: 0.08 (count: 1800)
Cluster 7: 0.12 (count: 2040)
Cluster 8: 0.02 (count: 1990)

    Average Return: 0.14532120521601022
    Total returns: 1.4532120521601022
    
Epoch 1


Actor Training:  62%|██████▏   | 62/100 [01:45<00:24,  1.54it/s]