# ABS Project Main Notebook

In [1]:
from benchmarl.algorithms import MappoConfig, QmixConfig, MasacConfig
from benchmarl.benchmark import Benchmark
from benchmarl.environments import VmasTask
from benchmarl.experiment import Experiment, ExperimentConfig
from benchmarl.models import MlpConfig
from pprint import pprint

# Test Experiments

## Experiment 1

In [12]:
exp_config = ExperimentConfig.get_from_yaml()
task = VmasTask.REVERSE_TRANSPORT.get_from_yaml()
alg = MappoConfig.get_from_yaml()
model = MlpConfig.get_from_yaml()
critic_model = MlpConfig.get_from_yaml()

In [13]:
pprint(exp_config)

ExperimentConfig(sampling_device='cpu',
                 train_device='cpu',
                 buffer_device='cpu',
                 share_policy_params=True,
                 prefer_continuous_actions=True,
                 collect_with_grad=False,
                 parallel_collection=False,
                 gamma=0.99,
                 lr=5e-05,
                 adam_eps=1e-06,
                 adam_extra_kwargs={},
                 clip_grad_norm=True,
                 clip_grad_val=5,
                 soft_target_update=True,
                 polyak_tau=0.005,
                 hard_target_update_frequency=5,
                 exploration_eps_init=0.8,
                 exploration_eps_end=0.01,
                 exploration_anneal_frames=None,
                 max_n_iters=None,
                 max_n_frames=3000000,
                 on_policy_collected_frames_per_batch=6000,
                 on_policy_n_envs_per_worker=10,
                 on_policy_n_minibatch_iters=45,
              

In [14]:
pprint(critic_model)

MlpConfig(num_cells=[256, 256],
          layer_class=<class 'torch.nn.modules.linear.Linear'>,
          activation_class=<class 'torch.nn.modules.activation.Tanh'>,
          activation_kwargs=None,
          norm_class=None,
          norm_kwargs=None,
          num_feature_dims=1)


In [15]:
task.config["max_steps"] = 250
exp_config.max_n_frames = 600_000
experiment = Experiment(
    task=task,
    algorithm_config=alg,
    model_config=model,
    critic_model_config=critic_model,
    seed=0,
    config=exp_config,
)



In [16]:
experiment.run()





mean return = 0.2609289288520813:   1%|          | 1/100 [00:12<20:28, 12.41s/it][A
mean return = 1.002407193183899:   2%|▏         | 2/100 [00:19<14:57,  9.16s/it] [A
mean return = 2.152454376220703:   3%|▎         | 3/100 [00:25<12:58,  8.03s/it][A
mean return = 3.700376272201538:   4%|▍         | 4/100 [00:32<12:05,  7.55s/it][A
mean return = 4.9462409019470215:   5%|▌         | 5/100 [00:39<11:35,  7.32s/it][A
mean return = 5.778998851776123:   6%|▌         | 6/100 [00:46<11:12,  7.16s/it] [A
mean return = 5.682106018066406:   7%|▋         | 7/100 [00:53<11:00,  7.10s/it][A
mean return = 7.571181297302246:   8%|▊         | 8/100 [01:00<10:38,  6.94s/it][A
mean return = 8.262001037597656:   9%|▉         | 9/100 [01:06<10:25,  6.87s/it][A
mean return = 9.038581848144531:  10%|█         | 10/100 [01:13<10:12,  6.81s/it][A
mean return = 10.868721961975098:  11%|█         | 11/100 [01:20<10:01,  6.76s/it][A
mean return = 10.810493469238281:  12%|█▏        | 12/100 [01:26<09:


mean return = 13.198963165283203:  20%|██        | 20/100 [02:25<11:03,  8.30s/it][A
mean return = 14.804220199584961:  21%|██        | 21/100 [02:32<10:20,  7.85s/it][A
mean return = 15.075281143188477:  22%|██▏       | 22/100 [02:39<09:43,  7.48s/it][A
mean return = 15.16360855102539:  23%|██▎       | 23/100 [02:46<09:17,  7.24s/it] [A
mean return = 15.13878345489502:  24%|██▍       | 24/100 [02:52<08:58,  7.09s/it][A
mean return = 16.58041000366211:  25%|██▌       | 25/100 [02:59<08:41,  6.95s/it][A
mean return = 13.136455535888672:  26%|██▌       | 26/100 [03:06<08:31,  6.91s/it][A
mean return = 17.44398307800293:  27%|██▋       | 27/100 [03:12<08:19,  6.84s/it] [A
mean return = 19.673036575317383:  28%|██▊       | 28/100 [03:19<08:13,  6.86s/it][A
mean return = 12.817437171936035:  29%|██▉       | 29/100 [03:26<08:01,  6.79s/it][A
mean return = 16.448007583618164:  30%|███       | 30/100 [03:33<07:57,  6.82s/it][A
mean return = 14.636313438415527:  31%|███       | 31/1


mean return = 18.461360931396484:  40%|████      | 40/100 [04:48<08:27,  8.46s/it][A
mean return = 18.729007720947266:  41%|████      | 41/100 [04:55<07:51,  8.00s/it][A
mean return = 16.55028533935547:  42%|████▏     | 42/100 [05:02<07:24,  7.66s/it] [A
mean return = 16.499916076660156:  43%|████▎     | 43/100 [05:09<07:00,  7.38s/it][A
mean return = 18.00625228881836:  44%|████▍     | 44/100 [05:16<06:45,  7.23s/it] [A
mean return = 19.167903900146484:  45%|████▌     | 45/100 [05:23<06:29,  7.08s/it][A
mean return = 18.206249237060547:  46%|████▌     | 46/100 [05:30<06:20,  7.04s/it][A
mean return = 20.85639190673828:  47%|████▋     | 47/100 [05:36<06:07,  6.93s/it] [A
mean return = 17.338966369628906:  48%|████▊     | 48/100 [05:43<05:56,  6.85s/it][A
mean return = 15.95553207397461:  49%|████▉     | 49/100 [05:50<05:47,  6.81s/it] [A
mean return = 19.0528564453125:  50%|█████     | 50/100 [05:56<05:40,  6.82s/it] [A
mean return = 18.36233139038086:  51%|█████     | 51/1


mean return = 22.654390335083008: 100%|██████████| 100/100 [12:22<00:00,  8.42s/it][A

0,1
collection/agents/reward/episode_reward_max,▁▃▂▄▄▅▅▅▅▅▆▆▆▇▆▇▆▇▆▇▇▇▇▇▇▇▇▆▇▇▇▇▇█▇▇██▇█
collection/agents/reward/episode_reward_mean,▁▃▃▃▄▅▄▅▅▅▅▆▅▆▇▇▆▆▆▆▇▇▇▆▇▇▇▇▆▇██▇▇▇██▇▇█
collection/agents/reward/episode_reward_min,████▁███████████████████████████████████
collection/reward/episode_reward_max,▁▂▃▄▄▄▅▆▆▆▆▆▇▆▆▇▇▇▇▇▇▇▇▇▇▆▇▇▇▆▇▇▇▇▇▇██▇▇
collection/reward/episode_reward_mean,▁▁▂▃▄▄▄▄▅▅▅▄▅▅▆▆▆▇▅▆▇▇▆█▇▇▇▆▇▇▇▇▇▇█▆▇▇▆█
collection/reward/episode_reward_min,▁▂▃▃▃▃▃▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃█▃▃▃▃▃▃▃▃▃▃
collection/reward/reward_max,▁▅▃▃▄▅▅▅▆▆▆▆▆▆▆▆▇▆▇▆▆▇▆▆▇▇▆▇▆▇▆▇▇▆▆▇█▇▆▇
collection/reward/reward_mean,▁▁▂▃▃▄▅▄▅▆▆▆▆▆▆▇▆▇▇▇█▇█▇██▇▇▇▇▇███▇▇████
collection/reward/reward_min,▇▇█▇▇▇██▇▁██▇▇▄▇▆▆▇█▇▇▇▇█▇████▇█▆▆▅█▄██▇
counters/current_frames,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
collection/agents/reward/episode_reward_max,34.69875
collection/agents/reward/episode_reward_mean,22.65439
collection/agents/reward/episode_reward_min,0
collection/reward/episode_reward_max,34.69875
collection/reward/episode_reward_mean,22.65439
collection/reward/episode_reward_min,0
collection/reward/reward_max,0.17972
collection/reward/reward_mean,0.10102
collection/reward/reward_min,-0.03477
counters/current_frames,6000


mean return = 22.654390335083008: 100%|██████████| 100/100 [12:23<00:00,  7.44s/it]
