In [7]:
va.run_vectorized_vs(va.RandomAgent(), va.RandomAgent(), n_envs=10000);

100%|██████████| 1999/1999 [00:10<00:00, 191.77it/s]


RandomAgent -vs- RandomAgent
Mean scores: 593.01 - 592.38
Match score: 5029 - 139 - 4832 (50.3% - 1.4% - 48.3%)


In [1]:
import os
import time
import torch

import vectorized_agents as va
import vectorized_env as ve

DEVICE = torch.device('cuda')

if DEVICE == torch.device('cpu'):
    os.environ['OMP_NUM_THREADS'] = '4'
else:
    os.environ['OMP_NUM_THREADS'] = '8'

In [2]:
env_kwargs = dict(
    n_envs=500 if DEVICE == torch.device('cuda') else 50,
    env_device=DEVICE,
    out_device=DEVICE
)
rl_agent_opp_kwargs = dict(
    device=DEVICE,
    deterministic_policy=True
)

timesteps = [0, 500]
agents = [
    va.SavedRLAgent('awac_agent_small_8_64_32_1_norm_v1-230', device=DEVICE, deterministic_policy=True),
    va.SavedRLAgent('a3c_agent_small_8_64_32_2_v2-30', device=DEVICE, deterministic_policy=False),
]
agents_obs_types = [a.obs_type for a in agents]
player = va.MultiObsFixedTimeEnsemble(
    timesteps,
    agents,
    agents_obs_types
)
opponents = (
    va.PullVegasSlotMachinesImproved(),
    va.SavedRLAgent('a3c_agent_small_8_32-790', **rl_agent_opp_kwargs),
    va.SavedRLAgent('awac_agent_small_8_64_32_1_norm_v1-230', **rl_agent_opp_kwargs),
    va.SavedRLAgent('a3c_agent_small_8_64_32_2_v2-30', **rl_agent_opp_kwargs),
)

In [3]:
for opp in opponents:
    va.run_vectorized_vs(player, opp, **env_kwargs)

100%|██████████| 1999/1999 [00:28<00:00, 70.35it/s]


MultiObsFixedTimeEnsemble: SavedRLAgent: awac_agent_small_8_64_32_1_norm_v1-230_deterministic__SavedRLAgent: a3c_agent_small_8_64_32_2_v2-30 -vs- PullVegasSlotMachinesImproved
Mean scores: 646.09 - 636.95
Match score: 312 - 8 - 180 (62.4% - 1.6% - 36.0%)


100%|██████████| 1999/1999 [00:36<00:00, 54.73it/s]


MultiObsFixedTimeEnsemble: SavedRLAgent: awac_agent_small_8_64_32_1_norm_v1-230_deterministic__SavedRLAgent: a3c_agent_small_8_64_32_2_v2-30 -vs- SavedRLAgent: a3c_agent_small_8_32-790_deterministic
Mean scores: 643.43 - 638.50
Match score: 279 - 5 - 216 (55.8% - 1.0% - 43.2%)


100%|██████████| 1999/1999 [00:40<00:00, 49.30it/s]


MultiObsFixedTimeEnsemble: SavedRLAgent: awac_agent_small_8_64_32_1_norm_v1-230_deterministic__SavedRLAgent: a3c_agent_small_8_64_32_2_v2-30 -vs- SavedRLAgent: awac_agent_small_8_64_32_1_norm_v1-230_deterministic
Mean scores: 638.92 - 639.38
Match score: 242 - 10 - 248 (48.4% - 2.0% - 49.6%)


100%|██████████| 1999/1999 [00:53<00:00, 37.27it/s]


MultiObsFixedTimeEnsemble: SavedRLAgent: awac_agent_small_8_64_32_1_norm_v1-230_deterministic__SavedRLAgent: a3c_agent_small_8_64_32_2_v2-30 -vs- SavedRLAgent: a3c_agent_small_8_64_32_2_v2-30_deterministic
Mean scores: 637.43 - 638.46
Match score: 235 - 5 - 260 (47.0% - 1.0% - 52.0%)


In [2]:
env_kwargs = dict(
    n_envs=500 if DEVICE == torch.device('cuda') else 50,
    env_device=DEVICE,
    out_device=DEVICE
)
rl_agent_opp_kwargs = dict(
    device=DEVICE,
    deterministic_policy=True
)

all_ensemble_names = ['a3c_agent_small_8_32', 'awac_agent_small_8_64_32_1_norm', 'a3c_agent_small_8_64_32_2']
players = (
    va.SavedRLAgentMultiObsEnsemble(all_ensemble_names[:2], weight_logits=False, deterministic_policy=True),
    va.SavedRLAgentMultiObsEnsemble([all_ensemble_names[0], all_ensemble_names[2]], weight_logits=False, deterministic_policy=True),
    va.SavedRLAgentMultiObsEnsemble(all_ensemble_names[-2:], weight_logits=False, deterministic_policy=True),
    va.SavedRLAgentMultiObsEnsemble(all_ensemble_names, weight_logits=False, deterministic_policy=True),
)
opponents = (
    va.PullVegasSlotMachinesImproved(),
    va.SavedRLAgent('a3c_agent_small_8_32-790', **rl_agent_opp_kwargs),
    va.SavedRLAgent('awac_agent_small_8_64_32_1_norm_v1-230', **rl_agent_opp_kwargs),
    va.SavedRLAgent('a3c_agent_small_8_64_32_2_v2-30', **rl_agent_opp_kwargs),
)

In [3]:
for player in players:
    print(f'Evaluating "{player.name}"')
    time.sleep(0.5)
    for opp in opponents:
        va.run_vectorized_vs(player, opp, **env_kwargs)
    print()

Evaluating "SavedRLAgentMultiObsEnsemble: a3c_agent_small_8_32__awac_agent_small_8_64_32_1_norm_weight_probs_deterministic"


100%|██████████| 1999/1999 [01:51<00:00, 17.93it/s]


SavedRLAgentMultiObsEnsemble: a3c_agent_small_8_32__awac_agent_small_8_64_32_1_norm_weight_probs_deterministic -vs- PullVegasSlotMachinesImproved
Mean scores: 646.79 - 639.54
Match score: 296 - 8 - 196 (59.2% - 1.6% - 39.2%)


100%|██████████| 1999/1999 [02:07<00:00, 15.67it/s]


SavedRLAgentMultiObsEnsemble: a3c_agent_small_8_32__awac_agent_small_8_64_32_1_norm_weight_probs_deterministic -vs- SavedRLAgent: a3c_agent_small_8_32-790_deterministic
Mean scores: 645.70 - 640.98
Match score: 271 - 12 - 217 (54.2% - 2.4% - 43.4%)


100%|██████████| 1999/1999 [02:11<00:00, 15.24it/s]


SavedRLAgentMultiObsEnsemble: a3c_agent_small_8_32__awac_agent_small_8_64_32_1_norm_weight_probs_deterministic -vs- SavedRLAgent: awac_agent_small_8_64_32_1_norm_v1-230_deterministic
Mean scores: 640.63 - 640.83
Match score: 245 - 5 - 250 (49.0% - 1.0% - 50.0%)


100%|██████████| 1999/1999 [02:20<00:00, 14.21it/s]


SavedRLAgentMultiObsEnsemble: a3c_agent_small_8_32__awac_agent_small_8_64_32_1_norm_weight_probs_deterministic -vs- SavedRLAgent: a3c_agent_small_8_64_32_2_v2-30_deterministic
Mean scores: 642.46 - 644.91
Match score: 220 - 11 - 269 (44.0% - 2.2% - 53.8%)

Evaluating "SavedRLAgentMultiObsEnsemble: awac_agent_small_8_64_32_1_norm__a3c_agent_small_8_64_32_2_weight_probs_deterministic"


100%|██████████| 1999/1999 [03:10<00:00, 10.48it/s]


SavedRLAgentMultiObsEnsemble: awac_agent_small_8_64_32_1_norm__a3c_agent_small_8_64_32_2_weight_probs_deterministic -vs- PullVegasSlotMachinesImproved
Mean scores: 648.98 - 636.41
Match score: 328 - 2 - 170 (65.6% - 0.4% - 34.0%)


100%|██████████| 1999/1999 [03:01<00:00, 11.02it/s]


SavedRLAgentMultiObsEnsemble: awac_agent_small_8_64_32_1_norm__a3c_agent_small_8_64_32_2_weight_probs_deterministic -vs- SavedRLAgent: a3c_agent_small_8_32-790_deterministic
Mean scores: 642.33 - 638.51
Match score: 265 - 13 - 222 (53.0% - 2.6% - 44.4%)


100%|██████████| 1999/1999 [03:23<00:00,  9.82it/s]


SavedRLAgentMultiObsEnsemble: awac_agent_small_8_64_32_1_norm__a3c_agent_small_8_64_32_2_weight_probs_deterministic -vs- SavedRLAgent: awac_agent_small_8_64_32_1_norm_v1-230_deterministic
Mean scores: 641.54 - 642.78
Match score: 241 - 4 - 255 (48.2% - 0.8% - 51.0%)


100%|██████████| 1999/1999 [03:35<00:00,  9.26it/s]


SavedRLAgentMultiObsEnsemble: awac_agent_small_8_64_32_1_norm__a3c_agent_small_8_64_32_2_weight_probs_deterministic -vs- SavedRLAgent: a3c_agent_small_8_64_32_2_v2-30_deterministic
Mean scores: 640.65 - 640.68
Match score: 255 - 5 - 240 (51.0% - 1.0% - 48.0%)

Evaluating "SavedRLAgentMultiObsEnsemble: a3c_agent_small_8_32__a3c_agent_small_8_64_32_2_weight_probs_deterministic"


100%|██████████| 1999/1999 [02:40<00:00, 12.43it/s]


SavedRLAgentMultiObsEnsemble: a3c_agent_small_8_32__a3c_agent_small_8_64_32_2_weight_probs_deterministic -vs- PullVegasSlotMachinesImproved
Mean scores: 648.39 - 636.39
Match score: 328 - 4 - 168 (65.6% - 0.8% - 33.6%)


 71%|███████   | 1423/1999 [01:58<00:47, 12.01it/s]


KeyboardInterrupt: 