# AgileRL Speaker-Listener with MATD3
https://docs.agilerl.com/en/latest/tutorials/pettingzoo/matd3.html

In [1]:
!pip install --upgrade pip



In [2]:
!pip install pettingzoo[mpe]
!pip install agilerl
!pip install imageio

zsh:1: no matches found: pettingzoo[mpe]


In [3]:
"""
This tutorial shows how to train an MATD3 agent on the simple speaker listener multi-particle environment.

Authors: Michael (https://github.com/mikepratt1), Nickua (https://github.com/nicku-a)
"""

import os

import numpy as np
import torch
from pettingzoo.mpe import simple_speaker_listener_v4
from tqdm import trange

from agilerl.components.multi_agent_replay_buffer import MultiAgentReplayBuffer
from agilerl.hpo.mutation import Mutations
from agilerl.hpo.tournament import TournamentSelection
from agilerl.utils.utils import initialPopulation

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("===== AgileRL Online Multi-Agent Demo =====")

    # Define the network configuration
    NET_CONFIG = {
        "arch": "mlp",  # Network architecture
        "h_size": [32, 32],  # Actor hidden size
    }

    # Define the initial hyperparameters
    INIT_HP = {
        "POPULATION_SIZE": 4,
        "ALGO": "MATD3",  # Algorithm
        # Swap image channels dimension from last to first [H, W, C] -> [C, H, W]
        "CHANNELS_LAST": False,
        "BATCH_SIZE": 32,  # Batch size
        "LR": 0.01,  # Learning rate
        "GAMMA": 0.95,  # Discount factor
        "MEMORY_SIZE": 100000,  # Max memory buffer size
        "LEARN_STEP": 5,  # Learning frequency
        "TAU": 0.01,  # For soft update of target parameters
        "POLICY_FREQ": 2,  # Policy frequnecy
    }

    # Define the simple speaker listener environment as a parallel environment
    env = simple_speaker_listener_v4.parallel_env(continuous_actions=True)
    env.reset()

    # Configure the multi-agent algo input arguments
    try:
        state_dim = [env.observation_space(agent).n for agent in env.agents]
        one_hot = True
    except Exception:
        state_dim = [env.observation_space(agent).shape for agent in env.agents]
        one_hot = False
    try:
        action_dim = [env.action_space(agent).n for agent in env.agents]
        INIT_HP["DISCRETE_ACTIONS"] = True
        INIT_HP["MAX_ACTION"] = None
        INIT_HP["MIN_ACTION"] = None
    except Exception:
        action_dim = [env.action_space(agent).shape[0] for agent in env.agents]
        INIT_HP["DISCRETE_ACTIONS"] = False
        INIT_HP["MAX_ACTION"] = [env.action_space(agent).high for agent in env.agents]
        INIT_HP["MIN_ACTION"] = [env.action_space(agent).low for agent in env.agents]

    # Not applicable to MPE environments, used when images are used for observations (Atari environments)
    if INIT_HP["CHANNELS_LAST"]:
        state_dim = [
            (state_dim[2], state_dim[0], state_dim[1]) for state_dim in state_dim
        ]

    # Append number of agents and agent IDs to the initial hyperparameter dictionary
    INIT_HP["N_AGENTS"] = env.num_agents
    INIT_HP["AGENT_IDS"] = env.agents

    # Create a population ready for evolutionary hyper-parameter optimisation
    pop = initialPopulation(
        INIT_HP["ALGO"],
        state_dim,
        action_dim,
        one_hot,
        NET_CONFIG,
        INIT_HP,
        population_size=INIT_HP["POPULATION_SIZE"],
        device=device,
    )

    # Configure the multi-agent replay buffer
    field_names = ["state", "action", "reward", "next_state", "done"]
    memory = MultiAgentReplayBuffer(
        INIT_HP["MEMORY_SIZE"],
        field_names=field_names,
        agent_ids=INIT_HP["AGENT_IDS"],
        device=device,
    )

    # Instantiate a tournament selection object (used for HPO)
    tournament = TournamentSelection(
        tournament_size=2,  # Tournament selection size
        elitism=True,  # Elitism in tournament selection
        population_size=INIT_HP["POPULATION_SIZE"],  # Population size
        evo_step=1,
    )  # Evaluate using last N fitness scores

    # Instantiate a mutations object (used for HPO)
    mutations = Mutations(
        algo=INIT_HP["ALGO"],
        no_mutation=0.2,  # Probability of no mutation
        architecture=0.2,  # Probability of architecture mutation
        new_layer_prob=0.2,  # Probability of new layer mutation
        parameters=0.2,  # Probability of parameter mutation
        activation=0,  # Probability of activation function mutation
        rl_hp=0.2,  # Probability of RL hyperparameter mutation
        rl_hp_selection=[
            "lr",
            "learn_step",
            "batch_size",
        ],  # RL hyperparams selected for mutation
        mutation_sd=0.1,  # Mutation strength
        agent_ids=INIT_HP["AGENT_IDS"],
        arch=NET_CONFIG["arch"],
        rand_seed=1,
        device=device,
    )

    # Define training loop parameters
    max_episodes = 3000 # 6000 #500  # Total episodes (default: 6000)
    max_steps = 25  # Maximum steps to take in each episode
    epsilon = 1.0  # Starting epsilon value
    eps_end = 0.1  # Final epsilon value
    eps_decay = 0.995  # Epsilon decay
    evo_epochs = 20  # Evolution frequency
    evo_loop = 1  # Number of evaluation episodes
    elite = pop[0]  # Assign a placeholder "elite" agent

    # Training loop
    for idx_epi in trange(max_episodes):
        for agent in pop:  # Loop through population
            state, info = env.reset()  # Reset environment at start of episode
            agent_reward = {agent_id: 0 for agent_id in env.agents}
            if INIT_HP["CHANNELS_LAST"]:
                state = {
                    agent_id: np.moveaxis(np.expand_dims(s, 0), [-1], [-3])
                    for agent_id, s in state.items()
                }

            for _ in range(max_steps):
                agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
                env_defined_actions = (
                    info["env_defined_actions"]
                    if "env_defined_actions" in info.keys()
                    else None
                )

                # Get next action from agent
                cont_actions, discrete_action = agent.getAction(
                    state, epsilon, agent_mask, env_defined_actions
                )
                if agent.discrete_actions:
                    action = discrete_action
                else:
                    action = cont_actions

                next_state, reward, termination, truncation, info = env.step(
                    action
                )  # Act in environment

                # Image processing if necessary for the environment
                if INIT_HP["CHANNELS_LAST"]:
                    state = {agent_id: np.squeeze(s) for agent_id, s in state.items()}
                    next_state = {
                        agent_id: np.moveaxis(ns, [-1], [-3])
                        for agent_id, ns in next_state.items()
                    }

                # Save experiences to replay buffer
                memory.save2memory(state, cont_actions, reward, next_state, termination)

                # Collect the reward
                for agent_id, r in reward.items():
                    agent_reward[agent_id] += r

                # Learn according to learning frequency
                if (memory.counter % agent.learn_step == 0) and (
                    len(memory) >= agent.batch_size
                ):
                    experiences = memory.sample(
                        agent.batch_size
                    )  # Sample replay buffer
                    agent.learn(experiences)  # Learn according to agent's RL algorithm

                # Update the state
                if INIT_HP["CHANNELS_LAST"]:
                    next_state = {
                        agent_id: np.expand_dims(ns, 0)
                        for agent_id, ns in next_state.items()
                    }
                state = next_state

                # Stop episode if any agents have terminated
                if any(truncation.values()) or any(termination.values()):
                    break

            # Save the total episode reward
            score = sum(agent_reward.values())
            agent.scores.append(score)

        # Update epsilon for exploration
        epsilon = max(eps_end, epsilon * eps_decay)

        # Now evolve population if necessary
        if (idx_epi + 1) % evo_epochs == 0:
            # Evaluate population
            fitnesses = [
                agent.test(
                    env,
                    swap_channels=INIT_HP["CHANNELS_LAST"],
                    max_steps=max_steps,
                    loop=evo_loop,
                )
                for agent in pop
            ]

            print(f"Episode {idx_epi + 1}/{max_episodes}")
            print(f'Fitnesses: {["%.2f" % fitness for fitness in fitnesses]}')
            print(
                f'100 fitness avgs: {["%.2f" % np.mean(agent.fitness[-100:]) for agent in pop]}'
            )

            # Tournament selection and population mutation
            elite, pop = tournament.select(pop)
            pop = mutations.mutation(pop)

    # Save the trained algorithm
    path = "./models/MATD3"
    filename = "MATD3_trained_agent.pt"
    os.makedirs(path, exist_ok=True)
    save_path = os.path.join(path, filename)
    elite.saveCheckpoint(save_path)

===== AgileRL Online Multi-Agent Demo =====


  from .autonotebook import tqdm as notebook_tqdm
  1%|▋                                                                                                            | 19/3000 [00:07<26:31,  1.87it/s]

Episode 20/3000
Fitnesses: ['-144.21', '-34.22', '-259.04', '-6.62']
100 fitness avgs: ['-144.21', '-34.22', '-259.04', '-6.62']


  1%|█▍                                                                                                           | 39/3000 [00:18<21:44,  2.27it/s]

Episode 40/3000
Fitnesses: ['-37.79', '-97.30', '-148.20', '-176.38']
100 fitness avgs: ['-22.20', '-51.96', '-91.21', '-217.71']


  2%|██▏                                                                                                          | 59/3000 [00:30<42:25,  1.16it/s]

Episode 60/3000
Fitnesses: ['-48.80', '-46.52', '-44.16', '-264.62']
100 fitness avgs: ['-31.07', '-30.31', '-29.52', '-149.01']


  3%|██▊                                                                                                          | 79/3000 [00:43<39:27,  1.23it/s]

Episode 80/3000
Fitnesses: ['-104.67', '-137.13', '-54.35', '-38.18']
100 fitness avgs: ['-48.31', '-146.04', '-36.89', '-31.69']


  3%|███▌                                                                                                         | 99/3000 [01:01<42:35,  1.14it/s]

Episode 100/3000
Fitnesses: ['-152.71', '-329.15', '-456.04', '-26.56']
100 fitness avgs: ['-55.89', '-95.34', '-116.56', '-30.66']


  4%|████▎                                                                                                       | 119/3000 [01:18<32:24,  1.48it/s]

Episode 120/3000
Fitnesses: ['-48.00', '-388.00', '-48.29', '-75.41']
100 fitness avgs: ['-33.55', '-144.12', '-33.60', '-109.70']


  5%|█████                                                                                                       | 139/3000 [01:31<30:39,  1.56it/s]

Episode 140/3000
Fitnesses: ['-78.39', '-154.72', '-20.90', '-62.61']
100 fitness avgs: ['-39.96', '-50.86', '-31.79', '-37.74']


  5%|█████▋                                                                                                      | 159/3000 [01:42<25:42,  1.84it/s]

Episode 160/3000
Fitnesses: ['-23.91', '-61.19', '-10.95', '-63.59']
100 fitness avgs: ['-30.80', '-52.15', '-34.39', '-40.98']


  6%|██████▍                                                                                                     | 179/3000 [01:54<26:31,  1.77it/s]

Episode 180/3000
Fitnesses: ['-133.33', '-58.84', '-36.21', '-54.43']
100 fitness avgs: ['-45.39', '-37.11', '-40.45', '-36.62']


  7%|███████▏                                                                                                    | 199/3000 [02:05<24:16,  1.92it/s]

Episode 200/3000
Fitnesses: ['-136.61', '-26.37', '-50.67', '-57.73']
100 fitness avgs: ['-50.06', '-43.49', '-41.47', '-42.17']


  7%|███████▉                                                                                                    | 219/3000 [02:16<24:17,  1.91it/s]

Episode 220/3000
Fitnesses: ['-49.34', '-80.24', '-24.75', '-18.60']
100 fitness avgs: ['-44.02', '-46.83', '-40.59', '-41.22']


  8%|████████▌                                                                                                   | 239/3000 [02:26<24:18,  1.89it/s]

Episode 240/3000
Fitnesses: ['-17.29', '-104.90', '-65.49', '-44.46']
100 fitness avgs: ['-39.23', '-46.53', '-48.38', '-40.91']


  9%|█████████▎                                                                                                  | 259/3000 [02:37<23:04,  1.98it/s]

Episode 260/3000
Fitnesses: ['-88.76', '-15.51', '-21.83', '-70.95']
100 fitness avgs: ['-43.04', '-38.96', '-39.45', '-43.22']


  9%|██████████                                                                                                  | 279/3000 [02:47<22:16,  2.04it/s]

Episode 280/3000
Fitnesses: ['-65.84', '-63.97', '-20.26', '-12.59']
100 fitness avgs: ['-40.88', '-44.71', '-41.58', '-37.08']


 10%|██████████▊                                                                                                 | 299/3000 [02:56<20:22,  2.21it/s]

Episode 300/3000
Fitnesses: ['-94.40', '-46.90', '-26.45', '-36.56']
100 fitness avgs: ['-40.90', '-41.94', '-43.49', '-37.04']


 11%|███████████▍                                                                                                | 319/3000 [03:06<23:00,  1.94it/s]

Episode 320/3000
Fitnesses: ['-7.70', '-13.33', '-19.70', '-42.86']
100 fitness avgs: ['-41.25', '-41.60', '-40.55', '-43.45']


 11%|████████████▏                                                                                               | 339/3000 [03:16<22:08,  2.00it/s]

Episode 340/3000
Fitnesses: ['-35.43', '-12.14', '-82.39', '-12.47']
100 fitness avgs: ['-40.91', '-39.87', '-43.67', '-39.56']


 12%|████████████▉                                                                                               | 359/3000 [03:27<22:55,  1.92it/s]

Episode 360/3000
Fitnesses: ['-12.37', '-26.82', '-12.55', '-29.95']
100 fitness avgs: ['-38.34', '-40.13', '-38.35', '-39.32']


 13%|█████████████▋                                                                                              | 379/3000 [03:38<22:41,  1.92it/s]

Episode 380/3000
Fitnesses: ['-9.25', '-22.52', '-8.72', '-14.42']
100 fitness avgs: ['-36.81', '-37.51', '-36.78', '-37.09']


 13%|██████████████▎                                                                                             | 399/3000 [03:48<22:09,  1.96it/s]

Episode 400/3000
Fitnesses: ['-36.92', '-67.44', '-46.58', '-63.65']
100 fitness avgs: ['-36.79', '-38.32', '-37.27', '-38.15']


 14%|███████████████                                                                                             | 419/3000 [03:59<23:02,  1.87it/s]

Episode 420/3000
Fitnesses: ['-25.29', '-54.93', '-73.13', '-79.98']
100 fitness avgs: ['-36.24', '-38.11', '-38.52', '-38.85']


 15%|███████████████▊                                                                                            | 439/3000 [04:09<21:52,  1.95it/s]

Episode 440/3000
Fitnesses: ['-12.24', '-106.96', '-12.85', '-21.72']
100 fitness avgs: ['-35.15', '-41.24', '-36.97', '-37.37']


 15%|████████████████▌                                                                                           | 459/3000 [04:18<18:47,  2.25it/s]

Episode 460/3000
Fitnesses: ['-35.12', '-10.88', '-36.63', '-70.22']
100 fitness avgs: ['-35.15', '-39.92', '-35.22', '-36.68']


 16%|█████████████████▏                                                                                          | 479/3000 [04:28<19:59,  2.10it/s]

Episode 480/3000
Fitnesses: ['-25.71', '-66.91', '-21.02', '-57.09']
100 fitness avgs: ['-39.33', '-36.54', '-39.14', '-36.06']


 17%|█████████████████▉                                                                                          | 499/3000 [04:39<22:43,  1.83it/s]

Episode 500/3000
Fitnesses: ['-42.53', '-21.18', '-54.83', '-35.88']
100 fitness avgs: ['-39.27', '-38.60', '-39.95', '-39.19']


 17%|██████████████████▋                                                                                         | 519/3000 [04:51<24:18,  1.70it/s]

Episode 520/3000
Fitnesses: ['-37.73', '-41.17', '-16.67', '-16.61']
100 fitness avgs: ['-38.57', '-38.70', '-38.33', '-39.05']


 18%|███████████████████▍                                                                                        | 539/3000 [05:02<21:22,  1.92it/s]

Episode 540/3000
Fitnesses: ['-40.34', '-37.74', '-8.38', '-15.29']
100 fitness avgs: ['-39.10', '-38.30', '-37.22', '-37.47']


 19%|████████████████████                                                                                        | 559/3000 [05:12<21:49,  1.86it/s]

Episode 560/3000
Fitnesses: ['-49.69', '-5.18', '-22.58', '-20.14']
100 fitness avgs: ['-37.66', '-36.32', '-36.69', '-37.66']


 19%|████████████████████▊                                                                                       | 579/3000 [05:23<21:17,  1.89it/s]

Episode 580/3000
Fitnesses: ['-49.90', '-6.97', '-37.50', '-19.14']
100 fitness avgs: ['-36.79', '-36.60', '-37.65', '-35.73']


 20%|█████████████████████▌                                                                                      | 599/3000 [05:35<22:10,  1.80it/s]

Episode 600/3000
Fitnesses: ['-18.19', '-75.77', '-9.31', '-19.66']
100 fitness avgs: ['-35.99', '-38.09', '-35.87', '-37.05']


 21%|██████████████████████▎                                                                                     | 619/3000 [05:46<21:06,  1.88it/s]

Episode 620/3000
Fitnesses: ['-48.51', '-13.85', '-108.27', '-7.53']
100 fitness avgs: ['-36.28', '-36.30', '-38.21', '-35.07']


 21%|███████████████████████                                                                                     | 639/3000 [05:57<20:27,  1.92it/s]

Episode 640/3000
Fitnesses: ['-6.72', '-37.27', '-7.74', '-25.86']
100 fitness avgs: ['-34.19', '-35.14', '-35.41', '-34.78']


 22%|███████████████████████▋                                                                                    | 659/3000 [06:06<17:52,  2.18it/s]

Episode 660/3000
Fitnesses: ['-24.47', '-48.51', '-20.50', '-11.91']
100 fitness avgs: ['-33.89', '-35.81', '-33.77', '-34.09']


 23%|████████████████████████▍                                                                                   | 679/3000 [06:19<24:42,  1.57it/s]

Episode 680/3000
Fitnesses: ['-56.39', '-54.40', '-28.03', '-83.46']
100 fitness avgs: ['-34.75', '-34.38', '-33.72', '-35.54']


 23%|█████████████████████████▏                                                                                  | 699/3000 [06:34<29:34,  1.30it/s]

Episode 700/3000
Fitnesses: ['-59.98', '-28.67', '-57.11', '-23.77']
100 fitness avgs: ['-34.47', '-33.58', '-34.39', '-34.43']


 24%|█████████████████████████▉                                                                                  | 719/3000 [06:48<25:46,  1.47it/s]

Episode 720/3000
Fitnesses: ['-26.69', '-10.13', '-39.02', '-73.63']
100 fitness avgs: ['-34.22', '-33.76', '-34.52', '-35.52']


 25%|██████████████████████████▌                                                                                 | 739/3000 [07:01<23:10,  1.63it/s]

Episode 740/3000
Fitnesses: ['-56.71', '-28.46', '-48.12', '-19.98']
100 fitness avgs: ['-34.38', '-33.62', '-34.88', '-33.39']


 25%|███████████████████████████▎                                                                                | 759/3000 [07:13<23:37,  1.58it/s]

Episode 760/3000
Fitnesses: ['-61.95', '-35.84', '-42.64', '-46.47']
100 fitness avgs: ['-34.14', '-34.91', '-33.85', '-33.73']


 26%|████████████████████████████                                                                                | 779/3000 [07:24<19:00,  1.95it/s]

Episode 780/3000
Fitnesses: ['-14.59', '-103.03', '-412.00', '-31.55']
100 fitness avgs: ['-34.39', '-36.66', '-43.55', '-34.82']


 27%|████████████████████████████▊                                                                               | 799/3000 [07:36<21:36,  1.70it/s]

Episode 800/3000
Fitnesses: ['-3.95', '-21.87', '-4.79', '-14.92']
100 fitness avgs: ['-33.63', '-34.50', '-33.65', '-33.90']


 27%|█████████████████████████████▍                                                                              | 819/3000 [07:49<22:43,  1.60it/s]

Episode 820/3000
Fitnesses: ['-17.43', '-32.69', '-53.12', '-7.29']
100 fitness avgs: ['-33.23', '-33.60', '-34.10', '-33.25']


 28%|██████████████████████████████▏                                                                             | 839/3000 [08:02<22:51,  1.58it/s]

Episode 840/3000
Fitnesses: ['-37.15', '-47.96', '-35.80', '-24.27']
100 fitness avgs: ['-33.35', '-33.58', '-33.31', '-33.04']


 29%|██████████████████████████████▉                                                                             | 859/3000 [08:15<23:05,  1.54it/s]

Episode 860/3000
Fitnesses: ['-31.10', '-42.01', '-39.27', '-28.03']
100 fitness avgs: ['-32.99', '-33.25', '-33.18', '-33.19']


 29%|███████████████████████████████▋                                                                            | 879/3000 [08:29<23:17,  1.52it/s]

Episode 880/3000
Fitnesses: ['-20.76', '-20.79', '-51.61', '-46.62']
100 fitness avgs: ['-32.91', '-32.72', '-33.60', '-33.49']


 30%|████████████████████████████████▎                                                                           | 899/3000 [08:42<22:58,  1.52it/s]

Episode 900/3000
Fitnesses: ['-26.93', '-41.55', '-17.99', '-31.65']
100 fitness avgs: ['-32.78', '-32.91', '-32.58', '-32.88']


 31%|█████████████████████████████████                                                                           | 919/3000 [08:54<20:35,  1.68it/s]

Episode 920/3000
Fitnesses: ['-2.82', '-33.97', '-23.58', '-82.23']
100 fitness avgs: ['-31.93', '-32.61', '-32.58', '-33.66']


 31%|█████████████████████████████████▊                                                                          | 939/3000 [09:04<17:54,  1.92it/s]

Episode 940/3000
Fitnesses: ['-23.27', '-28.80', '-61.71', '-2.62']
100 fitness avgs: ['-31.75', '-31.86', '-33.23', '-31.31']


 32%|██████████████████████████████████▌                                                                         | 959/3000 [09:14<16:48,  2.02it/s]

Episode 960/3000
Fitnesses: ['-22.23', '-18.90', '-18.49', '-18.96']
100 fitness avgs: ['-31.12', '-31.05', '-32.92', '-31.48']


 33%|███████████████████████████████████▏                                                                        | 979/3000 [09:26<19:59,  1.68it/s]

Episode 980/3000
Fitnesses: ['-425.29', '-79.37', '-23.38', '-25.45']
100 fitness avgs: ['-40.93', '-33.87', '-32.72', '-30.93']


 33%|███████████████████████████████████▉                                                                        | 999/3000 [09:39<21:07,  1.58it/s]

Episode 1000/3000
Fitnesses: ['-5.45', '-8.47', '-6.39', '-33.00']
100 fitness avgs: ['-32.18', '-32.24', '-30.44', '-32.73']


 34%|████████████████████████████████████▎                                                                      | 1019/3000 [09:51<18:52,  1.75it/s]

Episode 1020/3000
Fitnesses: ['-95.43', '-34.62', '-42.31', '-39.63']
100 fitness avgs: ['-33.42', '-32.23', '-32.38', '-30.62']


 35%|█████████████████████████████████████                                                                      | 1039/3000 [10:04<19:57,  1.64it/s]

Episode 1040/3000
Fitnesses: ['-60.31', '-4.68', '-11.67', '-5.67']
100 fitness avgs: ['-32.77', '-30.12', '-31.98', '-31.86']


 35%|█████████████████████████████████████▊                                                                     | 1059/3000 [10:14<17:18,  1.87it/s]

Episode 1060/3000
Fitnesses: ['-3.70', '-66.18', '-35.14', '-13.66']
100 fitness avgs: ['-29.63', '-32.62', '-30.22', '-29.81']


 36%|██████████████████████████████████████▍                                                                    | 1079/3000 [10:25<15:44,  2.03it/s]

Episode 1080/3000
Fitnesses: ['-51.57', '-51.44', '-9.18', '-17.32']
100 fitness avgs: ['-30.03', '-30.03', '-29.43', '-29.98']


 37%|███████████████████████████████████████▏                                                                   | 1099/3000 [10:35<15:37,  2.03it/s]

Episode 1100/3000
Fitnesses: ['-69.08', '-50.03', '-88.06', '-71.22']
100 fitness avgs: ['-30.15', '-29.81', '-31.09', '-30.19']


 37%|███████████████████████████████████████▉                                                                   | 1119/3000 [10:45<15:07,  2.07it/s]

Episode 1120/3000
Fitnesses: ['-63.64', '-67.53', '-32.72', '-96.73']
100 fitness avgs: ['-30.41', '-30.86', '-29.86', '-31.00']


 38%|████████████████████████████████████████▌                                                                  | 1139/3000 [10:55<14:55,  2.08it/s]

Episode 1140/3000
Fitnesses: ['-29.68', '-62.41', '-11.99', '-26.11']
100 fitness avgs: ['-29.85', '-30.43', '-30.09', '-29.79']


 39%|█████████████████████████████████████████▎                                                                 | 1159/3000 [11:07<16:29,  1.86it/s]

Episode 1160/3000
Fitnesses: ['-36.11', '-26.88', '-8.29', '-4.36']
100 fitness avgs: ['-30.19', '-29.74', '-29.71', '-29.41']


 39%|██████████████████████████████████████████                                                                 | 1179/3000 [11:19<18:47,  1.61it/s]

Episode 1180/3000
Fitnesses: ['-67.49', '-33.02', '-49.64', '-22.04']
100 fitness avgs: ['-30.06', '-29.48', '-29.76', '-29.29']


 40%|██████████████████████████████████████████▊                                                                | 1199/3000 [11:32<19:22,  1.55it/s]

Episode 1200/3000
Fitnesses: ['-23.42', '-4.89', '-38.33', '-32.41']
100 fitness avgs: ['-29.19', '-29.07', '-29.62', '-29.52']


 41%|███████████████████████████████████████████▍                                                               | 1219/3000 [11:47<20:27,  1.45it/s]

Episode 1220/3000
Fitnesses: ['-60.28', '-12.89', '-58.33', '-20.07']
100 fitness avgs: ['-29.58', '-28.80', '-29.55', '-29.37']


 41%|████████████████████████████████████████████▏                                                              | 1239/3000 [12:01<20:26,  1.44it/s]

Episode 1240/3000
Fitnesses: ['-61.20', '-25.67', '-105.33', '-48.22']
100 fitness avgs: ['-29.32', '-29.48', '-30.60', '-29.11']


 42%|████████████████████████████████████████████▉                                                              | 1259/3000 [12:16<20:39,  1.40it/s]

Episode 1260/3000
Fitnesses: ['-17.60', '-15.76', '-34.66', '-21.45']
100 fitness avgs: ['-29.29', '-29.27', '-30.66', '-29.36']


 43%|█████████████████████████████████████████████▌                                                             | 1279/3000 [12:30<19:16,  1.49it/s]

Episode 1280/3000
Fitnesses: ['-93.11', '-334.56', '-22.36', '-9.23']
100 fitness avgs: ['-30.26', '-34.04', '-29.25', '-28.98']


 43%|██████████████████████████████████████████████▎                                                            | 1299/3000 [12:44<20:50,  1.36it/s]

Episode 1300/3000
Fitnesses: ['-45.98', '-15.64', '-11.16', '-14.13']
100 fitness avgs: ['-29.24', '-30.04', '-29.97', '-28.75']


 44%|███████████████████████████████████████████████                                                            | 1319/3000 [12:59<19:57,  1.40it/s]

Episode 1320/3000
Fitnesses: ['-56.96', '-98.04', '-4.88', '-9.77']
100 fitness avgs: ['-30.38', '-31.00', '-29.66', '-28.95']


 45%|███████████████████████████████████████████████▊                                                           | 1339/3000 [13:14<19:39,  1.41it/s]

Episode 1340/3000
Fitnesses: ['-7.39', '-12.17', '-84.94', '-95.38']
100 fitness avgs: ['-29.32', '-29.40', '-29.78', '-30.64']


 45%|████████████████████████████████████████████████▍                                                          | 1359/3000 [13:28<19:21,  1.41it/s]

Episode 1360/3000
Fitnesses: ['-19.49', '-26.00', '-167.04', '-19.35']
100 fitness avgs: ['-29.18', '-29.28', '-31.35', '-29.18']


 46%|█████████████████████████████████████████████████▏                                                         | 1379/3000 [13:43<19:20,  1.40it/s]

Episode 1380/3000
Fitnesses: ['-15.85', '-42.69', '-36.35', '-750.08']
100 fitness avgs: ['-28.98', '-29.38', '-29.28', '-39.63']


 47%|█████████████████████████████████████████████████▉                                                         | 1399/3000 [13:58<19:35,  1.36it/s]

Episode 1400/3000
Fitnesses: ['-44.61', '-17.93', '-8.76', '-26.20']
100 fitness avgs: ['-29.21', '-28.83', '-28.99', '-29.24']


 47%|██████████████████████████████████████████████████▌                                                        | 1419/3000 [14:11<17:22,  1.52it/s]

Episode 1420/3000
Fitnesses: ['-34.53', '-19.35', '-20.78', '-30.61']
100 fitness avgs: ['-29.07', '-29.10', '-28.71', '-28.85']


 48%|███████████████████████████████████████████████████▎                                                       | 1439/3000 [14:24<15:47,  1.65it/s]

Episode 1440/3000
Fitnesses: ['-5.66', '-22.37', '-22.48', '-7.72']
100 fitness avgs: ['-28.78', '-28.63', '-28.76', '-28.42']


 49%|████████████████████████████████████████████████████                                                       | 1459/3000 [14:36<14:28,  1.77it/s]

Episode 1460/3000
Fitnesses: ['-67.76', '-23.88', '-13.58', '-60.79']
100 fitness avgs: ['-29.31', '-28.36', '-28.57', '-29.07']


 49%|████████████████████████████████████████████████████▊                                                      | 1479/3000 [14:49<15:31,  1.63it/s]

Episode 1480/3000
Fitnesses: ['-5.48', '-55.27', '-9.49', '-5.88']
100 fitness avgs: ['-28.25', '-28.93', '-28.80', '-28.75']


 50%|█████████████████████████████████████████████████████▍                                                     | 1499/3000 [15:02<15:36,  1.60it/s]

Episode 1500/3000
Fitnesses: ['-42.00', '-20.01', '-47.36', '-3.36']
100 fitness avgs: ['-28.44', '-28.64', '-29.17', '-28.41']


 51%|██████████████████████████████████████████████████████▏                                                    | 1519/3000 [15:12<12:03,  2.05it/s]

Episode 1520/3000
Fitnesses: ['-13.65', '-35.36', '-28.09', '-21.52']
100 fitness avgs: ['-28.22', '-28.51', '-28.63', '-28.32']


 51%|██████████████████████████████████████████████████████▉                                                    | 1539/3000 [15:24<13:50,  1.76it/s]

Episode 1540/3000
Fitnesses: ['-42.45', '-23.90', '-39.55', '-28.49']
100 fitness avgs: ['-28.40', '-28.27', '-28.65', '-28.33']


 52%|███████████████████████████████████████████████████████▌                                                   | 1559/3000 [15:38<16:18,  1.47it/s]

Episode 1560/3000
Fitnesses: ['-57.86', '-32.65', '-30.74', '-26.92']
100 fitness avgs: ['-28.65', '-28.38', '-28.43', '-28.25']


 53%|████████████████████████████████████████████████████████▎                                                  | 1579/3000 [15:53<17:48,  1.33it/s]

Episode 1580/3000
Fitnesses: ['-17.13', '-47.52', '-120.74', '-10.13']
100 fitness avgs: ['-28.11', '-28.68', '-29.42', '-28.02']


 53%|█████████████████████████████████████████████████████████                                                  | 1599/3000 [16:07<16:35,  1.41it/s]

Episode 1600/3000
Fitnesses: ['-40.70', '-35.67', '-580.78', '-9.45']
100 fitness avgs: ['-28.18', '-28.76', '-35.02', '-27.79']


 54%|█████████████████████████████████████████████████████████▋                                                 | 1619/3000 [16:22<17:00,  1.35it/s]

Episode 1620/3000
Fitnesses: ['-36.27', '-51.10', '-26.66', '-59.16']
100 fitness avgs: ['-27.89', '-28.07', '-27.77', '-28.17']


 55%|██████████████████████████████████████████████████████████▍                                                | 1639/3000 [16:36<15:20,  1.48it/s]

Episode 1640/3000
Fitnesses: ['-171.04', '-57.61', '-25.36', '-45.33']
100 fitness avgs: ['-29.52', '-28.25', '-27.86', '-28.29']


 55%|███████████████████████████████████████████████████████████▏                                               | 1659/3000 [16:50<14:51,  1.50it/s]

Episode 1660/3000
Fitnesses: ['-40.71', '-18.61', '-26.94', '-17.09']
100 fitness avgs: ['-28.02', '-28.17', '-28.27', '-28.12']


 56%|███████████████████████████████████████████████████████████▉                                               | 1679/3000 [17:02<12:41,  1.73it/s]

Episode 1680/3000
Fitnesses: ['-38.22', '-49.24', '-11.59', '-35.35']
100 fitness avgs: ['-28.24', '-28.37', '-28.07', '-28.21']


 57%|████████████████████████████████████████████████████████████▌                                              | 1699/3000 [17:15<13:40,  1.59it/s]

Episode 1700/3000
Fitnesses: ['-70.06', '-25.92', '-28.92', '-6.62']
100 fitness avgs: ['-28.56', '-28.05', '-28.21', '-27.95']


 57%|█████████████████████████████████████████████████████████████▎                                             | 1719/3000 [17:27<12:28,  1.71it/s]

Episode 1720/3000
Fitnesses: ['-3.77', '-33.90', '-19.13', '-6.59']
100 fitness avgs: ['-27.67', '-28.63', '-27.85', '-27.70']


 58%|██████████████████████████████████████████████████████████████                                             | 1739/3000 [17:40<11:21,  1.85it/s]

Episode 1740/3000
Fitnesses: ['-86.33', '-38.13', '-53.77', '-75.83']
100 fitness avgs: ['-28.35', '-27.97', '-27.97', '-28.40']


 59%|██████████████████████████████████████████████████████████████▋                                            | 1759/3000 [17:52<11:15,  1.84it/s]

Episode 1760/3000
Fitnesses: ['-24.88', '-5.89', '-28.31', '-7.49']
100 fitness avgs: ['-27.93', '-27.72', '-27.97', '-27.73']


 59%|███████████████████████████████████████████████████████████████▍                                           | 1779/3000 [18:03<11:05,  1.83it/s]

Episode 1780/3000
Fitnesses: ['-1.60', '-3.76', '-57.32', '-11.66']
100 fitness avgs: ['-27.43', '-27.47', '-28.05', '-27.54']


 60%|████████████████████████████████████████████████████████████████▏                                          | 1799/3000 [18:16<11:10,  1.79it/s]

Episode 1800/3000
Fitnesses: ['-19.35', '-43.64', '-122.91', '-20.68']
100 fitness avgs: ['-27.34', '-28.23', '-28.49', '-27.35']


 61%|████████████████████████████████████████████████████████████████▉                                          | 1819/3000 [18:28<11:23,  1.73it/s]

Episode 1820/3000
Fitnesses: ['-6.53', '-66.04', '-15.98', '-136.51']
100 fitness avgs: ['-27.11', '-27.78', '-27.23', '-29.42']


 61%|█████████████████████████████████████████████████████████████████▌                                         | 1839/3000 [18:41<12:01,  1.61it/s]

Episode 1840/3000
Fitnesses: ['-20.22', '-24.31', '-3.55', '-15.65']
100 fitness avgs: ['-27.03', '-27.19', '-26.85', '-27.10']


 62%|██████████████████████████████████████████████████████████████████▎                                        | 1859/3000 [18:54<12:14,  1.55it/s]

Episode 1860/3000
Fitnesses: ['-29.59', '-41.90', '-73.37', '-51.82']
100 fitness avgs: ['-26.88', '-27.26', '-27.53', '-27.12']


 63%|███████████████████████████████████████████████████████████████████                                        | 1879/3000 [19:13<16:11,  1.15it/s]

Episode 1880/3000
Fitnesses: ['-64.74', '-18.18', '-36.59', '-65.25']
100 fitness avgs: ['-27.28', '-27.03', '-27.22', '-27.29']


 63%|███████████████████████████████████████████████████████████████████▋                                       | 1899/3000 [19:30<14:51,  1.24it/s]

Episode 1900/3000
Fitnesses: ['-64.38', '-5.38', '-48.31', '-12.10']
100 fitness avgs: ['-27.42', '-26.80', '-27.44', '-26.87']


 64%|████████████████████████████████████████████████████████████████████▍                                      | 1919/3000 [19:43<10:00,  1.80it/s]

Episode 1920/3000
Fitnesses: ['-78.78', '-27.43', '-8.84', '-51.91']
100 fitness avgs: ['-27.34', '-26.80', '-26.61', '-27.06']


 65%|█████████████████████████████████████████████████████████████████████▏                                     | 1939/3000 [19:55<09:47,  1.81it/s]

Episode 1940/3000
Fitnesses: ['-25.38', '-71.98', '-17.12', '-1.27']
100 fitness avgs: ['-26.60', '-27.08', '-26.70', '-26.35']


 65%|█████████████████████████████████████████████████████████████████████▊                                     | 1959/3000 [20:08<09:42,  1.79it/s]

Episode 1960/3000
Fitnesses: ['-14.37', '-82.50', '-13.53', '-65.12']
100 fitness avgs: ['-26.23', '-26.92', '-26.57', '-26.74']


 66%|██████████████████████████████████████████████████████████████████████▌                                    | 1979/3000 [20:19<09:31,  1.79it/s]

Episode 1980/3000
Fitnesses: ['-16.20', '-3.37', '-22.07', '-16.71']
100 fitness avgs: ['-26.47', '-26.34', '-26.19', '-26.82']


 67%|███████████████████████████████████████████████████████████████████████▎                                   | 1999/3000 [20:32<09:16,  1.80it/s]

Episode 2000/3000
Fitnesses: ['-83.87', '-13.73', '-43.78', '-48.43']
100 fitness avgs: ['-26.91', '-26.69', '-26.51', '-26.68']


 67%|████████████████████████████████████████████████████████████████████████                                   | 2019/3000 [20:45<09:49,  1.66it/s]

Episode 2020/3000
Fitnesses: ['-13.99', '-8.36', '-24.58', '-25.65']
100 fitness avgs: ['-26.76', '-26.71', '-26.86', '-26.88']


 68%|████████████████████████████████████████████████████████████████████████▋                                  | 2039/3000 [20:59<11:42,  1.37it/s]

Episode 2040/3000
Fitnesses: ['-14.71', '-53.54', '-32.35', '-18.44']
100 fitness avgs: ['-26.47', '-26.86', '-26.65', '-26.57']


 69%|█████████████████████████████████████████████████████████████████████████▍                                 | 2059/3000 [21:13<10:23,  1.51it/s]

Episode 2060/3000
Fitnesses: ['-17.95', '-3.21', '-11.36', '-12.05']
100 fitness avgs: ['-26.21', '-26.16', '-26.15', '-26.25']


 69%|██████████████████████████████████████████████████████████████████████████▏                                | 2079/3000 [21:24<07:49,  1.96it/s]

Episode 2080/3000
Fitnesses: ['-165.65', '-11.80', '-11.05', '-81.14']
100 fitness avgs: ['-27.43', '-25.89', '-25.89', '-26.59']


 70%|██████████████████████████████████████████████████████████████████████████▊                                | 2099/3000 [21:37<08:11,  1.83it/s]

Episode 2100/3000
Fitnesses: ['-39.44', '-86.27', '-50.37', '-11.87']
100 fitness avgs: ['-26.02', '-26.49', '-26.13', '-25.75']


 71%|███████████████████████████████████████████████████████████████████████████▌                               | 2119/3000 [21:49<08:30,  1.73it/s]

Episode 2120/3000
Fitnesses: ['-10.25', '-3.61', '-22.94', '-47.28']
100 fitness avgs: ['-25.37', '-25.30', '-25.87', '-26.01']


 71%|████████████████████████████████████████████████████████████████████████████▎                              | 2139/3000 [22:02<09:02,  1.59it/s]

Episode 2140/3000
Fitnesses: ['-43.46', '-6.96', '-28.16', '-38.01']
100 fitness avgs: ['-25.11', '-24.81', '-24.96', '-25.76']


 72%|█████████████████████████████████████████████████████████████████████████████                              | 2159/3000 [22:16<09:17,  1.51it/s]

Episode 2160/3000
Fitnesses: ['-68.75', '-21.28', '-20.03', '-8.31']
100 fitness avgs: ['-24.86', '-24.53', '-24.38', '-24.40']


 73%|█████████████████████████████████████████████████████████████████████████████▋                             | 2179/3000 [22:31<09:54,  1.38it/s]

Episode 2180/3000
Fitnesses: ['-47.30', '-2.22', '-51.20', '-10.68']
100 fitness avgs: ['-24.51', '-24.04', '-24.53', '-24.15']


 73%|██████████████████████████████████████████████████████████████████████████████▍                            | 2199/3000 [22:47<09:40,  1.38it/s]

Episode 2200/3000
Fitnesses: ['-73.10', '-42.17', '-14.44', '-4.43']
100 fitness avgs: ['-24.19', '-24.36', '-23.60', '-23.98']


 74%|███████████████████████████████████████████████████████████████████████████████▏                           | 2219/3000 [23:04<09:56,  1.31it/s]

Episode 2220/3000
Fitnesses: ['-29.53', '-57.16', '-25.65', '-38.96']
100 fitness avgs: ['-24.03', '-24.31', '-23.61', '-23.74']


 75%|███████████████████████████████████████████████████████████████████████████████▊                           | 2239/3000 [23:14<05:23,  2.35it/s]

Episode 2240/3000
Fitnesses: ['-9.63', '-22.64', '-8.08', '-30.37']
100 fitness avgs: ['-23.26', '-23.39', '-23.38', '-23.47']


 75%|████████████████████████████████████████████████████████████████████████████████▌                          | 2259/3000 [23:26<07:02,  1.76it/s]

Episode 2260/3000
Fitnesses: ['-37.52', '-25.12', '-58.21', '-8.67']
100 fitness avgs: ['-23.05', '-22.92', '-23.25', '-22.76']


 76%|█████████████████████████████████████████████████████████████████████████████████▎                         | 2279/3000 [23:39<06:47,  1.77it/s]

Episode 2280/3000
Fitnesses: ['-20.61', '-8.05', '-6.57', '-43.38']
100 fitness avgs: ['-22.32', '-22.20', '-22.47', '-22.55']


 77%|█████████████████████████████████████████████████████████████████████████████████▉                         | 2299/3000 [23:52<07:19,  1.60it/s]

Episode 2300/3000
Fitnesses: ['-2.65', '-4.26', '-66.91', '-8.85']
100 fitness avgs: ['-22.23', '-21.98', '-22.88', '-22.30']


 77%|██████████████████████████████████████████████████████████████████████████████████▋                        | 2319/3000 [24:03<06:01,  1.88it/s]

Episode 2320/3000
Fitnesses: ['-64.12', '-37.50', '-99.62', '-58.65']
100 fitness avgs: ['-22.74', '-22.54', '-23.74', '-22.69']


 78%|███████████████████████████████████████████████████████████████████████████████████▍                       | 2339/3000 [24:15<05:39,  1.95it/s]

Episode 2340/3000
Fitnesses: ['-6.35', '-28.64', '-9.74', '-31.62']
100 fitness avgs: ['-22.48', '-22.91', '-22.72', '-22.88']


 79%|████████████████████████████████████████████████████████████████████████████████████▏                      | 2359/3000 [24:26<05:38,  1.89it/s]

Episode 2360/3000
Fitnesses: ['-2.13', '-73.95', '-42.25', '-12.30']
100 fitness avgs: ['-22.38', '-23.52', '-23.18', '-22.48']


 79%|████████████████████████████████████████████████████████████████████████████████████▊                      | 2379/3000 [24:38<06:06,  1.69it/s]

Episode 2380/3000
Fitnesses: ['-16.13', '-190.39', '-26.20', '-446.22']
100 fitness avgs: ['-22.45', '-25.00', '-22.55', '-27.56']


 80%|█████████████████████████████████████████████████████████████████████████████████████▌                     | 2399/3000 [24:51<06:22,  1.57it/s]

Episode 2400/3000
Fitnesses: ['-12.36', '-187.58', '-26.24', '-16.11']
100 fitness avgs: ['-22.11', '-26.41', '-22.35', '-22.15']


 81%|██████████████████████████████████████████████████████████████████████████████████████▎                    | 2419/3000 [25:07<07:14,  1.34it/s]

Episode 2420/3000
Fitnesses: ['-38.91', '-10.77', '-27.69', '-38.15']
100 fitness avgs: ['-21.95', '-21.91', '-21.88', '-21.94']


 81%|██████████████████████████████████████████████████████████████████████████████████████▉                    | 2439/3000 [25:23<07:26,  1.26it/s]

Episode 2440/3000
Fitnesses: ['-68.75', '-27.76', '-4.09', '-75.14']
100 fitness avgs: ['-21.53', '-21.15', '-20.88', '-21.59']


 82%|███████████████████████████████████████████████████████████████████████████████████████▋                   | 2459/3000 [25:41<07:12,  1.25it/s]

Episode 2460/3000
Fitnesses: ['-9.51', '-5.63', '-109.36', '-20.56']
100 fitness avgs: ['-20.87', '-20.83', '-22.51', '-20.98']


 83%|████████████████████████████████████████████████████████████████████████████████████████▍                  | 2479/3000 [25:59<07:28,  1.16it/s]

Episode 2480/3000
Fitnesses: ['-35.28', '-13.09', '-22.22', '-5.37']
100 fitness avgs: ['-20.92', '-20.74', '-20.79', '-20.66']


 83%|█████████████████████████████████████████████████████████████████████████████████████████▏                 | 2499/3000 [26:17<06:38,  1.26it/s]

Episode 2500/3000
Fitnesses: ['-18.97', '-2.88', '-5.26', '-58.02']
100 fitness avgs: ['-20.49', '-20.41', '-20.43', '-20.88']


 84%|█████████████████████████████████████████████████████████████████████████████████████████▊                 | 2519/3000 [26:33<06:18,  1.27it/s]

Episode 2520/3000
Fitnesses: ['-58.09', '-32.41', '-17.16', '-23.19']
100 fitness avgs: ['-20.82', '-20.57', '-20.41', '-20.56']


 85%|██████████████████████████████████████████████████████████████████████████████████████████▌                | 2539/3000 [26:52<06:38,  1.16it/s]

Episode 2540/3000
Fitnesses: ['-63.44', '-46.84', '-6.33', '-59.85']
100 fitness avgs: ['-20.96', '-20.80', '-20.55', '-20.93']


 85%|███████████████████████████████████████████████████████████████████████████████████████████▎               | 2559/3000 [27:13<06:58,  1.05it/s]

Episode 2560/3000
Fitnesses: ['-23.96', '-42.55', '-13.33', '-25.66']
100 fitness avgs: ['-20.29', '-20.73', '-20.18', '-20.31']


 86%|███████████████████████████████████████████████████████████████████████████████████████████▉               | 2579/3000 [27:34<07:07,  1.01s/it]

Episode 2580/3000
Fitnesses: ['-4.29', '-14.22', '-80.33', '-32.95']
100 fitness avgs: ['-20.16', '-20.25', '-21.02', '-20.57']


 87%|████████████████████████████████████████████████████████████████████████████████████████████▋              | 2599/3000 [27:56<07:24,  1.11s/it]

Episode 2600/3000
Fitnesses: ['-16.64', '-18.57', '-19.25', '-18.13']
100 fitness avgs: ['-20.14', '-20.16', '-20.58', '-20.25']


 87%|█████████████████████████████████████████████████████████████████████████████████████████████▍             | 2619/3000 [28:22<07:51,  1.24s/it]

Episode 2620/3000
Fitnesses: ['-30.87', '-4.33', '-20.62', '-30.91']
100 fitness avgs: ['-20.37', '-20.22', '-20.39', '-20.37']


 88%|██████████████████████████████████████████████████████████████████████████████████████████████             | 2639/3000 [28:44<06:46,  1.13s/it]

Episode 2640/3000
Fitnesses: ['-8.81', '-12.18', '-100.01', '-12.47']
100 fitness avgs: ['-20.24', '-20.44', '-21.16', '-20.28']


 89%|██████████████████████████████████████████████████████████████████████████████████████████████▊            | 2659/3000 [29:08<06:26,  1.13s/it]

Episode 2660/3000
Fitnesses: ['-50.78', '-189.12', '-19.86', '-19.56']
100 fitness avgs: ['-20.51', '-21.93', '-20.23', '-20.39']


 89%|███████████████████████████████████████████████████████████████████████████████████████████████▌           | 2679/3000 [29:31<06:10,  1.15s/it]

Episode 2680/3000
Fitnesses: ['-92.58', '-17.41', '-49.72', '-45.73']
100 fitness avgs: ['-21.04', '-20.28', '-20.72', '-20.41']


 90%|████████████████████████████████████████████████████████████████████████████████████████████████▎          | 2699/3000 [29:51<05:08,  1.02s/it]

Episode 2700/3000
Fitnesses: ['-35.82', '-13.94', '-6.90', '-15.25']
100 fitness avgs: ['-20.07', '-19.85', '-19.91', '-19.99']


 91%|████████████████████████████████████████████████████████████████████████████████████████████████▉          | 2719/3000 [30:13<04:54,  1.05s/it]

Episode 2720/3000
Fitnesses: ['-118.17', '-1.10', '-30.79', '-55.72']
100 fitness avgs: ['-20.70', '-19.53', '-19.83', '-20.02']


 91%|█████████████████████████████████████████████████████████████████████████████████████████████████▋         | 2739/3000 [30:33<04:30,  1.04s/it]

Episode 2740/3000
Fitnesses: ['-33.25', '-5.65', '-37.35', '-47.07']
100 fitness avgs: ['-19.38', '-19.40', '-19.72', '-19.52']


 92%|██████████████████████████████████████████████████████████████████████████████████████████████████▍        | 2759/3000 [30:55<04:12,  1.05s/it]

Episode 2760/3000
Fitnesses: ['-74.67', '-76.32', '-16.42', '-13.78']
100 fitness avgs: ['-19.79', '-20.12', '-19.52', '-19.16']


 93%|███████████████████████████████████████████████████████████████████████████████████████████████████        | 2779/3000 [31:17<03:49,  1.04s/it]

Episode 2780/3000
Fitnesses: ['-52.22', '-33.43', '-16.48', '-22.94']
100 fitness avgs: ['-19.54', '-19.71', '-19.81', '-19.61']


 93%|███████████████████████████████████████████████████████████████████████████████████████████████████▊       | 2799/3000 [31:39<03:31,  1.05s/it]

Episode 2800/3000
Fitnesses: ['-4.56', '-13.31', '-86.59', '-21.45']
100 fitness avgs: ['-19.71', '-19.59', '-20.43', '-19.67']


 94%|████████████████████████████████████████████████████████████████████████████████████████████████████▌      | 2819/3000 [32:01<03:07,  1.04s/it]

Episode 2820/3000
Fitnesses: ['-21.76', '-12.87', '-13.45', '-8.98']
100 fitness avgs: ['-19.85', '-19.65', '-19.65', '-19.69']


 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 2839/3000 [32:20<02:21,  1.14it/s]

Episode 2840/3000
Fitnesses: ['-4.64', '-635.61', '-111.83', '-3.67']
100 fitness avgs: ['-19.38', '-25.65', '-20.41', '-19.33']


 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 2859/3000 [32:38<02:05,  1.13it/s]

Episode 2860/3000
Fitnesses: ['-174.14', '-39.26', '-22.83', '-13.00']
100 fitness avgs: ['-20.79', '-19.49', '-20.36', '-20.26']


 96%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 2879/3000 [32:58<01:58,  1.02it/s]

Episode 2880/3000
Fitnesses: ['-118.01', '-17.74', '-9.16', '-25.02']
100 fitness avgs: ['-21.23', '-19.46', '-20.14', '-20.40']


 97%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 2899/3000 [33:15<01:28,  1.14it/s]

Episode 2900/3000
Fitnesses: ['-38.09', '-52.88', '-85.26', '-3.42']
100 fitness avgs: ['-20.34', '-21.58', '-20.13', '-19.31']


 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████   | 2919/3000 [33:36<01:19,  1.02it/s]

Episode 2920/3000
Fitnesses: ['-17.05', '-32.85', '-44.12', '-28.53']
100 fitness avgs: ['-19.46', '-21.88', '-21.99', '-20.60']


 98%|████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 2939/3000 [33:52<00:48,  1.25it/s]

Episode 2940/3000
Fitnesses: ['-107.34', '-20.40', '-41.57', '-42.36']
100 fitness avgs: ['-20.50', '-22.06', '-19.85', '-19.85']


 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 2959/3000 [34:09<00:33,  1.21it/s]

Episode 2960/3000
Fitnesses: ['-67.42', '-61.00', '-8.35', '-32.17']
100 fitness avgs: ['-22.54', '-20.27', '-21.95', '-19.99']


 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 2979/3000 [34:26<00:16,  1.28it/s]

Episode 2980/3000
Fitnesses: ['-39.48', '-1.02', '-59.01', '-18.56']
100 fitness avgs: ['-22.09', '-19.74', '-22.29', '-21.88']


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2999/3000 [34:41<00:00,  1.32it/s]

Episode 3000/3000
Fitnesses: ['-39.95', '-38.59', '-27.65', '-44.30']
100 fitness avgs: ['-20.08', '-22.41', '-19.96', '-22.26']


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 3000/3000 [34:44<00:00,  1.44it/s]


In [4]:
import os

import imageio
import numpy as np
import torch
from pettingzoo.mpe import simple_speaker_listener_v4
from PIL import Image, ImageDraw

from agilerl.algorithms.matd3 import MATD3


# Define function to return image
def _label_with_episode_number(frame, episode_num):
    im = Image.fromarray(frame)

    drawer = ImageDraw.Draw(im)

    if np.mean(frame) < 128:
        text_color = (255, 255, 255)
    else:
        text_color = (0, 0, 0)
    drawer.text(
        (im.size[0] / 20, im.size[1] / 18), f"Episode: {episode_num+1}", fill=text_color
    )

    return im


if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Configure the environment
    env = simple_speaker_listener_v4.parallel_env(
        continuous_actions=True, render_mode="rgb_array"
    )
    env.reset()
    try:
        state_dim = [env.observation_space(agent).n for agent in env.agents]
        one_hot = True
    except Exception:
        state_dim = [env.observation_space(agent).shape for agent in env.agents]
        one_hot = False
    try:
        action_dim = [env.action_space(agent).n for agent in env.agents]
        discrete_actions = True
        max_action = None
        min_action = None
    except Exception:
        action_dim = [env.action_space(agent).shape[0] for agent in env.agents]
        discrete_actions = False
        max_action = [env.action_space(agent).high for agent in env.agents]
        min_action = [env.action_space(agent).low for agent in env.agents]

    # Append number of agents and agent IDs to the initial hyperparameter dictionary
    n_agents = env.num_agents
    agent_ids = env.agents

    # Instantiate an MADDPG object
    matd3 = MATD3(
        state_dim,
        action_dim,
        one_hot,
        n_agents,
        agent_ids,
        max_action,
        min_action,
        discrete_actions,
        device=device,
    )

    # Load the saved algorithm into the MADDPG object
    path = "./models/MATD3/MATD3_trained_agent.pt"
    matd3.loadCheckpoint(path)

    # Define test loop parameters
    episodes = 10  # Number of episodes to test agent on
    max_steps = 25  # Max number of steps to take in the environment in each episode

    rewards = []  # List to collect total episodic reward
    frames = []  # List to collect frames
    indi_agent_rewards = {
        agent_id: [] for agent_id in agent_ids
    }  # Dictionary to collect inidivdual agent rewards

    rewards = []  # List to collect total episodic reward
    frames = []  # List to collect frames
    indi_agent_rewards = {
        agent_id: [] for agent_id in agent_ids
    }  # Dictionary to collect inidivdual agent rewards

    # Test loop for inference
    for ep in range(episodes):
        state, info = env.reset()
        agent_reward = {agent_id: 0 for agent_id in agent_ids}
        score = 0
        for _ in range(max_steps):
            agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
            env_defined_actions = (
                info["env_defined_actions"]
                if "env_defined_actions" in info.keys()
                else None
            )

            # Get next action from agent
            cont_actions, discrete_action = matd3.getAction(
                state,
                epsilon=0,
                agent_mask=agent_mask,
                env_defined_actions=env_defined_actions,
            )
            if matd3.discrete_actions:
                action = discrete_action
            else:
                action = cont_actions

            # Save the frame for this step and append to frames list
            frame = env.render()
            frames.append(_label_with_episode_number(frame, episode_num=ep))

            # Take action in environment
            state, reward, termination, truncation, info = env.step(action)

            # Save agent's reward for this step in this episode
            for agent_id, r in reward.items():
                agent_reward[agent_id] += r

            # Determine total score for the episode and then append to rewards list
            score = sum(agent_reward.values())

            # Stop episode if any agents have terminated
            if any(truncation.values()) or any(termination.values()):
                break

        rewards.append(score)

        # Record agent specific episodic reward
        for agent_id in agent_ids:
            indi_agent_rewards[agent_id].append(agent_reward[agent_id])

        print("-" * 15, f"Episode: {ep}", "-" * 15)
        print("Episodic Reward: ", rewards[-1])
        for agent_id, reward_list in indi_agent_rewards.items():
            print(f"{agent_id} reward: {reward_list[-1]}")
    env.close()

    # Save the gif to specified path
    gif_path = "./videos/"
    os.makedirs(gif_path, exist_ok=True)
    imageio.mimwrite(
        os.path.join("./videos/", "speaker_listener.gif"), frames, duration=10
    )

--------------- Episode: 0 ---------------
Episodic Reward:  -81.8284499669539
speaker_0 reward: -40.91422498347695
listener_0 reward: -40.91422498347695
--------------- Episode: 1 ---------------
Episodic Reward:  -186.10305871930248
speaker_0 reward: -93.05152935965124
listener_0 reward: -93.05152935965124
--------------- Episode: 2 ---------------
Episodic Reward:  -20.01420199276354
speaker_0 reward: -10.00710099638177
listener_0 reward: -10.00710099638177
--------------- Episode: 3 ---------------
Episodic Reward:  -18.592696013850134
speaker_0 reward: -9.296348006925067
listener_0 reward: -9.296348006925067
--------------- Episode: 4 ---------------
Episodic Reward:  -22.432836521675
speaker_0 reward: -11.2164182608375
listener_0 reward: -11.2164182608375
--------------- Episode: 5 ---------------
Episodic Reward:  -36.65143788417064
speaker_0 reward: -18.32571894208532
listener_0 reward: -18.32571894208532
--------------- Episode: 6 ---------------
Episodic Reward:  -71.42802934

In [5]:
!python3 -m pip3 install playsound
!python3 -m pip3 install PyObjC

/Users/work/work-worldmodel2023/venv-mpe/bin/python3: No module named pip3
/Users/work/work-worldmodel2023/venv-mpe/bin/python3: No module named pip3


In [6]:
import playsound

while True:
    playsound.playsound("fireworks.mp3")

KeyboardInterrupt: 