# AgileRL Speaker-Listener with MATD3
https://docs.agilerl.com/en/latest/tutorials/pettingzoo/matd3.html

In [2]:
!pip install --upgrade pip



In [3]:
!pip install pettingzoo[mpe]
!pip install agilerl
!pip install imageio

zsh:1: no matches found: pettingzoo[mpe]
Collecting numpy<2.0.0,>=1.24.2 (from agilerl)
  Using cached numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (115 kB)
Using cached numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl (14.0 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.23.5
    Uninstalling numpy-1.23.5:
      Successfully uninstalled numpy-1.23.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-macos 2.15.0 requires flatbuffers>=23.5.26, but you have flatbuffers 2.0 which is incompatible.
tensorflow-macos 2.15.0 requires keras<2.16,>=2.15.0, but you have keras 2.12.0 which is incompatible.
tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.26.2 which is incompatible.
tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.15.1 which

In [4]:
"""
This tutorial shows how to train an MATD3 agent on the simple speaker listener multi-particle environment.

Authors: Michael (https://github.com/mikepratt1), Nickua (https://github.com/nicku-a)
"""

import os

import numpy as np
import torch
from pettingzoo.mpe import simple_speaker_listener_v4
from tqdm import trange

from agilerl.components.multi_agent_replay_buffer import MultiAgentReplayBuffer
from agilerl.hpo.mutation import Mutations
from agilerl.hpo.tournament import TournamentSelection
from agilerl.utils.utils import initialPopulation

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("===== AgileRL Online Multi-Agent Demo =====")

    # Define the network configuration
    NET_CONFIG = {
        "arch": "mlp",  # Network architecture
        "h_size": [32, 32],  # Actor hidden size
    }

    # Define the initial hyperparameters
    INIT_HP = {
        "POPULATION_SIZE": 4,
        "ALGO": "MATD3",  # Algorithm
        # Swap image channels dimension from last to first [H, W, C] -> [C, H, W]
        "CHANNELS_LAST": False,
        "BATCH_SIZE": 32,  # Batch size
        "LR": 0.01,  # Learning rate
        "GAMMA": 0.95,  # Discount factor
        "MEMORY_SIZE": 100000,  # Max memory buffer size
        "LEARN_STEP": 5,  # Learning frequency
        "TAU": 0.01,  # For soft update of target parameters
        "POLICY_FREQ": 2,  # Policy frequnecy
    }

    # Define the simple speaker listener environment as a parallel environment
    env = simple_speaker_listener_v4.parallel_env(continuous_actions=True)
    env.reset()

    # Configure the multi-agent algo input arguments
    try:
        state_dim = [env.observation_space(agent).n for agent in env.agents]
        one_hot = True
    except Exception:
        state_dim = [env.observation_space(agent).shape for agent in env.agents]
        one_hot = False
    try:
        action_dim = [env.action_space(agent).n for agent in env.agents]
        INIT_HP["DISCRETE_ACTIONS"] = True
        INIT_HP["MAX_ACTION"] = None
        INIT_HP["MIN_ACTION"] = None
    except Exception:
        action_dim = [env.action_space(agent).shape[0] for agent in env.agents]
        INIT_HP["DISCRETE_ACTIONS"] = False
        INIT_HP["MAX_ACTION"] = [env.action_space(agent).high for agent in env.agents]
        INIT_HP["MIN_ACTION"] = [env.action_space(agent).low for agent in env.agents]

    # Not applicable to MPE environments, used when images are used for observations (Atari environments)
    if INIT_HP["CHANNELS_LAST"]:
        state_dim = [
            (state_dim[2], state_dim[0], state_dim[1]) for state_dim in state_dim
        ]

    # Append number of agents and agent IDs to the initial hyperparameter dictionary
    INIT_HP["N_AGENTS"] = env.num_agents
    INIT_HP["AGENT_IDS"] = env.agents

    # Create a population ready for evolutionary hyper-parameter optimisation
    pop = initialPopulation(
        INIT_HP["ALGO"],
        state_dim,
        action_dim,
        one_hot,
        NET_CONFIG,
        INIT_HP,
        population_size=INIT_HP["POPULATION_SIZE"],
        device=device,
    )

    # Configure the multi-agent replay buffer
    field_names = ["state", "action", "reward", "next_state", "done"]
    memory = MultiAgentReplayBuffer(
        INIT_HP["MEMORY_SIZE"],
        field_names=field_names,
        agent_ids=INIT_HP["AGENT_IDS"],
        device=device,
    )

    # Instantiate a tournament selection object (used for HPO)
    tournament = TournamentSelection(
        tournament_size=2,  # Tournament selection size
        elitism=True,  # Elitism in tournament selection
        population_size=INIT_HP["POPULATION_SIZE"],  # Population size
        evo_step=1,
    )  # Evaluate using last N fitness scores

    # Instantiate a mutations object (used for HPO)
    mutations = Mutations(
        algo=INIT_HP["ALGO"],
        no_mutation=0.2,  # Probability of no mutation
        architecture=0.2,  # Probability of architecture mutation
        new_layer_prob=0.2,  # Probability of new layer mutation
        parameters=0.2,  # Probability of parameter mutation
        activation=0,  # Probability of activation function mutation
        rl_hp=0.2,  # Probability of RL hyperparameter mutation
        rl_hp_selection=[
            "lr",
            "learn_step",
            "batch_size",
        ],  # RL hyperparams selected for mutation
        mutation_sd=0.1,  # Mutation strength
        agent_ids=INIT_HP["AGENT_IDS"],
        arch=NET_CONFIG["arch"],
        rand_seed=1,
        device=device,
    )

    # Define training loop parameters
    max_episodes = 6000 #500  # Total episodes (default: 6000)
    max_steps = 25  # Maximum steps to take in each episode
    epsilon = 1.0  # Starting epsilon value
    eps_end = 0.1  # Final epsilon value
    eps_decay = 0.995  # Epsilon decay
    evo_epochs = 20  # Evolution frequency
    evo_loop = 1  # Number of evaluation episodes
    elite = pop[0]  # Assign a placeholder "elite" agent

    # Training loop
    for idx_epi in range(max_episodes):
        for agent in pop:  # Loop through population
            state, info = env.reset()  # Reset environment at start of episode
            agent_reward = {agent_id: 0 for agent_id in env.agents}
            if INIT_HP["CHANNELS_LAST"]:
                state = {
                    agent_id: np.moveaxis(np.expand_dims(s, 0), [-1], [-3])
                    for agent_id, s in state.items()
                }

            for _ in range(max_steps):
                agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
                env_defined_actions = (
                    info["env_defined_actions"]
                    if "env_defined_actions" in info.keys()
                    else None
                )

                # Get next action from agent
                cont_actions, discrete_action = agent.getAction(
                    state, epsilon, agent_mask, env_defined_actions
                )
                if agent.discrete_actions:
                    action = discrete_action
                else:
                    action = cont_actions

                next_state, reward, termination, truncation, info = env.step(
                    action
                )  # Act in environment

                # Image processing if necessary for the environment
                if INIT_HP["CHANNELS_LAST"]:
                    state = {agent_id: np.squeeze(s) for agent_id, s in state.items()}
                    next_state = {
                        agent_id: np.moveaxis(ns, [-1], [-3])
                        for agent_id, ns in next_state.items()
                    }

                # Save experiences to replay buffer
                memory.save2memory(state, cont_actions, reward, next_state, termination)

                # Collect the reward
                for agent_id, r in reward.items():
                    agent_reward[agent_id] += r

                # Learn according to learning frequency
                if (memory.counter % agent.learn_step == 0) and (
                    len(memory) >= agent.batch_size
                ):
                    experiences = memory.sample(
                        agent.batch_size
                    )  # Sample replay buffer
                    agent.learn(experiences)  # Learn according to agent's RL algorithm

                # Update the state
                if INIT_HP["CHANNELS_LAST"]:
                    next_state = {
                        agent_id: np.expand_dims(ns, 0)
                        for agent_id, ns in next_state.items()
                    }
                state = next_state

                # Stop episode if any agents have terminated
                if any(truncation.values()) or any(termination.values()):
                    break

            # Save the total episode reward
            score = sum(agent_reward.values())
            agent.scores.append(score)

        # Update epsilon for exploration
        epsilon = max(eps_end, epsilon * eps_decay)

        # Now evolve population if necessary
        if (idx_epi + 1) % evo_epochs == 0:
            # Evaluate population
            fitnesses = [
                agent.test(
                    env,
                    swap_channels=INIT_HP["CHANNELS_LAST"],
                    max_steps=max_steps,
                    loop=evo_loop,
                )
                for agent in pop
            ]

            print(f"Episode {idx_epi + 1}/{max_episodes}")
            print(f'Fitnesses: {["%.2f" % fitness for fitness in fitnesses]}')
            print(
                f'100 fitness avgs: {["%.2f" % np.mean(agent.fitness[-100:]) for agent in pop]}'
            )

            # Tournament selection and population mutation
            elite, pop = tournament.select(pop)
            pop = mutations.mutation(pop)

    # Save the trained algorithm
    path = "./models/MATD3"
    filename = "MATD3_trained_agent.pt"
    os.makedirs(path, exist_ok=True)
    save_path = os.path.join(path, filename)
    elite.saveCheckpoint(save_path)

===== AgileRL Online Multi-Agent Demo =====


  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  0%|          | 20/6000 [00:02<11:12,  8.89it/s]

Episode 20/6000
Fitnesses: ['-5.24', '-164.89', '-133.82', '-431.37']
100 fitness avgs: ['-5.24', '-164.89', '-133.82', '-431.37']


  1%|          | 40/6000 [00:04<13:19,  7.46it/s]

Episode 40/6000
Fitnesses: ['-32.72', '-141.18', '-111.87', '-28.30']
100 fitness avgs: ['-18.98', '-153.03', '-58.55', '-81.06']


  1%|          | 60/6000 [00:07<13:38,  7.26it/s]

Episode 60/6000
Fitnesses: ['-25.15', '-22.86', '-373.87', '-465.51']
100 fitness avgs: ['-62.42', '-46.66', '-137.28', '-257.19']


  1%|▏         | 80/6000 [00:10<16:14,  6.08it/s]

Episode 80/6000
Fitnesses: ['-140.07', '-233.36', '-124.90', '-0.79']
100 fitness avgs: ['-70.01', '-105.16', '-134.18', '-35.19']


  2%|▏         | 100/6000 [00:14<26:24,  3.72it/s]

Episode 100/6000
Fitnesses: ['-140.00', '-19.52', '-18.04', '-72.70']
100 fitness avgs: ['-56.15', '-32.06', '-31.76', '-42.69']


  2%|▏         | 120/6000 [00:18<19:46,  4.96it/s]

Episode 120/6000
Fitnesses: ['-38.89', '-30.13', '-28.14', '-37.08']
100 fitness avgs: ['-32.95', '-31.49', '-40.27', '-32.65']


  2%|▏         | 140/6000 [00:22<18:41,  5.22it/s]

Episode 140/6000
Fitnesses: ['-11.11', '-55.74', '-9.16', '-12.80']
100 fitness avgs: ['-36.10', '-42.48', '-35.82', '-36.34']


  3%|▎         | 160/6000 [00:25<18:01,  5.40it/s]

Episode 160/6000
Fitnesses: ['-40.27', '-16.38', '-27.10', '-35.32']
100 fitness avgs: ['-36.38', '-33.39', '-34.73', '-36.00']


  3%|▎         | 180/6000 [00:29<19:25,  4.99it/s]

Episode 180/6000
Fitnesses: ['-9.71', '-10.25', '-14.14', '-16.28']
100 fitness avgs: ['-30.76', '-30.82', '-31.25', '-31.49']


  3%|▎         | 200/6000 [00:32<18:03,  5.35it/s]

Episode 200/6000
Fitnesses: ['-28.93', '-56.30', '-56.11', '-61.49']
100 fitness avgs: ['-30.58', '-33.37', '-33.30', '-33.89']


  4%|▎         | 220/6000 [00:36<19:07,  5.04it/s]

Episode 220/6000
Fitnesses: ['-28.85', '-9.12', '-70.26', '-85.63']
100 fitness avgs: ['-30.42', '-31.10', '-36.66', '-38.05']


  4%|▍         | 240/6000 [00:39<18:37,  5.15it/s]

Episode 240/6000
Fitnesses: ['-31.14', '-3.01', '-33.26', '-4.59']
100 fitness avgs: ['-31.10', '-28.76', '-30.66', '-28.27']


  4%|▍         | 260/6000 [00:43<19:34,  4.89it/s]

Episode 260/6000
Fitnesses: ['-7.14', '-18.82', '-28.05', '-4.70']
100 fitness avgs: ['-27.09', '-27.99', '-28.70', '-26.46']


  5%|▍         | 280/6000 [00:46<18:40,  5.10it/s]

Episode 280/6000
Fitnesses: ['-11.55', '-38.66', '-73.97', '-9.70']
100 fitness avgs: ['-25.39', '-27.92', '-29.85', '-25.85']


  5%|▌         | 300/6000 [00:50<19:46,  4.80it/s]

Episode 300/6000
Fitnesses: ['-79.02', '-61.17', '-419.74', '-22.34']
100 fitness avgs: ['-29.40', '-27.78', '-52.11', '-25.19']


  5%|▌         | 320/6000 [00:54<18:01,  5.25it/s]

Episode 320/6000
Fitnesses: ['-1.49', '-7.98', '-3.99', '-41.10']
100 fitness avgs: ['-23.71', '-24.11', '-26.29', '-30.13']


  6%|▌         | 340/6000 [00:58<20:32,  4.59it/s]

Episode 340/6000
Fitnesses: ['-108.53', '-45.61', '-33.71', '-48.59']
100 fitness avgs: ['-28.70', '-24.99', '-24.29', '-25.17']


  6%|▌         | 360/6000 [01:02<22:20,  4.21it/s]

Episode 360/6000
Fitnesses: ['-39.14', '-27.15', '-40.26', '-43.67']
100 fitness avgs: ['-25.12', '-25.11', '-29.34', '-25.37']


  6%|▋         | 380/6000 [01:07<32:45,  2.86it/s]

Episode 380/6000
Fitnesses: ['-22.36', '-15.46', '-7.34', '-17.59']
100 fitness avgs: ['-24.97', '-28.61', '-24.18', '-24.72']


  7%|▋         | 400/6000 [01:12<25:54,  3.60it/s]

Episode 400/6000
Fitnesses: ['-4.28', '-21.30', '-9.40', '-82.95']
100 fitness avgs: ['-23.19', '-24.04', '-27.65', '-27.12']


  7%|▋         | 420/6000 [01:17<23:59,  3.88it/s]

Episode 420/6000
Fitnesses: ['-9.64', '-32.26', '-24.19', '-36.18']
100 fitness avgs: ['-22.54', '-27.37', '-23.24', '-28.05']


  7%|▋         | 440/6000 [01:23<31:50,  2.91it/s]

Episode 440/6000
Fitnesses: ['-24.22', '-70.19', '-37.34', '-47.98']
100 fitness avgs: ['-22.62', '-24.71', '-28.48', '-23.70']


  8%|▊         | 460/6000 [01:29<26:54,  3.43it/s]

Episode 460/6000
Fitnesses: ['-54.79', '-58.25', '-50.28', '-75.67']
100 fitness avgs: ['-24.02', '-29.77', '-29.42', '-30.53']


  8%|▊         | 480/6000 [01:35<30:14,  3.04it/s]

Episode 480/6000
Fitnesses: ['-88.74', '-8.51', '-16.28', '-4.94']
100 fitness avgs: ['-31.90', '-23.37', '-28.88', '-23.22']


  8%|▊         | 500/6000 [01:41<30:43,  2.98it/s]

Episode 500/6000
Fitnesses: ['-52.26', '-8.46', '-27.08', '-31.25']
100 fitness avgs: ['-24.38', '-28.06', '-23.52', '-23.69']


  9%|▊         | 520/6000 [01:48<29:21,  3.11it/s]

Episode 520/6000
Fitnesses: ['-26.30', '-25.62', '-82.78', '-13.65']
100 fitness avgs: ['-27.99', '-24.43', '-25.96', '-23.14']


  9%|▉         | 540/6000 [01:54<30:51,  2.95it/s]

Episode 540/6000
Fitnesses: ['-89.99', '-20.96', '-48.35', '-15.18']
100 fitness avgs: ['-25.62', '-23.06', '-24.07', '-27.52']


  9%|▉         | 560/6000 [02:00<27:26,  3.30it/s]

Episode 560/6000
Fitnesses: ['-4.72', '-8.41', '-58.69', '-33.95']
100 fitness avgs: ['-26.70', '-26.84', '-24.33', '-24.43']


 10%|▉         | 580/6000 [02:06<27:01,  3.34it/s]

Episode 580/6000
Fitnesses: ['-37.83', '-20.85', '-135.66', '-90.20']
100 fitness avgs: ['-27.09', '-24.30', '-30.59', '-26.70']


 10%|▉         | 599/6000 [02:12<28:40,  3.14it/s]

Episode 600/6000
Fitnesses: ['-21.76', '-20.90', '-15.26', '-63.16']
100 fitness avgs: ['-24.22', '-24.19', '-26.69', '-27.91']


 10%|█         | 620/6000 [02:17<25:41,  3.49it/s]

Episode 620/6000
Fitnesses: ['-67.87', '-60.97', '-30.67', '-38.82']
100 fitness avgs: ['-28.02', '-25.40', '-24.40', '-27.08']


 11%|█         | 640/6000 [02:22<24:06,  3.71it/s]

Episode 640/6000
Fitnesses: ['-38.22', '-7.64', '-3.73', '-19.59']
100 fitness avgs: ['-24.83', '-23.88', '-26.35', '-26.85']


 11%|█         | 660/6000 [02:28<28:21,  3.14it/s]

Episode 660/6000
Fitnesses: ['-43.94', '-23.71', '-6.51', '-8.28']
100 fitness avgs: ['-26.89', '-23.87', '-23.35', '-23.40']


 11%|█▏        | 680/6000 [02:35<31:32,  2.81it/s]

Episode 680/6000
Fitnesses: ['-18.04', '-36.73', '-2.03', '-41.00']
100 fitness avgs: ['-23.19', '-23.74', '-22.72', '-24.37']


 12%|█▏        | 700/6000 [02:41<27:18,  3.23it/s]

Episode 700/6000
Fitnesses: ['-48.86', '-93.75', '-124.62', '-44.40']
100 fitness avgs: ['-23.47', '-25.74', '-26.09', '-23.80']


 12%|█▏        | 720/6000 [02:46<25:01,  3.52it/s]

Episode 720/6000
Fitnesses: ['-13.10', '-14.94', '-12.23', '-28.97']
100 fitness avgs: ['-23.50', '-25.78', '-23.48', '-23.94']


 12%|█▏        | 740/6000 [02:51<24:33,  3.57it/s]

Episode 740/6000
Fitnesses: ['-12.08', '-38.57', '-20.38', '-30.67']
100 fitness avgs: ['-23.17', '-23.91', '-23.39', '-23.70']


 13%|█▎        | 760/6000 [02:56<21:30,  4.06it/s]

Episode 760/6000
Fitnesses: ['-62.97', '-79.13', '-7.42', '-88.66']
100 fitness avgs: ['-24.22', '-24.64', '-22.75', '-24.89']


 13%|█▎        | 780/6000 [03:02<28:46,  3.02it/s]

Episode 780/6000
Fitnesses: ['-20.47', '-74.25', '-47.03', '-40.51']
100 fitness avgs: ['-22.70', '-25.50', '-23.38', '-23.21']


 13%|█▎        | 800/6000 [03:10<36:15,  2.39it/s]

Episode 800/6000
Fitnesses: ['-67.09', '-45.10', '-24.65', '-23.19']
100 fitness avgs: ['-23.81', '-23.92', '-23.25', '-22.71']


 14%|█▎        | 819/6000 [03:16<30:28,  2.83it/s]

Episode 820/6000
Fitnesses: ['-11.31', '-14.62', '-18.64', '-21.12']
100 fitness avgs: ['-22.43', '-23.69', '-22.61', '-22.67']


 14%|█▍        | 840/6000 [03:25<37:24,  2.30it/s]

Episode 840/6000
Fitnesses: ['-100.37', '-13.57', '-14.18', '-47.32']
100 fitness avgs: ['-24.29', '-22.22', '-22.23', '-23.20']


 14%|█▍        | 860/6000 [03:32<32:08,  2.67it/s]

Episode 860/6000
Fitnesses: ['-36.00', '-59.89', '-12.57', '-9.48']
100 fitness avgs: ['-22.54', '-23.10', '-22.00', '-21.92']


 15%|█▍        | 880/6000 [03:40<36:15,  2.35it/s]

Episode 880/6000
Fitnesses: ['-29.69', '-10.75', '-37.64', '-2.28']
100 fitness avgs: ['-22.10', '-21.74', '-22.28', '-21.48']


 15%|█▍        | 899/6000 [03:46<29:19,  2.90it/s]

Episode 900/6000
Fitnesses: ['-30.80', '-35.22', '-28.10', '-7.82']
100 fitness avgs: ['-21.68', '-21.78', '-21.62', '-21.96']


 15%|█▌        | 920/6000 [03:54<30:13,  2.80it/s]

Episode 920/6000
Fitnesses: ['-46.52', '-53.22', '-37.70', '-61.65']
100 fitness avgs: ['-22.49', '-22.64', '-22.30', '-22.82']


 16%|█▌        | 940/6000 [04:01<31:18,  2.69it/s]

Episode 940/6000
Fitnesses: ['-34.83', '-37.93', '-14.06', '-20.95']
100 fitness avgs: ['-22.57', '-22.96', '-22.31', '-22.46']


 16%|█▌        | 960/6000 [04:07<30:09,  2.79it/s]

Episode 960/6000
Fitnesses: ['-12.78', '-8.89', '-43.52', '-10.02']
100 fitness avgs: ['-22.12', '-22.18', '-22.76', '-22.20']


 16%|█▋        | 980/6000 [04:14<30:20,  2.76it/s]

Episode 980/6000
Fitnesses: ['-59.79', '-44.16', '-35.15', '-38.34']
100 fitness avgs: ['-22.95', '-22.63', '-22.47', '-22.53']


 17%|█▋        | 1000/6000 [04:23<31:35,  2.64it/s]

Episode 1000/6000
Fitnesses: ['-21.85', '-19.59', '-59.25', '-92.21']
100 fitness avgs: ['-22.45', '-22.47', '-23.26', '-23.86']


 17%|█▋        | 1020/6000 [04:29<28:14,  2.94it/s]

Episode 1020/6000
Fitnesses: ['-41.13', '-26.41', '-19.12', '-36.35']
100 fitness avgs: ['-22.84', '-23.33', '-23.18', '-22.74']


 17%|█▋        | 1040/6000 [04:36<31:43,  2.61it/s]

Episode 1040/6000
Fitnesses: ['-19.08', '-13.69', '-32.91', '-40.17']
100 fitness avgs: ['-23.10', '-23.00', '-23.51', '-23.51']


 18%|█▊        | 1060/6000 [04:44<35:56,  2.29it/s]

Episode 1060/6000
Fitnesses: ['-35.76', '-6.99', '-26.95', '-3.57']
100 fitness avgs: ['-23.24', '-22.70', '-23.08', '-22.74']


 18%|█▊        | 1079/6000 [04:51<33:11,  2.47it/s]

Episode 1080/6000
Fitnesses: ['-50.44', '-20.97', '-18.93', '-25.81']
100 fitness avgs: ['-23.25', '-23.04', '-23.00', '-23.13']


 18%|█▊        | 1100/6000 [05:02<36:58,  2.21it/s]

Episode 1100/6000
Fitnesses: ['-40.76', '-72.85', '-21.18', '-23.36']
100 fitness avgs: ['-23.32', '-23.94', '-22.97', '-23.01']


 19%|█▊        | 1120/6000 [05:10<36:13,  2.25it/s]

Episode 1120/6000
Fitnesses: ['-31.20', '-31.55', '-12.25', '-20.81']
100 fitness avgs: ['-23.11', '-23.16', '-22.77', '-22.97']


 19%|█▉        | 1139/6000 [05:16<25:43,  3.15it/s]

Episode 1140/6000
Fitnesses: ['-38.14', '-65.97', '-32.19', '-35.20']
100 fitness avgs: ['-23.04', '-23.53', '-23.13', '-22.99']


 19%|█▉        | 1159/6000 [05:22<22:33,  3.58it/s]

Episode 1160/6000
Fitnesses: ['-3.94', '-2.93', '-28.48', '-38.47']
100 fitness avgs: ['-22.80', '-22.78', '-23.14', '-23.39']


 20%|█▉        | 1179/6000 [05:27<19:41,  4.08it/s]

Episode 1180/6000
Fitnesses: ['-44.45', '-14.37', '-8.87', '-13.94']
100 fitness avgs: ['-23.15', '-22.65', '-22.90', '-22.65']


 20%|██        | 1200/6000 [05:32<20:46,  3.85it/s]

Episode 1200/6000
Fitnesses: ['-23.07', '-18.75', '-13.33', '-8.19']
100 fitness avgs: ['-22.90', '-22.58', '-22.50', '-22.41']


 20%|██        | 1219/6000 [05:36<16:11,  4.92it/s]

Episode 1220/6000
Fitnesses: ['-43.10', '-53.22', '-26.00', '-50.94']
100 fitness avgs: ['-22.75', '-22.92', '-22.47', '-23.05']


 21%|██        | 1239/6000 [05:40<17:22,  4.57it/s]

Episode 1240/6000
Fitnesses: ['-14.43', '-43.19', '-26.55', '-65.91']
100 fitness avgs: ['-22.34', '-23.08', '-23.10', '-23.45']


 21%|██        | 1259/6000 [05:44<15:48,  5.00it/s]

Episode 1260/6000
Fitnesses: ['-70.16', '-41.88', '-15.37', '-27.62']
100 fitness avgs: ['-23.10', '-22.65', '-22.23', '-22.43']


 21%|██▏       | 1280/6000 [05:49<18:52,  4.17it/s]

Episode 1280/6000
Fitnesses: ['-63.03', '-19.39', '-60.40', '-20.82']
100 fitness avgs: ['-22.87', '-22.60', '-22.83', '-22.40']


 22%|██▏       | 1299/6000 [05:53<15:47,  4.96it/s]

Episode 1300/6000
Fitnesses: ['-142.61', '-9.88', '-43.75', '-53.38']
100 fitness avgs: ['-24.45', '-22.41', '-22.93', '-22.88']


 22%|██▏       | 1320/6000 [05:57<17:48,  4.38it/s]

Episode 1320/6000
Fitnesses: ['-21.56', '-32.92', '-93.31', '-34.18']
100 fitness avgs: ['-22.39', '-23.03', '-23.48', '-23.10']


 22%|██▏       | 1339/6000 [06:00<12:03,  6.44it/s]

Episode 1340/6000
Fitnesses: ['-29.42', '-8.42', '-33.48', '-17.53']
100 fitness avgs: ['-22.50', '-22.18', '-23.25', '-23.01']


 23%|██▎       | 1360/6000 [06:03<14:22,  5.38it/s]

Episode 1360/6000
Fitnesses: ['-40.57', '-11.36', '-21.94', '-52.95']
100 fitness avgs: ['-22.45', '-22.03', '-22.18', '-23.45']


 23%|██▎       | 1380/6000 [06:06<16:17,  4.73it/s]

Episode 1380/6000
Fitnesses: ['-18.72', '-13.85', '-29.32', '-40.81']
100 fitness avgs: ['-21.98', '-21.91', '-22.55', '-22.30']


 23%|██▎       | 1400/6000 [06:09<11:36,  6.60it/s]

Episode 1400/6000
Fitnesses: ['-30.71', '-15.06', '-33.60', '-21.34']
100 fitness avgs: ['-22.03', '-21.88', '-22.14', '-21.90']


 24%|██▎       | 1420/6000 [06:12<13:22,  5.71it/s]

Episode 1420/6000
Fitnesses: ['-125.96', '-19.61', '-5.01', '-45.60']
100 fitness avgs: ['-23.34', '-21.87', '-21.66', '-22.21']


 24%|██▍       | 1439/6000 [06:15<13:29,  5.63it/s]

Episode 1440/6000
Fitnesses: ['-10.69', '-11.65', '-25.71', '-15.39']
100 fitness avgs: ['-21.51', '-21.52', '-21.72', '-21.78']


 24%|██▍       | 1460/6000 [06:20<19:05,  3.96it/s]

Episode 1460/6000
Fitnesses: ['-31.87', '-0.76', '-13.36', '-88.87']
100 fitness avgs: ['-21.65', '-21.24', '-21.40', '-22.44']


 25%|██▍       | 1480/6000 [06:24<20:37,  3.65it/s]

Episode 1480/6000
Fitnesses: ['-42.52', '-2.57', '-40.31', '-53.49']
100 fitness avgs: ['-21.52', '-21.14', '-21.65', '-21.83']


 25%|██▌       | 1500/6000 [06:29<20:36,  3.64it/s]

Episode 1500/6000
Fitnesses: ['-55.74', '-29.63', '-14.06', '-6.38']
100 fitness avgs: ['-21.60', '-21.26', '-21.05', '-21.45']


 25%|██▌       | 1519/6000 [06:33<17:03,  4.38it/s]

Episode 1520/6000
Fitnesses: ['-22.42', '-10.88', '-5.14', '-40.74']
100 fitness avgs: ['-21.46', '-21.31', '-20.84', '-21.31']


 26%|██▌       | 1540/6000 [06:38<18:41,  3.98it/s]

Episode 1540/6000
Fitnesses: ['-5.70', '-40.23', '-96.19', '-17.57']
100 fitness avgs: ['-20.64', '-21.71', '-22.43', '-20.80']


 26%|██▌       | 1560/6000 [06:43<21:19,  3.47it/s]

Episode 1560/6000
Fitnesses: ['-14.62', '-18.46', '-36.64', '-32.26']
100 fitness avgs: ['-20.56', '-21.66', '-20.85', '-20.79']


 26%|██▋       | 1580/6000 [06:48<20:31,  3.59it/s]

Episode 1580/6000
Fitnesses: ['-27.37', '-45.82', '-17.04', '-29.15']
100 fitness avgs: ['-20.65', '-20.88', '-20.52', '-20.67']


 27%|██▋       | 1599/6000 [06:52<18:31,  3.96it/s]

Episode 1600/6000
Fitnesses: ['-32.11', '-25.05', '-4.68', '-11.79']
100 fitness avgs: ['-20.67', '-20.71', '-20.32', '-20.54']


 27%|██▋       | 1619/6000 [06:58<18:47,  3.88it/s]

Episode 1620/6000
Fitnesses: ['-80.62', '-35.86', '-33.05', '-67.07']
100 fitness avgs: ['-21.07', '-20.73', '-20.82', '-21.11']


 27%|██▋       | 1639/6000 [07:03<19:08,  3.80it/s]

Episode 1640/6000
Fitnesses: ['-37.39', '-85.65', '-20.06', '-57.46']
100 fitness avgs: ['-21.02', '-21.52', '-20.72', '-21.18']


 28%|██▊       | 1660/6000 [07:09<23:26,  3.09it/s]

Episode 1660/6000
Fitnesses: ['-35.59', '-5.97', '-49.70', '-34.84']
100 fitness avgs: ['-20.90', '-20.84', '-21.07', '-21.19']


 28%|██▊       | 1679/6000 [07:15<25:06,  2.87it/s]

Episode 1680/6000
Fitnesses: ['-28.45', '-79.77', '-11.96', '-49.99']
100 fitness avgs: ['-20.93', '-21.54', '-20.79', '-21.25']


 28%|██▊       | 1699/6000 [07:22<23:40,  3.03it/s]

Episode 1700/6000
Fitnesses: ['-7.79', '-361.91', '-14.32', '-27.70']
100 fitness avgs: ['-20.64', '-25.25', '-20.85', '-21.61']


 29%|██▊       | 1719/6000 [07:28<21:22,  3.34it/s]

Episode 1720/6000
Fitnesses: ['-31.32', '-80.86', '-11.65', '-34.74']
100 fitness avgs: ['-20.77', '-25.90', '-20.74', '-20.80']


 29%|██▉       | 1739/6000 [07:33<18:53,  3.76it/s]

Episode 1740/6000
Fitnesses: ['-105.27', '-80.22', '-31.24', '-5.33']
100 fitness avgs: ['-21.72', '-21.43', '-20.87', '-20.59']


 29%|██▉       | 1759/6000 [07:39<18:47,  3.76it/s]

Episode 1760/6000
Fitnesses: ['-30.97', '-17.68', '-4.30', '-34.29']
100 fitness avgs: ['-20.71', '-20.83', '-20.40', '-21.02']


 30%|██▉       | 1779/6000 [07:44<19:35,  3.59it/s]

Episode 1780/6000
Fitnesses: ['-7.89', '-42.00', '-5.37', '-48.03']
100 fitness avgs: ['-20.26', '-21.07', '-20.23', '-20.71']


 30%|██▉       | 1799/6000 [07:50<20:38,  3.39it/s]

Episode 1800/6000
Fitnesses: ['-8.88', '-77.78', '-16.91', '-12.33']
100 fitness avgs: ['-20.11', '-20.87', '-20.20', '-20.15']


 30%|███       | 1819/6000 [07:56<17:14,  4.04it/s]

Episode 1820/6000
Fitnesses: ['-18.31', '-19.18', '-79.10', '-12.65']
100 fitness avgs: ['-20.09', '-20.19', '-20.84', '-20.03']


 31%|███       | 1839/6000 [08:01<20:15,  3.42it/s]

Episode 1840/6000
Fitnesses: ['-33.86', '-49.49', '-49.71', '-22.25']
100 fitness avgs: ['-20.18', '-20.35', '-20.35', '-20.11']


 31%|███       | 1859/6000 [08:08<20:11,  3.42it/s]

Episode 1860/6000
Fitnesses: ['-27.00', '-82.13', '-54.76', '-2.05']
100 fitness avgs: ['-20.19', '-20.78', '-20.48', '-20.15']


 31%|███▏      | 1879/6000 [08:14<20:10,  3.40it/s]

Episode 1880/6000
Fitnesses: ['-43.03', '-7.83', '-80.47', '-18.97']
100 fitness avgs: ['-20.39', '-20.02', '-21.41', '-20.17']


 32%|███▏      | 1899/6000 [08:20<21:27,  3.19it/s]

Episode 1900/6000
Fitnesses: ['-20.40', '-8.74', '-11.13', '-18.61']
100 fitness avgs: ['-20.02', '-19.90', '-20.29', '-20.00']


 32%|███▏      | 1919/6000 [08:27<21:19,  3.19it/s]

Episode 1920/6000
Fitnesses: ['-17.60', '-24.18', '-3.55', '-36.76']
100 fitness avgs: ['-19.88', '-20.34', '-20.12', '-20.47']


 32%|███▏      | 1939/6000 [08:33<20:32,  3.29it/s]

Episode 1940/6000
Fitnesses: ['-57.86', '-64.06', '-7.96', '-14.62']
100 fitness avgs: ['-20.51', '-20.33', '-19.75', '-20.28']


 33%|███▎      | 1959/6000 [08:39<19:56,  3.38it/s]

Episode 1960/6000
Fitnesses: ['-29.94', '-97.86', '-50.80', '-14.27']
100 fitness avgs: ['-19.86', '-20.55', '-20.07', '-20.22']


 33%|███▎      | 1979/6000 [08:45<20:06,  3.33it/s]

Episode 1980/6000
Fitnesses: ['-12.04', '-22.08', '-64.03', '-21.57']
100 fitness avgs: ['-20.13', '-19.88', '-20.99', '-19.87']


 33%|███▎      | 1999/6000 [08:51<20:04,  3.32it/s]

Episode 2000/6000
Fitnesses: ['-42.03', '-3.58', '-10.87', '-43.92']
100 fitness avgs: ['-20.35', '-19.97', '-20.04', '-20.37']


 34%|███▎      | 2019/6000 [08:57<18:50,  3.52it/s]

Episode 2020/6000
Fitnesses: ['-79.26', '-21.16', '-25.20', '-34.42']
100 fitness avgs: ['-20.71', '-20.51', '-20.24', '-20.33']


 34%|███▍      | 2039/6000 [09:02<16:58,  3.89it/s]

Episode 2040/6000
Fitnesses: ['-8.21', '-20.64', '-15.32', '-35.56']
100 fitness avgs: ['-19.47', '-19.33', '-19.37', '-19.57']


 34%|███▍      | 2059/6000 [09:08<17:43,  3.71it/s]

Episode 2060/6000
Fitnesses: ['-31.27', '-43.09', '-46.20', '-7.78']
100 fitness avgs: ['-19.56', '-19.57', '-19.71', '-19.32']


 35%|███▍      | 2079/6000 [09:14<17:40,  3.70it/s]

Episode 2080/6000
Fitnesses: ['-6.60', '-2.45', '-16.61', '-14.62']
100 fitness avgs: ['-19.38', '-19.34', '-19.48', '-19.85']


 35%|███▍      | 2099/6000 [09:19<17:13,  3.77it/s]

Episode 2100/6000
Fitnesses: ['-58.54', '-14.08', '-31.84', '-7.58']
100 fitness avgs: ['-19.20', '-19.26', '-18.97', '-18.69']


 35%|███▌      | 2119/6000 [09:24<15:34,  4.15it/s]

Episode 2120/6000
Fitnesses: ['-14.54', '-37.73', '-31.34', '-24.53']
100 fitness avgs: ['-18.55', '-19.07', '-19.29', '-18.65']


 36%|███▌      | 2139/6000 [09:29<16:05,  4.00it/s]

Episode 2140/6000
Fitnesses: ['-65.10', '-25.47', '-52.93', '-16.60']
100 fitness avgs: ['-19.11', '-18.72', '-19.09', '-19.37']


 36%|███▌      | 2159/6000 [09:34<14:27,  4.43it/s]

Episode 2160/6000
Fitnesses: ['-12.19', '-5.25', '-74.32', '-2.72']
100 fitness avgs: ['-19.32', '-19.25', '-19.69', '-18.58']


 36%|███▋      | 2179/6000 [09:39<15:42,  4.05it/s]

Episode 2180/6000
Fitnesses: ['-21.20', '-13.47', '-86.49', '-62.09']
100 fitness avgs: ['-18.69', '-19.29', '-20.09', '-19.10']


 37%|███▋      | 2199/6000 [09:44<15:05,  4.20it/s]

Episode 2200/6000
Fitnesses: ['-97.82', '-31.69', '-148.50', '-6.84']
100 fitness avgs: ['-19.98', '-19.32', '-20.30', '-19.07']


 37%|███▋      | 2219/6000 [09:49<14:03,  4.48it/s]

Episode 2220/6000
Fitnesses: ['-16.46', '-47.43', '-43.04', '-64.05']
100 fitness avgs: ['-18.95', '-19.26', '-19.21', '-20.65']


 37%|███▋      | 2239/6000 [09:54<14:55,  4.20it/s]

Episode 2240/6000
Fitnesses: ['-12.60', '-45.86', '-18.12', '-23.11']
100 fitness avgs: ['-19.03', '-19.67', '-19.35', '-19.40']


 38%|███▊      | 2259/6000 [09:59<14:32,  4.29it/s]

Episode 2260/6000
Fitnesses: ['-15.06', '-34.74', '-49.77', '-5.18']
100 fitness avgs: ['-19.13', '-19.65', '-19.85', '-19.35']


 38%|███▊      | 2279/6000 [10:04<13:54,  4.46it/s]

Episode 2280/6000
Fitnesses: ['-56.94', '-8.30', '-2.67', '-28.86']
100 fitness avgs: ['-19.81', '-19.10', '-19.56', '-19.53']


 38%|███▊      | 2299/6000 [10:09<14:00,  4.40it/s]

Episode 2300/6000
Fitnesses: ['-25.95', '-29.23', '-23.27', '-24.82']
100 fitness avgs: ['-19.60', '-19.63', '-19.11', '-19.58']


 39%|███▊      | 2319/6000 [10:13<10:21,  5.92it/s]

Episode 2320/6000
Fitnesses: ['-37.81', '-16.14', '-25.57', '-46.75']
100 fitness avgs: ['-19.47', '-19.25', '-19.35', '-20.05']


 39%|███▉      | 2339/6000 [10:16<07:48,  7.82it/s]

Episode 2340/6000
Fitnesses: ['-39.67', '-253.43', '-25.56', '-66.43']
100 fitness avgs: ['-19.31', '-21.55', '-19.27', '-19.68']


 39%|███▉      | 2359/6000 [10:19<08:43,  6.96it/s]

Episode 2360/6000
Fitnesses: ['-9.50', '-19.13', '-4.71', '-71.51']
100 fitness avgs: ['-18.97', '-19.11', '-18.92', '-20.00']


 40%|███▉      | 2379/6000 [10:22<07:19,  8.24it/s]

Episode 2380/6000
Fitnesses: ['-12.59', '-16.36', '-60.47', '-49.61']
100 fitness avgs: ['-18.98', '-19.06', '-19.45', '-19.39']


 40%|███▉      | 2399/6000 [10:25<07:12,  8.32it/s]

Episode 2400/6000
Fitnesses: ['-28.52', '-2.07', '-28.05', '-41.09']
100 fitness avgs: ['-19.22', '-19.04', '-19.21', '-19.43']


 40%|████      | 2419/6000 [10:28<07:20,  8.12it/s]

Episode 2420/6000
Fitnesses: ['-76.00', '-34.29', '-31.61', '-11.60']
100 fitness avgs: ['-19.70', '-19.29', '-19.26', '-19.24']


 41%|████      | 2439/6000 [10:30<07:05,  8.37it/s]

Episode 2440/6000
Fitnesses: ['-37.68', '-42.57', '-3.40', '-67.01']
100 fitness avgs: ['-19.37', '-19.47', '-19.05', '-19.67']


 41%|████      | 2459/6000 [10:34<07:55,  7.45it/s]

Episode 2460/6000
Fitnesses: ['-58.29', '-53.28', '-3.02', '-46.99']
100 fitness avgs: ['-19.09', '-19.04', '-18.85', '-19.39']


 41%|████▏     | 2479/6000 [10:36<07:19,  8.01it/s]

Episode 2480/6000
Fitnesses: ['-6.13', '-10.18', '-5.63', '-32.70']
100 fitness avgs: ['-18.83', '-19.41', '-18.83', '-19.33']


 42%|████▏     | 2499/6000 [10:39<06:38,  8.78it/s]

Episode 2500/6000
Fitnesses: ['-47.94', '-242.67', '-84.79', '-8.75']
100 fitness avgs: ['-19.03', '-20.98', '-19.40', '-18.64']


 42%|████▏     | 2519/6000 [10:42<07:26,  7.80it/s]

Episode 2520/6000
Fitnesses: ['-31.36', '-31.49', '-34.41', '-13.12']
100 fitness avgs: ['-18.82', '-18.82', '-18.85', '-19.03']


 42%|████▏     | 2539/6000 [10:45<07:18,  7.88it/s]

Episode 2540/6000
Fitnesses: ['-6.64', '-45.86', '-6.79', '-24.20']
100 fitness avgs: ['-18.61', '-19.00', '-18.40', '-18.58']


 43%|████▎     | 2559/6000 [10:48<08:44,  6.56it/s]

Episode 2560/6000
Fitnesses: ['-35.98', '-107.47', '-20.55', '-32.40']
100 fitness avgs: ['-18.63', '-19.31', '-18.48', '-18.39']


 43%|████▎     | 2579/6000 [10:52<07:36,  7.49it/s]

Episode 2580/6000
Fitnesses: ['-27.75', '-18.06', '-20.87', '-57.27']
100 fitness avgs: ['-18.55', '-18.45', '-18.48', '-18.84']


 43%|████▎     | 2599/6000 [10:54<07:00,  8.09it/s]

Episode 2600/6000
Fitnesses: ['-16.98', '-51.92', '-24.01', '-24.96']
100 fitness avgs: ['-18.41', '-18.76', '-18.58', '-18.59']


 44%|████▎     | 2619/6000 [10:57<07:15,  7.77it/s]

Episode 2620/6000
Fitnesses: ['-29.21', '-32.90', '-29.99', '-9.49']
100 fitness avgs: ['-18.40', '-18.61', '-18.40', '-18.20']


 44%|████▍     | 2639/6000 [11:01<07:04,  7.93it/s]

Episode 2640/6000
Fitnesses: ['-17.96', '-52.72', '-25.31', '-16.62']
100 fitness avgs: ['-18.30', '-18.65', '-18.58', '-18.49']


 44%|████▍     | 2659/6000 [11:03<06:41,  8.32it/s]

Episode 2660/6000
Fitnesses: ['-37.04', '-7.99', '-93.45', '-8.03']
100 fitness avgs: ['-18.80', '-18.51', '-19.45', '-18.51']


 45%|████▍     | 2679/6000 [11:07<08:20,  6.63it/s]

Episode 2680/6000
Fitnesses: ['-40.76', '-14.67', '-109.39', '-146.32']
100 fitness avgs: ['-18.74', '-18.47', '-19.42', '-19.79']


 45%|████▍     | 2699/6000 [11:10<09:23,  5.86it/s]

Episode 2700/6000
Fitnesses: ['-8.58', '-110.84', '-17.57', '-76.10']
100 fitness avgs: ['-18.12', '-20.46', '-19.15', '-20.11']


 45%|████▌     | 2719/6000 [11:14<09:23,  5.82it/s]

Episode 2720/6000
Fitnesses: ['-65.61', '-64.89', '-81.25', '-55.94']
100 fitness avgs: ['-18.65', '-18.64', '-21.15', '-20.89']


 46%|████▌     | 2739/6000 [11:19<10:02,  5.41it/s]

Episode 2740/6000
Fitnesses: ['-10.20', '-7.21', '-38.32', '-48.31']
100 fitness avgs: ['-20.87', '-20.84', '-21.16', '-19.01']


 46%|████▌     | 2759/6000 [11:23<10:36,  5.09it/s]

Episode 2760/6000
Fitnesses: ['-9.79', '-28.89', '-8.50', '-11.89']
100 fitness avgs: ['-20.87', '-21.06', '-20.88', '-21.20']


 46%|████▋     | 2779/6000 [11:28<10:25,  5.15it/s]

Episode 2780/6000
Fitnesses: ['-25.03', '-13.99', '-58.71', '-44.46']
100 fitness avgs: ['-20.93', '-20.80', '-21.25', '-21.44']


 47%|████▋     | 2799/6000 [11:33<11:52,  4.49it/s]

Episode 2800/6000
Fitnesses: ['-9.69', '-3.62', '-12.14', '-21.47']
100 fitness avgs: ['-20.67', '-20.73', '-21.14', '-20.79']


 47%|████▋     | 2819/6000 [11:37<11:00,  4.81it/s]

Episode 2820/6000
Fitnesses: ['-6.83', '-11.71', '-97.23', '-20.51']
100 fitness avgs: ['-20.69', '-20.79', '-21.59', '-20.83']


 47%|████▋     | 2839/6000 [11:42<11:45,  4.48it/s]

Episode 2840/6000
Fitnesses: ['-23.58', '-23.99', '-32.51', '-43.40']
100 fitness avgs: ['-20.79', '-20.89', '-20.98', '-20.99']


 48%|████▊     | 2859/6000 [11:47<12:23,  4.22it/s]

Episode 2860/6000
Fitnesses: ['-21.06', '-6.52', '-33.67', '-72.00']
100 fitness avgs: ['-20.91', '-20.76', '-21.14', '-21.42']


 48%|████▊     | 2879/6000 [11:52<10:56,  4.75it/s]

Episode 2880/6000
Fitnesses: ['-21.85', '-33.60', '-21.66', '-6.98']
100 fitness avgs: ['-20.60', '-20.72', '-20.75', '-20.60']


 48%|████▊     | 2899/6000 [11:56<09:48,  5.27it/s]

Episode 2900/6000
Fitnesses: ['-58.35', '-6.49', '-42.40', '-2.05']
100 fitness avgs: ['-21.10', '-20.59', '-20.95', '-20.69']


 49%|████▊     | 2919/6000 [12:01<10:12,  5.03it/s]

Episode 2920/6000
Fitnesses: ['-8.05', '-9.78', '-27.01', '-1.35']
100 fitness avgs: ['-20.30', '-20.32', '-20.91', '-20.14']


 49%|████▉     | 2939/6000 [12:07<16:15,  3.14it/s]

Episode 2940/6000
Fitnesses: ['-15.30', '-19.90', '-34.16', '-6.85']
100 fitness avgs: ['-20.08', '-20.13', '-20.27', '-20.00']


 49%|████▉     | 2959/6000 [12:15<18:10,  2.79it/s]

Episode 2960/6000
Fitnesses: ['-22.47', '-55.48', '-7.98', '-30.17']
100 fitness avgs: ['-20.12', '-20.54', '-20.11', '-20.20']


 50%|████▉     | 2979/6000 [12:22<15:41,  3.21it/s]

Episode 2980/6000
Fitnesses: ['-19.55', '-63.38', '-5.10', '-11.98']
100 fitness avgs: ['-19.92', '-20.36', '-19.87', '-19.93']


 50%|████▉     | 2999/6000 [12:28<14:25,  3.47it/s]

Episode 3000/6000
Fitnesses: ['-10.52', '-59.68', '-14.26', '-42.45']
100 fitness avgs: ['-19.38', '-19.87', '-19.42', '-19.70']


 50%|█████     | 3019/6000 [12:35<17:10,  2.89it/s]

Episode 3020/6000
Fitnesses: ['-40.55', '-49.25', '-6.43', '-36.78']
100 fitness avgs: ['-19.59', '-20.17', '-19.25', '-19.56']


 51%|█████     | 3039/6000 [12:42<15:33,  3.17it/s]

Episode 3040/6000
Fitnesses: ['-17.14', '-103.08', '-43.05', '-87.99']
100 fitness avgs: ['-19.29', '-20.49', '-20.46', '-20.30']


 51%|█████     | 3059/6000 [12:47<12:20,  3.97it/s]

Episode 3060/6000
Fitnesses: ['-44.28', '-7.75', '-52.17', '-31.50']
100 fitness avgs: ['-19.46', '-20.27', '-20.55', '-20.51']


 51%|█████▏    | 3079/6000 [12:50<05:59,  8.12it/s]

Episode 3080/6000
Fitnesses: ['-39.60', '-28.17', '-12.20', '-16.89']
100 fitness avgs: ['-20.48', '-20.37', '-20.21', '-20.49']


 52%|█████▏    | 3099/6000 [12:53<06:02,  8.00it/s]

Episode 3100/6000
Fitnesses: ['-24.24', '-17.29', '-7.41', '-29.07']
100 fitness avgs: ['-20.21', '-20.30', '-20.05', '-20.54']


 52%|█████▏    | 3119/6000 [12:57<06:04,  7.90it/s]

Episode 3120/6000
Fitnesses: ['-25.79', '-13.31', '-35.51', '-14.33']
100 fitness avgs: ['-20.10', '-19.97', '-20.36', '-19.98']


 52%|█████▏    | 3139/6000 [13:01<06:56,  6.87it/s]

Episode 3140/6000
Fitnesses: ['-90.35', '-20.72', '-34.83', '-16.82']
100 fitness avgs: ['-20.55', '-19.87', '-20.39', '-19.83']


 53%|█████▎    | 3159/6000 [13:05<07:28,  6.34it/s]

Episode 3160/6000
Fitnesses: ['-18.00', '-32.95', '-24.15', '-69.42']
100 fitness avgs: ['-19.97', '-20.68', '-20.07', '-21.04']


 53%|█████▎    | 3179/6000 [13:09<08:33,  5.49it/s]

Episode 3180/6000
Fitnesses: ['-25.99', '-9.28', '-60.88', '-48.01']
100 fitness avgs: ['-20.08', '-19.92', '-20.43', '-21.01']


 53%|█████▎    | 3199/6000 [13:14<09:33,  4.88it/s]

Episode 3200/6000
Fitnesses: ['-34.30', '-4.96', '-15.22', '-18.87']
100 fitness avgs: ['-20.18', '-20.05', '-19.99', '-20.02']


 54%|█████▎    | 3219/6000 [13:19<10:08,  4.57it/s]

Episode 3220/6000
Fitnesses: ['-14.77', '-8.44', '-11.15', '-32.32']
100 fitness avgs: ['-19.94', '-19.88', '-19.90', '-20.09']


 54%|█████▍    | 3239/6000 [13:24<09:47,  4.70it/s]

Episode 3240/6000
Fitnesses: ['-41.34', '-33.60', '-70.08', '-23.99']
100 fitness avgs: ['-20.15', '-20.07', '-20.46', '-19.97']


 54%|█████▍    | 3259/6000 [13:29<09:22,  4.87it/s]

Episode 3260/6000
Fitnesses: ['-137.65', '-11.38', '-19.10', '-16.95']
100 fitness avgs: ['-20.93', '-19.76', '-19.74', '-19.82']


 55%|█████▍    | 3279/6000 [13:33<10:04,  4.50it/s]

Episode 3280/6000
Fitnesses: ['-2.23', '-39.41', '-84.18', '-17.43']
100 fitness avgs: ['-19.59', '-20.02', '-20.39', '-19.72']


 55%|█████▍    | 3299/6000 [13:38<09:19,  4.83it/s]

Episode 3300/6000
Fitnesses: ['-5.47', '-82.73', '-17.54', '-45.34']
100 fitness avgs: ['-19.55', '-20.32', '-19.80', '-20.37']


 55%|█████▌    | 3319/6000 [13:43<08:39,  5.17it/s]

Episode 3320/6000
Fitnesses: ['-39.77', '-44.28', '-30.04', '-2.98']
100 fitness avgs: ['-19.73', '-19.77', '-19.63', '-19.62']


 56%|█████▌    | 3339/6000 [13:46<06:43,  6.60it/s]

Episode 3340/6000
Fitnesses: ['-14.04', '-22.36', '-33.02', '-10.02']
100 fitness avgs: ['-19.67', '-19.87', '-19.86', '-19.65']


 56%|█████▌    | 3359/6000 [13:51<12:01,  3.66it/s]

Episode 3360/6000
Fitnesses: ['-25.43', '-54.16', '-330.91', '-13.26']
100 fitness avgs: ['-19.79', '-20.10', '-22.87', '-19.67']


 56%|█████▋    | 3379/6000 [13:59<11:52,  3.68it/s]

Episode 3380/6000
Fitnesses: ['-20.91', '-3.64', '-17.84', '-63.04']
100 fitness avgs: ['-19.74', '-19.69', '-19.71', '-20.16']


 57%|█████▋    | 3399/6000 [14:05<10:13,  4.24it/s]

Episode 3400/6000
Fitnesses: ['-26.66', '-22.68', '-17.82', '-29.55']
100 fitness avgs: ['-19.74', '-19.72', '-19.65', '-20.24']


 57%|█████▋    | 3419/6000 [14:10<10:02,  4.28it/s]

Episode 3420/6000
Fitnesses: ['-14.41', '-5.54', '-15.25', '-5.52']
100 fitness avgs: ['-19.75', '-19.66', '-19.75', '-19.66']


 57%|█████▋    | 3439/6000 [14:15<09:34,  4.46it/s]

Episode 3440/6000
Fitnesses: ['-51.10', '-25.68', '-10.04', '-179.08']
100 fitness avgs: ['-20.06', '-19.81', '-19.74', '-21.43']


 58%|█████▊    | 3459/6000 [14:21<11:03,  3.83it/s]

Episode 3460/6000
Fitnesses: ['-61.45', '-32.83', '-53.02', '-41.60']
100 fitness avgs: ['-20.22', '-19.93', '-20.14', '-20.09']


 58%|█████▊    | 3479/6000 [14:26<07:54,  5.32it/s]

Episode 3480/6000
Fitnesses: ['-11.35', '-31.99', '-34.95', '-1.71']
100 fitness avgs: ['-20.02', '-20.23', '-20.41', '-19.93']


 58%|█████▊    | 3499/6000 [14:29<06:46,  6.15it/s]

Episode 3500/6000
Fitnesses: ['-13.86', '-20.51', '-40.76', '-21.37']
100 fitness avgs: ['-19.92', '-20.09', '-20.19', '-20.00']


 59%|█████▊    | 3519/6000 [14:33<06:17,  6.57it/s]

Episode 3520/6000
Fitnesses: ['-29.07', '-37.76', '-8.74', '-9.66']
100 fitness avgs: ['-20.16', '-20.52', '-20.03', '-20.13']


 59%|█████▉    | 3539/6000 [14:36<05:29,  7.47it/s]

Episode 3540/6000
Fitnesses: ['-6.53', '-47.13', '-60.13', '-2.46']
100 fitness avgs: ['-20.04', '-20.45', '-20.58', '-20.10']


 59%|█████▉    | 3559/6000 [14:39<06:16,  6.49it/s]

Episode 3560/6000
Fitnesses: ['-13.50', '-2.54', '-22.70', '-27.33']
100 fitness avgs: ['-20.09', '-20.33', '-20.18', '-20.17']


 60%|█████▉    | 3579/6000 [14:43<07:18,  5.52it/s]

Episode 3580/6000
Fitnesses: ['-380.52', '-72.19', '-46.62', '-4.58']
100 fitness avgs: ['-23.86', '-20.54', '-20.52', '-19.86']


 60%|█████▉    | 3599/6000 [14:47<06:30,  6.16it/s]

Episode 3600/6000
Fitnesses: ['-22.86', '-16.38', '-24.05', '-13.62']
100 fitness avgs: ['-19.97', '-20.58', '-20.66', '-19.88']


 60%|██████    | 3619/6000 [14:51<07:17,  5.44it/s]

Episode 3620/6000
Fitnesses: ['-54.24', '-9.31', '-38.26', '-46.27']
100 fitness avgs: ['-20.06', '-19.61', '-20.61', '-20.69']


 61%|██████    | 3639/6000 [14:55<07:02,  5.59it/s]

Episode 3640/6000
Fitnesses: ['-51.57', '-14.47', '-11.41', '-27.06']
100 fitness avgs: ['-19.93', '-20.63', '-20.60', '-20.68']


 61%|██████    | 3659/6000 [14:59<07:02,  5.55it/s]

Episode 3660/6000
Fitnesses: ['-22.40', '-9.34', '-14.14', '-6.00']
100 fitness avgs: ['-20.47', '-20.37', '-20.39', '-20.38']


 61%|██████▏   | 3679/6000 [15:02<06:29,  5.95it/s]

Episode 3680/6000
Fitnesses: ['-6.85', '-32.25', '-66.32', '-2.91']
100 fitness avgs: ['-20.33', '-20.58', '-20.92', '-20.28']


 62%|██████▏   | 3699/6000 [15:06<05:38,  6.80it/s]

Episode 3700/6000
Fitnesses: ['-3.89', '-28.40', '-12.78', '-12.47']
100 fitness avgs: ['-20.24', '-20.48', '-20.33', '-20.32']


 62%|██████▏   | 3719/6000 [15:10<06:27,  5.88it/s]

Episode 3720/6000
Fitnesses: ['-50.14', '-18.41', '-33.96', '-28.25']
100 fitness avgs: ['-20.43', '-20.35', '-20.26', '-20.29']


 62%|██████▏   | 3739/6000 [15:13<04:52,  7.72it/s]

Episode 3740/6000
Fitnesses: ['-37.54', '-48.85', '-36.99', '-8.63']
100 fitness avgs: ['-20.68', '-20.73', '-20.67', '-20.39']


 63%|██████▎   | 3759/6000 [15:16<06:51,  5.44it/s]

Episode 3760/6000
Fitnesses: ['-19.61', '-14.86', '-60.29', '-47.88']
100 fitness avgs: ['-20.54', '-20.78', '-21.23', '-20.82']


 63%|██████▎   | 3779/6000 [15:20<05:11,  7.13it/s]

Episode 3780/6000
Fitnesses: ['-108.09', '-20.96', '-13.84', '-109.09']
100 fitness avgs: ['-21.81', '-20.70', '-20.63', '-21.58']


 63%|██████▎   | 3799/6000 [15:23<04:37,  7.92it/s]

Episode 3800/6000
Fitnesses: ['-24.32', '-13.44', '-46.43', '-28.05']
100 fitness avgs: ['-20.78', '-20.67', '-21.00', '-20.89']


 64%|██████▎   | 3819/6000 [15:26<04:27,  8.15it/s]

Episode 3820/6000
Fitnesses: ['-63.57', '-49.63', '-16.66', '-16.82']
100 fitness avgs: ['-21.18', '-21.04', '-20.93', '-20.71']


 64%|██████▍   | 3839/6000 [15:28<04:31,  7.96it/s]

Episode 3840/6000
Fitnesses: ['-22.00', '-105.85', '-20.31', '-7.62']
100 fitness avgs: ['-20.65', '-21.74', '-20.64', '-20.76']


 64%|██████▍   | 3859/6000 [15:31<04:08,  8.63it/s]

Episode 3860/6000
Fitnesses: ['-84.78', '-70.63', '-27.04', '-37.23']
100 fitness avgs: ['-21.59', '-21.45', '-21.01', '-21.11']


 65%|██████▍   | 3879/6000 [15:35<05:13,  6.76it/s]

Episode 3880/6000
Fitnesses: ['-6.99', '-43.24', '-29.48', '-27.45']
100 fitness avgs: ['-20.65', '-21.01', '-20.98', '-20.86']


 65%|██████▍   | 3899/6000 [15:38<04:50,  7.23it/s]

Episode 3900/6000
Fitnesses: ['-71.03', '-2.25', '-11.84', '-21.59']
100 fitness avgs: ['-21.25', '-20.56', '-20.66', '-21.12']


 65%|██████▌   | 3919/6000 [15:41<03:46,  9.19it/s]

Episode 3920/6000
Fitnesses: ['-13.43', '-126.40', '-19.41', '-25.58']
100 fitness avgs: ['-20.45', '-21.68', '-20.61', '-20.67']


 66%|██████▌   | 3939/6000 [15:44<05:08,  6.68it/s]

Episode 3940/6000
Fitnesses: ['-11.92', '-353.83', '-11.95', '-62.30']
100 fitness avgs: ['-20.43', '-23.85', '-20.65', '-20.93']


 66%|██████▌   | 3959/6000 [15:47<04:30,  7.53it/s]

Episode 3960/6000
Fitnesses: ['-24.45', '-51.70', '-46.05', '-2.28']
100 fitness avgs: ['-20.53', '-20.80', '-21.25', '-20.31']


 66%|██████▋   | 3979/6000 [15:50<03:46,  8.93it/s]

Episode 3980/6000
Fitnesses: ['-18.76', '-10.89', '-50.25', '-110.73']
100 fitness avgs: ['-20.38', '-20.30', '-21.18', '-21.29']


 67%|██████▋   | 3999/6000 [15:53<03:44,  8.92it/s]

Episode 4000/6000
Fitnesses: ['-70.47', '-45.00', '-140.50', '-16.13']
100 fitness avgs: ['-20.58', '-20.33', '-22.17', '-20.04']


 67%|██████▋   | 4019/6000 [15:56<04:21,  7.56it/s]

Episode 4020/6000
Fitnesses: ['-73.92', '-48.55', '-11.71', '-37.29']
100 fitness avgs: ['-20.56', '-20.85', '-19.94', '-20.49']


 67%|██████▋   | 4039/6000 [15:59<03:47,  8.61it/s]

Episode 4040/6000
Fitnesses: ['-16.01', '-25.69', '-15.10', '-29.53']
100 fitness avgs: ['-20.02', '-20.66', '-20.01', '-20.70']


 68%|██████▊   | 4059/6000 [16:02<03:16,  9.89it/s]

Episode 4060/6000
Fitnesses: ['-42.40', '-16.12', '-51.93', '-55.24']
100 fitness avgs: ['-19.97', '-19.72', '-20.07', '-20.10']


 68%|██████▊   | 4079/6000 [16:05<03:54,  8.18it/s]

Episode 4080/6000
Fitnesses: ['-6.86', '-25.81', '-63.38', '-21.12']
100 fitness avgs: ['-19.64', '-19.83', '-20.46', '-19.79']


 68%|██████▊   | 4099/6000 [16:08<05:25,  5.83it/s]

Episode 4100/6000
Fitnesses: ['-8.23', '-33.34', '-58.84', '-5.18']
100 fitness avgs: ['-19.58', '-19.84', '-20.23', '-19.55']


 69%|██████▊   | 4119/6000 [16:12<05:27,  5.74it/s]

Episode 4120/6000
Fitnesses: ['-55.77', '-22.80', '-40.49', '-14.03']
100 fitness avgs: ['-19.80', '-19.47', '-19.65', '-19.38']


 69%|██████▉   | 4139/6000 [16:16<04:32,  6.83it/s]

Episode 4140/6000
Fitnesses: ['-45.07', '-31.58', '-100.04', '-18.57']
100 fitness avgs: ['-19.67', '-19.62', '-20.21', '-19.66']


 69%|██████▉   | 4159/6000 [16:19<04:10,  7.36it/s]

Episode 4160/6000
Fitnesses: ['-24.24', '-4.27', '-56.58', '-11.88']
100 fitness avgs: ['-19.85', '-19.61', '-20.18', '-19.73']


 70%|██████▉   | 4179/6000 [16:22<05:16,  5.75it/s]

Episode 4180/6000
Fitnesses: ['-92.31', '-29.00', '-21.70', '-0.14']
100 fitness avgs: ['-20.40', '-19.76', '-19.94', '-19.72']


 70%|██████▉   | 4199/6000 [16:26<04:10,  7.18it/s]

Episode 4200/6000
Fitnesses: ['-49.68', '-30.44', '-7.26', '-46.12']
100 fitness avgs: ['-20.15', '-20.17', '-19.94', '-20.33']


 70%|███████   | 4219/6000 [16:29<04:04,  7.29it/s]

Episode 4220/6000
Fitnesses: ['-31.00', '-30.98', '-83.44', '-37.68']
100 fitness avgs: ['-20.09', '-20.47', '-20.61', '-20.39']


 71%|███████   | 4239/6000 [16:32<03:46,  7.76it/s]

Episode 4240/6000
Fitnesses: ['-40.17', '-286.27', '-52.34', '-17.40']
100 fitness avgs: ['-20.75', '-23.12', '-20.87', '-20.52']


 71%|███████   | 4259/6000 [16:35<04:54,  5.91it/s]

Episode 4260/6000
Fitnesses: ['-72.72', '-38.36', '-18.82', '-45.36']
100 fitness avgs: ['-21.10', '-20.98', '-20.56', '-21.18']


 71%|███████▏  | 4279/6000 [16:40<04:23,  6.52it/s]

Episode 4280/6000
Fitnesses: ['-12.33', '-48.30', '-46.10', '-24.21']
100 fitness avgs: ['-20.60', '-21.38', '-20.94', '-20.72']


 72%|███████▏  | 4299/6000 [16:45<06:07,  4.63it/s]

Episode 4300/6000
Fitnesses: ['-21.52', '-11.42', '-6.14', '-46.34']
100 fitness avgs: ['-20.58', '-20.82', '-20.77', '-20.83']


 72%|███████▏  | 4319/6000 [16:49<05:27,  5.13it/s]

Episode 4320/6000
Fitnesses: ['-21.11', '-15.77', '-28.46', '-25.14']
100 fitness avgs: ['-20.72', '-20.72', '-20.80', '-20.76']


 72%|███████▏  | 4339/6000 [16:53<05:32,  4.99it/s]

Episode 4340/6000
Fitnesses: ['-2.88', '-49.83', '-10.50', '-36.49']
100 fitness avgs: ['-20.50', '-20.96', '-20.57', '-20.87']


 73%|███████▎  | 4359/6000 [16:57<04:03,  6.73it/s]

Episode 4360/6000
Fitnesses: ['-9.49', '-2.75', '-20.64', '-73.98']
100 fitness avgs: ['-20.54', '-20.55', '-20.65', '-21.26']


 73%|███████▎  | 4379/6000 [17:01<04:11,  6.43it/s]

Episode 4380/6000
Fitnesses: ['-60.95', '-8.47', '-500.43', '-22.99']
100 fitness avgs: ['-21.04', '-20.51', '-25.43', '-21.37']


 73%|███████▎  | 4399/6000 [17:03<03:25,  7.78it/s]

Episode 4400/6000
Fitnesses: ['-37.67', '-45.49', '-21.48', '-76.50']
100 fitness avgs: ['-20.60', '-20.68', '-20.44', '-21.52']


 74%|███████▎  | 4419/6000 [17:06<03:15,  8.08it/s]

Episode 4420/6000
Fitnesses: ['-41.00', '-32.86', '-48.07', '-24.90']
100 fitness avgs: ['-20.73', '-20.82', '-21.05', '-20.57']


 74%|███████▍  | 4439/6000 [17:10<03:19,  7.82it/s]

Episode 4440/6000
Fitnesses: ['-28.35', '-40.59', '-43.42', '-1.05']
100 fitness avgs: ['-20.48', '-20.60', '-20.79', '-20.21']


 74%|███████▍  | 4459/6000 [17:13<03:26,  7.46it/s]

Episode 4460/6000
Fitnesses: ['-51.16', '-54.98', '-47.07', '-77.72']
100 fitness avgs: ['-20.69', '-21.00', '-20.65', '-20.95']


 75%|███████▍  | 4479/6000 [17:16<03:15,  7.79it/s]

Episode 4480/6000
Fitnesses: ['-55.73', '-29.72', '-29.81', '-45.84']
100 fitness avgs: ['-21.15', '-20.93', '-20.93', '-21.09']


 75%|███████▍  | 4499/6000 [17:19<03:16,  7.62it/s]

Episode 4500/6000
Fitnesses: ['-92.18', '-10.62', '-14.60', '-27.82']
100 fitness avgs: ['-21.37', '-20.78', '-20.76', '-20.89']


 75%|███████▌  | 4519/6000 [17:22<03:14,  7.61it/s]

Episode 4520/6000
Fitnesses: ['-103.74', '-44.05', '-46.27', '-79.02']
100 fitness avgs: ['-21.68', '-21.09', '-21.09', '-21.43']


 76%|███████▌  | 4539/6000 [17:26<03:24,  7.14it/s]

Episode 4540/6000
Fitnesses: ['-27.70', '-64.24', '-46.98', '-74.47']
100 fitness avgs: ['-21.30', '-22.01', '-21.49', '-21.76']


 76%|███████▌  | 4559/6000 [17:29<03:08,  7.66it/s]

Episode 4560/6000
Fitnesses: ['-42.81', '-5.62', '-11.42', '-39.21']
100 fitness avgs: ['-21.52', '-21.34', '-21.20', '-21.48']


 76%|███████▋  | 4579/6000 [17:32<03:07,  7.59it/s]

Episode 4580/6000
Fitnesses: ['-58.24', '-144.76', '-0.98', '-27.55']
100 fitness avgs: ['-21.74', '-22.75', '-21.17', '-21.30']


 77%|███████▋  | 4599/6000 [17:35<03:15,  7.17it/s]

Episode 4600/6000
Fitnesses: ['-95.13', '-28.05', '-16.97', '-26.63']
100 fitness avgs: ['-21.95', '-21.28', '-21.74', '-21.27']


 77%|███████▋  | 4619/6000 [17:39<03:22,  6.83it/s]

Episode 4620/6000
Fitnesses: ['-5.69', '-17.05', '-66.37', '-59.50']
100 fitness avgs: ['-21.50', '-21.82', '-22.11', '-21.57']


 77%|███████▋  | 4639/6000 [17:44<04:39,  4.87it/s]

Episode 4640/6000
Fitnesses: ['-12.41', '-18.91', '-24.05', '-7.44']
100 fitness avgs: ['-21.46', '-21.84', '-21.65', '-21.73']


 78%|███████▊  | 4659/6000 [17:48<04:33,  4.90it/s]

Episode 4660/6000
Fitnesses: ['-17.44', '-16.17', '-111.13', '-452.93']
100 fitness avgs: ['-21.82', '-21.54', '-22.68', '-25.91']


 78%|███████▊  | 4679/6000 [17:52<03:59,  5.51it/s]

Episode 4680/6000
Fitnesses: ['-126.44', '-3.55', '-51.19', '-854.82']
100 fitness avgs: ['-21.34', '-21.25', '-20.59', '-28.62']


 78%|███████▊  | 4699/6000 [17:55<03:21,  6.47it/s]

Episode 4700/6000
Fitnesses: ['-2.70', '-12.67', '-74.65', '-37.57']
100 fitness avgs: ['-20.17', '-19.60', '-20.89', '-20.52']


 79%|███████▊  | 4719/6000 [17:59<03:07,  6.83it/s]

Episode 4720/6000
Fitnesses: ['-40.06', '-31.62', '-23.46', '-82.66']
100 fitness avgs: ['-20.01', '-19.36', '-19.85', '-20.44']


 79%|███████▉  | 4739/6000 [18:03<03:56,  5.34it/s]

Episode 4740/6000
Fitnesses: ['-74.51', '-36.52', '-60.81', '-460.21']
100 fitness avgs: ['-20.49', '-20.11', '-19.87', '-23.86']


 79%|███████▉  | 4759/6000 [18:06<02:37,  7.86it/s]

Episode 4760/6000
Fitnesses: ['-4.08', '-10.88', '-3.31', '-36.02']
100 fitness avgs: ['-20.07', '-19.89', '-19.82', '-20.14']


 80%|███████▉  | 4779/6000 [18:09<02:37,  7.75it/s]

Episode 4780/6000
Fitnesses: ['-37.34', '-57.86', '-33.54', '-46.25']
100 fitness avgs: ['-19.94', '-20.22', '-20.15', '-20.10']


 80%|███████▉  | 4799/6000 [18:12<02:37,  7.61it/s]

Episode 4800/6000
Fitnesses: ['-102.63', '-17.79', '-19.64', '-62.61']
100 fitness avgs: ['-21.14', '-20.29', '-20.10', '-20.74']


 80%|████████  | 4819/6000 [18:16<03:09,  6.22it/s]

Episode 4820/6000
Fitnesses: ['-46.51', '-103.34', '-41.07', '-9.55']
100 fitness avgs: ['-20.69', '-21.06', '-20.44', '-20.77']


 81%|████████  | 4839/6000 [18:20<02:24,  8.05it/s]

Episode 4840/6000
Fitnesses: ['-62.40', '-53.71', '-410.02', '-8.52']
100 fitness avgs: ['-21.16', '-20.74', '-24.93', '-20.62']


 81%|████████  | 4859/6000 [18:22<01:43, 11.04it/s]

Episode 4860/6000
Fitnesses: ['-78.21', '-137.31', '-53.63', '-8.96']
100 fitness avgs: ['-21.33', '-22.46', '-21.21', '-20.77']


 81%|████████▏ | 4879/6000 [18:25<02:15,  8.28it/s]

Episode 4880/6000
Fitnesses: ['-91.06', '-94.97', '-75.93', '-43.57']
100 fitness avgs: ['-21.46', '-21.50', '-21.87', '-20.98']


 82%|████████▏ | 4900/6000 [18:28<02:36,  7.02it/s]

Episode 4900/6000
Fitnesses: ['-43.01', '-60.90', '-26.26', '-34.47']
100 fitness avgs: ['-21.35', '-22.00', '-22.07', '-21.26']


 82%|████████▏ | 4922/6000 [18:29<01:04, 16.79it/s]

Episode 4920/6000
Fitnesses: ['-45.76', '-106.55', '-39.57', '-58.89']
100 fitness avgs: ['-22.52', '-23.12', '-21.65', '-22.65']


 82%|████████▏ | 4942/6000 [18:30<01:07, 15.70it/s]

Episode 4940/6000
Fitnesses: ['-199.42', '-336.14', '-177.82', '-18.35']
100 fitness avgs: ['-23.57', '-24.94', '-23.36', '-22.76']


 83%|████████▎ | 4962/6000 [18:32<01:09, 14.93it/s]

Episode 4960/6000
Fitnesses: ['-4.19', '-0.77', '-37.91', '-72.89']
100 fitness avgs: ['-22.50', '-23.06', '-23.43', '-23.78']


 83%|████████▎ | 4982/6000 [18:33<01:07, 14.98it/s]

Episode 4980/6000
Fitnesses: ['-249.34', '-15.99', '-17.44', '-71.60']
100 fitness avgs: ['-25.50', '-23.17', '-22.63', '-24.10']


 83%|████████▎ | 5000/6000 [18:34<01:08, 14.57it/s]

Episode 5000/6000
Fitnesses: ['-147.84', '-98.54', '-27.91', '-14.42']
100 fitness avgs: ['-24.05', '-25.89', '-22.31', '-22.17']


 84%|████████▎ | 5019/6000 [18:35<00:49, 19.75it/s]

Episode 5020/6000
Fitnesses: ['-46.32', '-14.80', '-77.70', '-34.62']
100 fitness avgs: ['-22.14', '-21.83', '-22.46', '-22.16']


 84%|████████▍ | 5042/6000 [18:36<01:00, 15.82it/s]

Episode 5040/6000
Fitnesses: ['-58.64', '-13.36', '-24.53', '-21.06']
100 fitness avgs: ['-21.98', '-21.87', '-21.64', '-22.24']


 84%|████████▍ | 5061/6000 [18:37<00:54, 17.37it/s]

Episode 5060/6000
Fitnesses: ['-7.97', '-111.86', '-6.77', '-62.63']
100 fitness avgs: ['-21.87', '-23.03', '-22.23', '-22.79']


 85%|████████▍ | 5082/6000 [18:38<00:46, 19.85it/s]

Episode 5080/6000
Fitnesses: ['-94.15', '-94.91', '-22.00', '-21.57']
100 fitness avgs: ['-23.05', '-23.61', '-22.88', '-21.96']


 85%|████████▌ | 5100/6000 [18:39<00:42, 21.29it/s]

Episode 5100/6000
Fitnesses: ['-39.64', '-11.13', '-35.79', '-1.20']
100 fitness avgs: ['-22.28', '-22.00', '-23.33', '-23.55']


 85%|████████▌ | 5121/6000 [18:40<00:44, 19.89it/s]

Episode 5120/6000
Fitnesses: ['-33.05', '-7.41', '-14.26', '-18.89']
100 fitness avgs: ['-23.74', '-23.48', '-23.33', '-23.60']


 86%|████████▌ | 5142/6000 [18:41<00:41, 20.66it/s]

Episode 5140/6000
Fitnesses: ['-7.76', '-116.36', '-18.28', '-45.74']
100 fitness avgs: ['-23.39', '-24.33', '-23.61', '-23.77']


 86%|████████▌ | 5160/6000 [18:42<00:38, 21.73it/s]

Episode 5160/6000
Fitnesses: ['-6.61', '-20.41', '-109.49', '-36.74']
100 fitness avgs: ['-23.28', '-23.80', '-24.53', '-23.58']


 86%|████████▋ | 5183/6000 [18:43<00:35, 22.95it/s]

Episode 5180/6000
Fitnesses: ['-5.71', '-47.64', '-73.79', '-11.45']
100 fitness avgs: ['-23.08', '-24.01', '-23.76', '-23.43']


 87%|████████▋ | 5202/6000 [18:44<00:33, 23.86it/s]

Episode 5200/6000
Fitnesses: ['-49.33', '-31.41', '-24.18', '-17.14']
100 fitness avgs: ['-23.52', '-23.34', '-23.63', '-23.20']


 87%|████████▋ | 5223/6000 [18:44<00:31, 24.34it/s]

Episode 5220/6000
Fitnesses: ['-110.32', '-16.04', '-23.53', '-24.81']
100 fitness avgs: ['-24.22', '-23.70', '-23.49', '-23.79']


 87%|████████▋ | 5241/6000 [18:45<00:36, 20.98it/s]

Episode 5240/6000
Fitnesses: ['-32.31', '-50.63', '-74.27', '-35.82']
100 fitness avgs: ['-23.69', '-23.87', '-24.11', '-23.72']


 88%|████████▊ | 5262/6000 [18:46<00:36, 20.15it/s]

Episode 5260/6000
Fitnesses: ['-17.40', '-20.25', '-4.82', '-31.71']
100 fitness avgs: ['-23.75', '-24.20', '-23.62', '-23.93']


 88%|████████▊ | 5283/6000 [18:47<00:35, 20.24it/s]

Episode 5280/6000
Fitnesses: ['-32.77', '-16.71', '-21.19', '-20.23']
100 fitness avgs: ['-23.93', '-24.34', '-23.94', '-23.80']


 88%|████████▊ | 5298/6000 [18:48<00:29, 23.77it/s]

Episode 5300/6000
Fitnesses: ['-4.76', '-20.22', '-8.64', '-39.42']
100 fitness avgs: ['-24.34', '-23.95', '-23.84', '-24.68']


 89%|████████▊ | 5319/6000 [18:49<00:31, 21.81it/s]

Episode 5320/6000
Fitnesses: ['-40.80', '-2.12', '-55.54', '-22.56']
100 fitness avgs: ['-24.44', '-24.06', '-24.09', '-23.76']


 89%|████████▉ | 5340/6000 [18:50<00:50, 13.03it/s]

Episode 5340/6000
Fitnesses: ['-55.27', '-215.59', '-8.27', '-10.53']
100 fitness avgs: ['-24.51', '-26.11', '-23.74', '-24.06']


 89%|████████▉ | 5360/6000 [18:51<00:46, 13.71it/s]

Episode 5360/6000
Fitnesses: ['-24.02', '-69.68', '-27.90', '-276.39']
100 fitness avgs: ['-23.73', '-24.50', '-24.09', '-28.62']


 90%|████████▉ | 5381/6000 [18:52<00:33, 18.35it/s]

Episode 5380/6000
Fitnesses: ['-74.71', '-60.02', '-8.72', '-90.77']
100 fitness avgs: ['-24.44', '-24.29', '-24.14', '-24.60']


 90%|█████████ | 5402/6000 [18:53<00:29, 20.21it/s]

Episode 5400/6000
Fitnesses: ['-26.49', '-91.61', '-38.96', '-21.50']
100 fitness avgs: ['-24.22', '-24.87', '-24.65', '-24.33']


 90%|█████████ | 5420/6000 [18:54<00:30, 19.12it/s]

Episode 5420/6000
Fitnesses: ['-39.06', '-8.18', '-69.05', '-65.42']
100 fitness avgs: ['-24.58', '-24.27', '-24.77', '-25.16']


 91%|█████████ | 5438/6000 [18:55<00:25, 22.00it/s]

Episode 5440/6000
Fitnesses: ['-117.93', '-24.18', '-37.41', '-92.11']
100 fitness avgs: ['-25.35', '-24.72', '-25.43', '-25.09']


 91%|█████████ | 5461/6000 [18:56<00:28, 18.62it/s]

Episode 5460/6000
Fitnesses: ['-1.06', '-32.37', '-51.17', '-18.81']
100 fitness avgs: ['-24.40', '-25.43', '-24.90', '-25.29']


 91%|█████████▏| 5482/6000 [18:57<00:27, 18.98it/s]

Episode 5480/6000
Fitnesses: ['-20.17', '-17.77', '-140.79', '-48.51']
100 fitness avgs: ['-24.49', '-25.36', '-25.69', '-24.77']


 92%|█████████▏| 5503/6000 [18:58<00:23, 21.12it/s]

Episode 5500/6000
Fitnesses: ['-127.81', '-98.78', '-24.10', '-114.38']
100 fitness avgs: ['-26.43', '-25.27', '-24.81', '-25.43']


 92%|█████████▏| 5521/6000 [18:59<00:26, 18.24it/s]

Episode 5520/6000
Fitnesses: ['-42.57', '-52.80', '-99.71', '-0.62']
100 fitness avgs: ['-25.14', '-25.86', '-25.71', '-25.18']


 92%|█████████▏| 5542/6000 [19:00<00:25, 18.01it/s]

Episode 5540/6000
Fitnesses: ['-56.84', '-81.45', '-9.35', '-34.83']
100 fitness avgs: ['-25.72', '-25.97', '-25.21', '-26.18']


 93%|█████████▎| 5561/6000 [19:01<00:25, 17.42it/s]

Episode 5560/6000
Fitnesses: ['-9.09', '-34.96', '-8.98', '-49.46']
100 fitness avgs: ['-25.16', '-25.42', '-25.16', '-26.33']


 93%|█████████▎| 5579/6000 [19:02<00:22, 18.60it/s]

Episode 5580/6000
Fitnesses: ['-152.05', '-14.88', '-52.94', '-46.47']
100 fitness avgs: ['-25.96', '-24.59', '-24.97', '-24.90']


 93%|█████████▎| 5601/6000 [19:04<00:27, 14.65it/s]

Episode 5600/6000
Fitnesses: ['-46.65', '-19.61', '-31.28', '-5.29']
100 fitness avgs: ['-24.89', '-25.00', '-26.11', '-24.48']


 94%|█████████▎| 5621/6000 [19:06<00:29, 12.69it/s]

Episode 5620/6000
Fitnesses: ['-13.78', '-25.95', '-147.08', '-59.47']
100 fitness avgs: ['-24.15', '-24.80', '-26.01', '-24.61']


 94%|█████████▍| 5641/6000 [19:07<00:25, 14.20it/s]

Episode 5640/6000
Fitnesses: ['-35.26', '-12.36', '-6.76', '-129.01']
100 fitness avgs: ['-24.36', '-24.78', '-24.53', '-25.94']


 94%|█████████▍| 5661/6000 [19:08<00:24, 14.04it/s]

Episode 5660/6000
Fitnesses: ['-62.03', '-18.71', '-8.68', '-70.22']
100 fitness avgs: ['-25.06', '-24.62', '-24.52', '-25.39']


 95%|█████████▍| 5681/6000 [19:09<00:21, 14.61it/s]

Episode 5680/6000
Fitnesses: ['-21.52', '-28.47', '-50.16', '-24.04']
100 fitness avgs: ['-24.71', '-25.64', '-25.53', '-25.27']


 95%|█████████▍| 5699/6000 [19:10<00:17, 17.13it/s]

Episode 5700/6000
Fitnesses: ['-12.85', '-58.00', '-14.74', '-76.65']
100 fitness avgs: ['-24.55', '-25.01', '-24.57', '-26.12']


 95%|█████████▌| 5719/6000 [19:12<00:22, 12.61it/s]

Episode 5720/6000
Fitnesses: ['-3.05', '-281.44', '-20.58', '-28.34']
100 fitness avgs: ['-24.40', '-27.20', '-24.60', '-24.65']


 96%|█████████▌| 5741/6000 [19:14<00:23, 11.03it/s]

Episode 5740/6000
Fitnesses: ['-25.63', '-25.86', '-361.55', '-13.56']
100 fitness avgs: ['-24.57', '-24.57', '-30.73', '-24.45']


 96%|█████████▌| 5761/6000 [19:16<00:19, 12.39it/s]

Episode 5760/6000
Fitnesses: ['-35.09', '-36.74', '-29.52', '-104.36']
100 fitness avgs: ['-24.61', '-24.74', '-24.55', '-25.30']


 96%|█████████▋| 5781/6000 [19:17<00:16, 12.95it/s]

Episode 5780/6000
Fitnesses: ['-12.39', '-11.17', '-13.70', '-48.17']
100 fitness avgs: ['-24.54', '-24.52', '-24.55', '-24.95']


 97%|█████████▋| 5799/6000 [19:18<00:13, 15.15it/s]

Episode 5800/6000
Fitnesses: ['-4.99', '-59.76', '-66.84', '-61.55']
100 fitness avgs: ['-24.44', '-24.99', '-25.07', '-25.00']


 97%|█████████▋| 5821/6000 [19:20<00:17, 10.26it/s]

Episode 5820/6000
Fitnesses: ['-38.80', '-656.58', '-77.04', '-37.23']
100 fitness avgs: ['-24.19', '-30.92', '-25.12', '-24.18']


 97%|█████████▋| 5841/6000 [19:22<00:14, 10.83it/s]

Episode 5840/6000
Fitnesses: ['-12.91', '-27.84', '-4.92', '-11.01']
100 fitness avgs: ['-24.23', '-25.32', '-24.15', '-25.15']


 98%|█████████▊| 5859/6000 [19:23<00:13, 10.69it/s]

Episode 5860/6000
Fitnesses: ['-62.09', '-32.07', '-171.22', '-4.79']
100 fitness avgs: ['-24.50', '-24.20', '-25.59', '-24.01']


 98%|█████████▊| 5881/6000 [19:26<00:12,  9.86it/s]

Episode 5880/6000
Fitnesses: ['-63.05', '-50.02', '-71.27', '-35.68']
100 fitness avgs: ['-24.57', '-24.93', '-25.14', '-24.29']


 98%|█████████▊| 5900/6000 [19:28<00:11,  8.77it/s]

Episode 5900/6000
Fitnesses: ['-42.98', '-52.39', '-44.76', '-15.28']
100 fitness avgs: ['-24.70', '-25.07', '-24.72', '-25.06']


 99%|█████████▊| 5920/6000 [19:29<00:06, 11.52it/s]

Episode 5920/6000
Fitnesses: ['-8.76', '-22.47', '-7.65', '-47.62']
100 fitness avgs: ['-25.01', '-25.15', '-25.00', '-25.41']


 99%|█████████▉| 5940/6000 [19:31<00:04, 13.18it/s]

Episode 5940/6000
Fitnesses: ['-31.66', '-17.48', '-53.54', '-28.79']
100 fitness avgs: ['-25.20', '-25.07', '-25.42', '-25.17']


 99%|█████████▉| 5962/6000 [19:32<00:02, 14.76it/s]

Episode 5960/6000
Fitnesses: ['-5.45', '-3.71', '-79.05', '-55.71']
100 fitness avgs: ['-25.10', '-25.21', '-25.97', '-25.70']


100%|█████████▉| 5981/6000 [19:33<00:01, 15.06it/s]

Episode 5980/6000
Fitnesses: ['-8.64', '-177.14', '-9.29', '-20.81']
100 fitness avgs: ['-25.19', '-26.88', '-25.08', '-25.20']


100%|██████████| 6000/6000 [19:34<00:00,  5.11it/s]

Episode 6000/6000
Fitnesses: ['-38.34', '-15.30', '-16.12', '-76.51']
100 fitness avgs: ['-25.41', '-25.08', '-25.19', '-25.69']





In [7]:
import os

import imageio
import numpy as np
import torch
from pettingzoo.mpe import simple_speaker_listener_v4
from PIL import Image, ImageDraw

from agilerl.algorithms.matd3 import MATD3


# Define function to return image
def _label_with_episode_number(frame, episode_num):
    im = Image.fromarray(frame)

    drawer = ImageDraw.Draw(im)

    if np.mean(frame) < 128:
        text_color = (255, 255, 255)
    else:
        text_color = (0, 0, 0)
    drawer.text(
        (im.size[0] / 20, im.size[1] / 18), f"Episode: {episode_num+1}", fill=text_color
    )

    return im


if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Configure the environment
    env = simple_speaker_listener_v4.parallel_env(
        continuous_actions=True, render_mode="rgb_array"
    )
    env.reset()
    try:
        state_dim = [env.observation_space(agent).n for agent in env.agents]
        one_hot = True
    except Exception:
        state_dim = [env.observation_space(agent).shape for agent in env.agents]
        one_hot = False
    try:
        action_dim = [env.action_space(agent).n for agent in env.agents]
        discrete_actions = True
        max_action = None
        min_action = None
    except Exception:
        action_dim = [env.action_space(agent).shape[0] for agent in env.agents]
        discrete_actions = False
        max_action = [env.action_space(agent).high for agent in env.agents]
        min_action = [env.action_space(agent).low for agent in env.agents]

    # Append number of agents and agent IDs to the initial hyperparameter dictionary
    n_agents = env.num_agents
    agent_ids = env.agents

    # Instantiate an MADDPG object
    matd3 = MATD3(
        state_dim,
        action_dim,
        one_hot,
        n_agents,
        agent_ids,
        max_action,
        min_action,
        discrete_actions,
        device=device,
    )

    # Load the saved algorithm into the MADDPG object
    path = "./models/MATD3/MATD3_trained_agent.pt"
    matd3.loadCheckpoint(path)

    # Define test loop parameters
    episodes = 10  # Number of episodes to test agent on
    max_steps = 25  # Max number of steps to take in the environment in each episode

    rewards = []  # List to collect total episodic reward
    frames = []  # List to collect frames
    indi_agent_rewards = {
        agent_id: [] for agent_id in agent_ids
    }  # Dictionary to collect inidivdual agent rewards

    rewards = []  # List to collect total episodic reward
    frames = []  # List to collect frames
    indi_agent_rewards = {
        agent_id: [] for agent_id in agent_ids
    }  # Dictionary to collect inidivdual agent rewards

    # Test loop for inference
    for ep in range(episodes):
        state, info = env.reset()
        agent_reward = {agent_id: 0 for agent_id in agent_ids}
        score = 0
        for _ in range(max_steps):
            agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
            env_defined_actions = (
                info["env_defined_actions"]
                if "env_defined_actions" in info.keys()
                else None
            )

            # Get next action from agent
            cont_actions, discrete_action = matd3.getAction(
                state,
                epsilon=0,
                agent_mask=agent_mask,
                env_defined_actions=env_defined_actions,
            )
            if matd3.discrete_actions:
                action = discrete_action
            else:
                action = cont_actions

            # Save the frame for this step and append to frames list
            frame = env.render()
            frames.append(_label_with_episode_number(frame, episode_num=ep))

            # Take action in environment
            state, reward, termination, truncation, info = env.step(action)

            # Save agent's reward for this step in this episode
            for agent_id, r in reward.items():
                agent_reward[agent_id] += r

            # Determine total score for the episode and then append to rewards list
            score = sum(agent_reward.values())

            # Stop episode if any agents have terminated
            if any(truncation.values()) or any(termination.values()):
                break

        rewards.append(score)

        # Record agent specific episodic reward
        for agent_id in agent_ids:
            indi_agent_rewards[agent_id].append(agent_reward[agent_id])

        print("-" * 15, f"Episode: {ep}", "-" * 15)
        print("Episodic Reward: ", rewards[-1])
        for agent_id, reward_list in indi_agent_rewards.items():
            print(f"{agent_id} reward: {reward_list[-1]}")
    env.close()

    # Save the gif to specified path
    gif_path = "./videos/"
    os.makedirs(gif_path, exist_ok=True)
    imageio.mimwrite(
        os.path.join("./videos/", "speaker_listener.gif"), frames, duration=10
    )

--------------- Episode: 0 ---------------
Episodic Reward:  -43.835148054556576
speaker_0 reward: -21.917574027278288
listener_0 reward: -21.917574027278288
--------------- Episode: 1 ---------------
Episodic Reward:  -23.326940956588846
speaker_0 reward: -11.663470478294423
listener_0 reward: -11.663470478294423
--------------- Episode: 2 ---------------
Episodic Reward:  -10.317120117451974
speaker_0 reward: -5.158560058725987
listener_0 reward: -5.158560058725987
--------------- Episode: 3 ---------------
Episodic Reward:  -23.893555654319197
speaker_0 reward: -11.946777827159599
listener_0 reward: -11.946777827159599
--------------- Episode: 4 ---------------
Episodic Reward:  -41.38620294840652
speaker_0 reward: -20.69310147420326
listener_0 reward: -20.69310147420326
--------------- Episode: 5 ---------------
Episodic Reward:  -39.988109501088644
speaker_0 reward: -19.994054750544322
listener_0 reward: -19.994054750544322
--------------- Episode: 6 ---------------
Episodic Rewar

In [6]:
# 音を鳴らす

import numpy as np
import IPython

rate = 48000
duration = 30.0

angle_list = np.arange(0, rate * duration)
sound = np.sin(2 * np.pi * 523 / rate * angle_list)


IPython.display.Audio(sound, rate=rate, autoplay=True)