# AgileRL Speaker-Listener with MATD3
https://docs.agilerl.com/en/latest/tutorials/pettingzoo/matd3.html

In [1]:
!pip install --upgrade pip

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m

In [2]:
!pip install pettingzoo[mpe]
!pip install agilerl
!pip install imageio

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0mLooking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0mLooking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m

In [3]:
"""
This tutorial shows how to train an MATD3 agent on the simple speaker listener multi-particle environment.

Authors: Michael (https://github.com/mikepratt1), Nickua (https://github.com/nicku-a)
"""

import os

import numpy as np
import torch
from pettingzoo.mpe import simple_speaker_listener_v4
from tqdm import trange

from agilerl.components.multi_agent_replay_buffer import MultiAgentReplayBuffer
from agilerl.hpo.mutation import Mutations
from agilerl.hpo.tournament import TournamentSelection
from agilerl.utils.utils import initialPopulation

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("===== AgileRL Online Multi-Agent Demo =====")

    # Define the network configuration
    NET_CONFIG = {
        "arch": "mlp",  # Network architecture
        "h_size": [32, 32],  # Actor hidden size
    }

    # Define the initial hyperparameters
    INIT_HP = {
        "POPULATION_SIZE": 4,
        "ALGO": "MATD3",  # Algorithm
        # Swap image channels dimension from last to first [H, W, C] -> [C, H, W]
        "CHANNELS_LAST": False,
        "BATCH_SIZE": 32,  # Batch size
        "LR": 0.01,  # Learning rate
        "GAMMA": 0.95,  # Discount factor
        "MEMORY_SIZE": 100000,  # Max memory buffer size
        "LEARN_STEP": 5,  # Learning frequency
        "TAU": 0.01,  # For soft update of target parameters
        "POLICY_FREQ": 2,  # Policy frequnecy
    }

    # Define the simple speaker listener environment as a parallel environment
    env = simple_speaker_listener_v4.parallel_env(continuous_actions=True)
    env.reset()

    # Configure the multi-agent algo input arguments
    try:
        state_dim = [env.observation_space(agent).n for agent in env.agents]
        one_hot = True
    except Exception:
        state_dim = [env.observation_space(agent).shape for agent in env.agents]
        one_hot = False
    try:
        action_dim = [env.action_space(agent).n for agent in env.agents]
        INIT_HP["DISCRETE_ACTIONS"] = True
        INIT_HP["MAX_ACTION"] = None
        INIT_HP["MIN_ACTION"] = None
    except Exception:
        action_dim = [env.action_space(agent).shape[0] for agent in env.agents]
        INIT_HP["DISCRETE_ACTIONS"] = False
        INIT_HP["MAX_ACTION"] = [env.action_space(agent).high for agent in env.agents]
        INIT_HP["MIN_ACTION"] = [env.action_space(agent).low for agent in env.agents]

    # Not applicable to MPE environments, used when images are used for observations (Atari environments)
    if INIT_HP["CHANNELS_LAST"]:
        state_dim = [
            (state_dim[2], state_dim[0], state_dim[1]) for state_dim in state_dim
        ]

    # Append number of agents and agent IDs to the initial hyperparameter dictionary
    INIT_HP["N_AGENTS"] = env.num_agents
    INIT_HP["AGENT_IDS"] = env.agents

    # Create a population ready for evolutionary hyper-parameter optimisation
    pop = initialPopulation(
        INIT_HP["ALGO"],
        state_dim,
        action_dim,
        one_hot,
        NET_CONFIG,
        INIT_HP,
        population_size=INIT_HP["POPULATION_SIZE"],
        device=device,
    )

    # Configure the multi-agent replay buffer
    field_names = ["state", "action", "reward", "next_state", "done"]
    memory = MultiAgentReplayBuffer(
        INIT_HP["MEMORY_SIZE"],
        field_names=field_names,
        agent_ids=INIT_HP["AGENT_IDS"],
        device=device,
    )

    # Instantiate a tournament selection object (used for HPO)
    tournament = TournamentSelection(
        tournament_size=2,  # Tournament selection size
        elitism=True,  # Elitism in tournament selection
        population_size=INIT_HP["POPULATION_SIZE"],  # Population size
        evo_step=1,
    )  # Evaluate using last N fitness scores

    # Instantiate a mutations object (used for HPO)
    mutations = Mutations(
        algo=INIT_HP["ALGO"],
        no_mutation=0.2,  # Probability of no mutation
        architecture=0.2,  # Probability of architecture mutation
        new_layer_prob=0.2,  # Probability of new layer mutation
        parameters=0.2,  # Probability of parameter mutation
        activation=0,  # Probability of activation function mutation
        rl_hp=0.2,  # Probability of RL hyperparameter mutation
        rl_hp_selection=[
            "lr",
            "learn_step",
            "batch_size",
        ],  # RL hyperparams selected for mutation
        mutation_sd=0.1,  # Mutation strength
        agent_ids=INIT_HP["AGENT_IDS"],
        arch=NET_CONFIG["arch"],
        rand_seed=1,
        device=device,
    )

    # Define training loop parameters
    max_episodes = 6000 #500  # Total episodes (default: 6000)
    max_steps = 25  # Maximum steps to take in each episode
    epsilon = 1.0  # Starting epsilon value
    eps_end = 0.1  # Final epsilon value
    eps_decay = 0.995  # Epsilon decay
    evo_epochs = 20  # Evolution frequency
    evo_loop = 1  # Number of evaluation episodes
    elite = pop[0]  # Assign a placeholder "elite" agent

    # Training loop
    for idx_epi in trange(max_episodes):
        for agent in pop:  # Loop through population
            state, info = env.reset()  # Reset environment at start of episode
            agent_reward = {agent_id: 0 for agent_id in env.agents}
            if INIT_HP["CHANNELS_LAST"]:
                state = {
                    agent_id: np.moveaxis(np.expand_dims(s, 0), [-1], [-3])
                    for agent_id, s in state.items()
                }

            for _ in range(max_steps):
                agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
                env_defined_actions = (
                    info["env_defined_actions"]
                    if "env_defined_actions" in info.keys()
                    else None
                )

                # Get next action from agent
                cont_actions, discrete_action = agent.getAction(
                    state, epsilon, agent_mask, env_defined_actions
                )
                if agent.discrete_actions:
                    action = discrete_action
                else:
                    action = cont_actions

                next_state, reward, termination, truncation, info = env.step(
                    action
                )  # Act in environment

                # Image processing if necessary for the environment
                if INIT_HP["CHANNELS_LAST"]:
                    state = {agent_id: np.squeeze(s) for agent_id, s in state.items()}
                    next_state = {
                        agent_id: np.moveaxis(ns, [-1], [-3])
                        for agent_id, ns in next_state.items()
                    }

                # Save experiences to replay buffer
                memory.save2memory(state, cont_actions, reward, next_state, termination)

                # Collect the reward
                for agent_id, r in reward.items():
                    agent_reward[agent_id] += r

                # Learn according to learning frequency
                if (memory.counter % agent.learn_step == 0) and (
                    len(memory) >= agent.batch_size
                ):
                    experiences = memory.sample(
                        agent.batch_size
                    )  # Sample replay buffer
                    agent.learn(experiences)  # Learn according to agent's RL algorithm

                # Update the state
                if INIT_HP["CHANNELS_LAST"]:
                    next_state = {
                        agent_id: np.expand_dims(ns, 0)
                        for agent_id, ns in next_state.items()
                    }
                state = next_state

                # Stop episode if any agents have terminated
                if any(truncation.values()) or any(termination.values()):
                    break

            # Save the total episode reward
            score = sum(agent_reward.values())
            agent.scores.append(score)

        # Update epsilon for exploration
        epsilon = max(eps_end, epsilon * eps_decay)

        # Now evolve population if necessary
        if (idx_epi + 1) % evo_epochs == 0:
            # Evaluate population
            fitnesses = [
                agent.test(
                    env,
                    swap_channels=INIT_HP["CHANNELS_LAST"],
                    max_steps=max_steps,
                    loop=evo_loop,
                )
                for agent in pop
            ]

            print(f"Episode {idx_epi + 1}/{max_episodes}")
            print(f'Fitnesses: {["%.2f" % fitness for fitness in fitnesses]}')
            print(
                f'100 fitness avgs: {["%.2f" % np.mean(agent.fitness[-100:]) for agent in pop]}'
            )

            # Tournament selection and population mutation
            elite, pop = tournament.select(pop)
            pop = mutations.mutation(pop)

    # Save the trained algorithm
    path = "./models/MATD3"
    filename = "MATD3_trained_agent.pt"
    os.makedirs(path, exist_ok=True)
    save_path = os.path.join(path, filename)
    elite.saveCheckpoint(save_path)

===== AgileRL Online Multi-Agent Demo =====


  0%|          | 19/6000 [00:06<33:56,  2.94it/s] 

Episode 20/6000
Fitnesses: ['-173.26', '-80.82', '-520.59', '-134.69']
100 fitness avgs: ['-173.26', '-80.82', '-520.59', '-134.69']


  1%|          | 39/6000 [00:14<39:26,  2.52it/s]

Episode 40/6000
Fitnesses: ['-38.27', '-6.87', '-43.58', '-198.71']
100 fitness avgs: ['-59.55', '-70.78', '-89.13', '-139.77']


  1%|          | 59/6000 [00:22<40:42,  2.43it/s]

Episode 60/6000
Fitnesses: ['-85.71', '-52.75', '-8.69', '-56.54']
100 fitness avgs: ['-75.76', '-64.77', '-42.59', '-78.27']


  1%|▏         | 79/6000 [00:31<42:10,  2.34it/s]

Episode 80/6000
Fitnesses: ['-199.16', '-77.20', '-129.99', '-253.95']
100 fitness avgs: ['-81.74', '-67.88', '-81.07', '-95.43']


  2%|▏         | 99/6000 [00:38<36:47,  2.67it/s]

Episode 100/6000
Fitnesses: ['-613.53', '-227.42', '-127.40', '-238.52']
100 fitness avgs: ['-177.01', '-99.79', '-79.78', '-113.09']


  2%|▏         | 119/6000 [00:47<39:57,  2.45it/s]

Episode 120/6000
Fitnesses: ['-70.32', '-150.74', '-120.02', '-159.77']
100 fitness avgs: ['-78.20', '-91.61', '-114.25', '-109.78']


  2%|▏         | 139/6000 [00:57<51:50,  1.88it/s]

Episode 140/6000
Fitnesses: ['-28.74', '-225.51', '-102.03', '-20.06']
100 fitness avgs: ['-71.14', '-130.14', '-112.50', '-100.79']


  3%|▎         | 159/6000 [01:08<56:25,  1.73it/s]  

Episode 160/6000
Fitnesses: ['-0.70', '-104.82', '-50.83', '-25.56']
100 fitness avgs: ['-88.28', '-101.30', '-68.60', '-65.44']


  3%|▎         | 179/6000 [01:20<55:17,  1.75it/s]  

Episode 180/6000
Fitnesses: ['-35.37', '-111.15', '-11.50', '-46.49']
100 fitness avgs: ['-82.40', '-90.82', '-91.32', '-63.33']


  3%|▎         | 199/6000 [01:31<53:09,  1.82it/s]  

Episode 200/6000
Fitnesses: ['-36.09', '-61.73', '-13.74', '-49.52']
100 fitness avgs: ['-85.80', '-88.36', '-58.37', '-86.69']


  4%|▎         | 219/6000 [01:41<48:23,  1.99it/s]  

Episode 220/6000
Fitnesses: ['-13.81', '-84.60', '-11.40', '-2.93']
100 fitness avgs: ['-54.32', '-86.50', '-79.85', '-79.08']


  4%|▍         | 239/6000 [01:51<47:28,  2.02it/s]

Episode 240/6000
Fitnesses: ['-35.49', '-39.35', '-22.54', '-5.33']
100 fitness avgs: ['-75.44', '-75.77', '-74.36', '-50.24']


  4%|▍         | 259/6000 [02:03<1:00:21,  1.59it/s]

Episode 260/6000
Fitnesses: ['-91.74', '-58.49', '-68.11', '-29.29']
100 fitness avgs: ['-53.43', '-50.87', '-51.62', '-71.89']


  5%|▍         | 279/6000 [02:14<51:20,  1.86it/s]  

Episode 280/6000
Fitnesses: ['-75.30', '-38.35', '-18.52', '-25.02']
100 fitness avgs: ['-72.14', '-50.67', '-49.25', '-68.54']


  5%|▍         | 299/6000 [02:25<52:30,  1.81it/s]

Episode 300/6000
Fitnesses: ['-32.66', '-13.00', '-6.19', '-7.19']
100 fitness avgs: ['-48.14', '-46.83', '-64.39', '-67.81']


  5%|▌         | 319/6000 [02:37<55:32,  1.70it/s]  

Episode 320/6000
Fitnesses: ['-69.66', '-68.62', '-14.14', '-35.34']
100 fitness avgs: ['-64.72', '-48.20', '-46.02', '-65.78']


  6%|▌         | 339/6000 [02:55<1:26:20,  1.09it/s]

Episode 340/6000
Fitnesses: ['-18.96', '-30.74', '-15.64', '-31.09']
100 fitness avgs: ['-44.43', '-47.17', '-44.23', '-63.74']


  6%|▌         | 359/6000 [03:21<2:10:12,  1.38s/it]

Episode 360/6000
Fitnesses: ['-59.80', '-18.37', '-40.78', '-67.97']
100 fitness avgs: ['-45.10', '-42.98', '-44.23', '-45.55']


  6%|▋         | 379/6000 [03:55<2:50:11,  1.82s/it]

Episode 380/6000
Fitnesses: ['-46.20', '-77.83', '-13.93', '-9.33']
100 fitness avgs: ['-43.15', '-44.81', '-41.45', '-42.39']


  7%|▋         | 399/6000 [04:30<2:45:50,  1.78s/it]

Episode 400/6000
Fitnesses: ['-15.98', '-44.45', '-4.17', '-26.70']
100 fitness avgs: ['-41.07', '-41.60', '-39.59', '-40.71']


  7%|▋         | 419/6000 [05:05<2:38:54,  1.71s/it]

Episode 420/6000
Fitnesses: ['-19.79', '-21.44', '-20.06', '-33.98']
100 fitness avgs: ['-38.64', '-40.13', '-38.66', '-40.73']


  7%|▋         | 439/6000 [05:39<2:47:06,  1.80s/it]

Episode 440/6000
Fitnesses: ['-22.95', '-98.79', '-41.03', '-28.53']
100 fitness avgs: ['-37.93', '-41.39', '-38.75', '-38.18']


  8%|▊         | 459/6000 [06:12<2:34:52,  1.68s/it]

Episode 460/6000
Fitnesses: ['-60.51', '-15.50', '-6.89', '-32.50']
100 fitness avgs: ['-38.91', '-37.20', '-36.82', '-38.48']


  8%|▊         | 479/6000 [06:46<2:39:07,  1.73s/it]

Episode 480/6000
Fitnesses: ['-11.77', '-9.70', '-20.76', '-42.42']
100 fitness avgs: ['-35.78', '-35.69', '-36.15', '-37.42']


  8%|▊         | 499/6000 [07:19<2:36:29,  1.71s/it]

Episode 500/6000
Fitnesses: ['-19.16', '-14.79', '-23.41', '-31.62']
100 fitness avgs: ['-35.03', '-34.94', '-35.20', '-35.97']


  9%|▊         | 519/6000 [07:53<2:41:56,  1.77s/it]

Episode 520/6000
Fitnesses: ['-78.73', '-70.36', '-78.57', '-38.61']
100 fitness avgs: ['-36.62', '-36.55', '-36.71', '-35.08']


  9%|▉         | 539/6000 [08:26<2:35:40,  1.71s/it]

Episode 540/6000
Fitnesses: ['-279.87', '-28.00', '-27.42', '-35.83']
100 fitness avgs: ['-44.15', '-34.82', '-34.80', '-35.11']


  9%|▉         | 559/6000 [08:59<2:30:49,  1.66s/it]

Episode 560/6000
Fitnesses: ['-52.20', '-12.91', '-33.81', '-49.67']
100 fitness avgs: ['-35.42', '-43.03', '-34.76', '-35.35']


 10%|▉         | 579/6000 [09:34<2:44:26,  1.82s/it]

Episode 580/6000
Fitnesses: ['-18.99', '-827.45', '-19.03', '-19.15']
100 fitness avgs: ['-42.20', '-62.10', '-34.79', '-34.22']


 10%|▉         | 599/6000 [10:02<2:09:41,  1.44s/it]

Episode 600/6000
Fitnesses: ['-29.54', '-14.07', '-20.46', '-26.12']
100 fitness avgs: ['-41.78', '-41.27', '-41.48', '-34.50']


 10%|█         | 619/6000 [10:32<2:13:54,  1.49s/it]

Episode 620/6000
Fitnesses: ['-29.44', '-26.42', '-178.51', '-8.70']
100 fitness avgs: ['-40.88', '-40.99', '-39.14', '-40.22']


 11%|█         | 639/6000 [11:01<2:12:58,  1.49s/it]

Episode 640/6000
Fitnesses: ['-29.87', '-34.60', '-35.07', '-114.26']
100 fitness avgs: ['-39.89', '-40.79', '-40.05', '-41.49']


 11%|█         | 659/6000 [11:27<1:54:49,  1.29s/it]

Episode 660/6000
Fitnesses: ['-21.31', '-15.10', '-5.08', '-20.31']
100 fitness avgs: ['-39.33', '-39.14', '-38.99', '-39.46']


 11%|█▏        | 679/6000 [11:52<1:54:19,  1.29s/it]

Episode 680/6000
Fitnesses: ['-50.57', '-17.58', '-19.40', '-25.38']
100 fitness avgs: ['-39.33', '-38.51', '-38.42', '-39.04']


 12%|█▏        | 699/6000 [12:17<1:51:59,  1.27s/it]

Episode 700/6000
Fitnesses: ['-39.31', '-13.68', '-5.03', '-9.78']
100 fitness avgs: ['-38.53', '-37.80', '-37.55', '-37.69']


 12%|█▏        | 719/6000 [12:42<1:52:38,  1.28s/it]

Episode 720/6000
Fitnesses: ['-62.23', '-23.04', '-53.19', '-12.53']
100 fitness avgs: ['-38.24', '-37.15', '-38.12', '-36.85']


 12%|█▏        | 739/6000 [13:07<1:56:14,  1.33s/it]

Episode 740/6000
Fitnesses: ['-69.99', '-34.26', '-1.46', '-29.52']
100 fitness avgs: ['-37.75', '-36.78', '-35.90', '-36.66']


 13%|█▎        | 759/6000 [13:32<1:53:00,  1.29s/it]

Episode 760/6000
Fitnesses: ['-91.89', '-13.98', '-41.00', '-25.76']
100 fitness avgs: ['-37.37', '-36.06', '-36.77', '-35.63']


 13%|█▎        | 779/6000 [13:58<1:55:31,  1.33s/it]

Episode 780/6000
Fitnesses: ['-46.34', '-35.71', '-83.42', '-21.52']
100 fitness avgs: ['-36.32', '-36.05', '-37.27', '-36.38']


 13%|█▎        | 799/6000 [14:24<1:57:33,  1.36s/it]

Episode 800/6000
Fitnesses: ['-30.93', '-34.83', '-12.16', '-25.37']
100 fitness avgs: ['-36.24', '-36.34', '-35.77', '-36.10']


 14%|█▎        | 819/6000 [14:50<1:54:47,  1.33s/it]

Episode 820/6000
Fitnesses: ['-47.03', '-3.93', '-40.47', '-39.81']
100 fitness avgs: ['-36.05', '-35.00', '-36.21', '-36.19']


 14%|█▍        | 839/6000 [15:17<1:58:50,  1.38s/it]

Episode 840/6000
Fitnesses: ['-413.91', '-11.82', '-64.30', '-99.32']
100 fitness avgs: ['-44.02', '-34.45', '-35.70', '-37.70']


 14%|█▍        | 859/6000 [15:46<2:04:48,  1.46s/it]

Episode 860/6000
Fitnesses: ['-25.78', '-81.04', '-530.36', '-69.07']
100 fitness avgs: ['-34.24', '-35.53', '-55.33', '-38.43']


 15%|█▍        | 879/6000 [16:12<1:53:42,  1.33s/it]

Episode 880/6000
Fitnesses: ['-61.18', '-13.98', '-24.83', '-30.79']
100 fitness avgs: ['-34.86', '-33.78', '-35.29', '-34.17']


 15%|█▍        | 899/6000 [16:38<1:55:05,  1.35s/it]

Episode 900/6000
Fitnesses: ['-30.63', '-19.90', '-46.79', '-36.54']
100 fitness avgs: ['-33.71', '-33.48', '-34.45', '-35.31']


 15%|█▌        | 919/6000 [17:04<1:53:35,  1.34s/it]

Episode 920/6000
Fitnesses: ['-29.92', '-51.25', '-43.48', '-36.36']
100 fitness avgs: ['-33.40', '-34.09', '-33.93', '-33.54']


 16%|█▌        | 939/6000 [17:30<1:53:34,  1.35s/it]

Episode 940/6000
Fitnesses: ['-36.13', '-46.37', '-3.99', '-12.36']
100 fitness avgs: ['-33.46', '-33.81', '-32.91', '-32.95']


 16%|█▌        | 959/6000 [18:00<2:13:21,  1.59s/it]

Episode 960/6000
Fitnesses: ['-948.34', '-46.31', '-23.66', '-238.91']
100 fitness avgs: ['-51.98', '-33.19', '-32.76', '-37.74']


 16%|█▋        | 979/6000 [18:28<1:56:33,  1.39s/it]

Episode 980/6000
Fitnesses: ['-42.23', '-15.57', '-386.57', '-22.83']
100 fitness avgs: ['-32.95', '-32.41', '-44.86', '-32.98']


 17%|█▋        | 999/6000 [18:54<1:47:52,  1.29s/it]

Episode 1000/6000
Fitnesses: ['-63.02', '-40.72', '-41.01', '-17.48']
100 fitness avgs: ['-33.02', '-33.11', '-32.58', '-32.11']


 17%|█▋        | 1019/6000 [19:20<1:52:19,  1.35s/it]

Episode 1020/6000
Fitnesses: ['-25.96', '-23.54', '-38.18', '-27.43']
100 fitness avgs: ['-31.99', '-31.94', '-32.23', '-32.02']


 17%|█▋        | 1039/6000 [19:48<1:57:06,  1.42s/it]

Episode 1040/6000
Fitnesses: ['-702.18', '-45.95', '-49.01', '-37.77']
100 fitness avgs: ['-44.83', '-32.26', '-32.34', '-32.10']


 18%|█▊        | 1059/6000 [20:15<1:51:30,  1.35s/it]

Episode 1060/6000
Fitnesses: ['-50.38', '-30.58', '-12.49', '-33.70']
100 fitness avgs: ['-32.44', '-32.07', '-31.73', '-32.28']


 18%|█▊        | 1079/6000 [20:41<1:51:44,  1.36s/it]

Episode 1080/6000
Fitnesses: ['-72.47', '-5.22', '-29.44', '-78.66']
100 fitness avgs: ['-32.48', '-31.57', '-32.02', '-32.60']


 18%|█▊        | 1099/6000 [21:07<1:49:02,  1.33s/it]

Episode 1100/6000
Fitnesses: ['-27.58', '-28.73', '-42.75', '-43.74']
100 fitness avgs: ['-31.50', '-31.96', '-32.67', '-31.79']


 19%|█▊        | 1119/6000 [21:34<1:50:59,  1.36s/it]

Episode 1120/6000
Fitnesses: ['-34.11', '-17.57', '-44.65', '-35.70']
100 fitness avgs: ['-31.55', '-31.70', '-32.88', '-32.72']


 19%|█▉        | 1139/6000 [21:59<1:45:06,  1.30s/it]

Episode 1140/6000
Fitnesses: ['-43.46', '-48.09', '-4.40', '-11.16']
100 fitness avgs: ['-31.91', '-31.84', '-32.23', '-32.34']


 19%|█▉        | 1159/6000 [22:25<1:46:12,  1.32s/it]

Episode 1160/6000
Fitnesses: ['-51.31', '-21.74', '-28.25', '-51.05']
100 fitness avgs: ['-32.56', '-32.05', '-32.27', '-32.55']


 20%|█▉        | 1179/6000 [22:51<1:44:16,  1.30s/it]

Episode 1180/6000
Fitnesses: ['-19.16', '-18.25', '-31.12', '-51.21']
100 fitness avgs: ['-31.83', '-32.31', '-32.25', '-32.37']


 20%|█▉        | 1199/6000 [23:16<1:45:38,  1.32s/it]

Episode 1200/6000
Fitnesses: ['-88.12', '-25.21', '-19.79', '-52.04']
100 fitness avgs: ['-33.24', '-32.19', '-32.10', '-32.64']


 20%|██        | 1219/6000 [23:41<1:42:35,  1.29s/it]

Episode 1220/6000
Fitnesses: ['-59.44', '-72.56', '-5.89', '-85.34']
100 fitness avgs: ['-32.55', '-32.85', '-31.76', '-32.97']


 21%|██        | 1239/6000 [24:07<1:47:39,  1.36s/it]

Episode 1240/6000
Fitnesses: ['-69.62', '-14.26', '-79.19', '-20.93']
100 fitness avgs: ['-32.37', '-31.48', '-32.52', '-32.36']


 21%|██        | 1259/6000 [24:34<1:48:20,  1.37s/it]

Episode 1260/6000
Fitnesses: ['-21.11', '-13.40', '-64.23', '-36.28']
100 fitness avgs: ['-31.31', '-31.19', '-32.87', '-32.42']


 21%|██▏       | 1279/6000 [25:00<1:46:55,  1.36s/it]

Episode 1280/6000
Fitnesses: ['-59.20', '-24.69', '-15.56', '-29.15']
100 fitness avgs: ['-31.63', '-31.09', '-32.16', '-32.37']


 22%|██▏       | 1299/6000 [25:26<1:43:29,  1.32s/it]

Episode 1300/6000
Fitnesses: ['-33.97', '-4.41', '-20.27', '-51.60']
100 fitness avgs: ['-32.19', '-30.68', '-31.98', '-32.46']


 22%|██▏       | 1319/6000 [25:53<1:47:02,  1.37s/it]

Episode 1320/6000
Fitnesses: ['-17.02', '-25.15', '-75.10', '-48.57']
100 fitness avgs: ['-30.47', '-30.59', '-32.63', '-32.23']


 22%|██▏       | 1339/6000 [26:20<1:48:09,  1.39s/it]

Episode 1340/6000
Fitnesses: ['-27.59', '-16.84', '-73.80', '-21.26']
100 fitness avgs: ['-30.43', '-30.39', '-31.12', '-30.45']


 23%|██▎       | 1359/6000 [26:47<1:47:00,  1.38s/it]

Episode 1360/6000
Fitnesses: ['-61.90', '-31.38', '-18.15', '-7.87']
100 fitness avgs: ['-30.85', '-30.40', '-30.21', '-30.12']


 23%|██▎       | 1379/6000 [27:14<1:45:57,  1.38s/it]

Episode 1380/6000
Fitnesses: ['-4.42', '-3.75', '-7.17', '-5.92']
100 fitness avgs: ['-29.75', '-29.74', '-29.79', '-29.77']


 23%|██▎       | 1399/6000 [27:40<1:43:59,  1.36s/it]

Episode 1400/6000
Fitnesses: ['-26.89', '-50.70', '-6.20', '-5.28']
100 fitness avgs: ['-29.70', '-30.07', '-29.44', '-29.40']


 24%|██▎       | 1419/6000 [28:07<1:45:45,  1.39s/it]

Episode 1420/6000
Fitnesses: ['-126.90', '-58.66', '-32.69', '-28.26']
100 fitness avgs: ['-30.77', '-30.11', '-29.48', '-29.42']


 24%|██▍       | 1439/6000 [28:34<1:45:58,  1.39s/it]

Episode 1440/6000
Fitnesses: ['-1.71', '-71.05', '-34.45', '-33.80']
100 fitness avgs: ['-29.03', '-30.68', '-30.17', '-29.54']


 24%|██▍       | 1459/6000 [29:01<1:43:06,  1.36s/it]

Episode 1460/6000
Fitnesses: ['-28.98', '-24.16', '-48.18', '-100.66']
100 fitness avgs: ['-29.03', '-29.47', '-29.80', '-30.52']


 25%|██▍       | 1479/6000 [29:28<1:42:52,  1.37s/it]

Episode 1480/6000
Fitnesses: ['-49.90', '-57.59', '-13.96', '-49.72']
100 fitness avgs: ['-29.74', '-30.17', '-30.29', '-30.77']


 25%|██▍       | 1499/6000 [29:53<1:39:07,  1.32s/it]

Episode 1500/6000
Fitnesses: ['-24.21', '-12.05', '-27.63', '-19.59']
100 fitness avgs: ['-30.21', '-30.05', '-30.26', '-30.15']


 25%|██▌       | 1519/6000 [30:20<1:43:49,  1.39s/it]

Episode 1520/6000
Fitnesses: ['-38.96', '-18.67', '-3.25', '-7.16']
100 fitness avgs: ['-30.17', '-30.00', '-29.70', '-29.75']


 26%|██▌       | 1539/6000 [30:47<1:41:26,  1.36s/it]

Episode 1540/6000
Fitnesses: ['-17.97', '-29.00', '-92.05', '-44.40']
100 fitness avgs: ['-29.54', '-29.98', '-30.51', '-30.35']


 26%|██▌       | 1559/6000 [31:14<1:43:49,  1.40s/it]

Episode 1560/6000
Fitnesses: ['-7.33', '-71.44', '-22.62', '-82.12']
100 fitness avgs: ['-29.26', '-30.08', '-29.89', '-30.22']


 26%|██▋       | 1579/6000 [31:40<1:37:26,  1.32s/it]

Episode 1580/6000
Fitnesses: ['-15.05', '-22.82', '-10.80', '-65.93']
100 fitness avgs: ['-29.08', '-29.18', '-29.02', '-29.72']


 27%|██▋       | 1599/6000 [32:07<1:40:20,  1.37s/it]

Episode 1600/6000
Fitnesses: ['-19.39', '-9.47', '-12.63', '-19.06']
100 fitness avgs: ['-28.90', '-28.93', '-28.82', '-28.90']


 27%|██▋       | 1619/6000 [32:35<1:46:04,  1.45s/it]

Episode 1620/6000
Fitnesses: ['-71.13', '-27.94', '-36.26', '-64.63']
100 fitness avgs: ['-29.45', '-28.81', '-28.91', '-29.35']


 27%|██▋       | 1639/6000 [33:01<1:38:50,  1.36s/it]

Episode 1640/6000
Fitnesses: ['-14.22', '-9.94', '-16.03', '-36.85']
100 fitness avgs: ['-28.63', '-28.58', '-28.65', '-28.91']


 28%|██▊       | 1659/6000 [33:28<1:38:39,  1.36s/it]

Episode 1660/6000
Fitnesses: ['-61.21', '-72.54', '-14.84', '-50.33']
100 fitness avgs: ['-28.97', '-29.11', '-28.41', '-28.84']


 28%|██▊       | 1679/6000 [33:54<1:36:10,  1.34s/it]

Episode 1680/6000
Fitnesses: ['-32.55', '-38.75', '-83.56', '-19.63']
100 fitness avgs: ['-28.46', '-28.54', '-29.62', '-28.31']


 28%|██▊       | 1699/6000 [34:21<1:38:39,  1.38s/it]

Episode 1700/6000
Fitnesses: ['-28.07', '-8.28', '-77.26', '-33.36']
100 fitness avgs: ['-28.31', '-28.30', '-28.88', '-28.37']


 29%|██▊       | 1719/6000 [34:47<1:36:41,  1.36s/it]

Episode 1720/6000
Fitnesses: ['-34.84', '-9.22', '-49.77', '-9.30']
100 fitness avgs: ['-28.37', '-28.08', '-28.55', '-28.66']


 29%|██▉       | 1739/6000 [35:14<1:36:41,  1.36s/it]

Episode 1740/6000
Fitnesses: ['-74.04', '-16.54', '-37.68', '-13.01']
100 fitness avgs: ['-28.60', '-28.52', '-28.19', '-27.90']


 29%|██▉       | 1759/6000 [35:41<1:40:50,  1.43s/it]

Episode 1760/6000
Fitnesses: ['-13.03', '-37.40', '-127.16', '-31.01']
100 fitness avgs: ['-27.73', '-28.62', '-29.31', '-28.55']


 30%|██▉       | 1779/6000 [36:07<1:36:12,  1.37s/it]

Episode 1780/6000
Fitnesses: ['-32.57', '-40.79', '-52.74', '-52.93']
100 fitness avgs: ['-27.79', '-27.88', '-28.89', '-28.02']


 30%|██▉       | 1799/6000 [36:35<1:39:49,  1.43s/it]

Episode 1800/6000
Fitnesses: ['-79.22', '-64.86', '-38.87', '-11.95']
100 fitness avgs: ['-28.36', '-28.20', '-28.14', '-27.61']


 30%|███       | 1819/6000 [37:02<1:35:41,  1.37s/it]

Episode 1820/6000
Fitnesses: ['-25.68', '-39.31', '-35.42', '-23.78']
100 fitness avgs: ['-27.59', '-28.26', '-27.70', '-28.31']


 31%|███       | 1839/6000 [37:28<1:33:00,  1.34s/it]

Episode 1840/6000
Fitnesses: ['-56.50', '-17.88', '-38.31', '-66.42']
100 fitness avgs: ['-28.62', '-28.20', '-27.81', '-28.68']


 31%|███       | 1859/6000 [37:54<1:31:50,  1.33s/it]

Episode 1860/6000
Fitnesses: ['-30.77', '-26.00', '-12.47', '-47.21']
100 fitness avgs: ['-28.22', '-28.17', '-28.03', '-28.82']


 31%|███▏      | 1879/6000 [38:20<1:29:46,  1.31s/it]

Episode 1880/6000
Fitnesses: ['-27.08', '-13.34', '-16.45', '-33.14']
100 fitness avgs: ['-28.02', '-28.01', '-27.90', '-28.08']


 32%|███▏      | 1899/6000 [38:46<1:29:36,  1.31s/it]

Episode 1900/6000
Fitnesses: ['-31.45', '-31.54', '-11.01', '-20.93']
100 fitness avgs: ['-28.05', '-27.94', '-27.73', '-27.83']


 32%|███▏      | 1919/6000 [39:12<1:35:39,  1.41s/it]

Episode 1920/6000
Fitnesses: ['-82.53', '-8.96', '-86.23', '-94.58']
100 fitness avgs: ['-28.30', '-27.63', '-28.34', '-28.42']


 32%|███▏      | 1939/6000 [39:38<1:30:50,  1.34s/it]

Episode 1940/6000
Fitnesses: ['-26.02', '-3.01', '-36.84', '-46.22']
100 fitness avgs: ['-27.62', '-27.38', '-27.73', '-28.48']


 33%|███▎      | 1959/6000 [40:05<1:30:19,  1.34s/it]

Episode 1960/6000
Fitnesses: ['-83.78', '-25.87', '-12.92', '-38.45']
100 fitness avgs: ['-27.96', '-27.60', '-27.47', '-27.73']


 33%|███▎      | 1979/6000 [40:31<1:32:12,  1.38s/it]

Episode 1980/6000
Fitnesses: ['-81.98', '-7.44', '-57.87', '-7.15']
100 fitness avgs: ['-28.02', '-27.75', '-27.77', '-27.26']


 33%|███▎      | 1999/6000 [40:58<1:28:42,  1.33s/it]

Episode 2000/6000
Fitnesses: ['-82.36', '-33.34', '-4.62', '-28.15']
100 fitness avgs: ['-27.81', '-27.83', '-27.04', '-27.75']


 34%|███▎      | 2019/6000 [41:25<1:31:43,  1.38s/it]

Episode 2020/6000
Fitnesses: ['-47.19', '-11.39', '-8.96', '-63.31']
100 fitness avgs: ['-26.16', '-25.80', '-25.78', '-26.32']


 34%|███▍      | 2039/6000 [41:51<1:29:10,  1.35s/it]

Episode 2040/6000
Fitnesses: ['-6.29', '-51.42', '-3.82', '-5.27']
100 fitness avgs: ['-25.77', '-26.22', '-25.77', '-25.76']


 34%|███▍      | 2059/6000 [42:18<1:30:21,  1.38s/it]

Episode 2060/6000
Fitnesses: ['-9.45', '-9.08', '-52.60', '-27.28']
100 fitness avgs: ['-25.34', '-25.33', '-26.22', '-25.52']


 35%|███▍      | 2079/6000 [42:45<1:30:16,  1.38s/it]

Episode 2080/6000
Fitnesses: ['-48.91', '-16.53', '-67.90', '-11.35']
100 fitness avgs: ['-25.04', '-24.73', '-25.25', '-24.67']


 35%|███▍      | 2099/6000 [43:11<1:26:57,  1.34s/it]

Episode 2100/6000
Fitnesses: ['-4.35', '-58.64', '-45.75', '-29.10']
100 fitness avgs: ['-23.44', '-23.98', '-23.92', '-24.06']


 35%|███▌      | 2119/6000 [43:37<1:26:30,  1.34s/it]

Episode 2120/6000
Fitnesses: ['-22.28', '-23.90', '-29.04', '-10.74']
100 fitness avgs: ['-22.96', '-23.60', '-23.65', '-23.38']


 36%|███▌      | 2139/6000 [44:05<1:29:20,  1.39s/it]

Episode 2140/6000
Fitnesses: ['-21.85', '-16.20', '-13.05', '-11.93']
100 fitness avgs: ['-23.32', '-23.26', '-23.23', '-22.79']


 36%|███▌      | 2159/6000 [44:31<1:28:04,  1.38s/it]

Episode 2160/6000
Fitnesses: ['-46.57', '-20.91', '-15.14', '-22.71']
100 fitness avgs: ['-23.00', '-22.74', '-22.68', '-23.20']


 36%|███▋      | 2179/6000 [44:58<1:27:41,  1.38s/it]

Episode 2180/6000
Fitnesses: ['-26.21', '-34.76', '-63.18', '-7.65']
100 fitness avgs: ['-22.48', '-22.57', '-22.91', '-22.81']


 37%|███▋      | 2199/6000 [45:26<1:25:43,  1.35s/it]

Episode 2200/6000
Fitnesses: ['-48.12', '-12.16', '-17.28', '-32.44']
100 fitness avgs: ['-23.15', '-22.47', '-22.85', '-22.75']


 37%|███▋      | 2219/6000 [45:52<1:27:00,  1.38s/it]

Episode 2220/6000
Fitnesses: ['-12.05', '-41.88', '-25.09', '-33.61']
100 fitness avgs: ['-22.45', '-23.13', '-22.58', '-22.66']


 37%|███▋      | 2239/6000 [46:18<1:23:34,  1.33s/it]

Episode 2240/6000
Fitnesses: ['-27.30', '-29.01', '-15.52', '-34.69']
100 fitness avgs: ['-22.67', '-22.68', '-22.55', '-22.74']


 38%|███▊      | 2259/6000 [46:45<1:24:33,  1.36s/it]

Episode 2260/6000
Fitnesses: ['-9.29', '-11.89', '-80.74', '-29.88']
100 fitness avgs: ['-21.96', '-21.99', '-22.79', '-22.17']


 38%|███▊      | 2279/6000 [47:11<1:26:54,  1.40s/it]

Episode 2280/6000
Fitnesses: ['-9.90', '-69.00', '-64.48', '-11.22']
100 fitness avgs: ['-21.88', '-23.30', '-22.45', '-21.91']


 38%|███▊      | 2299/6000 [47:38<1:27:47,  1.42s/it]

Episode 2300/6000
Fitnesses: ['-66.26', '-0.36', '-20.48', '-62.76']
100 fitness avgs: ['-22.21', '-21.55', '-21.75', '-22.22']


 39%|███▊      | 2319/6000 [48:05<1:22:21,  1.34s/it]

Episode 2320/6000
Fitnesses: ['-23.70', '-49.14', '-13.31', '-34.74']
100 fitness avgs: ['-21.65', '-21.90', '-21.75', '-22.42']


 39%|███▉      | 2339/6000 [48:33<1:28:30,  1.45s/it]

Episode 2340/6000
Fitnesses: ['-59.35', '-28.55', '-54.31', '-396.98']
100 fitness avgs: ['-22.15', '-21.84', '-22.00', '-25.43']


 39%|███▉      | 2359/6000 [48:59<1:20:14,  1.32s/it]

Episode 2360/6000
Fitnesses: ['-30.00', '-14.43', '-17.35', '-3.04']
100 fitness avgs: ['-21.96', '-21.96', '-22.14', '-22.00']


 40%|███▉      | 2379/6000 [49:25<1:20:54,  1.34s/it]

Episode 2380/6000
Fitnesses: ['-21.34', '-9.35', '-40.73', '-36.84']
100 fitness avgs: ['-22.07', '-21.95', '-22.41', '-22.37']


 40%|███▉      | 2399/6000 [49:51<1:19:44,  1.33s/it]

Episode 2400/6000
Fitnesses: ['-40.31', '-3.50', '-35.61', '-44.46']
100 fitness avgs: ['-22.31', '-21.94', '-22.72', '-22.35']


 40%|████      | 2419/6000 [50:17<1:20:04,  1.34s/it]

Episode 2420/6000
Fitnesses: ['-12.31', '-14.39', '-17.63', '-10.83']
100 fitness avgs: ['-21.87', '-22.26', '-22.70', '-22.26']


 41%|████      | 2439/6000 [50:43<1:20:22,  1.35s/it]

Episode 2440/6000
Fitnesses: ['-10.05', '-19.35', '-65.80', '-74.74']
100 fitness avgs: ['-22.08', '-22.17', '-22.63', '-22.73']


 41%|████      | 2459/6000 [51:10<1:21:35,  1.38s/it]

Episode 2460/6000
Fitnesses: ['-15.50', '-29.25', '-94.06', '-65.33']
100 fitness avgs: ['-22.16', '-22.85', '-23.04', '-22.66']


 41%|████▏     | 2479/6000 [51:36<1:20:25,  1.37s/it]

Episode 2480/6000
Fitnesses: ['-24.29', '-53.36', '-6.12', '-79.08']
100 fitness avgs: ['-22.29', '-23.08', '-22.11', '-23.53']


 42%|████▏     | 2499/6000 [52:02<1:16:35,  1.31s/it]

Episode 2500/6000
Fitnesses: ['-19.66', '-16.71', '-26.75', '-16.35']
100 fitness avgs: ['-22.16', '-23.10', '-23.20', '-22.12']


 42%|████▏     | 2519/6000 [52:28<1:17:22,  1.33s/it]

Episode 2520/6000
Fitnesses: ['-49.94', '-69.87', '-16.51', '-24.11']
100 fitness avgs: ['-22.24', '-23.41', '-22.88', '-22.95']


 42%|████▏     | 2539/6000 [52:53<1:17:02,  1.34s/it]

Episode 2540/6000
Fitnesses: ['-39.65', '-15.78', '-18.35', '-52.78']
100 fitness avgs: ['-20.47', '-20.31', '-20.26', '-20.61']


 43%|████▎     | 2559/6000 [53:19<1:15:31,  1.32s/it]

Episode 2560/6000
Fitnesses: ['-10.99', '-50.22', '-42.99', '-43.49']
100 fitness avgs: ['-20.29', '-20.85', '-20.91', '-20.57']


 43%|████▎     | 2579/6000 [53:45<1:16:37,  1.34s/it]

Episode 2580/6000
Fitnesses: ['-77.32', '-7.99', '-35.65', '-8.25']
100 fitness avgs: ['-20.88', '-20.18', '-20.46', '-20.80']


 43%|████▎     | 2599/6000 [54:10<1:12:50,  1.29s/it]

Episode 2600/6000
Fitnesses: ['-24.03', '-7.12', '-11.80', '-9.50']
100 fitness avgs: ['-20.28', '-20.39', '-20.16', '-20.83']


 44%|████▎     | 2619/6000 [54:36<1:13:22,  1.30s/it]

Episode 2620/6000
Fitnesses: ['-18.08', '-34.81', '-12.73', '-44.80']
100 fitness avgs: ['-20.48', '-20.65', '-20.43', '-20.75']


 44%|████▍     | 2639/6000 [55:02<1:18:46,  1.41s/it]

Episode 2640/6000
Fitnesses: ['-42.10', '-13.00', '-11.42', '-24.57']
100 fitness avgs: ['-20.55', '-20.26', '-20.25', '-20.60']


 44%|████▍     | 2659/6000 [55:29<1:16:45,  1.38s/it]

Episode 2660/6000
Fitnesses: ['-55.65', '-49.23', '-31.81', '-28.98']
100 fitness avgs: ['-20.65', '-20.59', '-20.41', '-20.38']


 45%|████▍     | 2679/6000 [55:56<1:15:44,  1.37s/it]

Episode 2680/6000
Fitnesses: ['-50.71', '-28.14', '-8.12', '-7.83']
100 fitness avgs: ['-20.72', '-20.49', '-20.29', '-20.49']


 45%|████▍     | 2699/6000 [56:23<1:15:12,  1.37s/it]

Episode 2700/6000
Fitnesses: ['-65.33', '-79.50', '-44.65', '-57.59']
100 fitness avgs: ['-21.09', '-21.03', '-20.89', '-21.02']


 45%|████▌     | 2719/6000 [56:50<1:16:51,  1.41s/it]

Episode 2720/6000
Fitnesses: ['-627.33', '-5.54', '-12.87', '-19.66']
100 fitness avgs: ['-27.03', '-21.02', '-20.89', '-20.96']


 46%|████▌     | 2739/6000 [57:17<1:15:35,  1.39s/it]

Episode 2740/6000
Fitnesses: ['-3.21', '-49.53', '-17.39', '-72.25']
100 fitness avgs: ['-20.76', '-21.22', '-20.90', '-21.32']


 46%|████▌     | 2759/6000 [57:43<1:10:29,  1.31s/it]

Episode 2760/6000
Fitnesses: ['-17.28', '-29.25', '-82.90', '-32.79']
100 fitness avgs: ['-20.52', '-21.11', '-21.32', '-21.14']


 46%|████▋     | 2779/6000 [58:10<1:14:26,  1.39s/it]

Episode 2780/6000
Fitnesses: ['-17.27', '-51.93', '-16.93', '-27.79']
100 fitness avgs: ['-20.48', '-21.41', '-20.48', '-21.17']


 47%|████▋     | 2799/6000 [58:37<1:13:33,  1.38s/it]

Episode 2800/6000
Fitnesses: ['-32.23', '-23.00', '-39.29', '-16.21']
100 fitness avgs: ['-20.68', '-21.28', '-20.75', '-20.52']


 47%|████▋     | 2819/6000 [59:03<1:14:14,  1.40s/it]

Episode 2820/6000
Fitnesses: ['-21.94', '-2.42', '-44.33', '-2.81']
100 fitness avgs: ['-20.70', '-20.51', '-21.68', '-20.51']


 47%|████▋     | 2839/6000 [59:30<1:09:28,  1.32s/it]

Episode 2840/6000
Fitnesses: ['-8.23', '-21.42', '-52.97', '-73.62']
100 fitness avgs: ['-20.47', '-20.60', '-20.92', '-21.13']


 48%|████▊     | 2859/6000 [59:55<1:08:03,  1.30s/it]

Episode 2860/6000
Fitnesses: ['-10.21', '-60.97', '-6.67', '-10.74']
100 fitness avgs: ['-20.31', '-20.95', '-20.41', '-20.32']


 48%|████▊     | 2879/6000 [1:00:21<1:07:15,  1.29s/it]

Episode 2880/6000
Fitnesses: ['-64.04', '-9.06', '-18.42', '-4.98']
100 fitness avgs: ['-20.91', '-20.26', '-20.36', '-20.22']


 48%|████▊     | 2899/6000 [1:00:47<1:09:32,  1.35s/it]

Episode 2900/6000
Fitnesses: ['-24.19', '-43.99', '-54.39', '-4.80']
100 fitness avgs: ['-20.27', '-20.46', '-20.70', '-20.07']


 49%|████▊     | 2919/6000 [1:01:13<1:07:00,  1.30s/it]

Episode 2920/6000
Fitnesses: ['-28.28', '-27.53', '-29.67', '-30.36']
100 fitness avgs: ['-20.06', '-20.05', '-20.26', '-20.27']


 49%|████▉     | 2939/6000 [1:01:38<1:06:19,  1.30s/it]

Episode 2940/6000
Fitnesses: ['-31.95', '-12.70', '-6.13', '-39.35']
100 fitness avgs: ['-20.24', '-20.05', '-19.99', '-20.33']


 49%|████▉     | 2959/6000 [1:02:04<1:07:56,  1.34s/it]

Episode 2960/6000
Fitnesses: ['-23.39', '-80.58', '-2.21', '-44.97']
100 fitness avgs: ['-19.98', '-20.56', '-20.03', '-20.27']


 50%|████▉     | 2979/6000 [1:02:30<1:06:53,  1.33s/it]

Episode 2980/6000
Fitnesses: ['-9.53', '-20.16', '-24.22', '-40.82']
100 fitness avgs: ['-19.97', '-20.31', '-20.64', '-20.81']


 50%|████▉     | 2999/6000 [1:02:55<1:05:39,  1.31s/it]

Episode 3000/6000
Fitnesses: ['-25.19', '-25.92', '-61.66', '-12.40']
100 fitness avgs: ['-20.05', '-20.73', '-21.08', '-20.26']


 50%|█████     | 3019/6000 [1:03:22<1:08:24,  1.38s/it]

Episode 3020/6000
Fitnesses: ['-36.02', '-85.64', '-52.23', '-4.81']
100 fitness avgs: ['-20.36', '-20.86', '-20.99', '-20.52']


 51%|█████     | 3039/6000 [1:03:48<1:07:11,  1.36s/it]

Episode 3040/6000
Fitnesses: ['-64.46', '-14.86', '-16.36', '-31.03']
100 fitness avgs: ['-20.78', '-20.76', '-20.30', '-20.45']


 51%|█████     | 3059/6000 [1:04:17<1:11:42,  1.46s/it]

Episode 3060/6000
Fitnesses: ['-26.05', '-23.54', '-57.68', '-47.84']
100 fitness avgs: ['-20.90', '-20.41', '-20.90', '-20.80']


 51%|█████▏    | 3079/6000 [1:04:44<1:08:16,  1.40s/it]

Episode 3080/6000
Fitnesses: ['-35.44', '-22.05', '-15.57', '-61.03']
100 fitness avgs: ['-20.04', '-20.39', '-19.84', '-20.78']


 52%|█████▏    | 3099/6000 [1:05:10<1:03:19,  1.31s/it]

Episode 3100/6000
Fitnesses: ['-16.11', '-9.99', '-15.75', '-65.98']
100 fitness avgs: ['-19.58', '-19.71', '-19.77', '-20.27']


 52%|█████▏    | 3119/6000 [1:05:36<1:04:47,  1.35s/it]

Episode 3120/6000
Fitnesses: ['-29.37', '-48.25', '-32.82', '-47.77']
100 fitness avgs: ['-19.65', '-19.84', '-19.55', '-19.89']


 52%|█████▏    | 3139/6000 [1:06:02<1:04:30,  1.35s/it]

Episode 3140/6000
Fitnesses: ['-74.04', '-48.66', '-14.53', '-46.07']
100 fitness avgs: ['-20.35', '-20.09', '-19.75', '-20.31']


 53%|█████▎    | 3159/6000 [1:06:30<1:09:22,  1.47s/it]

Episode 3160/6000
Fitnesses: ['-266.32', '-26.03', '-10.34', '-31.14']
100 fitness avgs: ['-21.90', '-19.50', '-19.90', '-20.11']


 53%|█████▎    | 3179/6000 [1:06:57<1:03:06,  1.34s/it]

Episode 3180/6000
Fitnesses: ['-17.31', '-2.48', '-48.53', '-47.71']
100 fitness avgs: ['-19.89', '-19.74', '-20.20', '-20.20']


 53%|█████▎    | 3199/6000 [1:07:22<1:00:43,  1.30s/it]

Episode 3200/6000
Fitnesses: ['-79.19', '-38.34', '-25.53', '-71.81']
100 fitness avgs: ['-20.28', '-19.88', '-19.75', '-20.36']


 54%|█████▎    | 3219/6000 [1:07:49<1:02:24,  1.35s/it]

Episode 3220/6000
Fitnesses: ['-1.94', '-60.97', '-55.26', '-19.32']
100 fitness avgs: ['-19.71', '-20.43', '-20.37', '-19.88']


 54%|█████▍    | 3239/6000 [1:08:17<1:06:20,  1.44s/it]

Episode 3240/6000
Fitnesses: ['-28.10', '-235.64', '-27.52', '-4.08']
100 fitness avgs: ['-19.85', '-21.92', '-20.01', '-19.78']


 54%|█████▍    | 3259/6000 [1:08:46<1:07:08,  1.47s/it]

Episode 3260/6000
Fitnesses: ['-17.63', '-6.59', '-368.50', '-19.17']
100 fitness avgs: ['-19.82', '-19.95', '-25.47', '-19.84']


 55%|█████▍    | 3279/6000 [1:09:13<1:00:09,  1.33s/it]

Episode 3280/6000
Fitnesses: ['-17.05', '-18.02', '-59.46', '-10.93']
100 fitness avgs: ['-19.87', '-19.88', '-20.29', '-19.70']


 55%|█████▍    | 3299/6000 [1:09:39<1:02:36,  1.39s/it]

Episode 3300/6000
Fitnesses: ['-59.27', '-86.31', '-49.12', '-48.03']
100 fitness avgs: ['-20.25', '-20.70', '-20.74', '-20.14']


 55%|█████▌    | 3319/6000 [1:10:06<59:10,  1.32s/it]  

Episode 3320/6000
Fitnesses: ['-17.69', '-37.34', '-27.87', '-16.75']
100 fitness avgs: ['-20.06', '-20.86', '-20.16', '-20.66']


 56%|█████▌    | 3339/6000 [1:10:31<57:43,  1.30s/it]  

Episode 3340/6000
Fitnesses: ['-22.68', '-71.53', '-41.55', '-30.37']
100 fitness avgs: ['-20.67', '-21.16', '-20.26', '-20.75']


 56%|█████▌    | 3359/6000 [1:10:56<55:46,  1.27s/it]  

Episode 3360/6000
Fitnesses: ['-31.78', '-10.40', '-41.86', '-31.32']
100 fitness avgs: ['-20.91', '-21.18', '-21.01', '-20.91']


 56%|█████▋    | 3379/6000 [1:11:21<56:06,  1.28s/it]

Episode 3380/6000
Fitnesses: ['-17.32', '-32.17', '-49.58', '-47.71']
100 fitness avgs: ['-21.30', '-21.45', '-21.35', '-21.32']


 57%|█████▋    | 3399/6000 [1:11:46<58:09,  1.34s/it]

Episode 3400/6000
Fitnesses: ['-24.99', '-118.10', '-6.70', '-94.16']
100 fitness avgs: ['-21.49', '-22.42', '-21.30', '-22.20']


 57%|█████▋    | 3419/6000 [1:12:12<58:43,  1.37s/it]  

Episode 3420/6000
Fitnesses: ['-44.57', '-37.35', '-41.69', '-24.87']
100 fitness avgs: ['-21.42', '-22.25', '-21.39', '-21.23']


 57%|█████▋    | 3439/6000 [1:12:39<58:15,  1.36s/it]  

Episode 3440/6000
Fitnesses: ['-6.80', '-10.09', '-100.27', '-68.07']
100 fitness avgs: ['-20.96', '-21.16', '-22.06', '-21.57']


 58%|█████▊    | 3459/6000 [1:13:04<55:07,  1.30s/it]  

Episode 3460/6000
Fitnesses: ['-3.27', '-56.10', '-21.58', '-25.66']
100 fitness avgs: ['-19.98', '-21.12', '-20.37', '-20.82']


 58%|█████▊    | 3479/6000 [1:13:30<54:29,  1.30s/it]

Episode 3480/6000
Fitnesses: ['-14.03', '-4.65', '-26.52', '-13.35']
100 fitness avgs: ['-19.98', '-20.73', '-20.11', '-19.98']


 58%|█████▊    | 3499/6000 [1:13:57<58:30,  1.40s/it]  

Episode 3500/6000
Fitnesses: ['-387.13', '-6.83', '-8.81', '-23.38']
100 fitness avgs: ['-24.48', '-20.06', '-20.69', '-20.84']


 59%|█████▊    | 3519/6000 [1:14:24<58:06,  1.41s/it]  

Episode 3520/6000
Fitnesses: ['-3.10', '-57.33', '-406.82', '-12.20']
100 fitness avgs: ['-20.05', '-20.60', '-28.51', '-20.78']


 59%|█████▉    | 3539/6000 [1:14:52<57:44,  1.41s/it]  

Episode 3540/6000
Fitnesses: ['-19.25', '-193.99', '-12.41', '-71.00']
100 fitness avgs: ['-20.07', '-21.81', '-20.00', '-21.31']


 59%|█████▉    | 3559/6000 [1:15:17<52:58,  1.30s/it]

Episode 3560/6000
Fitnesses: ['-13.58', '-18.38', '-2.00', '-59.45']
100 fitness avgs: ['-20.06', '-20.11', '-21.26', '-20.52']


 60%|█████▉    | 3579/6000 [1:15:42<52:04,  1.29s/it]

Episode 3580/6000
Fitnesses: ['-33.45', '-31.55', '-16.92', '-64.91']
100 fitness avgs: ['-21.49', '-20.27', '-20.12', '-20.60']


 60%|█████▉    | 3599/6000 [1:16:10<58:15,  1.46s/it]  

Episode 3600/6000
Fitnesses: ['-331.92', '-12.07', '-20.80', '-44.00']
100 fitness avgs: ['-23.31', '-20.26', '-20.20', '-21.80']


 60%|██████    | 3619/6000 [1:16:36<52:54,  1.33s/it]

Episode 3620/6000
Fitnesses: ['-27.16', '-48.59', '-16.67', '-0.49']
100 fitness avgs: ['-20.25', '-22.01', '-20.09', '-21.53']


 61%|██████    | 3639/6000 [1:17:02<51:45,  1.32s/it]

Episode 3640/6000
Fitnesses: ['-26.01', '-23.12', '-42.63', '-28.74']
100 fitness avgs: ['-21.69', '-21.66', '-21.85', '-21.71']


 61%|██████    | 3659/6000 [1:17:27<51:19,  1.32s/it]

Episode 3660/6000
Fitnesses: ['-43.73', '-22.23', '-33.02', '-58.42']
100 fitness avgs: ['-21.95', '-21.73', '-21.87', '-22.15']


 61%|██████▏   | 3679/6000 [1:17:52<49:38,  1.28s/it]

Episode 3680/6000
Fitnesses: ['-65.92', '-50.73', '-11.29', '-48.57']
100 fitness avgs: ['-22.00', '-22.07', '-21.59', '-21.83']


 62%|██████▏   | 3699/6000 [1:18:17<48:47,  1.27s/it]

Episode 3700/6000
Fitnesses: ['-46.00', '-9.80', '-29.63', '-13.72']
100 fitness avgs: ['-21.97', '-21.61', '-21.81', '-21.88']


 62%|██████▏   | 3719/6000 [1:18:42<48:41,  1.28s/it]

Episode 3720/6000
Fitnesses: ['-14.91', '-21.57', '-28.54', '-23.82']
100 fitness avgs: ['-21.67', '-22.01', '-22.08', '-21.95']


 62%|██████▏   | 3739/6000 [1:19:08<49:41,  1.32s/it]

Episode 3740/6000
Fitnesses: ['-29.21', '-3.30', '-50.81', '-5.28']
100 fitness avgs: ['-21.83', '-21.57', '-22.39', '-21.93']


 63%|██████▎   | 3759/6000 [1:19:36<54:06,  1.45s/it]

Episode 3760/6000
Fitnesses: ['-33.64', '-106.02', '-20.19', '-11.37']
100 fitness avgs: ['-21.77', '-22.76', '-22.00', '-21.55']


 63%|██████▎   | 3779/6000 [1:20:00<47:09,  1.27s/it]

Episode 3780/6000
Fitnesses: ['-18.01', '-13.53', '-25.92', '-122.30']
100 fitness avgs: ['-21.41', '-21.81', '-21.93', '-22.45']


 63%|██████▎   | 3799/6000 [1:20:27<50:19,  1.37s/it]

Episode 3800/6000
Fitnesses: ['-12.13', '-37.08', '-294.96', '-20.07']
100 fitness avgs: ['-21.14', '-21.39', '-23.56', '-20.81']


 64%|██████▎   | 3819/6000 [1:20:52<45:54,  1.26s/it]

Episode 3820/6000
Fitnesses: ['-25.37', '-54.78', '-61.05', '-21.07']
100 fitness avgs: ['-21.16', '-21.12', '-21.76', '-20.79']


 64%|██████▍   | 3839/6000 [1:21:17<45:20,  1.26s/it]

Episode 3840/6000
Fitnesses: ['-18.10', '-13.04', '-71.38', '-28.27']
100 fitness avgs: ['-20.79', '-21.08', '-21.32', '-20.89']


 64%|██████▍   | 3859/6000 [1:21:42<46:36,  1.31s/it]

Episode 3860/6000
Fitnesses: ['-2.42', '-19.34', '-25.92', '-5.74']
100 fitness avgs: ['-20.98', '-21.14', '-20.92', '-21.01']


 65%|██████▍   | 3879/6000 [1:22:08<46:19,  1.31s/it]

Episode 3880/6000
Fitnesses: ['-43.28', '-5.88', '-5.87', '-28.55']
100 fitness avgs: ['-21.24', '-20.87', '-20.87', '-21.27']


 65%|██████▍   | 3899/6000 [1:22:33<46:15,  1.32s/it]

Episode 3900/6000
Fitnesses: ['-60.77', '-74.69', '-24.88', '-26.50']
100 fitness avgs: ['-21.27', '-21.80', '-20.91', '-20.93']


 65%|██████▌   | 3919/6000 [1:22:59<46:09,  1.33s/it]

Episode 3920/6000
Fitnesses: ['-30.18', '-65.36', '-67.79', '-32.50']
100 fitness avgs: ['-21.12', '-21.47', '-21.51', '-21.16']


 66%|██████▌   | 3939/6000 [1:23:24<45:14,  1.32s/it]

Episode 3940/6000
Fitnesses: ['-121.54', '-84.94', '-83.61', '-62.80']
100 fitness avgs: ['-22.08', '-21.71', '-22.05', '-21.53']


 66%|██████▌   | 3959/6000 [1:23:52<49:13,  1.45s/it]

Episode 3960/6000
Fitnesses: ['-51.96', '-17.30', '-272.64', '-36.69']
100 fitness avgs: ['-21.92', '-21.57', '-24.13', '-22.29']


 66%|██████▋   | 3979/6000 [1:24:18<44:09,  1.31s/it]

Episode 3980/6000
Fitnesses: ['-21.57', '-42.88', '-19.67', '-10.40']
100 fitness avgs: ['-21.72', '-21.93', '-21.70', '-21.61']


 67%|██████▋   | 3999/6000 [1:24:43<42:55,  1.29s/it]

Episode 4000/6000
Fitnesses: ['-11.00', '-39.30', '-12.96', '-61.45']
100 fitness avgs: ['-21.67', '-21.95', '-21.69', '-22.17']


 67%|██████▋   | 4019/6000 [1:25:08<42:50,  1.30s/it]

Episode 4020/6000
Fitnesses: ['-31.61', '-25.71', '-11.56', '-29.66']
100 fitness avgs: ['-21.90', '-22.12', '-21.71', '-21.88']


 67%|██████▋   | 4039/6000 [1:25:33<41:29,  1.27s/it]

Episode 4040/6000
Fitnesses: ['-30.13', '-3.02', '-78.31', '-19.25']
100 fitness avgs: ['-21.96', '-21.69', '-22.44', '-22.26']


 68%|██████▊   | 4059/6000 [1:25:58<42:00,  1.30s/it]

Episode 4060/6000
Fitnesses: ['-23.20', '-27.45', '-21.04', '-48.22']
100 fitness avgs: ['-21.83', '-22.15', '-22.38', '-22.35']


 68%|██████▊   | 4079/6000 [1:26:23<42:47,  1.34s/it]

Episode 4080/6000
Fitnesses: ['-35.28', '-4.72', '-31.80', '-40.15']
100 fitness avgs: ['-22.62', '-21.77', '-22.35', '-22.12']


 68%|██████▊   | 4099/6000 [1:26:48<40:18,  1.27s/it]

Episode 4100/6000
Fitnesses: ['-94.35', '-21.42', '-22.41', '-11.16']
100 fitness avgs: ['-22.67', '-22.79', '-22.53', '-22.42']


 69%|██████▊   | 4119/6000 [1:27:13<39:44,  1.27s/it]

Episode 4120/6000
Fitnesses: ['-7.71', '-18.13', '-43.76', '-33.74']
100 fitness avgs: ['-22.27', '-22.75', '-22.63', '-22.53']


 69%|██████▉   | 4139/6000 [1:27:39<42:36,  1.37s/it]

Episode 4140/6000
Fitnesses: ['-250.48', '-24.41', '-34.93', '-4.35']
100 fitness avgs: ['-24.66', '-22.87', '-22.50', '-22.20']


 69%|██████▉   | 4159/6000 [1:28:06<42:02,  1.37s/it]

Episode 4160/6000
Fitnesses: ['-2.79', '-250.13', '-41.76', '-41.19']
100 fitness avgs: ['-22.07', '-27.01', '-23.14', '-23.13']


 70%|██████▉   | 4179/6000 [1:28:31<39:15,  1.29s/it]

Episode 4180/6000
Fitnesses: ['-67.59', '-14.44', '-14.56', '-24.31']
100 fitness avgs: ['-22.49', '-21.96', '-23.02', '-23.11']


 70%|██████▉   | 4199/6000 [1:28:57<39:47,  1.33s/it]

Episode 4200/6000
Fitnesses: ['-83.99', '-37.44', '-18.84', '-74.64']
100 fitness avgs: ['-22.67', '-22.21', '-23.18', '-22.58']


 70%|███████   | 4219/6000 [1:29:22<38:48,  1.31s/it]

Episode 4220/6000
Fitnesses: ['-7.80', '-43.33', '-6.11', '-38.46']
100 fitness avgs: ['-23.14', '-22.52', '-23.12', '-23.44']


 71%|███████   | 4239/6000 [1:29:47<38:12,  1.30s/it]

Episode 4240/6000
Fitnesses: ['-20.25', '-8.49', '-53.69', '-7.06']
100 fitness avgs: ['-23.17', '-23.37', '-23.83', '-23.36']


 71%|███████   | 4259/6000 [1:30:13<38:10,  1.32s/it]

Episode 4260/6000
Fitnesses: ['-30.29', '-40.73', '-19.06', '-73.84']
100 fitness avgs: ['-23.57', '-23.67', '-23.46', '-24.01']


 71%|███████▏  | 4279/6000 [1:30:38<37:33,  1.31s/it]

Episode 4280/6000
Fitnesses: ['-11.24', '-20.60', '-4.80', '-19.59']
100 fitness avgs: ['-23.47', '-23.78', '-23.52', '-23.67']


 72%|███████▏  | 4299/6000 [1:31:04<36:57,  1.30s/it]

Episode 4300/6000
Fitnesses: ['-89.16', '-65.19', '-15.44', '-44.76']
100 fitness avgs: ['-24.21', '-23.97', '-23.42', '-23.76']


 72%|███████▏  | 4319/6000 [1:31:29<36:31,  1.30s/it]

Episode 4320/6000
Fitnesses: ['-26.52', '-61.88', '-33.93', '-33.48']
100 fitness avgs: ['-23.55', '-24.69', '-23.63', '-23.62']


 72%|███████▏  | 4339/6000 [1:31:54<35:27,  1.28s/it]

Episode 4340/6000
Fitnesses: ['-35.90', '-32.98', '-41.87', '-23.66']
100 fitness avgs: ['-23.32', '-23.36', '-23.45', '-23.20']


 73%|███████▎  | 4359/6000 [1:32:20<37:01,  1.35s/it]

Episode 4360/6000
Fitnesses: ['-44.35', '-9.74', '-10.16', '-74.38']
100 fitness avgs: ['-23.61', '-23.26', '-23.27', '-24.03']


 73%|███████▎  | 4379/6000 [1:32:47<37:08,  1.37s/it]

Episode 4380/6000
Fitnesses: ['-120.80', '-29.42', '-13.50', '-9.72']
100 fitness avgs: ['-24.38', '-24.23', '-23.65', '-24.04']


 73%|███████▎  | 4399/6000 [1:33:13<34:40,  1.30s/it]

Episode 4400/6000
Fitnesses: ['-20.40', '-21.69', '-23.11', '-29.96']
100 fitness avgs: ['-23.79', '-23.81', '-24.02', '-23.51']


 74%|███████▎  | 4419/6000 [1:33:38<33:41,  1.28s/it]

Episode 4420/6000
Fitnesses: ['-49.06', '-40.98', '-34.71', '-39.75']
100 fitness avgs: ['-24.18', '-24.10', '-24.03', '-24.08']


 74%|███████▍  | 4439/6000 [1:34:04<34:40,  1.33s/it]

Episode 4440/6000
Fitnesses: ['-29.11', '-18.73', '-38.64', '-22.37']
100 fitness avgs: ['-24.22', '-24.12', '-24.32', '-24.16']


 74%|███████▍  | 4459/6000 [1:34:30<34:31,  1.34s/it]

Episode 4460/6000
Fitnesses: ['-43.09', '-6.53', '-7.71', '-10.72']
100 fitness avgs: ['-23.90', '-23.73', '-23.58', '-23.57']


 75%|███████▍  | 4479/6000 [1:34:56<34:28,  1.36s/it]

Episode 4480/6000
Fitnesses: ['-52.74', '-26.93', '-141.73', '-44.45']
100 fitness avgs: ['-23.73', '-23.32', '-24.61', '-23.64']


 75%|███████▍  | 4499/6000 [1:35:22<33:56,  1.36s/it]

Episode 4500/6000
Fitnesses: ['-14.41', '-41.32', '-22.18', '-10.06']
100 fitness avgs: ['-23.29', '-23.56', '-23.37', '-23.25']


 75%|███████▌  | 4519/6000 [1:35:49<32:41,  1.32s/it]

Episode 4520/6000
Fitnesses: ['-23.38', '-133.62', '-57.96', '-120.83']
100 fitness avgs: ['-23.24', '-24.34', '-23.59', '-24.26']


 76%|███████▌  | 4539/6000 [1:36:14<31:49,  1.31s/it]

Episode 4540/6000
Fitnesses: ['-42.52', '-7.48', '-27.42', '-18.08']
100 fitness avgs: ['-23.51', '-23.16', '-24.46', '-23.61']


 76%|███████▌  | 4559/6000 [1:36:40<31:38,  1.32s/it]

Episode 4560/6000
Fitnesses: ['-21.75', '-7.14', '-17.66', '-11.94']
100 fitness avgs: ['-23.27', '-23.12', '-23.68', '-23.17']


 76%|███████▋  | 4579/6000 [1:37:06<31:12,  1.32s/it]

Episode 4580/6000
Fitnesses: ['-33.34', '-28.25', '-43.60', '-15.87']
100 fitness avgs: ['-23.10', '-23.05', '-23.20', '-22.92']


 77%|███████▋  | 4599/6000 [1:37:32<31:43,  1.36s/it]

Episode 4600/6000
Fitnesses: ['-10.51', '-29.52', '-5.96', '-48.30']
100 fitness avgs: ['-22.96', '-23.15', '-22.91', '-23.51']


 77%|███████▋  | 4619/6000 [1:37:58<31:14,  1.36s/it]

Episode 4620/6000
Fitnesses: ['-5.31', '-14.05', '-12.53', '-6.56']
100 fitness avgs: ['-22.84', '-22.97', '-22.96', '-23.09']


 77%|███████▋  | 4639/6000 [1:38:25<30:53,  1.36s/it]

Episode 4640/6000
Fitnesses: ['-16.80', '-36.05', '-9.40', '-18.39']
100 fitness avgs: ['-22.89', '-23.33', '-22.82', '-22.91']


 78%|███████▊  | 4659/6000 [1:38:51<30:26,  1.36s/it]

Episode 4660/6000
Fitnesses: ['-18.99', '-9.78', '-6.18', '-54.93']
100 fitness avgs: ['-22.51', '-22.51', '-22.90', '-22.95']


 78%|███████▊  | 4679/6000 [1:39:19<31:42,  1.44s/it]

Episode 4680/6000
Fitnesses: ['-16.38', '-16.93', '-224.38', '-25.77']
100 fitness avgs: ['-22.99', '-22.61', '-25.07', '-23.08']


 78%|███████▊  | 4699/6000 [1:39:47<29:58,  1.38s/it]

Episode 4700/6000
Fitnesses: ['-19.53', '-19.05', '-21.66', '-13.67']
100 fitness avgs: ['-22.53', '-22.52', '-22.17', '-22.56']


 79%|███████▊  | 4719/6000 [1:40:13<27:56,  1.31s/it]

Episode 4720/6000
Fitnesses: ['-20.83', '-14.79', '-26.33', '-65.50']
100 fitness avgs: ['-22.72', '-22.66', '-22.73', '-23.12']


 79%|███████▉  | 4739/6000 [1:40:39<27:52,  1.33s/it]

Episode 4740/6000
Fitnesses: ['-3.89', '-21.09', '-11.71', '-37.28']
100 fitness avgs: ['-22.66', '-22.91', '-22.74', '-23.07']


 79%|███████▉  | 4759/6000 [1:41:05<28:49,  1.39s/it]

Episode 4760/6000
Fitnesses: ['-11.94', '-20.20', '-62.61', '-29.80']
100 fitness avgs: ['-22.61', '-22.77', '-23.36', '-23.04']


 80%|███████▉  | 4779/6000 [1:41:34<29:38,  1.46s/it]

Episode 4780/6000
Fitnesses: ['-52.21', '-11.30', '-74.80', '-384.52']
100 fitness avgs: ['-22.96', '-22.71', '-23.19', '-27.04']


 80%|███████▉  | 4799/6000 [1:42:01<27:32,  1.38s/it]

Episode 4800/6000
Fitnesses: ['-34.69', '-68.69', '-25.86', '-8.26']
100 fitness avgs: ['-22.90', '-23.24', '-23.06', '-23.11']


 80%|████████  | 4819/6000 [1:42:28<27:16,  1.39s/it]

Episode 4820/6000
Fitnesses: ['-36.71', '-7.26', '-9.86', '-7.61']
100 fitness avgs: ['-23.45', '-23.10', '-23.13', '-22.95']


 81%|████████  | 4839/6000 [1:42:55<26:45,  1.38s/it]

Episode 4840/6000
Fitnesses: ['-47.64', '-31.04', '-18.27', '-24.24']
100 fitness avgs: ['-23.50', '-23.33', '-23.05', '-23.26']


 81%|████████  | 4859/6000 [1:43:21<25:13,  1.33s/it]

Episode 4860/6000
Fitnesses: ['-60.57', '-49.04', '-45.38', '-18.94']
100 fitness avgs: ['-23.55', '-23.44', '-23.68', '-23.14']


 81%|████████▏ | 4879/6000 [1:43:48<25:28,  1.36s/it]

Episode 4880/6000
Fitnesses: ['-15.77', '-59.39', '-48.57', '-9.53']
100 fitness avgs: ['-23.24', '-24.23', '-23.87', '-23.73']


 82%|████████▏ | 4899/6000 [1:44:16<26:38,  1.45s/it]

Episode 4900/6000
Fitnesses: ['-24.59', '-99.27', '-7.03', '-39.21']
100 fitness avgs: ['-23.93', '-24.67', '-23.75', '-24.07']


 82%|████████▏ | 4919/6000 [1:44:43<23:42,  1.32s/it]

Episode 4920/6000
Fitnesses: ['-1.80', '-28.31', '-44.95', '-9.52']
100 fitness avgs: ['-23.49', '-23.76', '-24.10', '-23.57']


 82%|████████▏ | 4939/6000 [1:45:09<23:39,  1.34s/it]

Episode 4940/6000
Fitnesses: ['-8.90', '-8.41', '-24.84', '-21.50']
100 fitness avgs: ['-23.52', '-23.52', '-23.95', '-23.65']


 83%|████████▎ | 4959/6000 [1:45:34<22:23,  1.29s/it]

Episode 4960/6000
Fitnesses: ['-18.67', '-23.22', '-28.33', '-37.56']
100 fitness avgs: ['-22.90', '-22.94', '-23.00', '-23.09']


 83%|████████▎ | 4979/6000 [1:46:00<22:55,  1.35s/it]

Episode 4980/6000
Fitnesses: ['-9.10', '-44.23', '-72.72', '-66.69']
100 fitness avgs: ['-22.75', '-23.20', '-23.48', '-23.32']


 83%|████████▎ | 4999/6000 [1:46:27<22:08,  1.33s/it]

Episode 5000/6000
Fitnesses: ['-93.39', '-117.85', '-14.97', '-8.47']
100 fitness avgs: ['-23.42', '-23.67', '-23.21', '-22.57']


 84%|████████▎ | 5019/6000 [1:46:53<21:40,  1.33s/it]

Episode 5020/6000
Fitnesses: ['-60.76', '-31.86', '-20.49', '-62.84']
100 fitness avgs: ['-23.13', '-23.48', '-22.73', '-24.00']


 84%|████████▍ | 5039/6000 [1:47:18<20:53,  1.30s/it]

Episode 5040/6000
Fitnesses: ['-15.06', '-59.64', '-12.18', '-32.59']
100 fitness avgs: ['-22.72', '-23.16', '-23.44', '-23.65']


 84%|████████▍ | 5059/6000 [1:47:44<20:32,  1.31s/it]

Episode 5060/6000
Fitnesses: ['-1.22', '-32.03', '-5.90', '-37.50']
100 fitness avgs: ['-23.22', '-23.53', '-23.27', '-23.79']


 85%|████████▍ | 5079/6000 [1:48:10<20:12,  1.32s/it]

Episode 5080/6000
Fitnesses: ['-5.14', '-11.70', '-21.67', '-62.37']
100 fitness avgs: ['-22.92', '-22.98', '-23.13', '-23.49']


 85%|████████▍ | 5099/6000 [1:48:35<20:04,  1.34s/it]

Episode 5100/6000
Fitnesses: ['-45.61', '-45.08', '-21.21', '-50.19']
100 fitness avgs: ['-23.21', '-23.21', '-23.04', '-23.33']


 85%|████████▌ | 5119/6000 [1:49:01<19:57,  1.36s/it]

Episode 5120/6000
Fitnesses: ['-17.50', '-46.85', '-54.54', '-1.04']
100 fitness avgs: ['-22.73', '-23.03', '-23.10', '-22.57']


 86%|████████▌ | 5139/6000 [1:49:28<18:56,  1.32s/it]

Episode 5140/6000
Fitnesses: ['-10.03', '-27.15', '-53.35', '-30.99']
100 fitness avgs: ['-22.21', '-22.54', '-23.18', '-22.42']


 86%|████████▌ | 5159/6000 [1:49:54<18:44,  1.34s/it]

Episode 5160/6000
Fitnesses: ['-40.62', '-18.17', '-11.44', '-23.48']
100 fitness avgs: ['-22.51', '-22.50', '-22.56', '-22.55']


 86%|████████▋ | 5179/6000 [1:50:20<18:33,  1.36s/it]

Episode 5180/6000
Fitnesses: ['-45.88', '-42.06', '-58.81', '-46.46']
100 fitness avgs: ['-22.99', '-22.89', '-23.06', '-22.99']


 87%|████████▋ | 5199/6000 [1:50:46<17:58,  1.35s/it]

Episode 5200/6000
Fitnesses: ['-19.90', '-39.56', '-20.24', '-21.72']
100 fitness avgs: ['-22.84', '-23.13', '-22.94', '-22.95']


 87%|████████▋ | 5219/6000 [1:51:12<17:30,  1.35s/it]

Episode 5220/6000
Fitnesses: ['-74.66', '-38.86', '-20.45', '-26.00']
100 fitness avgs: ['-23.39', '-23.13', '-22.85', '-23.00']


 87%|████████▋ | 5239/6000 [1:51:42<19:17,  1.52s/it]

Episode 5240/6000
Fitnesses: ['-32.96', '-110.23', '-343.09', '-14.22']
100 fitness avgs: ['-22.90', '-23.83', '-26.00', '-22.87']


 88%|████████▊ | 5259/6000 [1:52:07<15:53,  1.29s/it]

Episode 5260/6000
Fitnesses: ['-37.32', '-21.46', '-26.60', '-54.70']
100 fitness avgs: ['-23.18', '-23.02', '-23.07', '-23.35']


 88%|████████▊ | 5279/6000 [1:52:32<15:23,  1.28s/it]

Episode 5280/6000
Fitnesses: ['-75.37', '-60.71', '-34.88', '-24.31']
100 fitness avgs: ['-23.18', '-23.03', '-22.93', '-22.72']


 88%|████████▊ | 5299/6000 [1:52:57<14:54,  1.28s/it]

Episode 5300/6000
Fitnesses: ['-44.40', '-20.25', '-31.58', '-26.43']
100 fitness avgs: ['-22.67', '-22.89', '-23.00', '-22.95']


 89%|████████▊ | 5319/6000 [1:53:23<15:27,  1.36s/it]

Episode 5320/6000
Fitnesses: ['-30.58', '-33.29', '-4.01', '-123.60']
100 fitness avgs: ['-23.03', '-23.12', '-22.88', '-23.96']


 89%|████████▉ | 5339/6000 [1:53:49<14:28,  1.31s/it]

Episode 5340/6000
Fitnesses: ['-23.74', '-4.29', '-52.94', '-12.14']
100 fitness avgs: ['-22.40', '-22.44', '-22.69', '-22.43']


 89%|████████▉ | 5359/6000 [1:54:16<14:43,  1.38s/it]

Episode 5360/6000
Fitnesses: ['-12.05', '-21.91', '-40.95', '-7.52']
100 fitness avgs: ['-22.46', '-22.56', '-22.75', '-22.37']


 90%|████████▉ | 5379/6000 [1:54:42<14:00,  1.35s/it]

Episode 5380/6000
Fitnesses: ['-113.40', '-36.61', '-11.43', '-16.40']
100 fitness avgs: ['-23.33', '-22.56', '-22.31', '-22.45']


 90%|████████▉ | 5399/6000 [1:55:08<13:31,  1.35s/it]

Episode 5400/6000
Fitnesses: ['-24.99', '-19.96', '-40.24', '-46.80']
100 fitness avgs: ['-22.49', '-22.44', '-22.65', '-22.96']


 90%|█████████ | 5419/6000 [1:55:35<12:47,  1.32s/it]

Episode 5420/6000
Fitnesses: ['-24.53', '-12.97', '-47.90', '-7.37']
100 fitness avgs: ['-22.44', '-22.37', '-22.67', '-22.27']


 91%|█████████ | 5439/6000 [1:56:01<12:37,  1.35s/it]

Episode 5440/6000
Fitnesses: ['-5.56', '-43.41', '-18.17', '-35.78']
100 fitness avgs: ['-21.64', '-22.02', '-21.87', '-22.12']


 91%|█████████ | 5459/6000 [1:56:26<11:26,  1.27s/it]

Episode 5460/6000
Fitnesses: ['-12.83', '-2.19', '-17.68', '-24.67']
100 fitness avgs: ['-21.51', '-21.41', '-22.04', '-21.63']


 91%|█████████▏| 5479/6000 [1:56:51<11:19,  1.30s/it]

Episode 5480/6000
Fitnesses: ['-29.46', '-15.53', '-18.57', '-25.06']
100 fitness avgs: ['-21.66', '-21.74', '-21.55', '-21.72']


 92%|█████████▏| 5499/6000 [1:57:16<10:44,  1.29s/it]

Episode 5500/6000
Fitnesses: ['-91.40', '-69.09', '-45.84', '-70.08']
100 fitness avgs: ['-22.57', '-22.32', '-22.11', '-22.27']


 92%|█████████▏| 5519/6000 [1:57:42<10:32,  1.31s/it]

Episode 5520/6000
Fitnesses: ['-14.94', '-47.44', '-27.22', '-25.39']
100 fitness avgs: ['-22.14', '-22.62', '-22.26', '-22.24']


 92%|█████████▏| 5539/6000 [1:58:08<10:18,  1.34s/it]

Episode 5540/6000
Fitnesses: ['-10.57', '-12.73', '-52.16', '-1.33']
100 fitness avgs: ['-21.53', '-21.66', '-21.95', '-21.92']


 93%|█████████▎| 5559/6000 [1:58:34<09:45,  1.33s/it]

Episode 5560/6000
Fitnesses: ['-19.06', '-21.48', '-18.82', '-63.14']
100 fitness avgs: ['-22.09', '-22.12', '-21.70', '-22.27']


 93%|█████████▎| 5579/6000 [1:59:00<09:14,  1.32s/it]

Episode 5580/6000
Fitnesses: ['-50.06', '-101.31', '-24.52', '-41.46']
100 fitness avgs: ['-21.87', '-22.38', '-21.61', '-21.78']


 93%|█████████▎| 5599/6000 [1:59:25<09:03,  1.36s/it]

Episode 5600/6000
Fitnesses: ['-4.14', '-11.96', '-23.81', '-2.47']
100 fitness avgs: ['-21.21', '-21.29', '-21.41', '-21.37']


 94%|█████████▎| 5619/6000 [1:59:51<08:21,  1.32s/it]

Episode 5620/6000
Fitnesses: ['-36.68', '-5.80', '-5.74', '-44.61']
100 fitness avgs: ['-21.73', '-21.42', '-21.27', '-21.66']


 94%|█████████▍| 5639/6000 [2:00:17<07:57,  1.32s/it]

Episode 5640/6000
Fitnesses: ['-9.78', '-25.64', '-17.52', '-9.46']
100 fitness avgs: ['-21.10', '-21.26', '-21.34', '-21.25']


 94%|█████████▍| 5659/6000 [2:00:42<07:19,  1.29s/it]

Episode 5660/6000
Fitnesses: ['-21.59', '-54.54', '-87.71', '-14.07']
100 fitness avgs: ['-21.14', '-21.48', '-21.80', '-20.92']


 95%|█████████▍| 5679/6000 [2:01:09<07:40,  1.43s/it]

Episode 5680/6000
Fitnesses: ['-56.78', '-393.53', '-11.50', '-39.91']
100 fitness avgs: ['-21.37', '-25.30', '-21.14', '-21.43']


 95%|█████████▍| 5699/6000 [2:01:35<06:43,  1.34s/it]

Episode 5700/6000
Fitnesses: ['-69.52', '-57.95', '-27.25', '-27.62']
100 fitness avgs: ['-21.74', '-21.85', '-21.60', '-21.32']


 95%|█████████▌| 5719/6000 [2:02:01<06:09,  1.31s/it]

Episode 5720/6000
Fitnesses: ['-53.69', '-63.68', '-29.94', '-42.31']
100 fitness avgs: ['-21.99', '-22.09', '-21.89', '-21.59']


 96%|█████████▌| 5739/6000 [2:02:27<05:47,  1.33s/it]

Episode 5740/6000
Fitnesses: ['-12.81', '-50.03', '-67.24', '-93.53']
100 fitness avgs: ['-21.99', '-22.06', '-22.23', '-22.50']


 96%|█████████▌| 5759/6000 [2:02:52<05:17,  1.32s/it]

Episode 5760/6000
Fitnesses: ['-12.60', '-21.36', '-8.12', '-21.81']
100 fitness avgs: ['-22.00', '-22.16', '-21.95', '-22.60']


 96%|█████████▋| 5779/6000 [2:03:18<04:50,  1.32s/it]

Episode 5780/6000
Fitnesses: ['-27.97', '-51.60', '-55.60', '-24.85']
100 fitness avgs: ['-22.05', '-22.29', '-22.37', '-22.02']


 97%|█████████▋| 5799/6000 [2:03:44<04:29,  1.34s/it]

Episode 5800/6000
Fitnesses: ['-36.80', '-11.07', '-3.87', '-79.86']
100 fitness avgs: ['-22.19', '-21.96', '-21.86', '-22.65']


 97%|█████████▋| 5819/6000 [2:04:08<03:48,  1.26s/it]

Episode 5820/6000
Fitnesses: ['-8.60', '-94.27', '-5.88', '-12.79']
100 fitness avgs: ['-21.40', '-22.58', '-21.37', '-21.77']


 97%|█████████▋| 5839/6000 [2:04:34<03:33,  1.33s/it]

Episode 5840/6000
Fitnesses: ['-79.33', '-42.27', '-19.18', '-72.99']
100 fitness avgs: ['-22.03', '-22.88', '-21.43', '-21.97']


 98%|█████████▊| 5859/6000 [2:05:00<03:06,  1.32s/it]

Episode 5860/6000
Fitnesses: ['-91.14', '-90.19', '-122.04', '-8.91']
100 fitness avgs: ['-22.32', '-22.31', '-22.63', '-22.94']


 98%|█████████▊| 5879/6000 [2:05:26<02:37,  1.30s/it]

Episode 5880/6000
Fitnesses: ['-8.78', '-8.51', '-42.53', '-27.48']
100 fitness avgs: ['-22.97', '-22.97', '-23.31', '-22.53']


 98%|█████████▊| 5899/6000 [2:05:51<02:15,  1.34s/it]

Episode 5900/6000
Fitnesses: ['-51.99', '-11.58', '-15.13', '-41.90']
100 fitness avgs: ['-23.22', '-22.82', '-22.85', '-23.12']


 99%|█████████▊| 5919/6000 [2:06:17<01:47,  1.33s/it]

Episode 5920/6000
Fitnesses: ['-20.55', '-21.14', '-28.42', '-16.51']
100 fitness avgs: ['-22.70', '-22.74', '-22.81', '-22.66']


 99%|█████████▉| 5939/6000 [2:06:43<01:23,  1.37s/it]

Episode 5940/6000
Fitnesses: ['-15.21', '-43.88', '-49.74', '-75.25']
100 fitness avgs: ['-22.18', '-22.51', '-22.53', '-22.94']


 99%|█████████▉| 5959/6000 [2:07:11<01:00,  1.48s/it]

Episode 5960/6000
Fitnesses: ['-54.93', '-48.93', '-19.69', '-393.58']
100 fitness avgs: ['-22.56', '-22.50', '-22.21', '-26.27']


100%|█████████▉| 5979/6000 [2:07:38<00:27,  1.33s/it]

Episode 5980/6000
Fitnesses: ['-40.08', '-71.74', '-12.97', '-50.87']
100 fitness avgs: ['-22.50', '-22.82', '-22.58', '-22.61']


100%|█████████▉| 5999/6000 [2:08:04<00:01,  1.31s/it]

Episode 6000/6000
Fitnesses: ['-14.66', '-26.78', '-52.48', '-1.72']
100 fitness avgs: ['-22.34', '-22.38', '-22.72', '-22.21']


100%|██████████| 6000/6000 [2:08:05<00:00,  1.28s/it]


In [4]:
import os

import imageio
import numpy as np
import torch
from pettingzoo.mpe import simple_speaker_listener_v4
from PIL import Image, ImageDraw

from agilerl.algorithms.matd3 import MATD3


# Define function to return image
def _label_with_episode_number(frame, episode_num):
    im = Image.fromarray(frame)

    drawer = ImageDraw.Draw(im)

    if np.mean(frame) < 128:
        text_color = (255, 255, 255)
    else:
        text_color = (0, 0, 0)
    drawer.text(
        (im.size[0] / 20, im.size[1] / 18), f"Episode: {episode_num+1}", fill=text_color
    )

    return im


if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Configure the environment
    env = simple_speaker_listener_v4.parallel_env(
        continuous_actions=True, render_mode="rgb_array"
    )
    env.reset()
    try:
        state_dim = [env.observation_space(agent).n for agent in env.agents]
        one_hot = True
    except Exception:
        state_dim = [env.observation_space(agent).shape for agent in env.agents]
        one_hot = False
    try:
        action_dim = [env.action_space(agent).n for agent in env.agents]
        discrete_actions = True
        max_action = None
        min_action = None
    except Exception:
        action_dim = [env.action_space(agent).shape[0] for agent in env.agents]
        discrete_actions = False
        max_action = [env.action_space(agent).high for agent in env.agents]
        min_action = [env.action_space(agent).low for agent in env.agents]

    # Append number of agents and agent IDs to the initial hyperparameter dictionary
    n_agents = env.num_agents
    agent_ids = env.agents

    # Instantiate an MADDPG object
    matd3 = MATD3(
        state_dim,
        action_dim,
        one_hot,
        n_agents,
        agent_ids,
        max_action,
        min_action,
        discrete_actions,
        device=device,
    )

    # Load the saved algorithm into the MADDPG object
    path = "./models/MATD3/MATD3_trained_agent.pt"
    matd3.loadCheckpoint(path)

    # Define test loop parameters
    episodes = 10  # Number of episodes to test agent on
    max_steps = 25  # Max number of steps to take in the environment in each episode

    rewards = []  # List to collect total episodic reward
    frames = []  # List to collect frames
    indi_agent_rewards = {
        agent_id: [] for agent_id in agent_ids
    }  # Dictionary to collect inidivdual agent rewards

    rewards = []  # List to collect total episodic reward
    frames = []  # List to collect frames
    indi_agent_rewards = {
        agent_id: [] for agent_id in agent_ids
    }  # Dictionary to collect inidivdual agent rewards

    # Test loop for inference
    for ep in range(episodes):
        state, info = env.reset()
        agent_reward = {agent_id: 0 for agent_id in agent_ids}
        score = 0
        for _ in range(max_steps):
            agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
            env_defined_actions = (
                info["env_defined_actions"]
                if "env_defined_actions" in info.keys()
                else None
            )

            # Get next action from agent
            cont_actions, discrete_action = matd3.getAction(
                state,
                epsilon=0,
                agent_mask=agent_mask,
                env_defined_actions=env_defined_actions,
            )
            if matd3.discrete_actions:
                action = discrete_action
            else:
                action = cont_actions

            # Save the frame for this step and append to frames list
            frame = env.render()
            frames.append(_label_with_episode_number(frame, episode_num=ep))

            # Take action in environment
            state, reward, termination, truncation, info = env.step(action)

            # Save agent's reward for this step in this episode
            for agent_id, r in reward.items():
                agent_reward[agent_id] += r

            # Determine total score for the episode and then append to rewards list
            score = sum(agent_reward.values())

            # Stop episode if any agents have terminated
            if any(truncation.values()) or any(termination.values()):
                break

        rewards.append(score)

        # Record agent specific episodic reward
        for agent_id in agent_ids:
            indi_agent_rewards[agent_id].append(agent_reward[agent_id])

        print("-" * 15, f"Episode: {ep}", "-" * 15)
        print("Episodic Reward: ", rewards[-1])
        for agent_id, reward_list in indi_agent_rewards.items():
            print(f"{agent_id} reward: {reward_list[-1]}")
    env.close()

    # Save the gif to specified path
    gif_path = "./videos/"
    os.makedirs(gif_path, exist_ok=True)
    imageio.mimwrite(
        os.path.join("./videos/", "speaker_listener.gif"), frames, duration=10
    )

--------------- Episode: 0 ---------------
Episodic Reward:  -57.333240274092745
speaker_0 reward: -28.666620137046372
listener_0 reward: -28.666620137046372
--------------- Episode: 1 ---------------
Episodic Reward:  -28.73107550578979
speaker_0 reward: -14.365537752894895
listener_0 reward: -14.365537752894895
--------------- Episode: 2 ---------------
Episodic Reward:  -15.869862488635734
speaker_0 reward: -7.934931244317867
listener_0 reward: -7.934931244317867
--------------- Episode: 3 ---------------
Episodic Reward:  -24.833338078791815
speaker_0 reward: -12.416669039395908
listener_0 reward: -12.416669039395908
--------------- Episode: 4 ---------------
Episodic Reward:  -27.32326055646619
speaker_0 reward: -13.661630278233096
listener_0 reward: -13.661630278233096
--------------- Episode: 5 ---------------
Episodic Reward:  -45.61750602262766
speaker_0 reward: -22.80875301131383
listener_0 reward: -22.80875301131383
--------------- Episode: 6 ---------------
Episodic Reward:

In [5]:
# 音を鳴らす

import numpy as np
import IPython

rate = 48000
duration = 30.0

angle_list = np.arange(0, rate * duration)
sound = np.sin(2 * np.pi * 523 / rate * angle_list)


IPython.display.Audio(sound, rate=rate, autoplay=True)