In [1]:
from mlagents_envs.environment import UnityEnvironment  # Import Unity environment
from mlagents_envs.envs.unity_aec_env import UnityAECEnv
from mlagents_envs.envs.unity_parallel_env import UnityParallelEnv
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
from comunication_channel import AgentLogChannel
import neat
import os
import datetime
import pickle
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MAX_STEPS = 2500
NUM_RUNS = 5
MAX_GENS = 2500

In [3]:
engine_config_channel = EngineConfigurationChannel()
engine_config_channel.set_configuration_parameters(time_scale=1)
agent_count_channel = AgentLogChannel()

env_path = "./Builds/train-env/autonomous-drone.exe"
save_nn_destination = 'result/best.pkl'

env = UnityEnvironment(file_name=None, worker_id=0, no_graphics=False, side_channels=[engine_config_channel, agent_count_channel])
env = UnityParallelEnv(env)
env.reset()
num_agents = len(env.possible_agents)

num_actions = env.action_space(env.possible_agents[0]).shape[0]
print(f"There is total of {num_actions} actions in enviroment")
num_inputs = env.observation_space(env.possible_agents[0]).shape[0]
print(f"There is total of {num_inputs} inputs in enviroment")

MAX_STEPS = 1200
NUM_TRIES = 3

DS: 0
DS: 1
RotorControl?team=0?agent_id=0
RotorControl?team=0?agent_id=1
DS: 0
DS: 1
RotorControl?team=0?agent_id=0
RotorControl?team=0?agent_id=1
There is total of 4 actions in enviroment
There is total of 14 inputs in enviroment


In [4]:
def set_agents_and_double_reset(num_agents: int):
    agent_count_channel.send_int(data=num_agents) 
    env.reset()
    obs = env.reset()
    return obs

In [5]:
def get_observation_for_agent(agent: int, observations):
    for observation in observations:
        key = int(observation.split("=")[2])
        if key == agent:
            return observations[observation]

In [6]:
obs = set_agents_and_double_reset(2)
get_observation_for_agent(1, obs)

Sending: 2
DS: 0
DS: 1
RotorControl?team=0?agent_id=0
RotorControl?team=0?agent_id=1
DS: 0
DS: 1
RotorControl?team=0?agent_id=0
RotorControl?team=0?agent_id=1


array([ 0.        ,  3.030908  ,  0.        ,  3.030908  ,  0.        ,
        0.        ,  1.        ,  0.        ,  0.        , -0.09739784,
        0.        ,  0.        , -0.19296518,  0.        ], dtype=float32)

In [7]:
def map_agents():
    map = {}
    current_index = 0
    for agent in env.agents:
        map[int(agent.split("=")[2])] = current_index
        current_index += 1
    return map

In [8]:
map = map_agents()
map

{0: 0, 1: 1}

In [9]:
import random
env.reset()
target_agents = [4, 4, 4]
possible_actions = [[1, 1, 1, 1]]
for target in target_agents:
    obs = set_agents_and_double_reset(num_agents = target)
    assert env.num_agents == target, f"Target agents do not match num_agents! expected {target} got {env.num_agents}"
    rewards = [0] * target
    map = map_agents()
    count_done = [False] * target
    while env.agents:
        actions = {agent: random.choice(possible_actions) for agent in env.agents} # env.action_space(agent).sample()
        obs, reward, done, _ = env.step(actions)
        print(f"Done: {done}")
        print(f"Env agents: {env.agents}")
        for agent in env.agents:
            agent_id = int(agent.split("=")[2])
            rewards[map[agent_id]] += reward[agent]
    print(max(rewards))
    print("\nFinished generation")

DS: 0
DS: 1
RotorControl?team=0?agent_id=0
RotorControl?team=0?agent_id=1
Sending: 4
DS: 0
DS: 1
DS: 2
DS: 3
RotorControl?team=0?agent_id=0
RotorControl?team=0?agent_id=1
RotorControl?team=0?agent_id=2
RotorControl?team=0?agent_id=3
DS: 0
DS: 1
DS: 2
DS: 3
RotorControl?team=0?agent_id=0
RotorControl?team=0?agent_id=1
RotorControl?team=0?agent_id=2
RotorControl?team=0?agent_id=3
DS: 0
DS: 1
DS: 2
DS: 3
TS: 1
RotorControl?team=0?agent_id=0
RotorControl?team=0?agent_id=2
RotorControl?team=0?agent_id=3
Done: {'RotorControl?team=0?agent_id=1': False, 'RotorControl?team=0?agent_id=0': False, 'RotorControl?team=0?agent_id=2': False, 'RotorControl?team=0?agent_id=3': False}
Env agents: ['RotorControl?team=0?agent_id=0', 'RotorControl?team=0?agent_id=1', 'RotorControl?team=0?agent_id=1', 'RotorControl?team=0?agent_id=2', 'RotorControl?team=0?agent_id=3']


KeyboardInterrupt: 

In [12]:
env.close()

# Problems
env_helpers, unwrap_batch_steps reutrns only one agent in decision id, and termination_id after it has finished,
therefore it end's not when all agents are done but when first agent is done