In [3]:
from mlagents_envs.environment import UnityEnvironment  # Import Unity environment
from mlagents_envs.envs.unity_aec_env import UnityAECEnv
from mlagents_envs.envs.unity_parallel_env import UnityParallelEnv
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
from comunication_channel import AgentLogChannel
import neat
import os
import datetime
import pickle

In [4]:

engine_config_channel = EngineConfigurationChannel()
engine_config_channel.set_configuration_parameters(time_scale=1)
agent_count_channel = AgentLogChannel()

env_file_name = "..\\Builds\\single-agent-env\\autonomous-drone.exe"

env = UnityEnvironment(file_name=env_file_name, worker_id=0, no_graphics=True, side_channels=[engine_config_channel, agent_count_channel])
env = UnityAECEnv(env)
env.reset()
num_agents = len(env.possible_agents)

# num_actions = env.action_space(env.possible_agents[0]).n
# observation_size = env.observation_space(env.possible_agents[0]).shape

<class 'gym.spaces.space.Space'>


In [43]:
env.close()

In [40]:
num_actions = env.action_space(env.possible_agents[0]).shape[0]
print(f"There is total of {num_actions} actions in enviroment")

There is total of 4 actions in enviroment


In [39]:
env.action_space(env.possible_agents[0]).sample()
env.step(env.action_space(env.possible_agents[0]).sample())

WTF [ 0 -1 -1  0], space: int32


In [17]:
num_inputs = env.observation_space(env.possible_agents[0]).shape[0]
print(f"There is total of {num_inputs} inputs in enviroment")

There is total of 14 inputs in enviroment


In [37]:
agents = env.agents
agents

['RotorControl?team=0?agent_id=0']

In [7]:
def send_num_agents(num: int):
    agent_count_channel.send_int(data=num)   

In [39]:
# Test sending data
targetAgentsCount = 2
send_num_agents(targetAgentsCount)

Sending: 2


In [30]:
def map_agents():
    map = {}
    current_index = 0
    for agent in env.agents:
        map[int(agent.split("=")[2])] = current_index
        current_index += 1
    return map

In [52]:
def set_agents_and_double_reset(num_agents: int):
    agent_count_channel.send_int(data=num_agents) 
    env.reset()
    env.reset()

In [46]:
def create_policies(genomes, cfg):
    policies = []
    for _, g in genomes:
        policy = neat.nn.FeedForwardNetwork.create(g, cfg)
        policies.append(policy)
        g.fitness = 0
    return policies

In [63]:
MAX_STEPS = 1000
def eval_genomes(genomes, cfg):
    policies = create_policies(genomes, cfg)
    target_agents = len(policies)
    set_agents_and_double_reset(num_agents = target_agents)
    rewards = [0] * target_agents
    map = map_agents()
    for agent in env.agent_iter(env.num_agents * MAX_STEPS):
        current_agent = int(agent.split("=")[2])
        obs, reward, done, info = env.last(observe=True)
        if done:
            action = None
        else:
            rewards[map[current_agent]] = reward
            action = policies[map[current_agent]].activate(obs)
        env.step(action)
    env.reset()
    print("\nFinished generation")

In [64]:
def run(config_file, run, datte):
    print(f"Running {run}")
    max_generations = 5

    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)
    
    pop = neat.Population(config)

    stats = neat.StatisticsReporter()

    pop.add_reporter(stats)
    #pop.add_reporter(neat.Checkpointer(generation_interval=25, time_interval_seconds=1200, filename_prefix='NEAT/checkpoints/NEAT-checkpoint-'))
    pop.add_reporter(neat.TBReporter(True, 0, run, datte))
    #pop.add_reporter(neat.StdOutReporter(True))
    env.reset()
    best = pop.run(eval_genomes, max_generations)
    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(best))
    print("Finished running!")
    
    # Save best genome
    with open(f'logs/{datte}/{run}/best.pkl', 'wb') as f:
        pickle.dump(best, f)

In [65]:
datte = datetime.datetime.now().strftime("%d-%m-%Y--%H_%M")
config_path = 'test_config'
runs = 1
for r in range(runs):
    run(config_path, r, datte)

Running 0

 ****** Running generation 0 ****** 

Sending: 50


KeyboardInterrupt: 

In [31]:
map = map_agents()
map

{0: 0, 67: 1, 68: 2, 69: 3, 70: 4, 71: 5}

In [55]:
import random
num_cycles = 10
for i in range(5):
    target_agents = random.randint(1, 10)
    set_agents_and_double_reset(num_agents = target_agents)
    map = map_agents()
    # print(f"Agents: {env.num_agents}")
    # print(f"{map}")
    rewards = [0] * env.num_agents
    for agent in env.agent_iter(env.num_agents * num_cycles):
        current_agent = int(agent.split("=")[2])
        prev_observe, reward, done, info = env.last(observe=True)
        print(prev_observe)
        if done:
            action = None
        else:
            rewards[map[current_agent]] = reward
            action = [1, 1, 1, 1] #env.action_spaces[agent].sample() 
        env.step(action)
    env.reset()

Sending: 6
[ 6.2357119e-10  3.0008994e-02  1.1849005e-09  3.0008994e-02
  1.1967495e-07  0.0000000e+00  1.0000000e+00  0.0000000e+00
  0.0000000e+00 -9.7397842e-02  0.0000000e+00  0.0000000e+00
 -1.9296518e-01  0.0000000e+00]
[ 1.2867581e-07  3.0021179e-02  1.7940408e-07  3.0021179e-02
  9.9836895e-04  2.2614970e-04  9.9999994e-01  3.2328634e-04
 -3.6226495e-04 -3.0797280e-02 -5.1793439e-04 -7.2450563e-04
 -6.1536241e-02 -1.0358084e-03]
[ 1.2672442e-07  3.0021179e-02  1.6996177e-07  3.0021179e-02
  9.9735567e-04  2.2613710e-04  9.9999994e-01  3.2326666e-04
 -3.6220541e-04 -3.0801032e-02 -5.1787484e-04 -7.2444609e-04
 -6.1543643e-02 -1.0357489e-03]
[ 2.9983400e-07  3.0023772e-02  4.1074091e-07  3.0023772e-02
  1.3768075e-03  3.0804062e-04  9.9999988e-01  4.4035158e-04
 -4.7649079e-04 -6.5995706e-03 -6.8123394e-04 -9.5294416e-04
 -1.3198514e-02 -1.3624914e-03]
[ 4.7537793e-07  3.0024324e-02  6.7040475e-07  3.0024324e-02
  1.4785749e-03  3.2545952e-04  9.9999982e-01  4.6525450e-04
 -4.988

KeyboardInterrupt: 

In [44]:
rewards

[2.1692624, 2.4318953]

In [8]:
env.close()

In [66]:
for i in range(1000):
    env.step(action=[1, 1, 1, 1])
    obs, reward, done, info = env.last(observe=True)
    if done:
        next_obs = {"observation": obs}
        env.reset()
env.reset()

0
-30.016123
-30.016123
-19.988907
-19.988907
0.110297725
0.11029617
0.28060788
0.2806057
0.52117443
0.5211719
0.8311544
0.83115125
1.2097149
1.2097111
1.6560277
1.6560233
2.169267
2.1692624
2.7486057
2.748601
3.393203
3.3931966
4.1021795
4.102173
4.874565
4.8745575
5.709132
5.7091246
6.603804
6.603797
207.55208
207.55206
708.4457
708.4457
8.09593
8.095922
6.975114
6.9751086
5.766517
5.7665124
4.495189
4.495184
3.164821
3.164815
1.776923
1.7769165
0.33246565
0.33245784
-1.1677694
-1.1677785
-2.7230911
-2.7231016
-4.3328624
-4.3328724
-5.996475
-5.996484
-7.7133393
-7.7133503
-9.482898
-9.482911
-300.0
-300.0
-300.0
-300.0
-30.388927
-30.388943
-10.502142
-10.502165
-0.34160396
-0.3416267
-0.11116353
-0.11118646
0.18790588
0.18788327
0.55407983
0.55405706
0.98544323
0.9854203
1.479451
1.4794272
2.0325065
2.0324821
2.6392002
2.6391735
3.2908373
3.2908065
3.972478
3.9724407
4.656692
4.6566443
5.2905445
5.290479
5.773715
5.7736225
5.952557
5.9524336
5.704843
5.704699
5.0549965
5.054843
4.1

In [10]:
env.close()