In [9]:
from mlagents_envs.environment import UnityEnvironment  # Import Unity environment
from mlagents_envs.envs.unity_aec_env import UnityAECEnv
from mlagents_envs.envs.unity_parallel_env import UnityParallelEnv
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
from comunication_channel import AgentLogChannel
import neat
import os
import datetime
import pickle
import numpy as np

In [10]:
MAX_STEPS = 2500
NUM_RUNS = 5
MAX_GENS = 2500

In [11]:
engine_config_channel = EngineConfigurationChannel()
engine_config_channel.set_configuration_parameters(time_scale=1)
agent_count_channel = AgentLogChannel()

env_path = "./Builds/train-env/autonomous-drone.exe"
save_nn_destination = 'result/best.pkl'

env = UnityEnvironment(file_name=None, worker_id=0, no_graphics=False, side_channels=[engine_config_channel, agent_count_channel])
env = UnityAECEnv(env)
env.reset()
num_agents = len(env.possible_agents)

num_actions = env.action_space(env.possible_agents[0]).shape[0]
print(f"There is total of {num_actions} actions in enviroment")
num_inputs = env.observation_space(env.possible_agents[0]).shape[0]
print(f"There is total of {num_inputs} inputs in enviroment")

MAX_STEPS = 1200
NUM_TRIES = 3

There is total of 4 actions in enviroment
There is total of 14 inputs in enviroment


In [12]:
def map_agents():
    map = {}
    current_index = 0
    for agent in env.agents:
        map[int(agent.split("=")[2])] = current_index
        current_index += 1
    return map

def set_agents_and_double_reset(num_agents: int):
    agent_count_channel.send_int(data=num_agents) 
    env.reset()
    env.reset()

def create_policies(genomes, cfg):
    policies = []
    for _, g in genomes:
        policy = neat.nn.FeedForwardNetwork.create(g, cfg)
        policies.append(policy)
        g.fitness = 0
    return policies

def eval_genomes(genomes, cfg):
    policies = create_policies(genomes, cfg)
    target_agents = len(policies)
    set_agents_and_double_reset(num_agents = target_agents)
    assert env.num_agents == target_agents, "Target agents do not match num_agents!"
    rewards = [0] * target_agents
    map = map_agents()
    count_done = [False] * target_agents
    for agent in env.agent_iter():
        current_agent = int(agent.split("=")[2])
        obs, reward, done, info = env.last(observe=True)
        if done:
            action = None
            count_done[map[current_agent]] = True
        else:
            action = np.asarray(policies[map[current_agent]].activate(obs))
        rewards[map[current_agent]] += reward
        env.step(action)
    for i, (_, genome) in enumerate(genomes):
        genome.fitness = rewards[i]
    print(count_done)
    env.reset()
    print("\nFinished generation")

def run(config_file, run, datte):
    print(f"Running {run}")

    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)
    
    pop = neat.Population(config)

    stats = neat.StatisticsReporter()

    pop.add_reporter(stats)
    #pop.add_reporter(neat.Checkpointer(generation_interval=25, time_interval_seconds=1200, filename_prefix='NEAT/checkpoints/NEAT-checkpoint-'))
    pop.add_reporter(neat.TBReporter(False, 0, run, datte))
    #pop.add_reporter(neat.StdOutReporter(True))
    env.reset()
    best = pop.run(eval_genomes, MAX_GENS)
    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(best))
    print("Finished running!")
    
    # Save best genome
    with open(f'logs/{datte}/{run}/best.pkl', 'wb') as f:
        pickle.dump(best, f)


In [13]:
config_path = 'test_config'
datte = datetime.datetime.now().strftime("%d-%m-%Y--%H_%M")

for r in range(NUM_RUNS):
    run(config_path, r, datte)
env.close()

Running 0

 ****** Running generation 0 ****** 

Sending: 100
[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]

Finished generation
Population's average fitness: 20.16763 stdev: 1416.02705
Best fitness: 2006.37231 - size: (5, 37) - species 1 - id 1
Best fitness ever found! : 2006.3723057806492
Total extinctions: 0
Generation time: 2.099 sec

 *****

KeyboardInterrupt: 

In [None]:
env.close()