In [21]:
import pickle
import numpy as np
import nevopy as ne
import datetime
import os
from comunication_channel import AgentLogChannel

from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.base_env import ActionTuple
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel

In [22]:
BASE_GENOME = ne.fixed_topology.FixedTopologyGenome(

    # List with the layers of the base genome. Other genomes in the population
    # will have similar layers (with the same topology).
    layers=[ne.fixed_topology.layers.TFDenseLayer(28, activation="tanh"),
            ne.fixed_topology.layers.TFDenseLayer(4, activation="tanh")],

    # Shape of the input samples expected by the genome.
    input_shape=[1, 14],
)

In [23]:
population = ne.genetic_algorithm.GeneticPopulation(size=50,
                                                        base_genome=BASE_GENOME)

In [None]:
import random
import tensorflow as tf

def fitness_function(genome):
    x = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

    # Reshape the input to have shape (1, 14)
    x_reshaped = np.reshape(x, (1, 14))
    out = genome.process(x_reshaped)
    return random.randint(0, 600)

In [None]:
early_stopping_cb = ne.callbacks.FitnessEarlyStopping(
        fitness_threshold=300,
        min_consecutive_generations=3,
    )

In [None]:
history = population.evolve(generations=50,
                                fitness_function=fitness_function,
                                callbacks=[early_stopping_cb])

>> GENERATION 1 SUMMARY:
. Mass extinction counter: [32m[22m0[0m / 15
. Processing time:  99.9603s

|---------------|--------------|--------------|--------------|--------------|
|     NAME      |   CURRENT    |     PAST     |   INCREASE   | INCREASE (%) |
| Best fitness  |   6.00E+02   |   0.00E+00   |[32m  +6.00E+02   [0m|[32m    +inf%     [0m|
|---------------|--------------|--------------|--------------|--------------|
|Avg population |   2.86E+02   |   0.00E+00   |[32m  +2.86E+02   [0m|[32m    +inf%     [0m|
|    fitness    |              |              |[32m              [0m|[32m              [0m|
|---------------|--------------|--------------|--------------|--------------|
|Mutation chance|    60.00%    |    0.00%     |[32m   +60.00%    [0m|      -       |
|---------------|--------------|--------------|--------------|--------------|
|    Weight     |    50.00%    |    0.00%     |[32m   +50.00%    [0m|      -       |
|mutation chance|              |             

KeyboardInterrupt: 

In [None]:
NUM_RUNS = 1
MAX_GENS = 400

In [None]:
engine_config_channel = EngineConfigurationChannel()
engine_config_channel.set_configuration_parameters(time_scale=1)
agent_count_channel = AgentLogChannel()

env_path = "../Builds/train-env/autonomous-drone.exe"
save_nn_destination = 'result/best.pkl'

env = UnityEnvironment(file_name=None, worker_id=0, no_graphics=False, side_channels=[engine_config_channel, agent_count_channel])
env.reset()

In [None]:
num_actions = 4 # 4
num_inputs = 13 # 14
out_mult = 1

behavior_specs = env.behavior_specs
print(f"Behaviour specs {behavior_specs}")
behavior_name = list(behavior_specs)[0]
spec = env.behavior_specs[behavior_name]

print(f"Name of the behavior : {behavior_name}")
print("Number of observations : ", len(spec.observation_specs)) # vector if 1

# Is the Action continuous or multi-discrete ?
if spec.action_spec.continuous_size > 0:
  print(f"There are {spec.action_spec.continuous_size} continuous actions")
if spec.action_spec.is_discrete():
  print(f"There are {spec.action_spec.discrete_size} discrete actions")

Behaviour specs <mlagents_envs.base_env.BehaviorMapping object at 0x000002517043E500>
Name of the behavior : RotorControl?team=0
Number of observations :  1
There are 4 continuous actions


In [None]:
def create_folder(file_name_prefix):
    directory = os.path.dirname(f"{file_name_prefix}")
    if not os.path.exists(directory):
        os.makedirs(directory)

In [None]:
def set_agents_and_double_reset(num_agents: int):
    agent_count_channel.send_int(data=num_agents) 
    env.reset()
    env.reset()

In [None]:
def get_observation_for_agent(agent: int, observations):
    for observation in observations:
        key = int(observation.split("=")[2])
        if key == agent:
            return observations[observation]

In [None]:
def map_agent_ids(decision_steps):
    """
    Map agent ids between NEAT and UNITY.

    Args:
        decision_steps: An iterable containing decision steps.

    Returns:
        A tuple of two dictionaries: (unity_to_neat_map, neat_to_unity_map)
    """
    unity_to_neat_map = {}
    neat_to_unity_map = {}
    id_count = 0
    for step in decision_steps:
        unity_to_neat_map[step] = id_count
        neat_to_unity_map[id_count] = step
        id_count += 1
    return unity_to_neat_map, neat_to_unity_map

In [None]:
def create_policies(genomes, cfg):
    policies = []
    for _, g in genomes:
        g.fitness = 0
        policy = neat.nn.FeedForwardNetwork.create(g, cfg)
        policies.append(policy)
    return policies

In [None]:
import random
def eval_genomes(genomes, cfg):
    policies = create_policies(genomes, cfg)
    set_agents_and_double_reset(len(policies))
    decision_steps, terminal_steps = env.get_steps(behavior_name)
    agent_count = len(decision_steps.agent_id)

    unity_to_neat_map, neat_to_unity_map = map_agent_ids(decision_steps)

    done = False  # Vectorized initialization
    removed_agents = []

    episode_rewards = [0] * agent_count
    print(f"Agent count: {agent_count}")
    env.reset()
    while not done:
        for agent in decision_steps:
            if agent not in removed_agents:
                nn_input =  np.asarray(decision_steps[agent].obs[:])
                actions = policies[unity_to_neat_map[agent]].activate(nn_input[0])
                continous_actions = np.asarray([actions])
                action_tuple = ActionTuple(discrete=None, continuous=continous_actions)
                env.set_action_for_agent(behavior_name=behavior_name, 
                                        agent_id=agent, 
                                        action=action_tuple)
        env.step()
        decision_steps, terminal_steps = env.get_steps(behavior_name)
        for agent in range(agent_count):
            if agent not in removed_agents:
                local_agent = neat_to_unity_map[agent]
                if local_agent in terminal_steps:
                    episode_rewards[agent] += terminal_steps[local_agent].reward
                    removed_agents.append(agent)
                elif local_agent in decision_steps:
                    episode_rewards[agent] += decision_steps[local_agent].reward
                 

        if len(removed_agents) >= agent_count:
            print(".") 
            done = True
    for i, (_, genome) in enumerate(genomes):
        genome.fitness = episode_rewards[i]
    env.reset()

In [None]:
def run(config_file, run, datte):
    print(f"Running {run}")
    file_name_prefix = f"checkpoints/{datte}/run-{run}/"
    create_folder(file_name_prefix=file_name_prefix)

    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)
    
    pop = neat.Population(config)

    stats = neat.StatisticsReporter()

    pop.add_reporter(stats)
    #pop.add_reporter(neat.Checkpointer(generation_interval=5, time_interval_seconds=100000, filename_prefix=file_name_prefix))
    pop.add_reporter(neat.TBReporter(False, 0, run, datte))
    #pop.add_reporter(neat.StdOutReporter(True))
    best = pop.run(eval_genomes, MAX_GENS)
    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(best))
    print("Finished running!")
    
    # Save best genome
    with open(f'logs/{datte}/{run}/best.pkl', 'wb') as f:
        pickle.dump(best, f)

In [None]:
config_path = 'test_config'
datte = datetime.datetime.now().strftime("%d-%m-%Y--%H_%M")
# pre create folders for checkpointer
for r in range(NUM_RUNS):
    run(config_path, r, datte)

Running 0

 ****** Running generation 0 ****** 

Sending: 50
Agent count: 50
.
Population's average fitness: 0.06823 stdev: 0.20227
Best fitness: 1.29199 - size: (14, 111) - species 1 - id 8
Best fitness ever found! : 1.291991442439466
Total extinctions: 0
Generation time: 40.066 sec

 ****** Running generation 1 ****** 

Sending: 50
Agent count: 50
.
Population's average fitness: 0.14911 stdev: 0.32717
Best fitness: 1.59247 - size: (14, 110) - species 1 - id 66
Best fitness ever found! : 1.5924724944228286
Total extinctions: 0
Generation time: 41.801 sec (40.933 average)

 ****** Running generation 2 ****** 

Sending: 50
Agent count: 50
.
Population's average fitness: 0.27902 stdev: 0.57097
Best fitness: 2.35935 - size: (15, 112) - species 1 - id 123
Best fitness ever found! : 2.359348338907553
Total extinctions: 0
Generation time: 48.902 sec (43.590 average)

 ****** Running generation 3 ****** 

Sending: 50
Agent count: 50
.
Population's average fitness: 0.23284 stdev: 0.43513
Best 

In [None]:
env.close()