In [1]:
import pickle
import numpy as np
import neat
import datetime
import os
import math

from comunication_channel import AgentLogChannel

from neuro_evolution.selection import Selection
from neuro_evolution.config import Config
from neuro_evolution.space import Space
from neuro_evolution.population import Population
from neuro_evolution.feed_forward import FeedForwardNetwork
from neuro_evolution.mutation import Mutation
from neuro_evolution.crossover import Crossover

from tqdm import tqdm
import time
import numpy as np

from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.base_env import ActionTuple
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel

In [2]:
NUM_RUNS = 1
MAX_GENS = 2000

In [3]:
engine_config_channel = EngineConfigurationChannel()
engine_config_channel.set_configuration_parameters(time_scale=10)
agent_count_channel = AgentLogChannel()

env_path = "../Builds/train-env/autonomous-drone.exe"
save_nn_destination = 'result/best.pkl'

env = UnityEnvironment(file_name=None, worker_id=0, no_graphics=True, side_channels=[engine_config_channel, agent_count_channel])
env.reset()

In [4]:
out_mult = 1

behavior_specs = env.behavior_specs
print(f"Behaviour specs {behavior_specs}")
behavior_name = list(behavior_specs)[0]
spec = env.behavior_specs[behavior_name]

print(f"Name of the behavior : {behavior_name}")
print("Number of observations : ", len(spec.observation_specs)) # vector if 1

# Is the Action continuous or multi-discrete ?
if spec.action_spec.continuous_size > 0:
  print(f"There are {spec.action_spec.continuous_size} continuous actions")
if spec.action_spec.is_discrete():
  print(f"There are {spec.action_spec.discrete_size} discrete actions")

Behaviour specs <mlagents_envs.base_env.BehaviorMapping object at 0x000001A8B7F3D360>
Name of the behavior : My Behavior?team=0
Number of observations :  1
There are 4 continuous actions


In [5]:
def create_folder(file_name_prefix):
    directory = os.path.dirname(f"{file_name_prefix}")
    if not os.path.exists(directory):
        os.makedirs(directory)

In [6]:
def set_agents_and_double_reset(num_agents: int):
    agent_count_channel.send_int(data=num_agents) 
    env.reset()
    env.reset()

In [7]:
def get_observation_for_agent(agent: int, observations):
    for observation in observations:
        key = int(observation.split("=")[2])
        if key == agent:
            return observations[observation]

In [8]:
def map_agent_ids(decision_steps):
    """
    Map agent ids between NEAT and UNITY.

    Args:
        decision_steps: An iterable containing decision steps.

    Returns:
        A tuple of two dictionaries: (unity_to_neat_map, neat_to_unity_map)
    """
    unity_to_neat_map = {}
    neat_to_unity_map = {}
    id_count = 0
    for step in decision_steps:
        unity_to_neat_map[step] = id_count
        neat_to_unity_map[id_count] = step
        id_count += 1
    return unity_to_neat_map, neat_to_unity_map

In [9]:
def create_policies(genomes, cfg):
    policies = []
    for _, g in genomes:
        g.fitness = 0
        policy = neat.nn.FeedForwardNetwork.create(g, cfg)
        policies.append(policy)
    return policies

In [10]:
import tensorflow as tf

def eval_genomes(population):
    policies = population
    set_agents_and_double_reset(len(policies))
    decision_steps, terminal_steps = env.get_steps(behavior_name)
    agent_count = len(decision_steps.agent_id)

    unity_to_neat_map, neat_to_unity_map = map_agent_ids(decision_steps)

    done = False  # Vectorized initialization
    removed_agents = []

    episode_rewards = [0] * agent_count
    #print(f"Agent count: {agent_count}")

    while not done:
        for agent in decision_steps:
            if unity_to_neat_map[agent] not in removed_agents:
                nn_input =  np.asarray(decision_steps[agent].obs[:])
                nn_input_tensor = tf.convert_to_tensor(nn_input, dtype=tf.float32)

                #print(f"NN INPUT: {nn_input_tensor}, agent: {agent}")
                actions = population[unity_to_neat_map[agent]].predict(nn_input_tensor, verbose=0).squeeze()
                continous_actions = np.asarray([actions])
                continous_actions = np.clip(continous_actions, -1, 1)
                #print(continous_actions)
                action_tuple = ActionTuple(discrete=None, continuous=continous_actions)
                env.set_action_for_agent(behavior_name=behavior_name, 
                                        agent_id=agent, 
                                        action=action_tuple)
        env.step()
        decision_steps, terminal_steps = env.get_steps(behavior_name)
        for agent in range(agent_count):
            if agent not in removed_agents:
                local_agent = neat_to_unity_map[agent]
                if local_agent in terminal_steps:
                    episode_rewards[agent] += terminal_steps[local_agent].reward
                    removed_agents.append(agent)
                    #print(f"Finished: {agent}")
                elif local_agent in decision_steps:
                    episode_rewards[agent] += decision_steps[local_agent].reward
                 

        if len(removed_agents) >= agent_count:
            done = True
    return episode_rewards

In [11]:
import matplotlib.pyplot as plt

def run(run, datte):
    print(f"Running {run}")
    file_name_prefix = f"checkpoints/{datte}/run-{run}/"
    create_folder(file_name_prefix=file_name_prefix)

    nn = FeedForwardNetwork()
    population = Population(nn)
    pop = population.pop
    graph = []
    max_gen = 200
    with tqdm(total=max_gen, desc="Processing") as pbar:
        for gen in range(max_gen):
            fitness = eval_genomes(pop)
            max_fitness = max(fitness)
            graph.append(max_fitness)
            pbar.set_description(f"Processing (Gen {gen+1}, Max Fitness: {max_fitness})")
            
            best = Selection.select_best(pop, fitness, [6, 4])
            rand = Selection.select_rand(pop, 30)
            sus = Selection.select_sus(pop, fitness, 10)
            
            rand = Mutation.muta(rand)
            sus = Mutation.mutx(sus)
            sus = Crossover.crossover(sus, 3, 1)
            pop = best + rand + sus
            pbar.update(1)
        
    plt.plot(graph)
    plt.show()
    # # Save best genome
    # with open(f'logs/{datte}/{run}/best.pkl', 'wb') as f:
    #     pickle.dump(best, f)

In [12]:
config = Config.get_instance("cfg")
space = Space.get_instance()

datte = datetime.datetime.now().strftime("%d-%m-%Y--%H_%M")
# pre create folders for checkpointer
for r in range(NUM_RUNS):
    run(r, datte)



Running 0


Processing:   0%|          | 0/200 [00:00<?, ?it/s]



Processing (Gen 11, Max Fitness: -984.5358315706253):   5%|▌         | 10/200 [03:11<1:00:38, 19.15s/it]


KeyboardInterrupt: 

In [13]:
env.close()