In [1]:
from mlagents_envs.environment import UnityEnvironment  # Import Unity environment
from mlagents_envs.envs.unity_aec_env import UnityAECEnv
from mlagents_envs.envs.unity_parallel_env import UnityParallelEnv
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
from comunication_channel import AgentLogChannel
import neat
import os
import datetime
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

engine_config_channel = EngineConfigurationChannel()
engine_config_channel.set_configuration_parameters(time_scale=1)
agent_count_channel = AgentLogChannel()

env_file_name = "..\\Builds\\single-agent-env\\autonomous-drone.exe"

env = UnityEnvironment(file_name=None, worker_id=0, no_graphics=False, side_channels=[engine_config_channel, agent_count_channel])
env = UnityParallelEnv(env)
env.reset()
num_agents = len(env.possible_agents)

# num_actions = env.action_space(env.possible_agents[0]).n
# observation_size = env.observation_space(env.possible_agents[0]).shape

In [3]:
num_actions = env.action_space(env.possible_agents[0]).shape[0]
print(f"There is total of {num_actions} actions in enviroment")

There is total of 4 actions in enviroment


In [5]:
num_inputs = env.observation_space(env.possible_agents[0]).shape[0]
print(f"There is total of {num_inputs} inputs in enviroment")

There is total of 14 inputs in enviroment


In [6]:
agents = env.agents
agents

['RotorControl?team=0?agent_id=0']

In [8]:
def send_num_agents(num: int):
    agent_count_channel.send_int(data=num)   

In [9]:
# Test sending data
targetAgentsCount = 2
send_num_agents(targetAgentsCount)

Sending: 2


In [10]:
def map_agents():
    map_agent_to_index = {}
    map_index_to_agent = {}
    current_index = 0
    for agent in env.agents:
        map_agent_to_index[int(agent.split("=")[2])] = current_index
        map_index_to_agent[current_index] = int(agent.split("=")[2])
        current_index += 1
    return map_agent_to_index, map_index_to_agent

In [11]:
def set_agents_and_double_reset(num_agents: int):
    agent_count_channel.send_int(data=num_agents) 
    env.reset()
    env.reset()

In [12]:
def create_policies(genomes, cfg):
    policies = []
    for _, g in genomes:
        policy = neat.nn.FeedForwardNetwork.create(g, cfg)
        policies.append(policy)
        g.fitness = 0
    return policies

In [13]:
MAX_STEPS = 1000
def eval_genomes(genomes, cfg):
    policies = create_policies(genomes, cfg)
    target_agents = len(policies)
    set_agents_and_double_reset(num_agents = target_agents)
    rewards = [0] * target_agents
    map = map_agents()
    for agent in env.agent_iter(env.num_agents * MAX_STEPS):
        current_agent = int(agent.split("=")[2])
        obs, reward, done, info = env.last(observe=True)
        if done:
            action = None
        else:
            rewards[map[current_agent]] = reward
            action = policies[map[current_agent]].activate(obs)
        env.step(action)
    env.reset()
    print("\nFinished generation")

In [14]:
def run(config_file, run, datte):
    print(f"Running {run}")
    max_generations = 5

    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)
    
    pop = neat.Population(config)

    stats = neat.StatisticsReporter()

    pop.add_reporter(stats)
    #pop.add_reporter(neat.Checkpointer(generation_interval=25, time_interval_seconds=1200, filename_prefix='NEAT/checkpoints/NEAT-checkpoint-'))
    pop.add_reporter(neat.TBReporter(True, 0, run, datte))
    #pop.add_reporter(neat.StdOutReporter(True))
    env.reset()
    best = pop.run(eval_genomes, max_generations)
    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(best))
    print("Finished running!")
    
    # Save best genome
    with open(f'logs/{datte}/{run}/best.pkl', 'wb') as f:
        pickle.dump(best, f)

In [15]:
def add_run(data, run_id):
    if run_id not in data:
        data[run_id] = {}

def update_fitness(data, run_id, agent_id, fitness):
    if run_id in data and agent_id in data[run_id]:
        data[run_id][agent_id]["fitness"] = fitness
    else:
        print(f"Run {run_id} or Agent {agent_id} does not exist. Cannot update fitness.")
        
def update_observe(data, run_id, agent_id, prev_observe):
    if run_id in data and agent_id in data[run_id]:
        data[run_id][agent_id]["prev_observe"].append(prev_observe)
    else:
        print(f"Run {run_id} or Agent {agent_id} does not exist. Cannot update observations.")

def add_agent(data, run_id, agent_id, initial_fitness=0):
    # Ensure the run exists
    add_run(data, run_id)
    if agent_id not in data[run_id]:
        data[run_id][agent_id] = {
            "actions": [],
            "fitness": initial_fitness,
            "prev_observe": []
        }

def add_action(data, run_id, agent_id, action):
    # Ensure the agent exists
    add_agent(data, run_id, agent_id)
    data[run_id][agent_id]["actions"].append(action)

In [16]:
import random
import time

replay_buffer = {}

num_cycles = 1000
target_agents = [9, 9, 1, 1, 1, 1]
#target_agents = [4, 4, 4]
for i in range(len(target_agents)):
    set_agents_and_double_reset(num_agents = target_agents[i])
    map_agent_to_index, map_index_to_agent = map_agents()
    print(f"Agents: {env.num_agents}")
    # print(f"{map}")
    rewards = [0] * env.num_agents
    for agent in env.agent_iter(env.num_agents * num_cycles):
        current_agent = int(agent.split("=")[2])
        prev_observe, reward, done, info = env.last(observe=True)
        #print(reward)
        if done:
            action = None
        else:
            action = [1.0, 1.0, 1.0, 1.0] #env.action_spaces[agent].sample() # 
        add_action(replay_buffer, i, current_agent, action)
        update_fitness(replay_buffer, i, current_agent, reward)
        update_observe(replay_buffer, i, current_agent, prev_observe)
        rewards[map_agent_to_index[current_agent]] += reward
        env.step(action)
    env.reset()
    print(rewards, end= '\n')
    print(f"Max reward: {max(rewards)}")
    #time.sleep(5)


Sending: 9
Agents: 9
[54.707245618104935, 54.707258224487305, 54.70711626857519, 54.707258224487305, 54.70711626857519, 54.70712796412408, 54.70779267512262, 54.70779267512262, 54.70712796412408]
Max reward: 54.70779267512262
Sending: 9
Agents: 9
[54.707245618104935, 54.707258224487305, 54.70711626857519, 54.707258224487305, 54.70711626857519, 54.70712796412408, 54.70779267512262, 54.70779267512262, 54.70712796412408]
Max reward: 54.70779267512262
Sending: 1
Agents: 1
[54.707245618104935]
Max reward: 54.707245618104935
Sending: 1
Agents: 1
[54.707245618104935]
Max reward: 54.707245618104935
Sending: 1
Agents: 1
[54.707245618104935]
Max reward: 54.707245618104935
Sending: 1
Agents: 1
[54.707245618104935]
Max reward: 54.707245618104935


In [17]:
env.close()

In [41]:
reward_c = 0
for reward in rewards:
    if reward < 1652.0 or reward > 1653.0:
        reward_c+=1
print(reward_c)

52


In [23]:
import numpy as np
# Extracting observations
observation_1 = replay_buffer[0][0]['prev_observe']
observation_2 = replay_buffer[1][0]['prev_observe']
# Summing all elements in each array
sum_observation_1 = np.sum(observation_1)
sum_observation_2 = np.sum(observation_2)

# Computing the difference between the sums of observation1 and observation2
difference = sum_observation_1 - sum_observation_2

sum_observation_1, sum_observation_2, difference

(89.052246, 89.052246, 0.0)

In [12]:
replay_buffer[0][0]['prev_observe'], replay_buffer[1][0]['prev_observe']

([array([ 0.        ,  0.03000899,  0.        ,  0.03000899,  0.        ,
          0.        ,  1.        ,  0.        ,  0.        , -0.09739784,
          0.        ,  0.        , -0.19296518,  0.        ], dtype=float32),
  array([-2.7770241e-08,  2.9964535e-02,  1.0924137e-07,  2.9964535e-02,
          1.0339580e-03, -8.9906600e-05,  9.9999994e-01,  3.3799847e-04,
          2.4557718e-05,  1.0412883e-01, -9.7086537e-05,  4.9116639e-05,
          2.0602380e-01, -1.9415423e-04], dtype=float32),
  array([-1.0457784e-07,  2.9682117e-02,  4.0343787e-07,  2.9682117e-02,
          3.5513176e-03, -3.1878724e-04,  9.9999928e-01,  1.1710123e-03,
          1.3590044e-05,  2.9493225e-01, -6.0614249e-05,  2.7180457e-05,
          5.4266113e-01, -1.2116156e-04], dtype=float32),
  array([-3.63855520e-07,  2.91645825e-02,  1.36812207e-06,  2.91645825e-02,
          7.47764297e-03, -6.84567261e-04,  9.99996662e-01,  2.49165064e-03,
         -2.15168129e-05,  4.62925792e-01,  6.01745087e-05, -4.303

In [39]:
# Now test if agents recieves the same reward for the same steps
test_run = 2
test_agent = 4
actions_to_reproduce = replay_buffer[test_run][test_agent]['actions']
expected_reward = replay_buffer[test_run][test_agent]['fitness']
expected_observe = replay_buffer[test_run][test_agent]['prev_observe']
actions_to_reproduce, expected_reward

KeyError: 4

In [None]:
observes = []
current_cycle = 0
env.reset()
set_agents_and_double_reset(num_agents = 1)
for agent in env.agent_iter(env.num_agents * num_cycles):
    current_agent = int(agent.split("=")[2])
    prev_observe, reward, done, info = env.last(observe=True)
    if done:
        action = None
    else:
        action = actions_to_reproduce[current_cycle] 
    observes.append(prev_observe)
    env.step(action)
    current_cycle+=1
env.reset()
print(reward, end= '\n')


In [None]:
def compare_observations(observes, expected_observe):
    for i, obs in enumerate(observes):
        print(f"Comparison {i+1}:")
        print(f"Normalized vector to checkpoint: obs: {obs[0:3]}, exp: {expected_observe[i][0:3]}")
        print(f"Distance to checkpoint: obs: {obs[3]}, exp: {expected_observe[i][3]}")
        print(f"Dot product with checkpoint: obs: {obs[4]}, exp: {expected_observe[i][4]}")
        print(f"Drone's inclination: obs: {obs[5:8]}, exp: {expected_observe[i][5:8]}")
        print(f"Drone's local velocity: obs: {obs[8:11]}, exp: {expected_observe[i][8:11]}")
        print(f"Drone's angular velocity: obs: {obs[11:14]}, exp: {expected_observe[i][11:14]}")
        print("\n")

In [None]:
def print_comparison_table(observes, expected_observe):
    headers = ["Component", "Observed", "Expected"]
    
    # Adjust the format here for alignment
    # > for right alignment, < for left alignment, ^ for center
    # Adjust the width as necessary
    header_format = "{:<40} {:>20} {:>20}"
    row_format = "{:<40} {:>20.6f} {:>20.6f}"  # Assuming numerical values need 6 decimal places

    # Print the header
    print(header_format.format(*headers))

    # Separator for clarity
    print("-" * 80)

    component_names = [
        "Normalized vector to checkpoint (x)",
        "Normalized vector to checkpoint (y)",
        "Normalized vector to checkpoint (z)",
        "Distance to checkpoint",
        "Dot product with checkpoint",
        "Drone's inclination (x)",
        "Drone's inclination (y)",
        "Drone's inclination (z)",
        "Drone's local velocity (x)",
        "Drone's local velocity (y)",
        "Drone's local velocity (z)",
        "Drone's angular velocity (x)",
        "Drone's angular velocity (y)",
        "Drone's angular velocity (z)"
    ]
    
    # Iterate and print each observation in a formatted manner
    for i, (obs, exp) in enumerate(zip(observes, expected_observe)):
        print(f"\nComparison {i+1}:")
        for j, component in enumerate(component_names):
            if j < len(obs):  # Ensure index is within bounds
                print(row_format.format(component, obs[j], exp[j]))


In [None]:
print_comparison_table(observes, expected_observe)

In [None]:
compare_observations(observes=observes, expected_observe=expected_observe)
# here we must assume that the angular velocity and velocity are not the same...

In [24]:
env.close()

In [None]:
for i in range(1000):
    env.step(action=[1, 1, 1, 1])
    obs, reward, done, info = env.last(observe=True)
    if done:
        next_obs = {"observation": obs}
        env.reset()
env.reset()

In [None]:
env.close()