In [110]:
from mlagents_envs.environment import UnityEnvironment  # Import Unity environment
from mlagents_envs.envs.unity_aec_env import UnityAECEnv
from mlagents_envs.envs.unity_parallel_env import UnityParallelEnv
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
from comunication_channel import AgentLogChannel
import neat
import os
import datetime
import pickle

In [122]:

engine_config_channel = EngineConfigurationChannel()
engine_config_channel.set_configuration_parameters(time_scale=1)
agent_count_channel = AgentLogChannel()

env_file_name = "..\\Builds\\single-agent-env\\autonomous-drone.exe"

env = UnityEnvironment(file_name=None, worker_id=0, no_graphics=True, side_channels=[engine_config_channel, agent_count_channel])
env = UnityAECEnv(env)
env.reset()
num_agents = len(env.possible_agents)

# num_actions = env.action_space(env.possible_agents[0]).n
# observation_size = env.observation_space(env.possible_agents[0]).shape

In [67]:
num_actions = env.action_space(env.possible_agents[0]).shape[0]
print(f"There is total of {num_actions} actions in enviroment")

There is total of 4 actions in enviroment


In [68]:
env.action_space(env.possible_agents[0]).sample()
env.step(env.action_space(env.possible_agents[0]).sample())

In [69]:
num_inputs = env.observation_space(env.possible_agents[0]).shape[0]
print(f"There is total of {num_inputs} inputs in enviroment")

There is total of 14 inputs in enviroment


In [None]:
agents = env.agents
agents

In [None]:
def send_num_agents(num: int):
    agent_count_channel.send_int(data=num)   

In [None]:
# Test sending data
targetAgentsCount = 2
send_num_agents(targetAgentsCount)

In [None]:
def map_agents():
    map_agent_to_index = {}
    map_index_to_agent = {}
    current_index = 0
    for agent in env.agents:
        map_agent_to_index[int(agent.split("=")[2])] = current_index
        map_index_to_agent[current_index] = int(agent.split("=")[2])
        current_index += 1
    return map_agent_to_index, map_index_to_agent

In [None]:
def set_agents_and_double_reset(num_agents: int):
    agent_count_channel.send_int(data=num_agents) 
    env.reset()
    env.reset()

In [None]:
def create_policies(genomes, cfg):
    policies = []
    for _, g in genomes:
        policy = neat.nn.FeedForwardNetwork.create(g, cfg)
        policies.append(policy)
        g.fitness = 0
    return policies

In [None]:
MAX_STEPS = 1000
def eval_genomes(genomes, cfg):
    policies = create_policies(genomes, cfg)
    target_agents = len(policies)
    set_agents_and_double_reset(num_agents = target_agents)
    rewards = [0] * target_agents
    map = map_agents()
    for agent in env.agent_iter(env.num_agents * MAX_STEPS):
        current_agent = int(agent.split("=")[2])
        obs, reward, done, info = env.last(observe=True)
        if done:
            action = None
        else:
            rewards[map[current_agent]] = reward
            action = policies[map[current_agent]].activate(obs)
        env.step(action)
    env.reset()
    print("\nFinished generation")

In [None]:
def run(config_file, run, datte):
    print(f"Running {run}")
    max_generations = 5

    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)
    
    pop = neat.Population(config)

    stats = neat.StatisticsReporter()

    pop.add_reporter(stats)
    #pop.add_reporter(neat.Checkpointer(generation_interval=25, time_interval_seconds=1200, filename_prefix='NEAT/checkpoints/NEAT-checkpoint-'))
    pop.add_reporter(neat.TBReporter(True, 0, run, datte))
    #pop.add_reporter(neat.StdOutReporter(True))
    env.reset()
    best = pop.run(eval_genomes, max_generations)
    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(best))
    print("Finished running!")
    
    # Save best genome
    with open(f'logs/{datte}/{run}/best.pkl', 'wb') as f:
        pickle.dump(best, f)

In [None]:
datte = datetime.datetime.now().strftime("%d-%m-%Y--%H_%M")
config_path = 'test_config'
runs = 1
for r in range(runs):
    run(config_path, r, datte)

In [70]:
map = map_agents()
map

({0: 0}, {0: 0})

In [144]:
def add_run(data, run_id):
    if run_id not in data:
        data[run_id] = {}

def update_fitness(data, run_id, agent_id, fitness):
    if run_id in data and agent_id in data[run_id]:
        data[run_id][agent_id]["fitness"] = fitness
    else:
        print(f"Run {run_id} or Agent {agent_id} does not exist. Cannot update fitness.")
        
def update_observe(data, run_id, agent_id, prev_observe):
    if run_id in data and agent_id in data[run_id]:
        data[run_id][agent_id]["prev_observe"].append(prev_observe)
    else:
        print(f"Run {run_id} or Agent {agent_id} does not exist. Cannot update observations.")

def add_agent(data, run_id, agent_id, initial_fitness=0):
    # Ensure the run exists
    add_run(data, run_id)
    if agent_id not in data[run_id]:
        data[run_id][agent_id] = {
            "actions": [],
            "fitness": initial_fitness,
            "prev_observe": []
        }

def add_action(data, run_id, agent_id, action):
    # Ensure the agent exists
    add_agent(data, run_id, agent_id)
    data[run_id][agent_id]["actions"].append(action)

In [145]:
import random
import time

replay_buffer = {}

num_cycles = 50
# target_agents = [4, 2, 3, 1, 5, 10, 3, 50, 50, 48]
target_agents = [4, 4, 4]
for i in range(len(target_agents)):
    set_agents_and_double_reset(num_agents = target_agents[i])
    map_agent_to_index, map_index_to_agent = map_agents()
    # print(f"Agents: {env.num_agents}")
    # print(f"{map}")
    rewards = [0] * env.num_agents
    for agent in env.agent_iter(env.num_agents * num_cycles):
        current_agent = int(agent.split("=")[2])
        prev_observe, reward, done, info = env.last(observe=True)
        if done:
            action = None
        else:
            action = env.action_spaces[agent].sample() 
        add_action(replay_buffer, i, current_agent, action)
        update_fitness(replay_buffer, i, current_agent, reward)
        update_observe(replay_buffer, i, current_agent, prev_observe)
        rewards[map_agent_to_index[current_agent]] = reward
        env.step(action)
    env.reset()
    print(rewards, end= '\n')
    #time.sleep(5)


Sending: 4
[-0.07325278, -0.056901477, -0.046763003, -0.052060045]
Sending: 4
[-0.09530868, -0.05167399, -0.046863355, -0.04441011]
Sending: 4
[-0.53272086, -0.08948949, 0.008481759, -0.055401575]


In [146]:
replay_buffer[2]

{0: {'actions': [array([-0.7635634 ,  0.9115681 , -0.09875052, -0.6852374 ], dtype=float32),
   array([ 0.90206575,  0.9399406 ,  0.20010617, -0.21662143], dtype=float32),
   array([-0.51005465,  0.49794143,  0.9211159 , -0.5105349 ], dtype=float32),
   array([-0.7974769 , -0.11899433,  0.62209594, -0.3052135 ], dtype=float32),
   array([-0.29024842, -0.26167548,  0.24861804,  0.37220126], dtype=float32),
   array([-0.31198916, -0.7606082 ,  0.21448123,  0.6313666 ], dtype=float32),
   array([-0.5315264 , -0.8036878 ,  0.6350819 , -0.83322704], dtype=float32),
   array([0.85001945, 0.53848207, 0.8701935 , 0.521115  ], dtype=float32),
   array([-0.13659932, -0.92604667,  0.22905634, -0.217378  ], dtype=float32),
   array([ 0.78839034,  0.7724198 ,  0.76016563, -0.6784161 ], dtype=float32),
   array([ 0.15642734,  0.3754027 , -0.20272411,  0.53965926], dtype=float32),
   array([-0.7948681 ,  0.52607346, -0.5025904 , -0.38855475], dtype=float32),
   array([ 0.3886722 , -0.61684656,  0.045

In [148]:
# Now test if agents recieves the same reward for the same steps
test_run = 2
test_agent = 9
actions_to_reproduce = replay_buffer[test_run][test_agent]['actions']
expected_reward = replay_buffer[test_run][test_agent]['fitness']
expected_observe = replay_buffer[test_run][test_agent]['prev_observe']
actions_to_reproduce, expected_reward

([array([-0.6316958 ,  0.34470174,  0.31601465, -0.7531064 ], dtype=float32),
  array([-0.64759654,  0.6965028 ,  0.6392375 ,  0.57287365], dtype=float32),
  array([-0.35911146,  0.34913555, -0.6137058 ,  0.15920123], dtype=float32),
  array([ 0.02544273, -0.31945324, -0.4940518 ,  0.33634764], dtype=float32),
  array([ 0.9211581 ,  0.67919225, -0.43141252,  0.93811935], dtype=float32),
  array([ 0.9652176 , -0.13962476,  0.5075995 , -0.74758846], dtype=float32),
  array([ 0.00117609, -0.0548018 , -0.5977862 , -0.8609914 ], dtype=float32),
  array([-0.12594754,  0.94712543,  0.0578706 ,  0.5863376 ], dtype=float32),
  array([ 0.40001526,  0.78482985,  0.2821162 , -0.2857918 ], dtype=float32),
  array([-0.45955426, -0.5491068 ,  0.42273828,  0.07438755], dtype=float32),
  array([-0.8253426 ,  0.5487444 , -0.38663423,  0.1496226 ], dtype=float32),
  array([-0.8139322 , -0.7234193 , -0.66033226, -0.21705988], dtype=float32),
  array([-0.9941285 ,  0.1933844 , -0.47037652, -0.67544544], dt

In [150]:
observes = []
current_cycle = 0
env.reset()
set_agents_and_double_reset(num_agents = 1)
for agent in env.agent_iter(env.num_agents * num_cycles):
    current_agent = int(agent.split("=")[2])
    prev_observe, reward, done, info = env.last(observe=True)
    if done:
        action = None
    else:
        action = actions_to_reproduce[current_cycle] 
    observes.append(prev_observe)
    env.step(action)
    current_cycle+=1
env.reset()
print(reward, end= '\n')


Sending: 1
-0.068639934


In [152]:
for i, obs in enumerate(observes):
    print(f"OBS: {obs}, expected: {expected_observe[i]})

[ 0.          0.03000899  0.          0.03000899  0.          0.
  1.          0.          0.         -0.09739784  0.          0.
 -0.19296518  0.        ] [ 0.          0.03000899  0.          0.03000899  0.          0.
  1.          0.          0.         -0.09739784  0.          0.
 -0.19296518  0.        ]
[ 2.9167836e-06  3.0028811e-02  3.6788301e-06  3.0028811e-02
  1.9053735e-02  4.8828735e-03  9.9996912e-01  6.1598220e-03
 -3.8895130e-03 -2.9841156e-03 -4.3098344e-03 -7.7788732e-03
 -5.9681432e-03 -8.6195273e-03] [ 2.5213981e-06  3.0028803e-02  3.1726195e-06  3.0028803e-02
  1.9018704e-02  4.8879352e-03  9.9996907e-01  6.1651305e-03
 -3.4687773e-03 -2.9581045e-03 -3.8340348e-03 -6.9374973e-03
 -5.9161950e-03 -7.6680128e-03]
[ 7.4195509e-06  3.0023452e-02  4.6324030e-06  3.0023454e-02
  2.3922291e-02  1.2328210e-02  9.9989408e-01  7.7347080e-03
 -3.4996152e-03  1.5099761e-03  1.2359378e-03 -6.9990954e-03
  3.0199618e-03  2.4718796e-03] [ 6.4649680e-06  3.0023454e-02  4.0460341e-

In [120]:
env.close()

In [None]:
for i in range(1000):
    env.step(action=[1, 1, 1, 1])
    obs, reward, done, info = env.last(observe=True)
    if done:
        next_obs = {"observation": obs}
        env.reset()
env.reset()

In [None]:
env.close()