In [26]:
from evotorch.neuroevolution import GymNE, NEProblem, SupervisedNE, VecGymNE
from evotorch.algorithms import CEM, CMAES, PGPE, SNES, XNES, Cosyne
import torch
from torch import nn
from evotorch.decorators import pass_info

In [None]:
import pickle
import numpy as np
import neat
import datetime
import os
import math

from comunication_channel import AgentLogChannel

from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.base_env import ActionTuple
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel

In [None]:
engine_config_channel = EngineConfigurationChannel()
engine_config_channel.set_configuration_parameters(time_scale=10)
agent_count_channel = AgentLogChannel()

env_path = "../Builds/train-env/autonomous-drone.exe"
save_nn_destination = 'result/best.pkl'

env = UnityEnvironment(file_name=None, worker_id=0, no_graphics=True, side_channels=[engine_config_channel, agent_count_channel])
env.reset()

In [None]:
def eval_genomes(genomes, cfg):
    global generation 
    generation += 1

    policies = create_policies(genomes, cfg)
    set_agents_and_double_reset(len(policies))
    decision_steps, terminal_steps = env.get_steps(behavior_name)
    agent_count = len(decision_steps.agent_id)

    unity_to_neat_map, neat_to_unity_map = map_agent_ids(decision_steps)

    done = False  # Vectorized initialization
    removed_agents = []

    episode_rewards = [0] * agent_count
    print(f"Agent count: {agent_count}")

    while not done:
        for agent in decision_steps:
            if unity_to_neat_map[agent] not in removed_agents:
                nn_input =  np.asarray(decision_steps[agent].obs[:])
                #print(f"NN INPUT: {nn_input}, agent: {agent}")
                #print(f"NN INPUT[0]: {nn_input[0]}, agent: {agent}")
                actions = policies[unity_to_neat_map[agent]].activate(nn_input[0])
                #print(f"NN OUTPUT: {actions}, agent: {agent}")
                continous_actions = np.asarray([actions])
                # continous_actions = np.clip(continous_actions, -1, 1)
                action_tuple = ActionTuple(discrete=None, continuous=continous_actions)
                env.set_action_for_agent(behavior_name=behavior_name, 
                                        agent_id=agent, 
                                        action=action_tuple)
        env.step()
        decision_steps, terminal_steps = env.get_steps(behavior_name)
        for agent in range(agent_count):
            if agent not in removed_agents:
                local_agent = neat_to_unity_map[agent]
                if local_agent in terminal_steps:
                    episode_rewards[agent] += terminal_steps[local_agent].reward
                    removed_agents.append(agent)
                    #print(f"Finished: {agent}")
                elif local_agent in decision_steps:
                    episode_rewards[agent] += decision_steps[local_agent].reward
                 

        if len(removed_agents) >= agent_count:
            done = True
    for i, (_, genome) in enumerate(genomes):
        genome.fitness = episode_rewards[i]
    if generation % SAVE_INTERVAL == 0:
      global file_name_prefix
      best_genome_current_generation = max(genomes, key=lambda x: x[1].fitness) 
      with open(file_name_prefix+'best-'+str(generation)+'.pkl', 'wb') as f:
          pickle.dump(best_genome_current_generation, f)
      

In [91]:
import random
from evotorch.tools import dtype_of, device_of

def random_fitness(network):
    samples = torch.randn((1, 16), dtype=dtype_of(network), device=device_of(network))
    network_out = network(samples)
    sign_out = network_out[0, :]

    print(sign_out)
    randomik = random.randint(0, 10)
    return randomik

In [103]:
def my_network():
    return torch.nn.Sequential(
        torch.nn.Linear(16, 10),
        torch.nn.Tanh(),
        torch.nn.Linear(10, 10),
        torch.nn.Tanh(),
        torch.nn.Linear(10, 4),
        torch.nn.Tanh(),
    )

In [104]:
problem = NEProblem(
    objective_sense="max",
    network=my_network,
    network_eval_func=random_fitness,
    num_actors= 1,
)

[2024-04-13 10:02:37] INFO     <27636> evotorch.core: Instance of `NEProblem` (id:2606897599472) -- The `dtype` for the problem's decision variables is set as torch.float32
[2024-04-13 10:02:37] INFO     <27636> evotorch.core: Instance of `NEProblem` (id:2606897599472) -- `eval_dtype` (the dtype of the fitnesses and evaluation data) is set as torch.float32
[2024-04-13 10:02:37] INFO     <27636> evotorch.core: Instance of `NEProblem` (id:2606897599472) -- The `device` of the problem is set as cpu
[2024-04-13 10:02:37] INFO     <27636> evotorch.core: Instance of `NEProblem` (id:2606897599472) -- The number of actors that will be allocated for parallelized evaluation was encountered as 1. This number is automatically dropped to 0, because having only 1 actor does not bring any benefit in terms of parallelization.
[2024-04-13 10:02:37] INFO     <27636> evotorch.core: Instance of `NEProblem` (id:2606897599472) -- The number of actors that will be allocated for parallelized evaluation is 0
[

In [105]:
from evotorch.logging import StdOutLogger

searcher = PGPE(
    problem,
    popsize=10,
    radius_init=2.25,
    center_learning_rate=0.2,
    stdev_learning_rate=0.1,
)
StdOutLogger(searcher)
searcher.run(50)

[2024-04-13 10:02:38] INFO     <27636> evotorch.optimizers: Instance of `ClipUp` (id:2606897595968) -- The maximum speed for the ClipUp optimizer is set as 0.4 which is two times the given step size.
tensor([-0.0148,  0.0411,  0.2568, -0.0322], grad_fn=<SliceBackward0>)
tensor([ 0.0829, -0.0783, -0.2401,  0.0188], grad_fn=<SliceBackward0>)
tensor([ 0.0919,  0.1610, -0.0968, -0.0449], grad_fn=<SliceBackward0>)
tensor([-0.0887, -0.0964,  0.0421,  0.0535], grad_fn=<SliceBackward0>)
tensor([-0.1351,  0.0135, -0.1631, -0.0086], grad_fn=<SliceBackward0>)
tensor([ 0.0689, -0.0174,  0.1159,  0.0321], grad_fn=<SliceBackward0>)
tensor([-0.0970,  0.0595, -0.0751, -0.0401], grad_fn=<SliceBackward0>)
tensor([0.2886, 0.1951, 0.0553, 0.0912], grad_fn=<SliceBackward0>)
tensor([-0.1340,  0.1067,  0.2656,  0.1770], grad_fn=<SliceBackward0>)
tensor([ 0.1773, -0.1391, -0.1320, -0.2505], grad_fn=<SliceBackward0>)
         iter : 1
    mean_eval : 4.800000190734863
pop_best_eval : 10.0
  median_eval : 4.0
 

In [95]:
searcher = Cosyne(
    problem,
    num_elites = 1,
    popsize=50,  
    tournament_size = 4,
    mutation_stdev = 0.3,
    mutation_probability = 0.2,
    permute_all = True, 
)
searcher.run(2)

tensor([-2.6421e-05, -2.2069e-05,  8.0357e-06,  1.8062e-05],
       grad_fn=<SliceBackward0>)
tensor([ 1.5590e-05,  6.5495e-07,  2.1302e-05, -2.8095e-05],
       grad_fn=<SliceBackward0>)
tensor([ 1.8517e-05, -2.0806e-05, -1.6830e-05,  1.7190e-05],
       grad_fn=<SliceBackward0>)
tensor([ 3.6074e-05, -1.8285e-06,  2.0540e-05,  8.7725e-06],
       grad_fn=<SliceBackward0>)
tensor([ 5.1415e-08,  3.8556e-06, -3.2350e-06,  2.2093e-05],
       grad_fn=<SliceBackward0>)
tensor([-6.5760e-06,  3.8907e-05,  3.9599e-05, -5.6665e-07],
       grad_fn=<SliceBackward0>)
tensor([-5.8583e-05, -3.6116e-05,  1.5483e-05,  5.6683e-06],
       grad_fn=<SliceBackward0>)
tensor([ 1.9822e-05,  2.5852e-05, -1.6865e-05, -2.8431e-05],
       grad_fn=<SliceBackward0>)
tensor([ 6.7319e-06, -8.3912e-06, -3.6203e-06, -7.8810e-06],
       grad_fn=<SliceBackward0>)
tensor([ 1.4365e-05,  3.2318e-06, -3.6008e-05, -2.8695e-05],
       grad_fn=<SliceBackward0>)
tensor([-3.8861e-06,  7.4871e-06, -2.1927e-05,  6.0536e-06],

In [96]:
assert "best" in searcher.status
assert searcher.step_count == 2
searcher.status

<LazyStatusDict
    pop_best_eval = <not yet computed>
    median_eval = <not yet computed>
    mean_eval = <not yet computed>
    pop_best = <not yet computed>
    iter = 2
    best = <Solution values=tensor([-0.1217, -0.3311,  0.0279,  0.1033,  0.1419, -0.0108,  0.0059, -0.2931,
        -0.2138,  0.5110, -0.6378, -0.2342, -0.5099,  0.0813, -0.2301,  0.0565,
        -0.0100,  0.5154,  0.2469,  0.0763,  0.1570,  0.1610,  0.2701,  0.4292,
        -0.2301,  0.1243,  0.2021, -0.5293,  0.1255, -0.2119,  0.5665,  0.0635,
        -0.0081,  0.3961, -0.3672,  0.0729,  0.0649,  0.1263,  0.1780, -0.1621,
         0.1167, -0.3656,  0.0939, -0.0914,  0.0825, -0.3129, -0.1075,  0.4635,
        -0.1483, -0.0018, -0.5026, -0.6031, -0.2275, -0.0860,  0.1527, -0.1529,
        -0.5318,  0.1757,  0.3927, -0.2155, -0.0407,  0.4612, -0.3914, -0.1300,
        -0.1937, -0.3573,  0.0764, -0.2507]), evals=tensor([10.])>
    worst = <Solution values=tensor([-0.4684, -0.3147,  0.3008,  0.2142,  0.3818,  0.3227, 

# Individual solution

In [97]:
import torch
import torch.nn as nn
from evotorch import Problem
from mlagents_envs.environment import UnityEnvironment
from typing import Any, Callable, Iterable, List, Optional, Union
from collections.abc import Mapping
from evotorch.core import BoundsPairLike, Solution, SolutionBatch


In [98]:
class UnityGymNE(NEProblem):
    def __init__(
        self,
        env: UnityEnvironment = None,
        network: Optional[Union[str, nn.Module, Callable[[], nn.Module]]] = None,
        *,
        network_args: Optional[dict] = None,
        num_episodes: int = 1,
        num_actors: Optional[Union[int, str]] = None,
        initial_bounds: Optional[BoundsPairLike] = (-0.00001, 0.00001),
    ):
        if (env is not None):
            self._env = env
        else:
            raise ValueError(f"Please specify environment")
        if network is None:
            raise ValueError(
                "Received None via the argument `network`."
                "Please provide the network as a string, or as a `Callable`, or as a `torch.nn.Module` instance."
            )
        self._num_episode = int(num_episodes)
        behavior_specs = self._env.behavior_specs
        behavior_name = list(behavior_specs)[0]
        spec = self._env.behavior_specs[behavior_name]
        self._obs_length = spec.observation_specs[0].shape[0]
        self._act_length = spec.action_spec.continuous_size
        self._obs_space = spec.observation_specs[0]
        self._obs_shape = spec.observation_specs[0].shape

        super().__init__(
            objective_sense="max",  # RL is maximization
            network=network,  # Using the policy as the network
            network_args=network_args,
            initial_bounds=initial_bounds,
            num_actors=num_actors,
            device="cpu",
        )
        self.after_eval_hook.append(self._extra_status)
    @property
    def _network_constants(self) -> dict:
        return {
            "obs_length": self._obs_length,
            "act_length": self._act_length,
            "obs_space": self._obs_space,
            "act_space": self._box_act_space,
            "obs_shape": self._obs_space.shape,
            "act_shape": self._box_act_space.shape,
        }
    @property
    def _str_network_constants(self) -> dict:
        return {
            "obs_space": self._obs_space.shape,
            "act_space": self._box_act_space.shape,
        }