In [3]:
import torch
from torch import nn
from evotorch.decorators import pass_info


# The decorator `@pass_info` used below tells the problem class `GymNE`
# to pass information regarding the gym environment via keyword arguments
# such as `obs_length` and `act_length`.
@pass_info
class LinearPolicy(nn.Module):
    def __init__(
        self, 
        obs_length: int, # Number of observations from the environment
        act_length: int, # Number of actions of the environment
        bias: bool = True,  # Whether the policy should use biases
        **kwargs # Anything else that is passed
    ):
        super().__init__()  # Always call super init for nn Modules
        self.linear = nn.Linear(obs_length, act_length, bias = bias)
        
    def forward(self, obs: torch.Tensor) -> torch.Tensor:
        # Forward pass of model simply applies linear layer to observations
        return self.linear(obs)

In [6]:
problem = GymNE(
    env="LunarLanderContinuous-v2",
    network=LinearPolicy,
    network_args = {'bias': False},
    num_actors= 4, 
    observation_normalization = False,
    num_episodes = 3,
    initial_bounds = (-0.3, 0.3),
)

[2024-04-13 10:37:51] INFO     <63928> evotorch.core: Instance of `GymNE` (id:2081106583440) -- The `dtype` for the problem's decision variables is set as torch.float32
[2024-04-13 10:37:51] INFO     <63928> evotorch.core: Instance of `GymNE` (id:2081106583440) -- `eval_dtype` (the dtype of the fitnesses and evaluation data) is set as torch.float32
[2024-04-13 10:37:51] INFO     <63928> evotorch.core: Instance of `GymNE` (id:2081106583440) -- The `device` of the problem is set as cpu
[2024-04-13 10:37:51] INFO     <63928> evotorch.core: Instance of `GymNE` (id:2081106583440) -- The number of actors that will be allocated for parallelized evaluation is 4
[2024-04-13 10:37:51] INFO     <63928> evotorch.core: Instance of `GymNE` (id:2081106583440) -- Number of GPUs that will be allocated per actor is None
[2024-04-13 10:37:51] INFO     <63928> evotorch.core: Instance of `GymNE` (id:2081106583440) -- `eval_dtype` (the dtype of the fitnesses and evaluation data) is set as torch.float32
[202

In [7]:
from evotorch.algorithms import Cosyne
searcher = Cosyne(
    problem,
    num_elites = 1,
    popsize=50,  
    tournament_size = 4,
    mutation_stdev = 0.3,
    mutation_probability = 0.5,
    permute_all = True, 
)

In [10]:
from evotorch.logging import StdOutLogger

StdOutLogger(searcher)
searcher.run(50)

2024-04-13 10:38:14,132	INFO worker.py:1724 -- Started a local Ray instance.


                   iter : 1
            median_eval : -140.58065795898438
              mean_eval : -163.625
          pop_best_eval : -34.220916748046875
              best_eval : -34.220916748046875
             worst_eval : -1476.3692626953125
total_interaction_count : 38388
    total_episode_count : 375

                   iter : 2
            median_eval : -133.28033447265625
              mean_eval : -130.7618408203125
          pop_best_eval : 20.381149291992188
              best_eval : 20.381149291992188
             worst_eval : -1476.3692626953125
total_interaction_count : 61363
    total_episode_count : 600

                   iter : 3
            median_eval : -131.8590850830078
              mean_eval : -124.07872009277344
          pop_best_eval : -10.64992618560791
              best_eval : 20.381149291992188
             worst_eval : -1476.3692626953125
total_interaction_count : 82846
    total_episode_count : 825

                   iter : 4
            median_eval : 

KeyboardInterrupt: 