# Evolving a Lunar Lander with differentiable Genetic Programming

## Installation
To install the required libraries run the command:

In [1]:
#!pip install -r requirements.txt


Imports from the standard genepro-multi library are done here. Any adjustments (e.g. different operators) should be made in the notebook. For example:

```
class SmoothOperator(Node):
  def __init__(self):
    super(SmoothOperator,self).__init__()
    self.arity = 1
    self.symb = "SmoothOperator"

  def _get_args_repr(self, args):
    return self._get_typical_repr(args,'before')

  def get_output(self, X):
    c_outs = self._get_child_outputs(X)
    return np.smoothOperation(c_outs[0])

  def get_output_pt(self, X):
    c_outs = self._get_child_outputs_pt(X)
    return torch.smoothOperation(c_outs[0])
```

In [2]:
import gymnasium as gym

from genepro.node_impl import *
from genepro.evo import Evolution
from genepro.node_impl import Constant

import torch
import torch.optim as optim

import random
import os
import copy
from collections import namedtuple, deque

import matplotlib.pyplot as plt
from matplotlib import animation

## Reinforcement Learning Setup
Here we first setup the Gymnasium environment. Please see https://gymnasium.farama.org/environments/box2d/lunar_lander/ for more information on the environment. 

Then a memory buffer is made. This is a buffer in which state transitions are stored. When the buffer reaches its maximum capacity old transitions are replaced by new ones.

A frame buffer is initialised used to later store animation frames of the environment.

In [3]:
env = gym.make("LunarLander-v2", render_mode="rgb_array")

In [4]:
Transition = namedtuple("Transition", ("state", "action", "next_state", "reward"))


class ReplayMemory(object):
    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        """Save a transition"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

    def __iadd__(self, other):
        self.memory += other.memory
        return self

    def __add__(self, other):
        self.memory = self.memory + other.memory
        return self

## Fitness Function

Here you get to be creative. The default setup evaluates 5 episodes of 300 frames. Think of what action to pick and what fitness function to use. The Multi-tree takes an input of $n \times d$ where $n$ is a batch of size 1.

In [5]:
def fitness_function_pt(multitree, num_episodes=5, episode_duration=300, ignore_done=False, render=False):
    memory = ReplayMemory(10000)
    episode_rewards = []
    if render:
        frames = []

    # print(multitree.get_readable_repr())
    for _ in range(num_episodes):
        # get initial state of the environment
        observation = env.reset()
        observation = observation[0]
        rewards = []
        for _ in range(episode_duration):
            if render:
                frames.append(env.render())
            input_sample = torch.from_numpy(observation.reshape((1, -1))).float()
            action = torch.argmax(multitree.get_output_pt(input_sample)).detach()
            observation, reward, terminated, truncated, info = env.step(action.item())
            rewards.append(reward)
            output_sample = torch.from_numpy(observation.reshape((1, -1))).float()
            memory.push(input_sample, torch.tensor([[action.item()]]), output_sample, torch.tensor([reward]))
            if (terminated or truncated) and not ignore_done:
                break
        episode_rewards.append(np.sum(rewards))

    # Get the average reward over all episodes
    fitness = episode_rewards
    if render:
        return fitness, memory, frames
    return fitness, memory

def rollout_function(multitree, num_episodes=5, episode_duration=300, ignore_done=False, render=False):
    env = gym.make("LunarLander-v2", render_mode="rgb_array")

    episode_rewards = []
    if render:
        frames = []

    for _ in range(num_episodes):
        # get initial state of the environment
        observation = env.reset()
        observation = observation[0]
        rewards = []
        for _ in range(episode_duration):
            if render:
                frames.append(env.render())
            input_sample = observation.reshape((1, -1))
            action = np.argmax(multitree.get_output(input_sample))
            observation, reward, terminated, truncated, info = env.step(action.item())
            rewards.append(reward)
            if (terminated or truncated) and not ignore_done:
                break
        episode_rewards.append(np.sum(rewards))

    # Get the average reward over all episodes
    episode_rewards = np.array(episode_rewards)
    if render:
        return episode_rewards, [], frames
    return episode_rewards, []

def fitness_baseline(episode_rewards, pop):
    return np.mean(episode_rewards)

def fitness_function_mean_len_sqrt_std(episode_rewards, pop):
    return np.mean(episode_rewards) - len(pop) - np.std(episode_rewards)**0.5

def fitness_function_mean_len_std(episode_rewards, pop):
    return np.mean(episode_rewards) - len(pop) - np.std(episode_rewards)

## Fitness function: symmetric approach

For the side-correction thruster symmetry we will be using the same tree for the side thrusters. The only difference between the right and left thruster trees will be that some feature nodes that correspond with symmetry are inverted as follows:

f(2)  
$\to$  
```
   *
  / \
-1   f(2) 
```

In [6]:
from copy import deepcopy
from genepro.thruster_symmetry import traverse_and_invert_iter


def rollout_function_symmetry(multitree, num_episodes=5, episode_duration=300, ignore_done=False, render=False):
    env = gym.make("LunarLander-v2", render_mode="rgb_array")
    
    # here, we generate the right thruster tree using the left thruster tree
    left_thruster_tree = deepcopy(multitree.children[1])
    right_thruster_tree = left_thruster_tree
    right_thruster_tree = traverse_and_invert_iter(left_thruster_tree, feature_ids = [0,2,4,5,6,7])

    episode_rewards = []
    if render:
        frames = []

    for _ in range(num_episodes):
        # get initial state of the environment
        observation = env.reset()
        observation = observation[0]
        rewards = []
        for _ in range(episode_duration):
            if render:
                frames.append(env.render())
            input_sample = observation.reshape((1, -1))
            
            outputs = multitree.get_output(input_sample)
            outputs = np.append(outputs, right_thruster_tree.get_output(input_sample))
            
            action = np.argmax(outputs)
            observation, reward, terminated, truncated, info = env.step(action.item())
            rewards.append(reward)
            if (terminated or truncated) and not ignore_done:
                break
        episode_rewards.append(np.sum(rewards))

    # Get the average reward over all episodes
    episode_rewards = np.array(episode_rewards)
    if render:
        return episode_rewards, [], frames
    return episode_rewards, []

ImportError: cannot import name 'traverse_and_invert' from 'genepro.thruster_symmetry' (c:\Users\caspa\Documents\01_SoftwareDevelopmentLocation\VS Code\genepromulti\genepro\thruster_symmetry.py)

In [None]:
### USED TO STORE THE EXPERIMENT DICTIONARY
import inspect
import itertools
import pickle


def serialize_functions_in_dict(dictionary):
    for key, value in dictionary.items():
        if inspect.isfunction(value) or inspect.ismethod(value):
            dictionary[key] = value.__name__
        elif isinstance(value, list):
            for i, item in enumerate(value):
                if isinstance(item, dict):
                    value[i] = serialize_functions_in_dict(item)
                elif inspect.isfunction(item) or inspect.ismethod(item):
                    value[i] = item.__name__
                elif isinstance(item, Node):
                    value[i] = item.symb
        elif isinstance(value, dict):
            dictionary[key] = serialize_functions_in_dict(value)
    return dictionary

### USED TO CREATE THE EXPERIMENT DICTIONARY
def grid_search_params(params_dict):
    """
    Given a dictionary of hyperparameters, if a value is a list, loop over all values
    and create a grid search.
    """
    param_keys = params_dict.keys()
    param_values = params_dict.values()
    param_combinations = list(itertools.product(*[v if isinstance(v, list) else [v] for v in param_values]))
    for combination in param_combinations:
        yield dict(zip(param_keys, combination))

In [None]:
# Save the gen as a pickle file in the gens folder
def save_and_evaluate_evo_generations(evo, rollout_function, experiment_name, num_episodes=10):
    generation_evo_fitnesses = []
    generation_test_fitnesses = []
    for i, gen in enumerate(evo.best_of_gens):
        if i == 0:
            continue

        episode_rewards, _ = rollout_function(gen, num_episodes=num_episodes)
        evo_fitness_mean, evo_fitness_std = round(np.mean(gen.fitnesses), 3), round(np.std(gen.fitnesses), 3)
        test_fitness_mean, test_fitness_std  = round(np.mean(episode_rewards), 3), round(np.std(episode_rewards), 3)
        print(f"Best of Generation {i}: evo fitness:{evo_fitness_mean}+/-{evo_fitness_std} \t test_fitness:{test_fitness_mean}+/-{test_fitness_std}")
        
        generation_evo_fitnesses.append(gen.fitnesses)
        generation_test_fitnesses.append(episode_rewards)
        # create the gens folder if it doesn't exist
        os.makedirs(f"./experiments/{experiment_name}/gen/", exist_ok=True) 
        with open(f"./experiments/{experiment_name}/gen/gen_{i}_{evo_fitness_mean}_{test_fitness_mean}.pickle", "wb") as f:
            pickle.dump(gen, f)

    np.save(f"./experiments/{experiment_name}/generation_evo_fitnesses.npy", generation_evo_fitnesses)
    np.save(f"./experiments/{experiment_name}/generation_test_fitnesses.npy", generation_test_fitnesses)   
    return generation_evo_fitnesses, generation_test_fitnesses

def plot_evo_test_fitnesses(evo_fitnesses, test_fitnesses, experiment_name):
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.set_title(f"Fitnesses: {experiment_name}")
    ax.set_xlabel("Generation")
    ax.set_ylabel("Fitness")
    ax.plot(np.arange(len(evo_fitnesses)), [np.mean(gen) for gen in evo_fitnesses], label="evo_fitness", color='tab:blue')
    ax.fill_between(np.arange(len(evo_fitnesses)), [np.mean(gen) - np.std(gen) for gen in evo_fitnesses], [np.mean(gen) + np.std(gen) for gen in evo_fitnesses], alpha=0.2, color='tab:blue')
    ax.plot(np.arange(len(test_fitnesses)), [np.mean(gen) for gen in test_fitnesses], label="test_fitness", color='tab:orange')
    ax.fill_between(np.arange(len(test_fitnesses)), [np.mean(gen) - np.std(gen) for gen in test_fitnesses], [np.mean(gen) + np.std(gen) for gen in test_fitnesses], alpha=0.2, color='tab:orange')
    ax.legend()
    plt.savefig(f"./experiments/{experiment_name}/{experiment_name}.png")
    plt.close()

## Evolution Setup
Here the leaf and internal nodes are defined. Think about the odds of sampling a constant in this default configurations. Also think about any operators that could be useful and add them here. 

Adjust the population size (multiple of 8 if you want to use the standard tournament selection), max generations and max tree size to taste. Be aware that each of these settings can increase the runtime.

#### BASELINE

In [None]:
from copy import deepcopy
import json
from genepro.selection import elitism_selection, tournament_selection
from genepro.variation import coeff_mutation, subtree_crossover, subtree_mutation

experiment_name = "fitness_symmetry"
num_features = env.observation_space.shape[0]
evo_settings = {
    "rollout_function": rollout_function_symmetry,
    "fitness_function": fitness_baseline,
    "internal_nodes": [[Plus(), Times(), Div(), Sin(), Sqrt(), Square()]],
    "leaf_nodes": [[Feature(i) for i in range(num_features)] + [Constant()]],
    "n_trees": 3,
    "pop_size": 32,
    "max_gens": 500,
    "init_max_depth": 4,
    "max_tree_size": 32,
    "crossovers": [[{"fun": subtree_crossover, "rate": 0.5}]],
    "mutations": [[{"fun": subtree_mutation, "rate": 0.5}]],
    "coeff_opts": [[{"fun": coeff_mutation, "rate": 0.5}]],
    "selection": {"fun": tournament_selection, "kwargs": {"tournament_size": 4}},
    "n_jobs": 8,
    "verbose": True
}

def hpo_evolve(evo_settings, experiment_name):
    hpo_settings = list(grid_search_params(evo_settings))
    for settings in hpo_settings:
        serialized_dict = serialize_functions_in_dict(deepcopy(settings))
        print(serialized_dict)
        
    for i, settings in enumerate(hpo_settings):
        specific_experiment_name = experiment_name + f"_pops{settings['pop_size']}_gens{settings['max_gens']}_mts{settings['max_tree_size']}_cor{settings['crossovers'][0]['rate']}_mutr{settings['mutations'][0]['rate']}_coeffr{settings['coeff_opts'][0]['rate']}"
        os.makedirs(f"./experiments/{specific_experiment_name}", exist_ok=True)
        with open(f"./experiments/{specific_experiment_name}/evo_settings.json", "w") as f:
            serialized_dict = serialize_functions_in_dict(deepcopy(settings))
            json.dump(serialized_dict, f)

        evo_baseline = Evolution(**settings)
        evo_baseline.evolve()

        with open(f"./experiments/{specific_experiment_name}/evolution_class.pickle", "wb") as f:
            pickle.dump(evo_baseline, f)
        
        generation_evo_fitnesses, generation_test_fitnesses = save_and_evaluate_evo_generations(evo_baseline, evo_settings['rollout_function'], specific_experiment_name, num_episodes=5)
        plot_evo_test_fitnesses(generation_evo_fitnesses, generation_test_fitnesses, specific_experiment_name)

for i in range(n_experiments:=4):
    hpo_evolve(evo_settings, experiment_name=f'{experiment_name}_exp{i}')

{'rollout_function': 'rollout_function_symmetry', 'fitness_function': 'fitness_baseline', 'internal_nodes': ['+', '*', '/', 'sin', 'sqrt', '**2'], 'leaf_nodes': ['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'const?'], 'n_trees': 3, 'pop_size': 32, 'max_gens': 500, 'init_max_depth': 4, 'max_tree_size': 32, 'crossovers': [{'fun': 'subtree_crossover', 'rate': 0.5}], 'mutations': [{'fun': 'subtree_mutation', 'rate': 0.5}], 'coeff_opts': [{'fun': 'coeff_mutation', 'rate': 0.5}], 'selection': {'fun': 'tournament_selection', 'kwargs': {'tournament_size': 4}}, 'n_jobs': 8, 'verbose': True}
gen: 1 (4.2s),	best of gen fitness: -86.102; reward:-86.102+/-64.373,	best of gen size: 23
gen: 2 (6.0s),	best of gen fitness: -97.560; reward:-97.560+/-46.339,	best of gen size: 19
gen: 3 (7.9s),	best of gen fitness: -71.124; reward:-71.124+/-61.961,	best of gen size: 28
gen: 4 (9.9s),	best of gen fitness: -85.269; reward:-85.269+/-79.345,	best of gen size: 11
gen: 5 (12.0s),	best of gen fitness:

  return c_outs[0]**2
  return np.sin(c_outs[0])


gen: 17 (43.0s),	best of gen fitness: -96.934; reward:-96.934+/-47.523,	best of gen size: 31


  return c_outs[0]**2
  return np.sin(c_outs[0])


gen: 18 (46.0s),	best of gen fitness: -49.603; reward:-49.603+/-54.222,	best of gen size: 31


  return c_outs[0]**2
  return np.sin(c_outs[0])


gen: 19 (48.8s),	best of gen fitness: -88.560; reward:-88.560+/-61.429,	best of gen size: 31


  return c_outs[0]**2
  return np.sin(c_outs[0])
  return c_outs[0]**2
  return np.sin(c_outs[0])


gen: 20 (51.8s),	best of gen fitness: -64.261; reward:-64.261+/-38.035,	best of gen size: 31


  return c_outs[0]**2
  return np.sin(c_outs[0])


gen: 21 (55.2s),	best of gen fitness: -95.429; reward:-95.429+/-18.638,	best of gen size: 31
gen: 22 (59.2s),	best of gen fitness: -91.262; reward:-91.262+/-6.251,	best of gen size: 26


  return c_outs[0]**2
  return np.sin(c_outs[0])


gen: 23 (63.1s),	best of gen fitness: -93.848; reward:-93.848+/-14.559,	best of gen size: 31


  return c_outs[0]**2
  return np.sin(c_outs[0])


gen: 24 (67.0s),	best of gen fitness: -89.297; reward:-89.297+/-46.836,	best of gen size: 26
gen: 25 (70.9s),	best of gen fitness: -90.429; reward:-90.429+/-27.748,	best of gen size: 31
gen: 26 (74.1s),	best of gen fitness: -99.214; reward:-99.214+/-5.998,	best of gen size: 29
gen: 27 (77.6s),	best of gen fitness: -93.369; reward:-93.369+/-16.443,	best of gen size: 26
gen: 28 (81.0s),	best of gen fitness: 54.347; reward:54.347+/-79.629,	best of gen size: 29
gen: 29 (85.3s),	best of gen fitness: -33.800; reward:-33.800+/-99.670,	best of gen size: 29
gen: 30 (89.6s),	best of gen fitness: 12.976; reward:12.976+/-114.988,	best of gen size: 29
gen: 31 (92.9s),	best of gen fitness: -31.403; reward:-31.403+/-40.069,	best of gen size: 29
gen: 32 (96.8s),	best of gen fitness: 25.341; reward:25.341+/-119.791,	best of gen size: 29
gen: 33 (101.5s),	best of gen fitness: 1.265; reward:1.265+/-68.284,	best of gen size: 29
gen: 34 (107.2s),	best of gen fitness: 53.133; reward:53.133+/-153.701,	best o

  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 65 (276.4s),	best of gen fitness: 58.042; reward:58.042+/-123.560,	best of gen size: 32
gen: 66 (281.8s),	best of gen fitness: 17.636; reward:17.636+/-77.610,	best of gen size: 32
gen: 67 (287.7s),	best of gen fitness: 135.388; reward:135.388+/-77.987,	best of gen size: 32
gen: 68 (293.5s),	best of gen fitness: 27.604; reward:27.604+/-134.588,	best of gen size: 32
gen: 69 (299.0s),	best of gen fitness: 62.278; reward:62.278+/-153.246,	best of gen size: 32
gen: 70 (304.4s),	best of gen fitness: 73.985; reward:73.985+/-121.948,	best of gen size: 32
gen: 71 (311.1s),	best of gen fitness: 31.297; reward:31.297+/-81.115,	best of gen size: 32
gen: 72 (317.4s),	best of gen fitness: 101.168; reward:101.168+/-123.107,	best of gen size: 32
gen: 73 (323.0s),	best of gen fitness: 26.830; reward:26.830+/-84.217,	best of gen size: 32
gen: 74 (328.2s),	best of gen fitness: 38.720; reward:38.720+/-84.206,	best of gen size: 32
gen: 75 (334.3s),	best of gen fitness: 42.390; reward:42.390+/-109.803,

  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 96 (479.8s),	best of gen fitness: 86.591; reward:86.591+/-119.644,	best of gen size: 32
gen: 97 (486.4s),	best of gen fitness: 67.730; reward:67.730+/-142.149,	best of gen size: 32
gen: 98 (494.1s),	best of gen fitness: 141.460; reward:141.460+/-23.306,	best of gen size: 32
gen: 99 (501.9s),	best of gen fitness: 71.537; reward:71.537+/-93.290,	best of gen size: 32
gen: 100 (508.7s),	best of gen fitness: 68.364; reward:68.364+/-110.861,	best of gen size: 32
gen: 101 (515.2s),	best of gen fitness: 125.144; reward:125.144+/-73.464,	best of gen size: 32


  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 102 (521.6s),	best of gen fitness: 69.998; reward:69.998+/-118.982,	best of gen size: 32
gen: 103 (527.7s),	best of gen fitness: 47.578; reward:47.578+/-94.217,	best of gen size: 32
gen: 104 (534.4s),	best of gen fitness: 85.142; reward:85.142+/-103.792,	best of gen size: 32
gen: 105 (540.1s),	best of gen fitness: 143.035; reward:143.035+/-108.263,	best of gen size: 32
gen: 106 (546.3s),	best of gen fitness: 165.551; reward:165.551+/-72.572,	best of gen size: 32
gen: 107 (553.1s),	best of gen fitness: 59.258; reward:59.258+/-107.547,	best of gen size: 32
gen: 108 (558.8s),	best of gen fitness: 123.978; reward:123.978+/-96.369,	best of gen size: 32


  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 109 (565.7s),	best of gen fitness: 88.725; reward:88.725+/-96.552,	best of gen size: 31
gen: 110 (571.6s),	best of gen fitness: 76.853; reward:76.853+/-97.334,	best of gen size: 31
gen: 111 (577.6s),	best of gen fitness: 100.111; reward:100.111+/-57.554,	best of gen size: 32
gen: 112 (583.8s),	best of gen fitness: 89.148; reward:89.148+/-71.176,	best of gen size: 32
gen: 113 (589.4s),	best of gen fitness: 53.428; reward:53.428+/-159.287,	best of gen size: 32
gen: 114 (595.4s),	best of gen fitness: 86.815; reward:86.815+/-88.171,	best of gen size: 32
gen: 115 (601.1s),	best of gen fitness: 85.012; reward:85.012+/-119.959,	best of gen size: 32
gen: 116 (607.0s),	best of gen fitness: 131.641; reward:131.641+/-91.796,	best of gen size: 32
gen: 117 (613.2s),	best of gen fitness: 61.306; reward:61.306+/-132.168,	best of gen size: 32
gen: 118 (619.3s),	best of gen fitness: 35.147; reward:35.147+/-124.386,	best of gen size: 32
gen: 119 (625.5s),	best of gen fitness: 66.189; reward:66.189+

  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 122 (644.2s),	best of gen fitness: 78.482; reward:78.482+/-158.809,	best of gen size: 32
gen: 123 (649.7s),	best of gen fitness: 78.829; reward:78.829+/-88.560,	best of gen size: 29


  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 124 (656.2s),	best of gen fitness: 92.022; reward:92.022+/-125.130,	best of gen size: 29
gen: 125 (661.9s),	best of gen fitness: 80.369; reward:80.369+/-130.515,	best of gen size: 32
gen: 126 (667.4s),	best of gen fitness: 55.832; reward:55.832+/-91.602,	best of gen size: 32
gen: 127 (673.7s),	best of gen fitness: 104.754; reward:104.754+/-131.613,	best of gen size: 32
gen: 128 (679.6s),	best of gen fitness: 61.625; reward:61.625+/-112.628,	best of gen size: 30
gen: 129 (685.8s),	best of gen fitness: 54.572; reward:54.572+/-121.262,	best of gen size: 29
gen: 130 (692.0s),	best of gen fitness: 79.537; reward:79.537+/-79.835,	best of gen size: 29
gen: 131 (698.8s),	best of gen fitness: 51.092; reward:51.092+/-72.749,	best of gen size: 32
gen: 132 (704.6s),	best of gen fitness: 87.670; reward:87.670+/-54.344,	best of gen size: 31
gen: 133 (710.2s),	best of gen fitness: 46.022; reward:46.022+/-113.685,	best of gen size: 32
gen: 134 (716.3s),	best of gen fitness: 51.828; reward:51.828+

  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 138 (740.6s),	best of gen fitness: 45.975; reward:45.975+/-200.385,	best of gen size: 31
gen: 139 (747.4s),	best of gen fitness: 79.072; reward:79.072+/-118.335,	best of gen size: 32


  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 140 (752.9s),	best of gen fitness: 67.591; reward:67.591+/-111.871,	best of gen size: 29
gen: 141 (759.0s),	best of gen fitness: 80.641; reward:80.641+/-65.865,	best of gen size: 29
gen: 142 (764.2s),	best of gen fitness: 86.105; reward:86.105+/-122.096,	best of gen size: 30
gen: 143 (770.2s),	best of gen fitness: 125.341; reward:125.341+/-106.070,	best of gen size: 29


  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 144 (775.9s),	best of gen fitness: 46.419; reward:46.419+/-37.348,	best of gen size: 29
gen: 145 (782.5s),	best of gen fitness: 76.772; reward:76.772+/-124.424,	best of gen size: 31


  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))
  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 146 (787.9s),	best of gen fitness: 102.928; reward:102.928+/-105.328,	best of gen size: 28
gen: 147 (793.3s),	best of gen fitness: 37.102; reward:37.102+/-57.240,	best of gen size: 28
gen: 148 (799.2s),	best of gen fitness: 128.163; reward:128.163+/-77.464,	best of gen size: 29
gen: 149 (805.1s),	best of gen fitness: 70.542; reward:70.542+/-126.043,	best of gen size: 31
gen: 150 (810.8s),	best of gen fitness: 84.790; reward:84.790+/-131.105,	best of gen size: 28
gen: 151 (817.1s),	best of gen fitness: 84.330; reward:84.330+/-129.262,	best of gen size: 28
gen: 152 (823.4s),	best of gen fitness: 56.548; reward:56.548+/-86.520,	best of gen size: 31
gen: 153 (830.2s),	best of gen fitness: 26.430; reward:26.430+/-81.978,	best of gen size: 32
gen: 154 (836.1s),	best of gen fitness: 85.917; reward:85.917+/-110.109,	best of gen size: 31
gen: 155 (843.1s),	best of gen fitness: 48.919; reward:48.919+/-119.178,	best of gen size: 32
gen: 156 (850.2s),	best of gen fitness: 82.605; reward:82.60

  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 172 (961.8s),	best of gen fitness: 56.177; reward:56.177+/-117.247,	best of gen size: 32
gen: 173 (968.6s),	best of gen fitness: 46.037; reward:46.037+/-82.140,	best of gen size: 32
gen: 174 (975.1s),	best of gen fitness: 38.791; reward:38.791+/-80.199,	best of gen size: 31
gen: 175 (982.3s),	best of gen fitness: 73.725; reward:73.725+/-91.550,	best of gen size: 32
gen: 176 (987.9s),	best of gen fitness: 51.548; reward:51.548+/-114.208,	best of gen size: 31
gen: 177 (994.8s),	best of gen fitness: 51.534; reward:51.534+/-111.376,	best of gen size: 31
gen: 178 (1001.5s),	best of gen fitness: 54.310; reward:54.310+/-83.013,	best of gen size: 30
gen: 179 (1007.8s),	best of gen fitness: 92.790; reward:92.790+/-122.607,	best of gen size: 30
gen: 180 (1014.7s),	best of gen fitness: 63.002; reward:63.002+/-143.030,	best of gen size: 31
gen: 181 (1020.7s),	best of gen fitness: 57.005; reward:57.005+/-77.773,	best of gen size: 30
gen: 182 (1027.7s),	best of gen fitness: 71.671; reward:71.67

  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))
  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 206 (1175.5s),	best of gen fitness: 76.580; reward:76.580+/-45.334,	best of gen size: 31
gen: 207 (1181.1s),	best of gen fitness: 53.751; reward:53.751+/-96.993,	best of gen size: 28


  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 208 (1187.4s),	best of gen fitness: 80.342; reward:80.342+/-104.590,	best of gen size: 29
gen: 209 (1192.8s),	best of gen fitness: 104.013; reward:104.013+/-92.184,	best of gen size: 29
gen: 210 (1199.4s),	best of gen fitness: 48.650; reward:48.650+/-109.262,	best of gen size: 32
gen: 211 (1205.2s),	best of gen fitness: 57.834; reward:57.834+/-98.490,	best of gen size: 31
gen: 212 (1211.9s),	best of gen fitness: 43.458; reward:43.458+/-82.241,	best of gen size: 32
gen: 213 (1218.5s),	best of gen fitness: 96.122; reward:96.122+/-96.615,	best of gen size: 31
gen: 214 (1225.0s),	best of gen fitness: 72.443; reward:72.443+/-132.146,	best of gen size: 32
gen: 215 (1231.4s),	best of gen fitness: 118.205; reward:118.205+/-113.107,	best of gen size: 32
gen: 216 (1237.8s),	best of gen fitness: 66.748; reward:66.748+/-99.656,	best of gen size: 32
gen: 217 (1243.9s),	best of gen fitness: 22.726; reward:22.726+/-29.761,	best of gen size: 32
gen: 218 (1250.1s),	best of gen fitness: 65.297; rew

  return np.multiply(c_outs[0], c_outs[1])


gen: 222 (1275.4s),	best of gen fitness: 66.686; reward:66.686+/-106.623,	best of gen size: 32
gen: 223 (1282.3s),	best of gen fitness: 114.355; reward:114.355+/-132.452,	best of gen size: 30
gen: 224 (1289.2s),	best of gen fitness: 40.510; reward:40.510+/-91.213,	best of gen size: 28
gen: 225 (1295.0s),	best of gen fitness: 84.341; reward:84.341+/-115.766,	best of gen size: 30
gen: 226 (1301.7s),	best of gen fitness: 44.722; reward:44.722+/-125.320,	best of gen size: 28
gen: 227 (1308.7s),	best of gen fitness: 72.445; reward:72.445+/-108.531,	best of gen size: 30
gen: 228 (1314.8s),	best of gen fitness: 61.556; reward:61.556+/-114.834,	best of gen size: 32
gen: 229 (1322.1s),	best of gen fitness: 46.302; reward:46.302+/-128.431,	best of gen size: 30
gen: 230 (1328.7s),	best of gen fitness: 68.069; reward:68.069+/-99.589,	best of gen size: 32
gen: 231 (1335.5s),	best of gen fitness: 57.682; reward:57.682+/-57.988,	best of gen size: 32
gen: 232 (1342.2s),	best of gen fitness: 52.948; re

  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 250 (1459.6s),	best of gen fitness: 115.379; reward:115.379+/-126.270,	best of gen size: 31
gen: 251 (1465.1s),	best of gen fitness: 71.388; reward:71.388+/-105.406,	best of gen size: 29
gen: 252 (1470.4s),	best of gen fitness: 39.431; reward:39.431+/-112.938,	best of gen size: 32
gen: 253 (1476.1s),	best of gen fitness: 53.725; reward:53.725+/-91.031,	best of gen size: 29
gen: 254 (1482.8s),	best of gen fitness: 39.363; reward:39.363+/-115.195,	best of gen size: 29
gen: 255 (1489.0s),	best of gen fitness: 63.705; reward:63.705+/-153.351,	best of gen size: 32
gen: 256 (1494.9s),	best of gen fitness: 68.272; reward:68.272+/-100.097,	best of gen size: 30
gen: 257 (1501.5s),	best of gen fitness: 73.572; reward:73.572+/-114.788,	best of gen size: 32
gen: 258 (1508.4s),	best of gen fitness: 97.885; reward:97.885+/-117.071,	best of gen size: 32
gen: 259 (1515.5s),	best of gen fitness: 83.182; reward:83.182+/-107.003,	best of gen size: 30
gen: 260 (1522.5s),	best of gen fitness: 102.339;

  return np.multiply(c_outs[0], c_outs[1])


gen: 297 (1737.7s),	best of gen fitness: 69.747; reward:69.747+/-108.654,	best of gen size: 30
gen: 298 (1743.5s),	best of gen fitness: 96.664; reward:96.664+/-113.728,	best of gen size: 30
gen: 299 (1749.1s),	best of gen fitness: 53.979; reward:53.979+/-79.053,	best of gen size: 30
gen: 300 (1754.8s),	best of gen fitness: 68.457; reward:68.457+/-127.135,	best of gen size: 30
gen: 301 (1760.6s),	best of gen fitness: 101.416; reward:101.416+/-104.332,	best of gen size: 30
gen: 302 (1767.2s),	best of gen fitness: 110.572; reward:110.572+/-101.229,	best of gen size: 32
gen: 303 (1774.0s),	best of gen fitness: 95.541; reward:95.541+/-95.304,	best of gen size: 31
gen: 304 (1780.4s),	best of gen fitness: 91.065; reward:91.065+/-94.638,	best of gen size: 30
gen: 305 (1786.6s),	best of gen fitness: 67.347; reward:67.347+/-132.968,	best of gen size: 29
gen: 306 (1793.5s),	best of gen fitness: 76.141; reward:76.141+/-66.014,	best of gen size: 30
gen: 307 (1799.7s),	best of gen fitness: 118.741; 

  return np.multiply(c_outs[0], c_outs[1])


gen: 350 (2109.9s),	best of gen fitness: 121.474; reward:121.474+/-134.759,	best of gen size: 29
gen: 351 (2116.9s),	best of gen fitness: 65.934; reward:65.934+/-83.555,	best of gen size: 29
gen: 352 (2124.0s),	best of gen fitness: 91.694; reward:91.694+/-129.633,	best of gen size: 29
gen: 353 (2130.8s),	best of gen fitness: 100.830; reward:100.830+/-117.409,	best of gen size: 32
gen: 354 (2140.3s),	best of gen fitness: 162.235; reward:162.235+/-110.779,	best of gen size: 29
gen: 355 (2150.7s),	best of gen fitness: 164.481; reward:164.481+/-111.923,	best of gen size: 29
gen: 356 (2160.9s),	best of gen fitness: 109.477; reward:109.477+/-87.162,	best of gen size: 29
gen: 357 (2168.4s),	best of gen fitness: 138.696; reward:138.696+/-105.298,	best of gen size: 29
gen: 358 (2176.4s),	best of gen fitness: 116.025; reward:116.025+/-119.392,	best of gen size: 29
gen: 359 (2184.5s),	best of gen fitness: 85.402; reward:85.402+/-78.662,	best of gen size: 32


  return np.multiply(c_outs[0], c_outs[1])


gen: 360 (2192.8s),	best of gen fitness: 85.271; reward:85.271+/-123.311,	best of gen size: 29
gen: 361 (2199.9s),	best of gen fitness: 133.851; reward:133.851+/-91.264,	best of gen size: 32
gen: 362 (2209.3s),	best of gen fitness: 87.324; reward:87.324+/-83.757,	best of gen size: 32
gen: 363 (2218.8s),	best of gen fitness: 73.213; reward:73.213+/-72.244,	best of gen size: 32
gen: 364 (2228.3s),	best of gen fitness: 63.353; reward:63.353+/-53.030,	best of gen size: 32
gen: 365 (2237.4s),	best of gen fitness: 117.788; reward:117.788+/-48.755,	best of gen size: 32
gen: 366 (2245.8s),	best of gen fitness: 101.756; reward:101.756+/-69.236,	best of gen size: 32
gen: 367 (2255.5s),	best of gen fitness: 50.181; reward:50.181+/-86.215,	best of gen size: 32
gen: 368 (2264.6s),	best of gen fitness: 92.848; reward:92.848+/-104.278,	best of gen size: 32
gen: 369 (2274.0s),	best of gen fitness: 118.323; reward:118.323+/-23.563,	best of gen size: 30
gen: 370 (2282.8s),	best of gen fitness: 93.978; r

  return np.multiply(c_outs[0], c_outs[1])


gen: 386 (2401.9s),	best of gen fitness: 134.455; reward:134.455+/-87.451,	best of gen size: 32
gen: 387 (2408.5s),	best of gen fitness: 109.507; reward:109.507+/-88.124,	best of gen size: 30
gen: 388 (2414.9s),	best of gen fitness: 102.283; reward:102.283+/-100.576,	best of gen size: 32
gen: 389 (2421.5s),	best of gen fitness: 88.437; reward:88.437+/-91.674,	best of gen size: 29
gen: 390 (2428.5s),	best of gen fitness: 132.353; reward:132.353+/-86.549,	best of gen size: 31
gen: 391 (2436.3s),	best of gen fitness: 117.577; reward:117.577+/-75.999,	best of gen size: 32
gen: 392 (2443.6s),	best of gen fitness: 70.238; reward:70.238+/-93.573,	best of gen size: 32
gen: 393 (2451.0s),	best of gen fitness: 70.883; reward:70.883+/-147.839,	best of gen size: 32
gen: 394 (2458.4s),	best of gen fitness: 89.079; reward:89.079+/-53.699,	best of gen size: 32
gen: 395 (2465.0s),	best of gen fitness: 139.828; reward:139.828+/-105.685,	best of gen size: 32
gen: 396 (2472.6s),	best of gen fitness: 107.

  return np.multiply(c_outs[0], c_outs[1])


gen: 443 (2844.0s),	best of gen fitness: 98.301; reward:98.301+/-118.448,	best of gen size: 32
gen: 444 (2853.3s),	best of gen fitness: 81.028; reward:81.028+/-53.961,	best of gen size: 32
gen: 445 (2862.1s),	best of gen fitness: 100.165; reward:100.165+/-79.546,	best of gen size: 32
gen: 446 (2870.5s),	best of gen fitness: 109.492; reward:109.492+/-92.956,	best of gen size: 31
gen: 447 (2879.6s),	best of gen fitness: 117.595; reward:117.595+/-101.575,	best of gen size: 32
gen: 448 (2887.9s),	best of gen fitness: 143.486; reward:143.486+/-43.024,	best of gen size: 32
gen: 449 (2896.7s),	best of gen fitness: 125.048; reward:125.048+/-81.278,	best of gen size: 32
gen: 450 (2905.6s),	best of gen fitness: 78.648; reward:78.648+/-78.415,	best of gen size: 32
gen: 451 (2915.2s),	best of gen fitness: 116.458; reward:116.458+/-70.916,	best of gen size: 32
gen: 452 (2923.8s),	best of gen fitness: 99.835; reward:99.835+/-87.319,	best of gen size: 31
gen: 453 (2931.8s),	best of gen fitness: 184.7

  return np.multiply(c_outs[0], c_outs[1])


gen: 468 (3061.2s),	best of gen fitness: 135.857; reward:135.857+/-53.590,	best of gen size: 32
gen: 469 (3069.6s),	best of gen fitness: 107.107; reward:107.107+/-129.282,	best of gen size: 32
gen: 470 (3078.7s),	best of gen fitness: 112.630; reward:112.630+/-103.836,	best of gen size: 32
gen: 471 (3087.9s),	best of gen fitness: 119.910; reward:119.910+/-65.158,	best of gen size: 32
gen: 472 (3096.6s),	best of gen fitness: 74.944; reward:74.944+/-105.974,	best of gen size: 32
gen: 473 (3104.5s),	best of gen fitness: 84.039; reward:84.039+/-103.780,	best of gen size: 32
gen: 474 (3112.3s),	best of gen fitness: 78.643; reward:78.643+/-83.827,	best of gen size: 32
gen: 475 (3121.9s),	best of gen fitness: 163.759; reward:163.759+/-48.391,	best of gen size: 32
gen: 476 (3128.7s),	best of gen fitness: 75.877; reward:75.877+/-140.855,	best of gen size: 32
gen: 477 (3137.4s),	best of gen fitness: 129.998; reward:129.998+/-70.293,	best of gen size: 32
gen: 478 (3146.8s),	best of gen fitness: 98

  return np.multiply(c_outs[0], c_outs[1])


gen: 491 (3263.3s),	best of gen fitness: 159.098; reward:159.098+/-96.273,	best of gen size: 31
gen: 492 (3272.3s),	best of gen fitness: 183.860; reward:183.860+/-65.365,	best of gen size: 32
gen: 493 (3281.1s),	best of gen fitness: 93.904; reward:93.904+/-104.162,	best of gen size: 32
gen: 494 (3291.0s),	best of gen fitness: 154.707; reward:154.707+/-107.852,	best of gen size: 32
gen: 495 (3300.8s),	best of gen fitness: 157.915; reward:157.915+/-94.648,	best of gen size: 32
gen: 496 (3309.6s),	best of gen fitness: 155.248; reward:155.248+/-106.388,	best of gen size: 32
gen: 497 (3318.3s),	best of gen fitness: 152.806; reward:152.806+/-110.777,	best of gen size: 32
gen: 498 (3326.1s),	best of gen fitness: 184.193; reward:184.193+/-57.562,	best of gen size: 31
gen: 499 (3334.0s),	best of gen fitness: 127.306; reward:127.306+/-77.470,	best of gen size: 31
gen: 500 (3342.1s),	best of gen fitness: 132.208; reward:132.208+/-69.573,	best of gen size: 31
Best of Generation 1: evo fitness:-86.

  return c_outs[0]**2


Best of Generation 28: evo fitness:54.347+/-79.629 	 test_fitness:-52.23+/-8.549
Best of Generation 29: evo fitness:-33.8+/-99.67 	 test_fitness:-89.509+/-52.862
Best of Generation 30: evo fitness:12.976+/-114.988 	 test_fitness:-73.785+/-30.572
Best of Generation 31: evo fitness:-31.403+/-40.069 	 test_fitness:-37.181+/-32.692
Best of Generation 32: evo fitness:25.341+/-119.791 	 test_fitness:-98.407+/-90.332
Best of Generation 33: evo fitness:1.265+/-68.284 	 test_fitness:-44.496+/-23.245
Best of Generation 34: evo fitness:53.133+/-153.701 	 test_fitness:-41.045+/-22.758
Best of Generation 35: evo fitness:20.601+/-45.881 	 test_fitness:3.181+/-70.522
Best of Generation 36: evo fitness:38.941+/-119.956 	 test_fitness:-64.13+/-79.667
Best of Generation 37: evo fitness:11.736+/-107.024 	 test_fitness:-176.274+/-301.2
Best of Generation 38: evo fitness:38.273+/-80.334 	 test_fitness:-598.5+/-1189.48
Best of Generation 39: evo fitness:25.706+/-101.031 	 test_fitness:-52.557+/-17.871
Best 

  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


Best of Generation 205: evo fitness:45.362+/-62.524 	 test_fitness:-32.784+/-149.461
Best of Generation 206: evo fitness:76.58+/-45.334 	 test_fitness:-16.891+/-78.156
Best of Generation 207: evo fitness:53.751+/-96.993 	 test_fitness:-43.057+/-18.412
Best of Generation 208: evo fitness:80.342+/-104.59 	 test_fitness:12.28+/-71.928
Best of Generation 209: evo fitness:104.013+/-92.184 	 test_fitness:-23.278+/-27.573
Best of Generation 210: evo fitness:48.65+/-109.262 	 test_fitness:30.795+/-151.589
Best of Generation 211: evo fitness:57.834+/-98.49 	 test_fitness:-294.402+/-531.189
Best of Generation 212: evo fitness:43.458+/-82.241 	 test_fitness:-59.094+/-22.684
Best of Generation 213: evo fitness:96.122+/-96.615 	 test_fitness:-16.933+/-66.916
Best of Generation 214: evo fitness:72.443+/-132.146 	 test_fitness:-47.876+/-35.066
Best of Generation 215: evo fitness:118.205+/-113.107 	 test_fitness:-24.03+/-112.417
Best of Generation 216: evo fitness:66.748+/-99.656 	 test_fitness:-22.69

  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


Best of Generation 287: evo fitness:72.514+/-63.465 	 test_fitness:-130.983+/-178.52
Best of Generation 288: evo fitness:52.049+/-80.872 	 test_fitness:35.746+/-98.262
Best of Generation 289: evo fitness:33.174+/-128.058 	 test_fitness:-13.713+/-130.675
Best of Generation 290: evo fitness:53.331+/-71.419 	 test_fitness:-41.242+/-35.278
Best of Generation 291: evo fitness:107.374+/-97.232 	 test_fitness:35.91+/-108.253
Best of Generation 292: evo fitness:80.406+/-102.38 	 test_fitness:-31.586+/-49.021
Best of Generation 293: evo fitness:71.992+/-143.427 	 test_fitness:-41.881+/-40.624
Best of Generation 294: evo fitness:74.675+/-95.899 	 test_fitness:84.549+/-95.178
Best of Generation 295: evo fitness:110.806+/-141.723 	 test_fitness:-50.002+/-20.007
Best of Generation 296: evo fitness:102.425+/-116.646 	 test_fitness:37.479+/-104.583
Best of Generation 297: evo fitness:69.747+/-108.654 	 test_fitness:-21.685+/-72.408
Best of Generation 298: evo fitness:96.664+/-113.728 	 test_fitness:-

  return np.sin(c_outs[0])


Best of Generation 444: evo fitness:81.028+/-53.961 	 test_fitness:116.727+/-89.432
Best of Generation 445: evo fitness:100.165+/-79.546 	 test_fitness:85.266+/-82.127
Best of Generation 446: evo fitness:109.492+/-92.956 	 test_fitness:32.743+/-99.933
Best of Generation 447: evo fitness:117.595+/-101.575 	 test_fitness:44.272+/-81.815
Best of Generation 448: evo fitness:143.486+/-43.024 	 test_fitness:-18.227+/-70.014
Best of Generation 449: evo fitness:125.048+/-81.278 	 test_fitness:-33.137+/-79.866
Best of Generation 450: evo fitness:78.648+/-78.415 	 test_fitness:42.276+/-105.978
Best of Generation 451: evo fitness:116.458+/-70.916 	 test_fitness:-12.71+/-58.378
Best of Generation 452: evo fitness:99.835+/-87.319 	 test_fitness:61.146+/-74.882
Best of Generation 453: evo fitness:184.764+/-57.063 	 test_fitness:-276.243+/-668.447
Best of Generation 454: evo fitness:81.683+/-98.758 	 test_fitness:75.274+/-32.791
Best of Generation 455: evo fitness:141.333+/-28.793 	 test_fitness:33.1

  return np.multiply(c_outs[0], c_outs[1])


gen: 420 (2361.3s),	best of gen fitness: 2.079; reward:2.079+/-46.935,	best of gen size: 32
gen: 421 (2364.6s),	best of gen fitness: 48.193; reward:48.193+/-115.237,	best of gen size: 22
gen: 422 (2369.3s),	best of gen fitness: -4.861; reward:-4.861+/-127.822,	best of gen size: 26
gen: 423 (2373.3s),	best of gen fitness: 34.848; reward:34.848+/-117.432,	best of gen size: 25
gen: 424 (2377.2s),	best of gen fitness: -14.818; reward:-14.818+/-42.104,	best of gen size: 25
gen: 425 (2381.2s),	best of gen fitness: 49.323; reward:49.323+/-109.334,	best of gen size: 32
gen: 426 (2384.1s),	best of gen fitness: 12.680; reward:12.680+/-137.518,	best of gen size: 32
gen: 427 (2388.1s),	best of gen fitness: -11.627; reward:-11.627+/-26.549,	best of gen size: 19
gen: 428 (2392.2s),	best of gen fitness: 79.002; reward:79.002+/-151.476,	best of gen size: 19
gen: 429 (2396.1s),	best of gen fitness: 15.144; reward:15.144+/-120.819,	best of gen size: 32
gen: 430 (2400.0s),	best of gen fitness: 29.970; re

  return c_outs[0]**2


gen: 470 (2585.9s),	best of gen fitness: 41.711; reward:41.711+/-112.346,	best of gen size: 29
gen: 471 (2590.0s),	best of gen fitness: 96.174; reward:96.174+/-135.119,	best of gen size: 32
gen: 472 (2594.9s),	best of gen fitness: 66.280; reward:66.280+/-161.742,	best of gen size: 32
gen: 473 (2599.8s),	best of gen fitness: 48.471; reward:48.471+/-118.988,	best of gen size: 32
gen: 474 (2604.5s),	best of gen fitness: 37.022; reward:37.022+/-118.238,	best of gen size: 32
gen: 475 (2610.3s),	best of gen fitness: 50.523; reward:50.523+/-125.396,	best of gen size: 25
gen: 476 (2615.9s),	best of gen fitness: 32.923; reward:32.923+/-122.049,	best of gen size: 32
gen: 477 (2622.0s),	best of gen fitness: 55.145; reward:55.145+/-119.848,	best of gen size: 32
gen: 478 (2627.6s),	best of gen fitness: 20.279; reward:20.279+/-131.945,	best of gen size: 32
gen: 479 (2633.5s),	best of gen fitness: 73.283; reward:73.283+/-118.340,	best of gen size: 32
gen: 480 (2640.3s),	best of gen fitness: -1.937; r

  return c_outs[0]**2


gen: 481 (2646.9s),	best of gen fitness: 46.601; reward:46.601+/-248.431,	best of gen size: 32


  return c_outs[0]**2
  return c_outs[0]**2


gen: 482 (2653.5s),	best of gen fitness: 41.385; reward:41.385+/-111.853,	best of gen size: 32
gen: 483 (2660.5s),	best of gen fitness: 50.754; reward:50.754+/-112.683,	best of gen size: 32


  return c_outs[0]**2


gen: 484 (2666.7s),	best of gen fitness: 38.185; reward:38.185+/-115.973,	best of gen size: 32


  return c_outs[0]**2
  return c_outs[0]**2
  return c_outs[0]**2


gen: 485 (2674.5s),	best of gen fitness: 29.442; reward:29.442+/-137.897,	best of gen size: 32
gen: 486 (2680.3s),	best of gen fitness: 37.546; reward:37.546+/-228.570,	best of gen size: 32
gen: 487 (2686.3s),	best of gen fitness: 28.870; reward:28.870+/-129.043,	best of gen size: 32
gen: 488 (2691.6s),	best of gen fitness: 35.288; reward:35.288+/-131.393,	best of gen size: 32
gen: 489 (2697.0s),	best of gen fitness: 48.428; reward:48.428+/-106.581,	best of gen size: 32
gen: 490 (2702.2s),	best of gen fitness: 43.531; reward:43.531+/-118.442,	best of gen size: 32
gen: 491 (2707.5s),	best of gen fitness: 22.897; reward:22.897+/-123.377,	best of gen size: 32
gen: 492 (2712.1s),	best of gen fitness: -5.412; reward:-5.412+/-132.598,	best of gen size: 32
gen: 493 (2716.3s),	best of gen fitness: 29.475; reward:29.475+/-131.677,	best of gen size: 32
gen: 494 (2721.3s),	best of gen fitness: 15.842; reward:15.842+/-122.180,	best of gen size: 32
gen: 495 (2726.5s),	best of gen fitness: 50.521; r

  return np.multiply(c_outs[0], c_outs[1])


gen: 62 (199.4s),	best of gen fitness: -9.368; reward:-9.368+/-18.087,	best of gen size: 29
gen: 63 (202.9s),	best of gen fitness: -8.117; reward:-8.117+/-16.296,	best of gen size: 29
gen: 64 (207.5s),	best of gen fitness: 3.640; reward:3.640+/-26.830,	best of gen size: 29
gen: 65 (211.1s),	best of gen fitness: 14.834; reward:14.834+/-51.802,	best of gen size: 30
gen: 66 (215.3s),	best of gen fitness: 17.686; reward:17.686+/-82.241,	best of gen size: 30
gen: 67 (219.5s),	best of gen fitness: -15.226; reward:-15.226+/-40.998,	best of gen size: 30
gen: 68 (223.8s),	best of gen fitness: -13.977; reward:-13.977+/-15.535,	best of gen size: 30
gen: 69 (227.7s),	best of gen fitness: -2.451; reward:-2.451+/-20.857,	best of gen size: 30
gen: 70 (231.7s),	best of gen fitness: -13.413; reward:-13.413+/-22.877,	best of gen size: 26
gen: 71 (235.7s),	best of gen fitness: -10.764; reward:-10.764+/-17.145,	best of gen size: 27
gen: 72 (239.4s),	best of gen fitness: -12.720; reward:-12.720+/-22.530,	b

  return np.sin(c_outs[0])


gen: 4 (9.9s),	best of gen fitness: -101.781; reward:-101.781+/-60.796,	best of gen size: 12
gen: 5 (12.4s),	best of gen fitness: -66.126; reward:-66.126+/-30.185,	best of gen size: 27
gen: 6 (15.2s),	best of gen fitness: -94.128; reward:-94.128+/-43.485,	best of gen size: 29
gen: 7 (18.4s),	best of gen fitness: -82.680; reward:-82.680+/-38.298,	best of gen size: 27
gen: 8 (21.8s),	best of gen fitness: -67.894; reward:-67.894+/-82.763,	best of gen size: 27
gen: 9 (25.5s),	best of gen fitness: -49.574; reward:-49.574+/-20.399,	best of gen size: 29
gen: 10 (29.1s),	best of gen fitness: -49.722; reward:-49.722+/-33.738,	best of gen size: 27
gen: 11 (33.9s),	best of gen fitness: -41.968; reward:-41.968+/-45.327,	best of gen size: 30
gen: 12 (39.0s),	best of gen fitness: -66.832; reward:-66.832+/-21.774,	best of gen size: 30
gen: 13 (43.2s),	best of gen fitness: -33.115; reward:-33.115+/-79.073,	best of gen size: 30
gen: 14 (49.7s),	best of gen fitness: -47.764; reward:-47.764+/-86.764,	bes

  return np.sin(c_outs[0])
  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 125 (1198.0s),	best of gen fitness: 102.990; reward:102.990+/-40.241,	best of gen size: 30
gen: 126 (1208.5s),	best of gen fitness: 103.682; reward:103.682+/-12.792,	best of gen size: 32
gen: 127 (1220.0s),	best of gen fitness: 103.639; reward:103.639+/-52.695,	best of gen size: 32
gen: 128 (1230.7s),	best of gen fitness: 91.734; reward:91.734+/-27.585,	best of gen size: 32
gen: 129 (1243.2s),	best of gen fitness: 121.710; reward:121.710+/-10.692,	best of gen size: 32
gen: 130 (1255.1s),	best of gen fitness: 113.514; reward:113.514+/-51.192,	best of gen size: 32
gen: 131 (1265.4s),	best of gen fitness: 95.835; reward:95.835+/-24.802,	best of gen size: 32
gen: 132 (1278.3s),	best of gen fitness: 101.328; reward:101.328+/-55.068,	best of gen size: 32
gen: 133 (1289.5s),	best of gen fitness: 93.030; reward:93.030+/-21.463,	best of gen size: 32
gen: 134 (1300.4s),	best of gen fitness: 95.155; reward:95.155+/-17.390,	best of gen size: 32
gen: 135 (1310.1s),	best of gen fitness: 108.659

  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 172 (1839.5s),	best of gen fitness: 104.390; reward:104.390+/-18.700,	best of gen size: 32
gen: 173 (1864.2s),	best of gen fitness: 88.358; reward:88.358+/-52.831,	best of gen size: 32
gen: 174 (1891.7s),	best of gen fitness: 90.627; reward:90.627+/-33.758,	best of gen size: 32
gen: 175 (1913.8s),	best of gen fitness: 109.310; reward:109.310+/-11.601,	best of gen size: 32
gen: 176 (1933.1s),	best of gen fitness: 102.318; reward:102.318+/-20.091,	best of gen size: 32
gen: 177 (1951.7s),	best of gen fitness: 101.492; reward:101.492+/-24.698,	best of gen size: 32
gen: 178 (1972.3s),	best of gen fitness: 94.119; reward:94.119+/-12.790,	best of gen size: 32
gen: 179 (1993.5s),	best of gen fitness: 84.351; reward:84.351+/-13.101,	best of gen size: 32
gen: 180 (2011.1s),	best of gen fitness: 117.746; reward:117.746+/-23.712,	best of gen size: 32
gen: 181 (2028.9s),	best of gen fitness: 99.451; reward:99.451+/-16.610,	best of gen size: 32
gen: 182 (2045.6s),	best of gen fitness: 104.043; 

  protected_div = sign_b * c_outs[0] / (1e-9 + np.abs(c_outs[1]))


gen: 187 (2135.5s),	best of gen fitness: 102.155; reward:102.155+/-13.489,	best of gen size: 31
gen: 188 (2153.4s),	best of gen fitness: 99.992; reward:99.992+/-17.265,	best of gen size: 32
gen: 189 (2171.0s),	best of gen fitness: 115.917; reward:115.917+/-21.929,	best of gen size: 32
gen: 190 (2188.9s),	best of gen fitness: 111.836; reward:111.836+/-21.848,	best of gen size: 31
gen: 191 (2207.0s),	best of gen fitness: 114.230; reward:114.230+/-28.673,	best of gen size: 31
gen: 192 (2225.8s),	best of gen fitness: 109.604; reward:109.604+/-22.046,	best of gen size: 32
gen: 193 (2242.7s),	best of gen fitness: 92.720; reward:92.720+/-67.474,	best of gen size: 31
gen: 194 (2261.7s),	best of gen fitness: 105.383; reward:105.383+/-42.614,	best of gen size: 31
gen: 195 (2279.9s),	best of gen fitness: 114.656; reward:114.656+/-16.730,	best of gen size: 31
gen: 196 (2298.8s),	best of gen fitness: 97.510; reward:97.510+/-47.951,	best of gen size: 31
gen: 197 (2317.9s),	best of gen fitness: 115.0

  return c_outs[0]**2
  return np.sin(c_outs[0])


gen: 459 (5701.8s),	best of gen fitness: 99.720; reward:99.720+/-23.816,	best of gen size: 31
gen: 460 (5711.1s),	best of gen fitness: 114.666; reward:114.666+/-14.528,	best of gen size: 31
gen: 461 (5721.1s),	best of gen fitness: 121.302; reward:121.302+/-55.058,	best of gen size: 29
gen: 462 (5730.7s),	best of gen fitness: 101.888; reward:101.888+/-12.299,	best of gen size: 31
gen: 463 (5740.3s),	best of gen fitness: 112.598; reward:112.598+/-25.963,	best of gen size: 29
gen: 464 (5750.0s),	best of gen fitness: 100.200; reward:100.200+/-19.717,	best of gen size: 31
gen: 465 (5759.0s),	best of gen fitness: 105.068; reward:105.068+/-31.934,	best of gen size: 31
gen: 466 (5768.8s),	best of gen fitness: 115.945; reward:115.945+/-34.661,	best of gen size: 31
gen: 467 (5777.9s),	best of gen fitness: 95.695; reward:95.695+/-18.694,	best of gen size: 29
gen: 468 (5786.4s),	best of gen fitness: 106.887; reward:106.887+/-36.594,	best of gen size: 29
gen: 469 (5794.6s),	best of gen fitness: 110

# Vizualize the different experiments.

## Make an animation
Here the best evolved individual is selected and one episode is rendered. Make sure to save your lunar landers over time to track progress and make comparisons.

In [None]:
# # gist to save gif from https://gist.github.com/botforge/64cbb71780e6208172bbf03cd9293553
# def save_frames_as_gif(frames, path="./", filename="evolved_lander.gif"):
#     plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi=72)
#     patch = plt.imshow(frames[0])
#     plt.axis("off")

#     def animate(i):
#         patch.set_data(frames[i])

#     anim = animation.FuncAnimation(plt.gcf(), animate, frames=len(frames), interval=50)
#     anim.save(path + filename, writer="imagemagick", fps=60)


# frames = []
# avg_fitness, frames = get_test_score(evo.best_of_gens[-1], num_episodes=5, episode_duration=300, seed=5, render=True)
# print("Average fitness of the render is: ", avg_fitness)
# env.close()
# save_frames_as_gif(frames)

## Play animation

<img src="evolved_lander.gif" width="750">

## Optimisation
The coefficients in the multi-tree aren't optimised. Here Q-learning (taken from https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html) is used to optimise the weights further. Incorporate coefficient optimisation in training your agent(s). Coefficient Optimisation can be expensive. Think about how often you want to optimise, when, which individuals etc.

In [None]:
# batch_size = 128
# GAMMA = 0.99

# constants = best.get_subtrees_consts()

# if len(constants) > 0:
#     optimizer = optim.AdamW(constants, lr=1e-3, amsgrad=True)

# for _ in range(500):
#     if len(constants) > 0 and len(evo.memory) > batch_size:
#         target_tree = copy.deepcopy(best)

#         transitions = evo.memory.sample(batch_size)
#         batch = Transition(*zip(*transitions))

#         non_final_mask = torch.tensor(
#             tuple(map(lambda s: s is not None, batch.next_state)), dtype=torch.bool
#         )

#         non_final_next_states = torch.cat(
#             [s for s in batch.next_state if s is not None]
#         )
#         state_batch = torch.cat(batch.state)
#         action_batch = torch.cat(batch.action)
#         reward_batch = torch.cat(batch.reward)

#         state_action_values = best.get_output_pt(state_batch).gather(1, action_batch)
#         next_state_values = torch.zeros(batch_size, dtype=torch.float)
#         with torch.no_grad():
#             next_state_values[non_final_mask] = (
#                 target_tree.get_output_pt(non_final_next_states).max(1)[0].float()
#             )

#         expected_state_action_values = (next_state_values * GAMMA) + reward_batch

#         criterion = nn.SmoothL1Loss()
#         loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))

#         # Optimize the model
#         optimizer.zero_grad()
#         loss.backward()
#         torch.nn.utils.clip_grad_value_(constants, 100)
#         optimizer.step()

# print(best.get_readable_repr())
# print(get_test_score(best))

In [None]:
# frames = []
# fitness_function_pt(
#     best, num_episodes=1, episode_duration=500, render=True, ignore_done=False
# )
# env.close()
# save_frames_as_gif(frames, filename="evolved_lander_RL.gif")

<img src="evolved_lander_RL.gif" width="750">