In [1]:
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)
    

In [2]:
from __future__ import annotations
import abc
import typing
import math
import functools

from typing import Optional
from typing import Tuple
from typing import Dict
from typing import Generic
from typing import Type
from dataclasses import dataclass

from typing import Union
from typing import List
from typing import Any
from typing import Self
from typing import Callable
from inspect import signature

from core.globals import LogLevel
from core.globals import report
from core.variator import Variator
from core.selector import ElitistSimpleSelector, SimpleSelector
from core.selector import Elitist
from core.selector import TournamentSelector
from core.evaluator import Evaluator
from core.controller import Controller
from core.population import Population
from core.population import Genome
from evolvables.expression import Program
from evolvables.expression import ProgramCrossoverVariator

from evolvables.expression import ProgramFactory


import random



import gymnasium as gym

from random import choice



In [3]:

def neg(x):
    return -x

def add(x, y):
    return x+y

def sub(x, y):
    return x-y

def mul(x, y):
    return x*y

def div(x, y):
    return 0 if y==0 else x/y

def log(x):
    abs_x = abs(x)
    return 0 if (abs_x == 0) else math.log(abs(x))

def lim(x, a, b):
    return min(max(min(a,b), x), max(a,b))

def avg(x, y):
    return (x+y)/2

def val0():
    return 0

def val1():
    return 1

def protectedDiv(left, right):
    try: return truncate(left, 8) / truncate(right, 8)
    except ZeroDivisionError: return 0
    
def if_then_else(input, output1, output2):
    if input: return output1
    else: return output2

def limit(input, minimum, maximum):
    return min(max(input,minimum), maximum)

# helper function to limit decimal places
def truncate(number, decimals=0):
    if not isinstance(decimals, int):
        raise TypeError("decimal places must be an integer.")
    elif decimals < 0:
        raise ValueError("decimal places has to be 0 or more.")
    elif decimals == 0:
        return math.trunc(number)
    factor = 10.0 ** decimals
    return math.trunc(number * factor) / factor

In [29]:

class GymEvaluator(Evaluator[Program[float]]):
    def __init__(self, env, wrapper: Callable[[float], float], episode_count: int, step_count: int, score_wrapper: Callable[[float], float] = lambda x : x):
        super().__init__()
        self.env = env
        self.wrapper = wrapper
        self.episode_count = episode_count
        self.step_count = step_count
        self.score_wrapper = score_wrapper

    def evaluate(self, s1: Program[float]) -> float:
        score = self.evaluate_episode(s1, self.env, self.wrapper, self.episode_count, self.step_count)
        # score = s1.evaluate(1,4,8,16)
        
        #score = GymEvaluator.evaluate_episode(s1, self.env, self.wrapper, self.episode_count, self.step_count) 
        return self.score_wrapper(score)

    @staticmethod
    def evaluate_episode(s1: Program[float], env, wrapper: Callable[[float], float], episode_count: int, step_count: int) -> float:
        score = 0.
        
        for i in range(0, episode_count):
            score = score + GymEvaluator.evaluate_step(s1, env, wrapper, step_count)
        return score / episode_count

    @staticmethod
    def evaluate_step(s1: Program[float], env, wrapper: Callable[[float], float], step_count: int) -> float:
        
        step_result = env.reset()
        score = 0.
        # hard coded - an episode consists of 10 evaluations.
        for i in range(0, step_count):
            if (len(step_result)>=5 and (step_result[2] or step_result[3])):
                break
            step_result = env.step(wrapper(s1.evaluate(*step_result[0]))) #type: ignore
            if (step_result[2]):
                break
            score = score + step_result[1] #type: ignore
        return score


# ########## Begin setup :) ########## #

# Size of the population. Affects the size of the initial initial population, also enforced by selectors.
pop_size = 100

# Depth constraint of the expression tree
tree_depth = 5
# Node budget of the expression tree
node_budget = 20

# The number of episodes for each evaluation. The actual score should be the mean of these scores.
# The length of each episode is hard-coded to be 10 (see `evaluate_step`)
step_bound = 10
episode_bound = 25


# Build the population of ternary programs. The arity (4) should match the size of the observation space (4 for cartpole)
progf = ProgramFactory((add, sub, mul, div, math.sin, lim), 4)

# Declare and populate the population
pops: Population[Program[float]] = Population()
for i in range(0, pop_size):
    pops.append(progf.build(tree_depth, node_budget))

# Prepare the variator
variator = ProgramCrossoverVariator(arity = 2, coarity = 3)

# The evaluaor is ready. Feed the custom wrapper and the environment to GymEvaluator
def pendulum_wrapper(f: float):
    return [round(max(min(2, f), -2))]

def cartpole_wrapper(f: float) -> int:
    result = round(max(min(1, f), 0))
    return result

eval = gym.make('CartPole-v1')
evaluator = GymEvaluator(eval, cartpole_wrapper, step_bound, episode_bound, score_wrapper = lambda x : x)

# Prepare the selector.
import gymnasium as gym
selp = SimpleSelector[Program[float]](coarity = 2, budget = pop_size)
selc = Elitist(TournamentSelector[Program[float]](coarity = 2, budget = pop_size))

ctrl = Controller[Program[float]](
    population = pops,
    evaluator = evaluator,
    parent_selector = selc,
    variator = variator,
    survivor_selector = selp
)

best_solutions: List[Program] = []
best_scores: List[Program] = []

def score_keeper(best_scores, best_solutions, c: Controller[Program[T]]):
    best_solutions = best_solutions.append(c.population[0])
    best_scores = best_scores.append(c.population[0].score)

from functools import partial
for i in range(0, 20):
    ctrl.step(partial(score_keeper, best_scores, best_solutions))


print ([str(x) for x in best_solutions])
print (str(best_scores))

sin(lim(sym_1, sym_2, sub(sym_1, lim(mul(sym_2, sym_2), sub(sym_3, sym_2), lim(sym_4, sym_4, sym_4)))))-----add(sin(lim(sym_3, sym_4, sub(lim(sym_1, sym_3, sym_1), lim(sym_2, sym_2, sym_3)))), lim(sym_3, sub(lim(sym_4, lim(sym_3, sym_4, sym_1), sin(sym_2)), sin(sin(sym_2))), sym_2))
sym_4-----sin(sin(div(div(mul(sym_1, sym_4), sub(sym_4, sym_1)), add(lim(sym_4, sym_1, sym_3), add(sym_2, sym_1)))))
lim(lim(sin(sym_1), lim(lim(lim(sym_4, sym_4, sym_2), sub(sym_3, sym_1), sym_4), sym_4, sym_3), add(sin(sub(sym_4, sym_2)), sym_4)), sub(lim(sym_4, sin(lim(sym_1, sym_1, sym_2)), sin(sym_4)), sym_2), sin(sin(sym_4)))-----div(div(sin(lim(lim(sym_3, sym_1, sym_3), sin(sym_2), sym_1)), sin(add(lim(sym_1, sym_2, sym_3), mul(sym_1, sym_2)))), sin(sym_2))
div(lim(lim(sin(add(sym_3, sub(sym_4, sym_2))), sub(sin(mul(sym_3, sym_1)), sym_3), lim(sym_4, lim(mul(sym_3, sym_2), sin(sym_3), sin(sym_3)), sym_3)), sym_2, sym_4), sin(lim(sym_2, sym_4, sym_3)))-----lim(lim(sin(sin(sym_2)), lim(sym_4, div(sym_1

In [28]:
print(str(best_solutions[-1]))
print(str(best_scores[-1]))

lim(sym_1, lim(sym_1, lim(sin(sub(sym_3, sym_4)), sym_1, sub(lim(sym_1, sym_1, sym_2), sym_1)), sym_4), lim(sym_1, sin(sym_1), lim(sym_4, sym_2, sym_2)))
-8.0


In [30]:
print(str(best_solutions[-1]))
print(str(best_scores[-1]))

lim(sub(lim(sym_4, sin(lim(sym_1, sym_1, sym_2)), sin(sym_4)), sym_2), sin(lim(sin(div(sym_1, sym_1)), sin(sym_1), add(sin(sub(sym_4, sym_2)), sym_4))), lim(add(sin(sub(sym_4, sym_2)), sym_4), sym_4, sin(div(sym_1, sym_1))))
25.0


In [55]:
import gymnasium as gym
sin = math.sin
step_count = 4000
best_program = lambda sym_1, sym_2, sym_3, sym_4: lim(sym_1, lim(sym_1, lim(math.sin(sub(sym_3, sym_4)), sym_1, sub(lim(sym_1, sym_1, sym_2), sym_1)), sym_4), lim(sym_1, sin(sym_1), lim(sym_4, sym_2, sym_2)))


visible = True
if visible:
    env = gym.make("CartPole-v1", render_mode="human")
else:
    env = gym.make("CartPole-v1")
env.reset()


step_result = env.reset()
score = 0.
# hard coded - an episode consists of 10 evaluations.
for i in range(0, step_count):
    if (len(step_result)>=5 and (step_result[2] or step_result[3])):
        
        break
    step_result = env.step(cartpole_wrapper(best_program(*step_result[0]))) #type: ignore
    print (step_result)
    if (step_result[4]):
        
        break
    score = score + step_result[1] #type: ignore

print (score)
# print(score)
env.close()

(array([-0.04889956, -0.19121426,  0.03136238,  0.34358838], dtype=float32), 1.0, False, False, {})
(array([-0.05272384, -0.386768  ,  0.03823415,  0.64599377], dtype=float32), 1.0, False, False, {})
(array([-0.0604592 , -0.5824013 ,  0.05115402,  0.95046735], dtype=float32), 1.0, False, False, {})
(array([-0.07210723, -0.7781731 ,  0.07016337,  1.2587736 ], dtype=float32), 1.0, False, False, {})
(array([-0.08767069, -0.9741191 ,  0.09533884,  1.5725806 ], dtype=float32), 1.0, False, False, {})
(array([-0.10715307, -1.1702404 ,  0.12679045,  1.8934138 ], dtype=float32), 1.0, False, False, {})
(array([-0.13055788, -1.3664894 ,  0.16465873,  2.222601  ], dtype=float32), 1.0, False, False, {})
(array([-0.15788767, -1.5627545 ,  0.20911075,  2.5612078 ], dtype=float32), 1.0, False, False, {})
(array([-0.18914276, -1.7588419 ,  0.2603349 ,  2.9099631 ], dtype=float32), 1.0, True, False, {})
9.0
