In [12]:
import gymnasium as gym
import numpy as np
from deap import base, creator, tools, gp, algorithms
import operator
import math

In [14]:
def evaluate(individual, env, toolbox):
    func = toolbox.compile(expr=individual)
    observation, _ = env.reset(seed=42)
    total_reward = 0.0
    max_steps = 999

    for _ in range(max_steps):
        position, velocity = observation
        action = [np.clip(func(position, velocity), -1, 1)]
        observation, reward, terminated, truncated, _ = env.step(action)
        total_reward += reward
        if terminated or truncated:
            break
    return (total_reward,) 

In [13]:
env = gym.make("MountainCarContinuous-v0")

pset = gp.PrimitiveSet("MAIN", arity=2)
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(operator.neg, 1)
pset.addPrimitive(math.sin, 1)
pset.addPrimitive(math.cos, 1)
pset.renameArguments(ARG0="position")
pset.renameArguments(ARG1="velocity")

creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=3)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("compile", gp.compile, pset=pset)
toolbox.register("evaluate", lambda individual: evaluate(individual, env, toolbox))
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr, pset=pset)
toolbox.register("select", tools.selTournament, tournsize=3)


In [None]:
pop = toolbox.population(n=600)
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)

pop, logbook = algorithms.eaSimple(
    pop,
    toolbox,
    cxpb=0.5,
    mutpb=0.2,
    ngen=50,
    stats=stats,
    halloffame=hof,
    verbose=True,
)

print("Best individual:", hof[0])
print(f"Total reward for the best individual: {evaluate(hof[0], env, toolbox)[0]}")

gen	nevals	avg     	std    	min  	max
0  	600   	-39.5844	39.1861	-99.9	0  
1  	380   	-18.5964	29.0289	-99.9	0  
2  	358   	-9.80636	23.3149	-99.9	0  
3  	381   	-7.38397	19.6267	-99.9	0  
4  	347   	-7.11044	19.9802	-99.9	0  
5  	347   	-7.79035	21.4318	-99.9	0  
6  	357   	-7.32269	20.944 	-99.9	0  
7  	359   	-6.16678	19.4211	-99.9	0  
8  	349   	-6.11781	18.814 	-99.9	0  
9  	353   	-6.09616	19.8579	-99.9	0  
10 	331   	-4.02986	15.6388	-99.9	0  
11 	361   	-4.2568 	16.7429	-99.9	0  
12 	358   	-3.0105 	12.6002	-99.9	0  
13 	406   	-4.35182	16.5079	-99.9	0  
14 	346   	-2.86788	12.7749	-99.9	0  
15 	343   	-3.04341	14.8062	-99.9	0  
16 	344   	-3.06534	13.9421	-99.9	0  
17 	350   	-3.04139	14.119 	-99.9	0  
18 	339   	-1.47443	9.56219	-99.9	0  
19 	345   	-2.43231	12.6716	-99.9	0  
20 	348   	-1.37041	9.40546	-99.894	0  
21 	381   	-2.37645	13.1973	-99.9  	0  
22 	366   	-2.48209	13.1498	-99.9  	0  
23 	349   	-1.97685	11.3776	-99.9  	0  
24 	346   	-0.864419	7.46337	-98.2667	0  
