# CartPole Example

In [None]:
import csv
import json
from pathlib import Path

import gymnasium
import pandas as pd
import torch
import yaml

from examples.cartpole.direct_evaluator import CartPoleEvaluator
from examples.cartpole.cartpole_prescriptor import CartPolePrescriptor
from presp.prescriptor import NNPrescriptorFactory
from presp.evolution import Evolution

## Load Config
First we load the config using pyyaml.
We can view the config here and see we have evolution params, prescriptor params, and eval params.
The evolution params go directly into the evolution object from presp, the prescriptor params are used in the prescriptor factory to generate candidate prescriptors, and the eval params are used to customize evaluation.

In [None]:
with open("examples/cartpole/config.yml", "r", encoding="utf-8") as f:
    config = yaml.safe_load(f)

print(json.dumps(config, sort_keys=False, indent=4))

## Factory and Evaluator
These are the 2 objects that must be implemented in order to run evolution. Please see the corresponding files in order to see how they are implemented. This is just a simple implementation to show the full evolution process.

In [None]:
factory = NNPrescriptorFactory(CartPolePrescriptor, **config["prescriptor_params"])
evaluator = CartPoleEvaluator(**config["eval_params"])

## Evolution
Finally, we call presp with the config, factory, and evaluator to run the evolution process. With just 10 generations we should be able to solve a simple problem like CartPole.

In [None]:
evolution = Evolution(prescriptor_factory=factory, evaluator=evaluator, **config["evolution_params"])
evolution.run_evolution()

## Visualization
Now we can look at the results of evolution

This snippet runs the environment using a random acting agent

In [None]:
env = gymnasium.make("CartPole-v1", render_mode="human")

# Random agent
obs, _ = env.reset()
for i in range(300):
    action = env.action_space.sample()
    obs, _, done, _, _ = env.step(action)
    env.render()
    if done:
        print(f"Random actions lasted {i} steps")
        break
env.close()

And this one uses our best prescriptor. We can see it (more or less) solved the problem in just 10 generations.

In [None]:
# Our agent
save_path = Path(config["evolution_params"]["save_path"])
results_df = pd.read_csv(save_path / "results.csv")
best_cand_id = results_df[results_df["gen"] == results_df["gen"].max()]["cand_id"].values[0]
print(best_cand_id)

pop_dict = factory.load_population(save_path / "population")
best_cand = pop_dict[best_cand_id]
env = gymnasium.make("CartPole-v1", render_mode="human")
obs, _ = env.reset()
for i in range(300):
    action = best_cand.forward(obs)
    obs, _, done, _, _ = env.step(action)
    env.render()
    if done:
        print(f"Agent lasted {i} steps")
        break
env.close()

if i == 299:
    print("Agent won!")