In [None]:
# GEPA Behavior Demo

This notebook initializes the simulation and behavior model, runs a minimal rollout, and visualizes logs.


In [None]:
%load_ext autoreload
%autoreload 2
import os, sys
sys.path.append(os.path.abspath('..'))
from gepa.utils.config import Config, ConfigLoader
from gepa.utils.logging_utils import Logger
from gepa.sim import BulletSimEnv
from gepa.models import TorchBehaviorModel
from gepa.gepa import GEPAOptimizer, MockLLM, Prompt
import torch, numpy as np


In [None]:
cfg = ConfigLoader.from_yaml('../configs/default.yaml')
env = BulletSimEnv(urdf_path=cfg.simulation.robot_urdf, gui=False)
obs = env.reset()
cfg.model.input_dim = int(obs.shape[0])
cfg.model.action_dim = int(env.num_joints)
model = TorchBehaviorModel(architecture=cfg.model.architecture, input_dim=cfg.model.input_dim, action_dim=cfg.model.action_dim, prompt_conditioning=cfg.model.prompt_conditioning)
logger = Logger('../runs/notebook_demo')
prompt = cfg.gepa.base_prompt


In [None]:
rewards = []
observations = []
actions = []
for t in range(100):
    observations.append(obs.copy())
    obs_t = torch.tensor(obs, dtype=torch.float32).unsqueeze(0)
    action = model.select_action(obs_t, prompt=prompt).action.squeeze(0).numpy()
    actions.append(action.copy())
    res = env.step(action)
    obs = res.observation
    rewards.append(res.reward)
logger.log_trajectory('demo_episode', observations, actions, rewards)
env.close()


In [None]:
from gepa.utils.plotting import plot_trajectory
plot_trajectory(rewards)
