From 57f2a7af21c29a2a6c3844264b129059929fb702 Mon Sep 17 00:00:00 2001 From: Kyle Corbitt Date: Thu, 23 Oct 2025 17:56:27 -0700 Subject: [PATCH 1/3] feat: Add OpenEnv integration example with echo environment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Demonstrates how to use OpenEnv environments with ART for training. The example shows a simple echo environment that rewards longer messages. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- examples/openenv_echo.py | 94 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 examples/openenv_echo.py diff --git a/examples/openenv_echo.py b/examples/openenv_echo.py new file mode 100644 index 00000000..d42e38f5 --- /dev/null +++ b/examples/openenv_echo.py @@ -0,0 +1,94 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "openenv", +# "openpipe-art", +# "weave", +# ] +# +# [tool.uv.sources] +# openenv = { git = "https://github.com/meta-pytorch/OpenEnv.git", rev = "6083a47" } +# /// +import asyncio +from datetime import datetime + +import art +from art.serverless.backend import ServerlessBackend +from dotenv import load_dotenv +from envs.echo_env import EchoAction, EchoEnv +import weave + +PROMPT = "Use at most 100 tokens; maximize the total character length of the output." +NUM_STEPS = 50 +ROLLOUTS_PER_GROUP = 4 + + +# In ART, the rollout function +async def rollout(model: art.TrainableModel, env_client: EchoEnv) -> art.Trajectory: + # For the simple echo environment there's no internal state to reset, but we show resetting anyway to demonstrate the pattern. + await asyncio.to_thread(env_client.reset) + + # We create an art.Trajectory object to store our messages as well as the final reward. + traj = art.Trajectory( + messages_and_choices=[{"role": "system", "content": PROMPT}], reward=0.0 + ) + + # We use the model we're training to generate the next action to send to the environment. For this simple echo environment, the action is a single message. + choice = ( + await model.openai_client().chat.completions.create( + model=model.inference_model_name, + messages=traj.messages(), + max_completion_tokens=100, + timeout=30, + ) + ).choices[0] + reply = (choice.message.content or "").strip() + + # We send the action to the environment. + result = await asyncio.to_thread(env_client.step, EchoAction(message=reply)) + + # We need to record the actual message we produced so we can use it for training later. + traj.messages_and_choices.append(choice) + + # The environment gives us back a reward (in this case it's simply the length of the message we sent divided by 10). We record it so we can use it for training later. + traj.reward = result.reward + + # We return the completed trajectory to the trainer. + return traj.finish() + + +async def main() -> None: + load_dotenv() + + weave.init("openenv-demo") + + # The ServerlessBackend requires a `WANDB_API_KEY` environment variable to be set. You can also use the ART `LocalBackend` to train on a local GPU. + backend = ServerlessBackend() + + # We define a model that we'll train. The model is a LoRA adapter on top of Qwen3-14B. + model = art.TrainableModel( + name=f"openenv-echo-{datetime.now().strftime('%Y-%m-%d-%H%M%S')}", + project="openenv-demo", + base_model="OpenPipe/Qwen3-14B-Instruct", + ) + await model.register(backend) + + # We create a shared pool of environment clients for training, to avoid starting up and tearing down docker containers for each rollout. + env_pool = [ + EchoEnv.from_docker_image("quixote13/echo-env:latest") + for _ in range(ROLLOUTS_PER_GROUP) + ] + + # We train the model for a fixed number of steps. + for _step in range(await model.get_step(), NUM_STEPS): + print(f"Gathering groups for step {_step}") + + # We + groups = await art.gather_trajectory_groups( + [art.TrajectoryGroup(rollout(model, env_client) for env_client in env_pool)] + ) + + await model.train(groups) + + +asyncio.run(main()) From bafa8781d8ca554333d1ea7930c07bd5a564e81f Mon Sep 17 00:00:00 2001 From: Kyle Corbitt Date: Thu, 23 Oct 2025 18:30:24 -0700 Subject: [PATCH 2/3] docs: Add OpenEnv integration documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add concise documentation page for OpenEnv integration - Link to OpenEnv GitHub repository - Emphasize automatic integration due to ART's flexible architecture - Include complete code example demonstrating usage 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- docs/docs.json | 3 +- docs/integrations/openenv-integration.mdx | 103 ++++++++++++++++++++++ examples/openenv_echo.py | 7 +- 3 files changed, 107 insertions(+), 6 deletions(-) create mode 100644 docs/integrations/openenv-integration.mdx diff --git a/docs/docs.json b/docs/docs.json index 23409152..4f07591a 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -72,7 +72,8 @@ { "group": "Integrations", "pages": [ - "integrations/langgraph-integration" + "integrations/langgraph-integration", + "integrations/openenv-integration" ] }, { diff --git a/docs/integrations/openenv-integration.mdx b/docs/integrations/openenv-integration.mdx new file mode 100644 index 00000000..4303fe69 --- /dev/null +++ b/docs/integrations/openenv-integration.mdx @@ -0,0 +1,103 @@ +--- +title: "🌍 OpenEnv" +description: "Train AI agents in isolated execution environments using OpenEnv with ART's reinforcement learning" +--- + +# OpenEnv Integration + +[OpenEnv](https://github.com/meta-pytorch/OpenEnv) provides a standard for interacting with agentic execution environments via simple Gymnasium-style APIs, making it easy to create reproducible training scenarios for code generation, tool usage, and other complex tasks. Because ART is unopinionated about the shape of your environment and rollout function, integration with OpenEnv is automatic - you can use any OpenEnv environment with ART without any special adapters or configuration. + +## Code Example + +Here's a complete example showing how to train an agent using OpenEnv's echo environment with ART: + +```python +import asyncio +from datetime import datetime + +import art +from art.serverless.backend import ServerlessBackend +from dotenv import load_dotenv +from envs.echo_env import EchoAction, EchoEnv +import weave + +PROMPT = "Use at most 100 tokens; maximize the total character length of the output." +NUM_STEPS = 50 +ROLLOUTS_PER_GROUP = 4 + + +# The rollout function defines how your agent interacts with the environment +async def rollout(model: art.TrainableModel, env_client: EchoEnv) -> art.Trajectory: + # Reset the environment to get initial state + await asyncio.to_thread(env_client.reset) + + # Create a trajectory to store interactions and rewards + traj = art.Trajectory( + messages_and_choices=[{"role": "system", "content": PROMPT}], + reward=0.0 + ) + + # Use the model to generate an action + choice = ( + await model.openai_client().chat.completions.create( + model=model.inference_model_name, + messages=traj.messages(), + max_completion_tokens=100, + timeout=30, + ) + ).choices[0] + reply = (choice.message.content or "").strip() + + # Send the action to the environment and get observation/reward + result = await asyncio.to_thread( + env_client.step, + EchoAction(message=reply) + ) + + # Record the model's output and reward + traj.messages_and_choices.append(choice) + traj.reward = result.reward + + return traj.finish() + + +async def main() -> None: + load_dotenv() + weave.init("openenv-demo") + + # Set up the training backend + backend = ServerlessBackend() + + # Define the model to train + model = art.TrainableModel( + name=f"openenv-echo-{datetime.now().strftime('%Y-%m-%d-%H%M%S')}", + project="openenv-demo", + base_model="OpenPipe/Qwen3-14B-Instruct", + ) + await model.register(backend) + + # Create a pool of environment clients for efficient training + env_pool = [ + EchoEnv.from_docker_image("quixote13/echo-env:latest") + for _ in range(ROLLOUTS_PER_GROUP) + ] + + # Training loop + for step in range(await model.get_step(), NUM_STEPS): + print(f"Gathering groups for step {step}") + + # Run multiple rollouts in parallel + groups = await art.gather_trajectory_groups([ + art.TrajectoryGroup( + rollout(model, env_client) + for env_client in env_pool + ) + ]) + + # Train the model on collected trajectories + await model.train(groups) + + +if __name__ == "__main__": + asyncio.run(main()) +``` \ No newline at end of file diff --git a/examples/openenv_echo.py b/examples/openenv_echo.py index d42e38f5..2e59a141 100644 --- a/examples/openenv_echo.py +++ b/examples/openenv_echo.py @@ -1,13 +1,10 @@ # /// script # requires-python = ">=3.10" # dependencies = [ -# "openenv", -# "openpipe-art", -# "weave", +# "openenv-core==0.1.13", +# "openpipe-art==0.5.1", # ] # -# [tool.uv.sources] -# openenv = { git = "https://github.com/meta-pytorch/OpenEnv.git", rev = "6083a47" } # /// import asyncio from datetime import datetime From 7ecc7121aa2bae1e495f8baf9f4ae43f85eb252a Mon Sep 17 00:00:00 2001 From: Kyle Corbitt Date: Thu, 23 Oct 2025 18:30:45 -0700 Subject: [PATCH 3/3] chore: Add package-lock.json to docs .gitignore --- docs/.gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/.gitignore b/docs/.gitignore index b512c09d..25c8fdba 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1 +1,2 @@ -node_modules \ No newline at end of file +node_modules +package-lock.json \ No newline at end of file