# Environment Development ("BabyAI-Text")

This notebook builds the BabyAI ACT-PRM environment, inspects the real state, renders the prompt with tools, and shows the gold trajectory. This is a compact setup similar to the exmaple notebook.



In [1]:
import os

os.environ["HF_HOME"] = os.path.expanduser("~/.cache/huggingface")
os.environ["TRANSFORMERS_CACHE"] = os.path.expanduser("~/.cache/huggingface/transformers")

In [2]:
from omegaconf import OmegaConf
from rich import print as rich_print

from transformers import AutoTokenizer

# Get a tokenizer
model_config = OmegaConf.load("../configs/model/hf_qwen3_4b_inst_2507.yaml")
# Override cache_dir to a writable path (config defaults to /scr/...)
model_config.model_config["cache_dir"] = os.path.expanduser("~/.cache/huggingface/models")

hf_tokenizer = AutoTokenizer.from_pretrained(**model_config.model_config)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def rich_print_messages(
    msg_text: str,
    bos_token: str = "<|im_start|>",
    eos_token: str = "<|im_end|>\n",
    tool_call_bos_token: str = "<tool_call>",
    tool_call_eos_token: str = "</tool_call>",
    tool_response_bos_token: str = "<tool_response>",
    tool_response_eos_token: str = "</tool_response>",
):
    # Split into messages
    messages = msg_text.split(eos_token)

    system_bos = f"{bos_token}system"
    user_bos = f"{bos_token}user"
    assistant_bos = f"{bos_token}assistant"

    for ix, message in enumerate(messages):
        # system prompt
        if message.startswith(system_bos):
            messages[ix] = f"[bright_yellow]{message}[/bright_yellow]"
        # user messages
        elif message.startswith(user_bos):
            messages[ix] = f"[bright_red]{message}[/bright_red]"
        # assistant messages
        elif message.startswith(assistant_bos):
            messages[ix] = f"[bright_green]{message}[/bright_green]"

        # tool calls
        if tool_call_bos_token in messages[ix] and tool_call_eos_token in messages[ix]:
            messages[ix] = messages[ix].replace(tool_call_bos_token, f"[bright_cyan]{tool_call_bos_token}")
            messages[ix] = messages[ix].replace(tool_call_eos_token, f"{tool_call_eos_token}[/bright_cyan]")
        # tool responses
        if tool_response_bos_token in messages[ix] and tool_response_eos_token in messages[ix]:
            messages[ix] = messages[ix].replace(tool_response_bos_token, f"[bright_magenta]{tool_response_bos_token}")
            messages[ix] = messages[ix].replace(tool_response_eos_token, f"{tool_response_eos_token}[/bright_magenta]")

    msgs_text = eos_token.join(messages)
    try:
        rich_print(msgs_text)
    except Exception:
        print(msgs_text)

## BabyAI-Text (ACT-PRM)

We now build the BabyAI environment and inspect the real state plus the tool schema.


In [4]:
from act_prm.environments import get_env

babyai_env = get_env(
    name="babyai_text",
    env_name="BabyAI-MixedTestLocal-v0",
    num_train_samples=3,
    num_val_samples=1,
    num_test_samples=1,
    max_turns=20,
    seed=0,
)

babyai_state = babyai_env.reset()

babyai_state.system_prompt, babyai_state.new_messages[0]["content"][:400]

  fn()
Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


ImportError: babyai_text package is required for BabyAI-Text. See https://github.com/flowersteam/Grounding_LLMs_with_online_RL/tree/main/babyai-text

In [None]:
system_message = {"role": "system", "content": babyai_state.system_prompt}

messages = hf_tokenizer.apply_chat_template(
    [system_message] + babyai_state.new_messages,
    tokenize=False,
    tools=babyai_state.tools,
)
rich_print_messages(messages)

In [None]:
# First few steps of the gold trajectory (user -> assistant -> tool -> ...)
babyai_state.action_trajectory[:6]