# Environment Development ("BabyAI-Text")

This notebook builds the BabyAI ACT-PRM environment, inspects the real state, renders the prompt with tools, and shows the gold trajectory.



In [1]:
import numpy as np

# Compat shims for older gym / minigrid stacks on NumPy>=2
if not hasattr(np, "bool8"):
    np.bool8 = np.bool_
if not hasattr(np, "bool"):
    np.bool = bool  # or: np.bool_

In [2]:
import os

os.environ["HF_HOME"] = os.path.expanduser("~/.cache/huggingface")
os.environ["TRANSFORMERS_CACHE"] = os.path.expanduser("~/.cache/huggingface/transformers")

In [3]:
from omegaconf import OmegaConf
from rich import print as rich_print

from transformers import AutoTokenizer

# Get a tokenizer
model_config = OmegaConf.load("../configs/model/hf_qwen3_4b_inst_2507.yaml")
# Override cache_dir to a writable path (config defaults to /scr/...)
model_config.model_config["cache_dir"] = os.path.expanduser("~/.cache/huggingface/models")

hf_tokenizer = AutoTokenizer.from_pretrained(**model_config.model_config)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def rich_print_messages(
    msg_text: str,
    bos_token: str = "<|im_start|>",
    eos_token: str = "<|im_end|>\n",
    tool_call_bos_token: str = "<tool_call>",
    tool_call_eos_token: str = "</tool_call>",
    tool_response_bos_token: str = "<tool_response>",
    tool_response_eos_token: str = "</tool_response>",
):
    # Split into messages
    messages = msg_text.split(eos_token)

    system_bos = f"{bos_token}system"
    user_bos = f"{bos_token}user"
    assistant_bos = f"{bos_token}assistant"

    for ix, message in enumerate(messages):
        # system prompt
        if message.startswith(system_bos):
            messages[ix] = f"[bright_yellow]{message}[/bright_yellow]"
        # user messages
        elif message.startswith(user_bos):
            messages[ix] = f"[bright_red]{message}[/bright_red]"
        # assistant messages
        elif message.startswith(assistant_bos):
            messages[ix] = f"[bright_green]{message}[/bright_green]"

        # tool calls
        if tool_call_bos_token in messages[ix] and tool_call_eos_token in messages[ix]:
            messages[ix] = messages[ix].replace(tool_call_bos_token, f"[bright_cyan]{tool_call_bos_token}")
            messages[ix] = messages[ix].replace(tool_call_eos_token, f"{tool_call_eos_token}[/bright_cyan]")
        # tool responses
        if tool_response_bos_token in messages[ix] and tool_response_eos_token in messages[ix]:
            messages[ix] = messages[ix].replace(tool_response_bos_token, f"[bright_magenta]{tool_response_bos_token}")
            messages[ix] = messages[ix].replace(tool_response_eos_token, f"{tool_response_eos_token}[/bright_magenta]")

    msgs_text = eos_token.join(messages)
    try:
        rich_print(msgs_text)
    except Exception:
        print(msgs_text)

## BabyAI-Text (ACT-PRM)

We now build the BabyAI environment and inspect the real state plus the tool schema. The player prompt instructs the model to call one of the available tools each turn.


In [5]:
from act_prm.environments import get_env

babyai_env = get_env(
    name="babyai_text",
    env_name="BabyAI-MixedTestLocal-v0",
    num_train_samples=3,
    num_val_samples=1,
    num_test_samples=1,
    max_turns=20,
    seed=0,
)

babyai_state = babyai_env.reset()

babyai_state.system_prompt, babyai_state.new_messages[0]["content"][:400]

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}")
  logger.warn(f"{pre} is not within the observation space.")
  logger.deprecation(
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}")


('You are a helpful assistant that can answer questions and call tools.\n\nYou are an agent playing BabyAI-Text, a grid-based game with text observations.\nYour goal is to follow the mission and reach the win condition.\n\n## Rules:\n- You must call exactly one tool per turn to choose an action.\n- Only the listed tools are valid actions.\n- You have 20 steps to complete the task. Restarting is forbidden.',
 "You are playing 'BabyAI-Text'.\nYour goal is to open the red door.\nAvailable actions are turn left, turn right, go forward, pick up, drop, toggle.\nYou see a wall 4 steps forward. You see a yellow key 2 steps left and 1 step forward. You see a red key 1 step left and 3 steps forward. You see a yellow ball 1 step left. You see a blue box 1 step right and 3 steps forward. You see a blue ball 2 steps r")

In [6]:
system_message = {"role": "system", "content": babyai_state.system_prompt}

messages = hf_tokenizer.apply_chat_template(
    [system_message] + babyai_state.new_messages,
    tokenize=False,
    tools=babyai_state.tools,
)
rich_print_messages(messages)

In [7]:
# Show 5 color-coded *trajectories* (user -> assistant(tool_call) -> tool -> ...)
from act_prm.environments import get_env

babyai_env = get_env(
    name="babyai_text",
    env_name="BabyAI-MixedTestLocal-v0",
    num_train_samples=20,
    num_val_samples=0,
    num_test_samples=0,
    max_turns=20,
    seed=0,
)

for sample_idx in range(5):
    state = babyai_env.reset(sample_idx=sample_idx)
    system_message = {"role": "system", "content": state.system_prompt}

    msg_text = hf_tokenizer.apply_chat_template(
        [system_message] + state.action_trajectory,
        tokenize=False,
        tools=state.tools,
    )

    print("\n" + "=" * 120)
    print(f"TRAJECTORY EXAMPLE {sample_idx}")
    rich_print_messages(msg_text)


TRAJECTORY EXAMPLE 0



TRAJECTORY EXAMPLE 1



TRAJECTORY EXAMPLE 2



TRAJECTORY EXAMPLE 3



TRAJECTORY EXAMPLE 4


In [10]:
import numpy as np
print(np.__version__)
print(hasattr(np, "bool8"))

2.4.1
True
