# Test FoldAgent React Agent Loop with Local Search

This notebook tests your custom React agent loop from `verl/experimental/agent_loop/FoldAgent` by:
- Launching a stub LocalSearch FastAPI server (`/search`, `/open`).
- Registering FoldAgent's `react_agent` and composing VERL config via Hydra.
- Starting a standalone rollout server and driving one sample through the agent loop.
- Inspecting the generated messages, reward, and metrics.

In [1]:
import os
import asyncio
import socket
import json
import numpy as np

import ray
import fastapi
import uvicorn
from starlette.requests import Request
from starlette.responses import JSONResponse

from hydra import compose, initialize_config_dir

import verl
from verl import DataProto
from verl.experimental.agent_loop.agent_loop import AgentLoopManager
from verl.experimental.agent_loop.FoldAgent import ReactAgentLoop  # Ensures @register("react_agent") runs

from verl.workers.rollout.replica import get_rollout_replica_class

# Speed-focused Ray init; adjust as needed
ray.init(runtime_env={"env_vars": {"VLLM_USE_V1": "1"}}, ignore_reinit_error=True)
verl_config_dir = os.path.join(os.path.dirname(verl.__file__), "trainer/config")

rollout_name = "vllm"  # or "sglang"

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/_inductor/compile_worker/__main__.py", line 10, in <module>
    from torch._inductor.async_compile import pre_fork_setup
  File "/usr/local/lib/python3.11/dist-packages/torch/__init__.py", line 2611, in <module>
    from torch import _meta_registrations
  File "/usr/local/lib/python3.11/dist-packages/torch/_meta_registrations.py", line 12, in <module>
    from torch._decomp import (
  File "/usr/local/lib/python3.11/dist-packages/torch/_decomp/__init__.py", line 276, in <module>
    import torch._decomp.decompositions
  File "/usr/local/lib/python3.11/dist-packages/torch/_decomp/decompositions.py", line 16, in <module>
    import torch._prims as prims
  File "/usr/local/lib/python3.11/dist-packages/torch/_prims/__init__.py", line 525, in <module>
    abs = _make_elementwise_unary_prim(
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/_prims/__init__.py",

KeyboardInterrupt: 

## 1) Launch a stub LocalSearch FastAPI service
Implements `/search` and `/open` endpoints expected by `LocalSearch` env. We set `LOCAL_SEARCH_URL` to point the agent to this server.

In [2]:
@ray.remote(num_cpus=1)
class SearchServer:
    """Minimal LocalSearch server with /search and /open endpoints."""

    def __init__(self):
        self.address = ray._private.services.get_node_ip_address()
        self.port = self._get_free_port()
        # Simple corpus
        self.pages = {
            "wiki:elon": {
                "docid": "wiki:elon",
                "url": "https://en.wikipedia.org/wiki/Elon_Musk",
                "text": "Elon Musk is the CEO of Tesla, Inc. He also leads SpaceX, xAI, and other ventures."
            },
            "tesla:leadership": {
                "docid": "tesla:leadership",
                "url": "https://www.tesla.com/leadership",
                "text": "Tesla's CEO is Elon Musk. The leadership page lists executive roles and bios."
            },
            "news:tesla": {
                "docid": "news:tesla",
                "url": "https://example.com/news/tesla",
                "text": "Breaking: Tesla maintains its leadership under CEO Elon Musk, focusing on EV innovation."
            }
        }
        asyncio.create_task(self._start_fastapi_server())

    def _get_free_port(self):
        with socket.socket() as sock:
            sock.bind(("", 0))
            return sock.getsockname()[1]

    async def _start_fastapi_server(self):
        app = fastapi.FastAPI()

        @app.post("/search")
        async def search(request: Request):
            req = await request.json()
            query = (req.get("query") or "").lower()
            k = int(req.get("k", 10))
            # naive keyword filter
            def match(p):
                txt = (p.get("text") or "").lower()
                return ("elon" in query or "tesla" in query) and ("elon" in txt or "tesla" in txt)
            results = [p for p in self.pages.values() if match(p)][:k]
            return JSONResponse(content={"results": results})

        @app.post("/open")
        async def open_page(request: Request):
            req = await request.json()
            docid = req.get("docid")
            url = req.get("url")
            page = None
            if docid and docid in self.pages:
                page = self.pages[docid]
            elif url:
                for p in self.pages.values():
                    if p.get("url") == url:
                        page = p
                        break
            return JSONResponse(content={"results": [page] if page else []})

        config = uvicorn.Config(app, host=["::", "0.0.0.0"], port=self.port, log_level="warning")
        server = uvicorn.Server(config)
        await server.serve()

    async def get_server_address(self) -> str:
        return f"{self.address}:{self.port}"

search_server = SearchServer.remote()
search_address = ray.get(search_server.get_server_address.remote())
os.environ["LOCAL_SEARCH_URL"] = f"http://{search_address}"  # used by LocalSearch env
print("LOCAL_SEARCH_URL:", os.environ["LOCAL_SEARCH_URL"])

LOCAL_SEARCH_URL: http://10.122.252.202:60219


## 2) Compose VERL config and start standalone rollout
We set the rollout engine (vLLM or SGLang), model path, and select our agent loop name `react_agent`.

In [3]:
from huggingface_hub import snapshot_download

# Download a small-ish instruct model (adjust if you already have one).
model_path = os.path.expanduser("~/Qwen/Qwen3-1.7B")
snapshot_download(repo_id="Qwen/Qwen3-1.7B", repo_type="model", local_dir=model_path)

with initialize_config_dir(config_dir=verl_config_dir):
    config = compose(
        config_name="ppo_trainer",
        overrides=[
            # rollout engine
            "actor_rollout_ref.rollout.name=" + rollout_name,
            "actor_rollout_ref.rollout.mode=async",
            "actor_rollout_ref.rollout.tensor_model_parallel_size=1",
            "actor_rollout_ref.rollout.data_parallel_size=1",
            "actor_rollout_ref.rollout.pipeline_model_parallel_size=1",
            "actor_rollout_ref.rollout.skip_tokenizer_init=False",
            "actor_rollout_ref.rollout.prompt_length=4096",
            "actor_rollout_ref.rollout.response_length=4096",
            # model
            "actor_rollout_ref.model.path=" + model_path,
            # agent loop: use our FoldAgent React agent
            "actor_rollout_ref.rollout.agent.default_agent_loop=react_agent",
            "actor_rollout_ref.rollout.agent.num_workers=1",
            # trainer sizing
            "trainer.n_gpus_per_node=2",
            "trainer.nnodes=1",
            "trainer.logger=['console']",
            "trainer.project_name=verl",
            "trainer.experiment_name=" + os.path.basename(model_path)
        ],
    )

from omegaconf import OmegaConf

# Make a safe copy of the trainer config to attach plugin without affecting rollout server instantiation
trainer_config_with_plugin = OmegaConf.create(OmegaConf.to_container(config, resolve=False))
OmegaConf.set_struct(trainer_config_with_plugin.actor_rollout_ref.rollout, False)

# Inject FoldAgent plugin fields on the copied config
trainer_config_with_plugin.actor_rollout_ref.rollout.plugin = OmegaConf.create({
    "workflow": "search",
    "max_turn": 32,
    "retry_cjk": 0,
    "turn_max_new_tokens": 1024,
    "max_session": 3,
    "val_max_session": 3,
    "session_timeout": 3600,
    "enable_summary": True,
    "branch_len": 256,
    "process_reward": "flat,scope",
    "max_traj": 2,
    "must_finish": False,
    "double_check": False,
    "must_search": True,
    "val_max_turn": 32,
    "val_response_length": 1024,
})

print("Plugin config:", OmegaConf.to_container(trainer_config_with_plugin.actor_rollout_ref.rollout.plugin, resolve=True))

# Start a standalone rollout server (same as in the tutorial)
rollout_server_class = get_rollout_replica_class(config.actor_rollout_ref.rollout.name)
rollout_server = rollout_server_class(
    replica_rank=0,
    config=config.actor_rollout_ref.rollout,
    model_config=config.actor_rollout_ref.model,
    gpus_per_node=config.trainer.n_gpus_per_node,
)
await rollout_server.init_standalone()
print("Rollout server address:", rollout_server.server_address)

Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  with initialize_config_dir(config_dir=verl_config_dir):


Plugin config: {'workflow': 'search', 'max_turn': 32, 'retry_cjk': 0, 'turn_max_new_tokens': 1024, 'max_session': 3, 'val_max_session': 3, 'session_timeout': 3600, 'enable_summary': True, 'branch_len': 256, 'process_reward': 'flat,scope', 'max_traj': 2, 'must_finish': False, 'double_check': False, 'must_search': True, 'val_max_turn': 32, 'val_response_length': 1024}
INFO 12-13 07:19:42 [__init__.py:235] Automatically detected platform cuda.




(pid=1201697, ip=10.122.252.202) INFO 12-13 07:19:54 [__init__.py:235] Automatically detected platform cuda.
(pid=1201918, ip=10.122.252.202) INFO 12-13 07:20:04 [__init__.py:235] Automatically detected platform cuda.


(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO:2025-12-13 07:20:10,395:vLLMHttpServer, replica_rank: 0, master address: 10.122.252.202, master port: 38159, data parallel master port: 47337
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO:2025-12-13 07:20:10,401:override_generation_config: {'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'repetition_penalty': 1.0, 'max_new_tokens': 4096}


(vLLMHttpServer pid=1201918, ip=10.122.252.202) ['serve',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '/home/tiger/Qwen/Qwen3-1.7B',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '--dtype',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  'bfloat16',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '--load_format',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  'auto',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '--max_model_len',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '8192',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '--max_num_seqs',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '1024',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '--enable_chunked_prefill',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '--max_num_batched_tokens',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '8192',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '--enable_prefix_caching',
(vLLMHttpServer pid=1201918, ip=10.122.252.202)  '--enable_sleep_mode',
(vLLMHttpS

(vLLMHttpServer pid=1201918, ip=10.122.252.202) Using blocking ray.get inside async actor. This blocks the event loop. Please use `await` on object ref with asyncio.gather if you want to yield execution to the event loop instead.
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO:2025-12-13 07:20:11,116:replica_rank=0, node_rank=0, nnodes=1, get worker zmq addresses: ['ipc:///tmp/verl_vllm_zmq_1201697_tiger.ipc']


(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:20:17 [config.py:1604] Using max model len 8192
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:20:17 [config.py:2434] Chunked prefill is enabled with max_num_batched_tokens=8192.
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:20:23 [__init__.py:235] Automatically detected platform cuda.
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:20:28 [core.py:572] Waiting for init message from front-end.
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:20:28 [core.py:71] Initializing a V1 LLM engine (v0.10.0) with config: model='/home/tiger/Qwen/Qwen3-1.7B', speculative_config=None, tokenizer='/home/tiger/Qwen/Qwen3-1.7B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:00<00:00,  3.25it/s]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:00<00:00,  3.25it/s]
(vLLMAsyncRollout pid=1201697, ip=10.122.252.202) 


(vLLMAsyncRollout pid=1201697, ip=10.122.252.202) INFO 12-13 07:20:33 [default_loader.py:262] Loading weights took 0.71 seconds
(vLLMAsyncRollout pid=1201697, ip=10.122.252.202) INFO 12-13 07:20:34 [gpu_model_runner.py:1892] Model loading took 3.2152 GiB and 0.856083 seconds
(vLLMAsyncRollout pid=1201697, ip=10.122.252.202) INFO 12-13 07:20:42 [backends.py:530] Using cache directory: /home/tiger/.cache/vllm/torch_compile_cache/3f4beb645d/rank_0_0/backbone for vLLM's torch.compile
(vLLMAsyncRollout pid=1201697, ip=10.122.252.202) INFO 12-13 07:20:42 [backends.py:541] Dynamo bytecode transform time: 7.65 s
(vLLMAsyncRollout pid=1201697, ip=10.122.252.202) INFO 12-13 07:20:47 [backends.py:194] Cache the graph for dynamic shape for later use
(vLLMAsyncRollout pid=1201697, ip=10.122.252.202) INFO 12-13 07:21:15 [backends.py:215] Compiling a graph for dynamic shape takes 33.24 s
(vLLMAsyncRollout pid=1201697, ip=10.122.252.202) INFO 12-13 07:21:23 [monitor.py:34] torch.compile takes 40.88 s 

Capturing CUDA graph shapes:   0%|          | 0/67 [00:00<?, ?it/s]
Capturing CUDA graph shapes:   6%|▌         | 4/67 [00:00<00:01, 35.77it/s]
Capturing CUDA graph shapes:  12%|█▏        | 8/67 [00:00<00:01, 36.59it/s]
Capturing CUDA graph shapes:  18%|█▊        | 12/67 [00:00<00:01, 36.43it/s]
Capturing CUDA graph shapes:  24%|██▍       | 16/67 [00:00<00:01, 36.67it/s]
Capturing CUDA graph shapes:  30%|██▉       | 20/67 [00:00<00:01, 37.13it/s]
Capturing CUDA graph shapes:  36%|███▌      | 24/67 [00:00<00:01, 37.41it/s]
Capturing CUDA graph shapes:  42%|████▏     | 28/67 [00:00<00:01, 37.18it/s]
Capturing CUDA graph shapes:  48%|████▊     | 32/67 [00:00<00:00, 37.04it/s]
Capturing CUDA graph shapes:  54%|█████▎    | 36/67 [00:00<00:00, 35.87it/s]
Capturing CUDA graph shapes:  60%|█████▉    | 40/67 [00:01<00:00, 35.64it/s]
Capturing CUDA graph shapes:  66%|██████▌   | 44/67 [00:01<00:00, 35.65it/s]
Capturing CUDA graph shapes:  72%|███████▏  | 48/67 [00:01<00:00, 34.58it/s]
Capturing 

(vLLMAsyncRollout pid=1201697, ip=10.122.252.202) INFO 12-13 07:21:27 [gpu_model_runner.py:2485] Graph capturing finished in 2 secs, took 0.49 GiB
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:21:27 [core.py:193] init engine (profile, create kv cache, warmup model) took 53.36 seconds


(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO:2025-12-13 07:21:27,926:Initializing a V1 LLM engine with config: model='/home/tiger/Qwen/Qwen3-1.7B', speculative_config=None, tokenizer='/home/tiger/Qwen/Qwen3-1.7B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=True, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=/home/tiger/Qwen/Qwen3-1.7B, num_scheduler_steps=1, multi_step

(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:21:27 [serving_responses.py:89] Using default chat sampling params from model: {'repetition_penalty': 1.0, 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'max_tokens': 4096}
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:21:27 [serving_chat.py:122] Using default chat sampling params from model: {'repetition_penalty': 1.0, 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'max_tokens': 4096}
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:21:27 [serving_completion.py:77] Using default completion sampling params from model: {'repetition_penalty': 1.0, 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'max_tokens': 4096}
Rollout server address: 10.122.252.202:47645


## 3) Build one test sample and run the agent loop
We create a `DataProto` with `ability=LocalSearch` and the fields expected by your env and agent logic.

In [4]:
from pprint import pprint

# Minimal raw prompt (env will replace via its create_chat flow, but we include for completeness)
raw_prompt = [
    {"role": "system", "content": "You are a helpful agent that uses ReAct-style tool calls."},
    {"role": "user", "content": "Who is the CEO of Tesla? Provide citations."}
]

extra_info = {
    "query": "Who is the CEO of Tesla?",
    "answer": "Elon Musk",  # label for reward checking
    "prompt": raw_prompt,
    "workflow": "search"
}

uid = "test-0001"
reward_model = "default"

batch = DataProto.from_dict(
    tensors={},
    non_tensors={
        "raw_prompt": np.array([raw_prompt], dtype=object),
        "extra_info": np.array([extra_info], dtype=object),
        "uid": np.array([uid], dtype=object),
        "reward_model": np.array([reward_model], dtype=object),
        "ability": np.array(["LocalSearch"], dtype=object),
        "agent_name": np.array(["react_agent"], dtype=object),
        "index": np.array([0], dtype=object)
    },
    meta_info={"validate": False, "global_steps": 0}
)


# Use AgentLoopWorker directly to avoid nested asyncio.run issues in notebooks
from verl.experimental.agent_loop import AgentLoopWorker

alm_worker = AgentLoopWorker.options(
        name="notebook_agent_loop_worker",
        runtime_env={"env_vars": {"LOCAL_SEARCH_URL": f"http://{search_address}"}},
).remote(
        trainer_config_with_plugin,
        [rollout_server.server_handle],  # reuse the already-started standalone server
        None,
)
output = ray.get(alm_worker.generate_sequences.remote(batch))

print("Reward score:", output.batch.get("rm_scores", None))
print("Num turns:", output.non_tensor_batch["__num_turns__"][0])
print("Extra fields keys:", list(output.non_tensor_batch.keys()))
pprint(output.non_tensor_batch.get("env_stats", [None])[0])

(AgentLoopWorker pid=1203418, ip=10.122.252.202) INFO 12-13 07:21:42 [__init__.py:235] Automatically detected platform cuda.
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:21:44 [async_llm.py:269] Added request d762184fdc284ef2ba40a5fd9d89d6b7.


(AgentLoopWorker pid=1203418, ip=10.122.252.202) ERROR:2025-12-13 07:21:44,355:Error getting data from env: 'NoneType' object has no attribute 'is_train'


(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:21:46 [async_llm.py:269] Added request 0497c552ecaa46d6bc1eb6aaf1d45f26.
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:21:46 [async_llm.py:269] Added request d98a120d33484b05ac47faa2ce122f48.
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:21:46 [async_llm.py:269] Added request 1daaa291709f406d962aa01d718e3745.
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:21:46 [async_llm.py:269] Added request f91a6fbcaf294a4b917d14547add3dd2.
(vLLMHttpServer pid=1201918, ip=10.122.252.202) INFO 12-13 07:21:47 [async_llm.py:269] Added request ca288248d7c940bc9a3589a92efdafde.
Reward score: tensor([[0., 0., 0.,  ..., 0., 0., 0.]])
Num turns: 13
Extra fields keys: ['__num_turns__', 'env_stats', 'raw_prompt', 'messages']
Counter({'action': 6,
         'open_page': 3,
         'visit_pages': 3,
         'search': 2,
         'is_search': 1,
         'is_open': 1,
         'is_finish': 1,
         'f

(AgentLoopWorker pid=1203418, ip=10.122.252.202) [Judged] score=1
(AgentLoopWorker pid=1203418, ip=10.122.252.202) Label: Elon Musk
(AgentLoopWorker pid=1203418, ip=10.122.252.202) Model: Elon Musk


(AgentLoopWorker pid=1203418, ip=10.122.252.202) You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [8]:
# If messages were captured by the agent loop, they are included under extra_fields
messages = output.non_tensor_batch.get("messages", [None])[0]
if messages:
    print("\nConversation transcript:")
    print(f"Num turns: {output.non_tensor_batch['__num_turns__'][0]}")
    for m in messages:
        print("=" * 100)
        role = m.get("role")
        content = m.get("content")
        print(f"- {role}: {content}" if content else f"- {role}")


Conversation transcript:
Num turns: 13
- system: You are a meticulous and strategic research agent. Your primary function is to conduct comprehensive, multi-step research to deliver a thorough, accurate, and well-supported report in response to the user's query.

Your operation is guided by these core principles:
* **Rigor:** Execute every step of the research process with precision and attention to detail.
* **Objectivity:** Synthesize information based on the evidence gathered, not on prior assumptions. Note and investigate conflicting information.
* **Thoroughness:** Never settle for a surface-level answer. Always strive to uncover the underlying details, context, and data.
* **Transparency:** Your reasoning process should be clear at every step, linking evidence from your research directly to your conclusions.

Follow this structured protocol for to find the answer

### Phase 1: Deconstruction & Strategy

1.  **Deconstruct the Query:**
    * Analyze the user's prompt to identify t

Summary of output fields:

In [10]:
# output = ray.get(alm_worker.generate_sequences.remote(batch))

# Detailed summary of DataProto output

def summarize_dataproto(dp):
    print("=== DataProto Summary ===")

    # Meta info
    print("\nMeta Info:")
    pprint(dp.meta_info)

    # Batch (TensorDict)
    print("\nBatch (TensorDict):")
    for k in list(dp.batch.keys()):
        t = dp.batch[k]
        try:
            shape = tuple(t.shape)
            dtype = t.dtype
        except Exception:
            shape = getattr(t, "shape", None)
            dtype = getattr(t, "dtype", None)
        print(f"- {k}: shape={shape}, dtype={dtype}")

    # Non-tensor batch
    print("\nNon-Tensor Batch:")
    for k, v in dp.non_tensor_batch.items():
        if isinstance(v, np.ndarray):
            print(f"- {k}: dtype={v.dtype}, shape={v.shape}")
            if v.dtype == object and v.size > 0:
                sample = v[0]
                if k == "multi_modal_inputs" and isinstance(sample, dict):
                    print("  multi_modal_inputs sample tensors:")
                    for kk, val in sample.items():
                        if hasattr(val, "shape"):
                            print(f"    {kk}: shape={tuple(val.shape)}, dtype={getattr(val, 'dtype', None)}")
                elif isinstance(sample, dict):
                    print(f"  sample keys: {list(sample.keys())}")
                else:
                    s = str(sample)
                    print(f"  sample: {s[:500]}{'...' if len(s) > 500 else ''}")
            elif v.size > 0 and v.ndim == 1:
                head = v[:min(5, v.shape[0])]
                print(f"  head: {head}")
        else:
            print(f"- {k}: type={type(v)}")

    # Messages preview (first sample)
    # messages = dp.non_tensor_batch.get("messages", None)
    # if messages is not None and len(messages) > 0:
    #     first = messages[0]
    #     if isinstance(first, list):
    #         print("\nTranscript (first sample):")
    #         for m in first:
    #             role = m.get("role")
    #             content = m.get("content")
    #             preview = content[:200] if isinstance(content, str) else content
    #             print(f"  - {role}: {preview}")
    #     else:
    #         print("\nMessages[0] type:", type(first))

    print("\n=== End Summary ===")

summarize_dataproto(output)

=== DataProto Summary ===

Meta Info:
{'metrics': [{'generate_sequences': 4.131077413447201,
              'tool_calls': 0.02602100116200745}],
 'reward_extra_keys': []}

Batch (TensorDict):
- prompts: shape=(1, 4096), dtype=torch.int64
- responses: shape=(1, 4096), dtype=torch.int64
- response_mask: shape=(1, 4096), dtype=torch.int64
- input_ids: shape=(1, 8192), dtype=torch.int64
- attention_mask: shape=(1, 8192), dtype=torch.int64
- position_ids: shape=(1, 8192), dtype=torch.int64
- rollout_log_probs: shape=(1, 4096), dtype=torch.float32
- rm_scores: shape=(1, 4096), dtype=torch.float32

Non-Tensor Batch:
- __num_turns__: dtype=int32, shape=(1,)
  head: [32]
- env_stats: dtype=object, shape=(1,)
  sample keys: ['finish', 'search', 'open_page', 'change_answer', 'is_search', 'is_open', 'is_finish', 'visit_pages', 'action']
- raw_prompt: dtype=object, shape=(1,)
  sample: [{'role': 'system', 'content': 'You are a helpful agent that uses ReAct-style tool calls.'}
 {'role': 'user', 'cont