# Test FoldAgent Agent Loop with Local Search

This notebook tests your custom FoldAgent loop from `verl/experimental/agent_loop/FoldAgent` by:
- Launching a stub LocalSearch FastAPI server (`/search`, `/open`).
- Registering FoldAgent's `fold_agent` and composing VERL config via Hydra.
- Starting a standalone rollout server and driving one sample through the agent loop.
- Inspecting the generated messages, reward, metrics, and branch-related fields.


In [1]:
import os
import asyncio
import socket
import json
import numpy as np

import ray
import fastapi
import uvicorn
from starlette.requests import Request
from starlette.responses import JSONResponse

from hydra import compose, initialize_config_dir
from omegaconf import OmegaConf
from huggingface_hub import snapshot_download

import verl
from verl import DataProto
from verl.experimental.agent_loop import AgentLoopWorker
from verl.experimental.agent_loop.FoldAgent import FoldAgentLoop  # Ensures @register("fold_agent") runs
from verl.workers.rollout.replica import get_rollout_replica_class

# Speed-focused Ray init; adjust as needed
ray.init(runtime_env={"env_vars": {"VLLM_USE_V1": "1"}}, ignore_reinit_error=True)
verl_config_dir = os.path.join(os.path.dirname(verl.__file__), "trainer/config")

rollout_name = "vllm"  # or "sglang"


[2025-12-16 14:25:07,302 I 347038 347038] gcs_rpc_client.h:648: successful connect gcs: 10.122.253.153:62931
[2025-12-16 14:25:07,587 I 347038 347038] gcs_rpc_client.h:648: successful connect gcs: 10.122.253.153:62931
[2025-12-16 14:25:10,075 I 347038 347038] gcs_rpc_client.h:648: successful connect gcs: 10.122.253.153:62931
[2025-12-16 14:25:10,076 I 347038 347038] gcs_rpc_client.h:648: successful connect gcs: 10.122.253.153:62931
2025-12-16 14:25:10,078	INFO worker.py:1887 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
[2025-12-16 14:25:10,080 I 347038 347038] gcs_rpc_client.h:648: successful connect gcs: 10.122.253.153:62931


Logs are printed to python-core-driver-01000000ffffffffffffffffffffffffffffffffffffffffffffffff_347038.log


## 1) Launch a stub LocalSearch FastAPI service
Implements `/search` and `/open` endpoints expected by `LocalSearch` env. We set `LOCAL_SEARCH_URL` to point the agent to this server.

In [2]:
@ray.remote(num_cpus=1)
class SearchServer:
    """Minimal LocalSearch server with /search and /open endpoints."""

    def __init__(self):
        self.address = ray._private.services.get_node_ip_address()
        self.port = self._get_free_port()
        # Simple corpus
        self.pages = {
            "wiki:elon": {
                "docid": "wiki:elon",
                "url": "https://en.wikipedia.org/wiki/Elon_Musk",
                "text": "Elon Musk is the CEO of Tesla, Inc. He also leads SpaceX, xAI, and other ventures."
            },
            "tesla:leadership": {
                "docid": "tesla:leadership",
                "url": "https://www.tesla.com/leadership",
                "text": "Tesla's CEO is Elon Musk. The leadership page lists executive roles and bios."
            },
            "news:tesla": {
                "docid": "news:tesla",
                "url": "https://example.com/news/tesla",
                "text": "Breaking: Tesla maintains its leadership under CEO Elon Musk, focusing on EV innovation."
            }
        }
        asyncio.create_task(self._start_fastapi_server())

    def _get_free_port(self):
        with socket.socket() as sock:
            sock.bind(("", 0))
            return sock.getsockname()[1]

    async def _start_fastapi_server(self):
        app = fastapi.FastAPI()

        @app.post("/search")
        async def search(request: Request):
            req = await request.json()
            query = (req.get("query") or "").lower()
            k = int(req.get("k", 10))
            # naive keyword filter
            def match(p):
                txt = (p.get("text") or "").lower()
                return ("elon" in query or "tesla" in query) and ("elon" in txt or "tesla" in txt)
            results = [p for p in self.pages.values() if match(p)][:k]
            return JSONResponse(content={"results": results})

        @app.post("/open")
        async def open_page(request: Request):
            req = await request.json()
            docid = req.get("docid")
            url = req.get("url")
            page = None
            if docid and docid in self.pages:
                page = self.pages[docid]
            elif url:
                for p in self.pages.values():
                    if p.get("url") == url:
                        page = p
                        break
            return JSONResponse(content={"results": [page] if page else []})

        config = uvicorn.Config(app, host=["::", "0.0.0.0"], port=self.port, log_level="warning")
        server = uvicorn.Server(config)
        await server.serve()

    async def get_server_address(self) -> str:
        return f"{self.address}:{self.port}"

search_server = SearchServer.remote()
search_address = ray.get(search_server.get_server_address.remote())
os.environ["LOCAL_SEARCH_URL"] = f"http://{search_address}"  # used by LocalSearch env
print("LOCAL_SEARCH_URL:", os.environ["LOCAL_SEARCH_URL"])

LOCAL_SEARCH_URL: http://10.122.253.153:34943


## 2) Compose VERL config and start standalone rollout
We set the rollout engine (vLLM or SGLang), model path, and select our agent loop name `fold_agent`.

In [3]:
# Download a small-ish instruct model (adjust if you already have one).
model_path = os.path.expanduser("~/Qwen/Qwen3-1.7B")
snapshot_download(repo_id="Qwen/Qwen3-1.7B", repo_type="model", local_dir=model_path)

with initialize_config_dir(config_dir=verl_config_dir):
    config = compose(
        config_name="ppo_trainer",
        overrides=[
            # rollout engine
            "actor_rollout_ref.rollout.name=" + rollout_name,
            "actor_rollout_ref.rollout.mode=async",
            "actor_rollout_ref.rollout.tensor_model_parallel_size=1",
            "actor_rollout_ref.rollout.data_parallel_size=1",
            "actor_rollout_ref.rollout.pipeline_model_parallel_size=1",
            "actor_rollout_ref.rollout.skip_tokenizer_init=False",
            "actor_rollout_ref.rollout.prompt_length=4096",
            "actor_rollout_ref.rollout.response_length=4096",
            # model
            "actor_rollout_ref.model.path=" + model_path,
            # agent loop: use our FoldAgent
            "actor_rollout_ref.rollout.agent.default_agent_loop=fold_agent",
            "actor_rollout_ref.rollout.agent.num_workers=1",
            # trainer sizing
            "trainer.n_gpus_per_node=2",
            "trainer.nnodes=1",
            "trainer.logger=['console']",
            "trainer.project_name=verl",
            "trainer.experiment_name=" + os.path.basename(model_path)
        ],
    )

# Make a safe copy of the trainer config to attach plugin without affecting rollout server instantiation
trainer_config_with_plugin = OmegaConf.create(OmegaConf.to_container(config, resolve=False))
OmegaConf.set_struct(trainer_config_with_plugin.actor_rollout_ref.rollout, False)

# Inject FoldAgent plugin fields on the copied config
trainer_config_with_plugin.actor_rollout_ref.rollout.plugin = OmegaConf.create({
    "workflow": "search",
    "max_turn": 32,
    "retry_cjk": 0,
    "turn_max_new_tokens": 1024,
    "max_session": 3,
    "val_max_session": 3,
    "session_timeout": 3600,
    "enable_summary": True,
    "branch_len": 256,
    "process_reward": "flat,scope",
    "max_traj": 2,
    "must_finish": False,
    "double_check": False,
    "must_search": True,
    "val_max_turn": 32,
    "val_response_length": 1024,
})

print("Plugin config:", OmegaConf.to_container(trainer_config_with_plugin.actor_rollout_ref.rollout.plugin, resolve=True))

# Start a standalone rollout server
rollout_server_class = get_rollout_replica_class(config.actor_rollout_ref.rollout.name)
rollout_server = rollout_server_class(
    replica_rank=0,
    config=config.actor_rollout_ref.rollout,
    model_config=config.actor_rollout_ref.model,
    gpus_per_node=config.trainer.n_gpus_per_node,
)
await rollout_server.init_standalone()
print("Rollout server address:", rollout_server.server_address)

Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  with initialize_config_dir(config_dir=verl_config_dir):


Plugin config: {'workflow': 'search', 'max_turn': 32, 'retry_cjk': 0, 'turn_max_new_tokens': 1024, 'max_session': 3, 'val_max_session': 3, 'session_timeout': 3600, 'enable_summary': True, 'branch_len': 256, 'process_reward': 'flat,scope', 'max_traj': 2, 'must_finish': False, 'double_check': False, 'must_search': True, 'val_max_turn': 32, 'val_response_length': 1024}
INFO 12-16 14:25:56 [__init__.py:235] Automatically detected platform cuda.




(pid=358607, ip=10.122.253.153) INFO 12-16 14:26:08 [__init__.py:235] Automatically detected platform cuda.
(pid=358804, ip=10.122.253.153) INFO 12-16 14:26:18 [__init__.py:235] Automatically detected platform cuda.


(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO:2025-12-16 14:26:24,269:vLLMHttpServer, replica_rank: 0, master address: 10.122.253.153, master port: 35093, data parallel master port: 39923
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO:2025-12-16 14:26:24,276:override_generation_config: {'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'repetition_penalty': 1.0, 'max_new_tokens': 4096}


(vLLMHttpServer pid=358804, ip=10.122.253.153) ['serve',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '/home/tiger/Qwen/Qwen3-1.7B',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '--dtype',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  'bfloat16',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '--load_format',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  'auto',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '--max_model_len',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '8192',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '--max_num_seqs',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '1024',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '--enable_chunked_prefill',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '--max_num_batched_tokens',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '8192',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '--enable_prefix_caching',
(vLLMHttpServer pid=358804, ip=10.122.253.153)  '--enable_sleep_mode',
(vLLMHttpServer pid=35880

(vLLMHttpServer pid=358804, ip=10.122.253.153) Using blocking ray.get inside async actor. This blocks the event loop. Please use `await` on object ref with asyncio.gather if you want to yield execution to the event loop instead.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO:2025-12-16 14:26:24,999:replica_rank=0, node_rank=0, nnodes=1, get worker zmq addresses: ['ipc:///tmp/verl_vllm_zmq_358607_tiger.ipc']


(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:26:30 [config.py:1604] Using max model len 8192
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:26:30 [config.py:2434] Chunked prefill is enabled with max_num_batched_tokens=8192.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:26:36 [__init__.py:235] Automatically detected platform cuda.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:26:41 [core.py:572] Waiting for init message from front-end.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:26:41 [core.py:71] Initializing a V1 LLM engine (v0.10.0) with config: model='/home/tiger/Qwen/Qwen3-1.7B', speculative_config=None, tokenizer='/home/tiger/Qwen/Qwen3-1.7B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_paral

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:00<00:00,  1.81it/s]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:00<00:00,  3.62it/s]
(vLLMAsyncRollout pid=358607, ip=10.122.253.153) 


(vLLMAsyncRollout pid=358607, ip=10.122.253.153) INFO 12-16 14:26:46 [default_loader.py:262] Loading weights took 0.73 seconds
(vLLMAsyncRollout pid=358607, ip=10.122.253.153) INFO 12-16 14:26:47 [gpu_model_runner.py:1892] Model loading took 3.2152 GiB and 0.879546 seconds
(vLLMAsyncRollout pid=358607, ip=10.122.253.153) INFO 12-16 14:26:55 [backends.py:530] Using cache directory: /home/tiger/.cache/vllm/torch_compile_cache/3f4beb645d/rank_0_0/backbone for vLLM's torch.compile
(vLLMAsyncRollout pid=358607, ip=10.122.253.153) INFO 12-16 14:26:55 [backends.py:541] Dynamo bytecode transform time: 7.67 s
(vLLMAsyncRollout pid=358607, ip=10.122.253.153) INFO 12-16 14:27:01 [backends.py:161] Directly load the compiled graph(s) for dynamic shape from the cache, took 6.164 s
(vLLMAsyncRollout pid=358607, ip=10.122.253.153) INFO 12-16 14:27:02 [monitor.py:34] torch.compile takes 7.67 s in total
(vLLMAsyncRollout pid=358607, ip=10.122.253.153) INFO 12-16 14:27:03 [gpu_worker.py:255] Available KV

Capturing CUDA graph shapes:   0%|          | 0/67 [00:00<?, ?it/s]
Capturing CUDA graph shapes:   6%|▌         | 4/67 [00:00<00:01, 36.95it/s]
Capturing CUDA graph shapes:  12%|█▏        | 8/67 [00:00<00:01, 37.72it/s]
Capturing CUDA graph shapes:  18%|█▊        | 12/67 [00:00<00:01, 37.62it/s]
Capturing CUDA graph shapes:  24%|██▍       | 16/67 [00:00<00:01, 37.68it/s]
Capturing CUDA graph shapes:  30%|██▉       | 20/67 [00:00<00:01, 38.03it/s]
Capturing CUDA graph shapes:  36%|███▌      | 24/67 [00:00<00:01, 38.25it/s]
Capturing CUDA graph shapes:  42%|████▏     | 28/67 [00:00<00:01, 38.58it/s]
Capturing CUDA graph shapes:  48%|████▊     | 32/67 [00:00<00:00, 37.49it/s]
Capturing CUDA graph shapes:  54%|█████▎    | 36/67 [00:00<00:00, 35.17it/s]
Capturing CUDA graph shapes:  60%|█████▉    | 40/67 [00:01<00:00, 34.25it/s]
Capturing CUDA graph shapes:  66%|██████▌   | 44/67 [00:01<00:00, 35.20it/s]
Capturing CUDA graph shapes:  72%|███████▏  | 48/67 [00:01<00:00, 34.95it/s]
Capturing 

(vLLMAsyncRollout pid=358607, ip=10.122.253.153) INFO 12-16 14:27:06 [gpu_model_runner.py:2485] Graph capturing finished in 2 secs, took 0.49 GiB
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:27:06 [core.py:193] init engine (profile, create kv cache, warmup model) took 19.35 seconds


(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO:2025-12-16 14:27:06,900:Initializing a V1 LLM engine with config: model='/home/tiger/Qwen/Qwen3-1.7B', speculative_config=None, tokenizer='/home/tiger/Qwen/Qwen3-1.7B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=True, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=/home/tiger/Qwen/Qwen3-1.7B, num_scheduler_steps=1, multi_step_

(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:27:06 [serving_responses.py:89] Using default chat sampling params from model: {'repetition_penalty': 1.0, 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'max_tokens': 4096}
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:27:06 [serving_chat.py:122] Using default chat sampling params from model: {'repetition_penalty': 1.0, 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'max_tokens': 4096}
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:27:06 [serving_completion.py:77] Using default completion sampling params from model: {'repetition_penalty': 1.0, 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'max_tokens': 4096}
Rollout server address: 10.122.253.153:37523


## 3) Build one test sample and run the agent loop
We create a `DataProto` with `ability=LocalSearch` and the fields expected by your env and agent logic.

In [4]:
from pprint import pprint

# Minimal raw prompt (env will replace via its create_chat flow, but we include for completeness)
raw_prompt = [
    {"role": "system", "content": "You are a helpful agent that uses FoldAgent-style tool calls."},
    {"role": "user", "content": "Who is the CEO of Tesla? Provide citations."}
]

extra_info = {
    "query": "Who is the CEO of Tesla?",
    "answer": "Elon Musk",  # label for reward checking
    "prompt": raw_prompt,
    "workflow": "search"
}

uid = "test-fold-0001"
reward_model = "default"

batch = DataProto.from_dict(
    tensors={},
    non_tensors={
        "raw_prompt": np.array([raw_prompt], dtype=object),
        "extra_info": np.array([extra_info], dtype=object),
        "uid": np.array([uid], dtype=object),
        "reward_model": np.array([reward_model], dtype=object),
        "ability": np.array(["LocalSearch"], dtype=object),
        "agent_name": np.array(["fold_agent"], dtype=object),
        "index": np.array([0], dtype=object)
    },
    meta_info={"validate": False, "global_steps": 0}
)

alm_worker = AgentLoopWorker.options(
        name="notebook_agent_loop_worker_fold",
        runtime_env={"env_vars": {"LOCAL_SEARCH_URL": f"http://{search_address}"}},
).remote(
        trainer_config_with_plugin,
        [rollout_server.server_handle],  # reuse the already-started standalone server
        None,
)
output = ray.get(alm_worker.generate_sequences.remote(batch))

print("Reward score tensor present?", 'rm_scores' in output.batch)
print("Samples in batch (trajectories):", output.batch['responses'].shape[0])
print("Extra fields keys:", list(output.non_tensor_batch.keys()))
pprint(output.non_tensor_batch.get("env_stats", [None])[0])

(AgentLoopWorker pid=359664, ip=10.122.253.153) INFO 12-16 14:27:41 [__init__.py:235] Automatically detected platform cuda.


(AgentLoopWorker pid=359664, ip=10.122.253.153) ERROR:2025-12-16 14:27:43,560:Error getting data from env: 'NoneType' object has no attribute 'is_train'


(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:27:43 [async_llm.py:269] Added request 3d262a401115422ebd2f59569edd20ca.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:27:45 [async_llm.py:269] Added request e770fa9e34184b4eb0e00b8f309c0ba7.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:27:45 [async_llm.py:269] Added request 0358a20261fc4392aeed6fb05161dd23.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:27:45 [async_llm.py:269] Added request f2eff71e56a140dc9b536daed9f65fab.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:27:46 [async_llm.py:269] Added request 7471592a2e534d0496ffb4ca2fb7b9d0.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:27:46 [async_llm.py:269] Added request 82d64171d84b434880b1892b2cbe0ee7.
(AgentLoopWorker pid=359664, ip=10.122.253.153) [Judged] score=1
(AgentLoopWorker pid=359664, ip=10.122.253.153) Label: Elon Musk
(AgentLoopWorker pid=359664, ip=10.122.253.153) Model: Elon Musk
R

(AgentLoopWorker pid=359664, ip=10.122.253.153) You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


## 4) Inspect conversation and branch details
FoldAgent returns one output per trajectory (main + branches during training). We iterate and print summaries.

In [5]:
def summarize_fold_output(dp):
    print("=== DataProto Summary ===")
    print("\nMeta Info:")
    print(dp.meta_info)
    n = dp.batch['responses'].shape[0]
    for i in range(n):
        print("\n--- Sample", i, "---")
        agent_name = dp.non_tensor_batch.get("agent_name", [None])[i]
        num_turns = dp.non_tensor_batch.get("__num_turns__", [None])[i]
        env_stats = dp.non_tensor_batch.get("env_stats", [None])[i]
        num_branches = dp.non_tensor_batch.get("num_branches", [0])[i]
        branch_names = dp.non_tensor_batch.get("branch_names", [[]])[i]
        mask_rollout = dp.non_tensor_batch.get("mask_rollout", [None])[i]
        is_finish = dp.non_tensor_batch.get("is_finish", [None])[i]
        print(f"agent_name={agent_name}, num_turns={num_turns}, num_branches={num_branches}, mask_rollout={mask_rollout}, is_finish={is_finish}")
        if isinstance(env_stats, dict):
            print("env_stats keys:", list(env_stats.keys()))
        if branch_names:
            print("branch_names:", branch_names)
        messages = dp.non_tensor_batch.get("messages", [None])[i]
        if isinstance(messages, list):
            print("Conversation transcript (truncated):")
            for m in messages[:min(12, len(messages))]:
                role = m.get("role")
                content = m.get("content")
                preview = content[:600] if isinstance(content, str) else content
                print(f"- {role}: {preview}")
    print("\n=== End Summary ===")

summarize_fold_output(output)

=== DataProto Summary ===

Meta Info:
{'metrics': [{'generate_sequences': 2.8060615863651037, 'tool_calls': 0.01573640131391585}], 'reward_extra_keys': []}

--- Sample 0 ---
agent_name=main, num_turns=13, num_branches=0, mask_rollout=False, is_finish=True
env_stats keys: ['finish', 'search', 'open_page', 'change_answer', 'is_search', 'is_open', 'is_finish', 'visit_pages', 'action', 'session_time', 'get_final_score', 'traj_num', 'main_len', 'total_token', 'main_turn', 'is_branch', 'branch_success', 'use_all_branch', 'scope_judge']
Conversation transcript (truncated):
- system: You are a meticulous and strategic research agent. Your primary function is to conduct comprehensive, multi-step research to deliver a thorough, accurate, and well-supported report in response to the user's query.

Your operation is guided by these core principles:
* **Rigor:** Execute every step of the research process with precision and attention to detail.
* **Objectivity:** Synthesize information based on the 

The previous DataProto has wrong "workflow" field. It should be "search_branch".

In [6]:
# Minimal raw prompt (env will replace via its create_chat flow, but we include for completeness)
raw_prompt = [
    {"role": "system", "content": "You are a helpful agent that uses FoldAgent-style tool calls."},
    {"role": "user", "content": "Who is the CEO of Tesla? Provide citations."}
]

extra_info = {
    "query": "Who is the CEO of Tesla?",
    "answer": "Elon Musk",  # label for reward checking
    "prompt": raw_prompt,
    "workflow": "search_branch"
}

uid = "test-fold-0001"
reward_model = "default"

batch = DataProto.from_dict(
    tensors={},
    non_tensors={
        "raw_prompt": np.array([raw_prompt], dtype=object),
        "extra_info": np.array([extra_info], dtype=object),
        "uid": np.array([uid], dtype=object),
        "reward_model": np.array([reward_model], dtype=object),
        "ability": np.array(["LocalSearch"], dtype=object),
        "agent_name": np.array(["fold_agent"], dtype=object),
        "index": np.array([0], dtype=object)
    },
    meta_info={"validate": False, "global_steps": 0}
)


In [8]:
output = ray.get(alm_worker.generate_sequences.remote(batch))

print("Reward score tensor present?", 'rm_scores' in output.batch)
print("Samples in batch (trajectories):", output.batch['responses'].shape[0])
print("Extra fields keys:", list(output.non_tensor_batch.keys()))
pprint(output.non_tensor_batch.get("env_stats", [None])[0])

(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:51:20 [async_llm.py:269] Added request b4621e3e6fe549c8ba63deaf402a094e.


(AgentLoopWorker pid=359664, ip=10.122.253.153) ERROR:2025-12-16 14:51:20,470:Error getting data from env: 'NoneType' object has no attribute 'is_train'


(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:51:22 [async_llm.py:269] Added request 8b29e128d55f4a41a6ddd5a29d25b1c6.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:51:24 [async_llm.py:269] Added request c33f6e529bcc4d23b0fd1ba33e855216.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:51:25 [async_llm.py:269] Added request 955d80ba456242a296f7f35ff6820426.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:51:25 [async_llm.py:269] Added request 93337e4b593445dda521ef50612cb091.
(vLLMHttpServer pid=358804, ip=10.122.253.153) INFO 12-16 14:51:26 [async_llm.py:269] Added request 6492eaf366cd4052bc3182eb502883c3.
(AgentLoopWorker pid=359664, ip=10.122.253.153) [Judged] score=1
(AgentLoopWorker pid=359664, ip=10.122.253.153) Label: Elon Musk
(AgentLoopWorker pid=359664, ip=10.122.253.153) Model: Elon Musk
(AgentLoopWorker pid=359664, ip=10.122.253.153) [CALL OPENAI] Error after 3 attempts: Invalid type for url.  Expected str or httpx.URL,

(AgentLoopWorker pid=359664, ip=10.122.253.153) [CALL OPENAI] Error after 3 attempts: Invalid type for url.  Expected str or httpx.URL, got <class 'NoneType'>: None


In [10]:
def summarize_fold_output(dp):
    print("=== DataProto Summary ===")
    print("\nMeta Info:")
    print(dp.meta_info)
    n = dp.batch['responses'].shape[0]
    for i in range(n):
        print("\n--- Sample", i, "---")
        agent_name = dp.non_tensor_batch.get("agent_name", [None])[i]
        num_turns = dp.non_tensor_batch.get("__num_turns__", [None])[i]
        env_stats = dp.non_tensor_batch.get("env_stats", [None])[i]
        num_branches = dp.non_tensor_batch.get("num_branches", [0])[i]
        branch_names = dp.non_tensor_batch.get("branch_names", [[]])[i]
        mask_rollout = dp.non_tensor_batch.get("mask_rollout", [None])[i]
        is_finish = dp.non_tensor_batch.get("is_finish", [None])[i]
        print(f"agent_name={agent_name}, num_turns={num_turns}, num_branches={num_branches}, mask_rollout={mask_rollout}, is_finish={is_finish}")
        if isinstance(env_stats, dict):
            print("env_stats keys:", list(env_stats.keys()))
        if branch_names:
            print("branch_names:", branch_names)
        messages = dp.non_tensor_batch.get("messages", [None])[i]
        if isinstance(messages, list):
            print("Conversation transcript (truncated):")
            for m in messages[:min(12, len(messages))]:
                role = m.get("role")
                content = m.get("content")
                preview = content if isinstance(content, str) else content
                print(f"- {role}: {preview}")
    print("\n=== End Summary ===")

summarize_fold_output(output)

=== DataProto Summary ===

Meta Info:
{'metrics': [{'generate_sequences': 6.561201705830172, 'tool_calls': 0.002620855113491416}, {'generate_sequences': 6.561201705830172, 'tool_calls': 0.002620855113491416}], 'reward_extra_keys': []}

--- Sample 0 ---
agent_name=main, num_turns=9, num_branches=2, mask_rollout=False, is_finish=True
env_stats keys: ['finish', 'search', 'open_page', 'change_answer', 'is_search', 'is_open', 'is_finish', 'visit_pages', 'action', 'session_time', 'get_final_score', 'traj_num', 'main_len', 'total_token', 'main_turn', 'is_branch', 'branch_success', 'use_all_branch', 'scope_judge']
branch_names: ['#0-Agent', '#1-Agent']
Conversation transcript (truncated):
- system: You are a **Multi-Role Research Agent**, an advanced AI designed to conduct comprehensive, multi-step research. Your purpose is to deliver a thorough, accurate, and well-supported report in response to a user's query.

You operate in one of two modes: **MAIN** or **BRANCH**. Your current role will b