# FoldAgent LocalSearch Test (bc_test_emh.parquet)

This notebook tests your FoldAgent loop in the LocalSearch environment:
- Starts a standalone rollout server (vLLM by default).
- Reads a sample from `/opt/tiger/verl_context_folding/bc_test_emh.parquet`.
- Uses `LOCAL_SEARCH_URL` (set below) to hit your local search server.
- Feeds one sample to `FoldAgentLoop` and prints a compact summary.

Note: Replace `LOCAL_SEARCH_URL` with your actual local search server endpoint (e.g., `http://127.0.0.1:8000`).

In [1]:
import os
import asyncio
import numpy as np
import pandas as pd

import ray
from hydra import compose, initialize_config_dir
from omegaconf import OmegaConf

import verl
from verl import DataProto
from verl.experimental.agent_loop import AgentLoopWorker
from verl.experimental.agent_loop.FoldAgent import FoldAgentLoop  # Ensures @register("fold_agent") runs
from verl.workers.rollout.replica import get_rollout_replica_class
from huggingface_hub import snapshot_download


# Fast Ray init; tweak as needed
ray.init(runtime_env={"env_vars": {"VLLM_USE_V1": "1"}}, ignore_reinit_error=True)
verl_config_dir = os.path.join(os.path.dirname(verl.__file__), "trainer/config")

# Rollout engine: vllm or sglang
rollout_name = "vllm"  # or "sglang"

# Download a small-ish instruct model (adjust if you already have one).
model_path = os.path.expanduser("~/Qwen/Qwen3-8B")
snapshot_download(repo_id="Qwen/Qwen3-8B", repo_type="model", local_dir=model_path)

with initialize_config_dir(config_dir=verl_config_dir):
    config = compose(
        config_name="ppo_trainer",
        overrides=[
            # rollout engine
            "actor_rollout_ref.rollout.name=" + rollout_name,
            "actor_rollout_ref.rollout.mode=async",
            "actor_rollout_ref.rollout.tensor_model_parallel_size=8",
            "actor_rollout_ref.rollout.data_parallel_size=1",
            "actor_rollout_ref.rollout.pipeline_model_parallel_size=1",
            "actor_rollout_ref.rollout.skip_tokenizer_init=False",
            "actor_rollout_ref.rollout.prompt_length=4096",
            "actor_rollout_ref.rollout.response_length=32768",
            # model
            "actor_rollout_ref.model.path=" + model_path,
            # agent loop: use our FoldAgent
            "actor_rollout_ref.rollout.agent.default_agent_loop=fold_agent",
            "actor_rollout_ref.rollout.agent.num_workers=1",
            # trainer sizing
            "trainer.n_gpus_per_node=8",
            "trainer.nnodes=1",
            "trainer.logger=['console']",
            "trainer.project_name=verl",
            "trainer.experiment_name=" + os.path.basename(model_path)
        ],
    )

# Make a safe copy of the trainer config to attach plugin without affecting rollout server instantiation
trainer_config_with_plugin = OmegaConf.create(OmegaConf.to_container(config, resolve=False))
OmegaConf.set_struct(trainer_config_with_plugin.actor_rollout_ref.rollout, False)

# Inject FoldAgent plugin fields on the copied config
trainer_config_with_plugin.actor_rollout_ref.rollout.plugin = OmegaConf.create({
    "workflow": "search",
    "max_turn": 5,
    "retry_cjk": 0,
    "turn_max_new_tokens": 2048,
    "max_session": 3,
    "val_max_session": 3,
    "session_timeout": 3600,
    "enable_summary": True,
    "branch_len": 256,
    "process_reward": "flat,scope",
    "max_traj": 4,
    "must_finish": False,
    "double_check": False,
    "must_search": True,
    "val_max_turn": 32,
    "val_response_length": 1024,
})

print("Plugin config:", OmegaConf.to_container(trainer_config_with_plugin.actor_rollout_ref.rollout.plugin, resolve=True))
print("Model:", config.actor_rollout_ref.model.path)
print("Rollout:", config.actor_rollout_ref.rollout.name)
print("Agent loop:", config.actor_rollout_ref.rollout.agent.default_agent_loop)

[2025-12-29 12:44:42,762 I 431471 431471] gcs_rpc_client.h:648: successful connect gcs: 10.122.253.153:63180
[2025-12-29 12:44:43,046 I 431471 431471] gcs_rpc_client.h:648: successful connect gcs: 10.122.253.153:63180
[2025-12-29 12:44:45,531 I 431471 431471] gcs_rpc_client.h:648: successful connect gcs: 10.122.253.153:63180
[2025-12-29 12:44:45,532 I 431471 431471] gcs_rpc_client.h:648: successful connect gcs: 10.122.253.153:63180
2025-12-29 12:44:45,534	INFO worker.py:1887 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
[2025-12-29 12:44:45,536 I 431471 431471] gcs_rpc_client.h:648: successful connect gcs: 10.122.253.153:63180


Logs are printed to python-core-driver-01000000ffffffffffffffffffffffffffffffffffffffffffffffff_431471.log


Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

merges.txt: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

README.md: 0.00B [00:00, ?B/s]

.gitattributes: 0.00B [00:00, ?B/s]

generation_config.json: 0.00B [00:00, ?B/s]

LICENSE: 0.00B [00:00, ?B/s]

model-00002-of-00005.safetensors:   0%|          | 0.00/3.99G [00:00<?, ?B/s]

model-00001-of-00005.safetensors:   0%|          | 0.00/4.00G [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

model-00003-of-00005.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]

model-00004-of-00005.safetensors:   0%|          | 0.00/3.19G [00:00<?, ?B/s]

model-00005-of-00005.safetensors:   0%|          | 0.00/1.24G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

## Start standalone rollout server
Initializes a single-node rollout and exposes `server_handle` for token generation.

In [2]:
rollout_server_class = get_rollout_replica_class(config.actor_rollout_ref.rollout.name)
rollout_server = rollout_server_class(
    replica_rank=0,
    config=config.actor_rollout_ref.rollout,
    model_config=config.actor_rollout_ref.model,
    gpus_per_node=config.trainer.n_gpus_per_node,
)
await rollout_server.init_standalone()
print("Rollout server address:", rollout_server.server_address)

INFO 12-29 09:45:35 [__init__.py:235] Automatically detected platform cuda.




(pid=525452, ip=10.122.253.153) INFO 12-29 09:45:48 [__init__.py:235] Automatically detected platform cuda.
(pid=528682, ip=10.122.253.153) INFO 12-29 09:45:58 [__init__.py:235] Automatically detected platform cuda.


(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO:2025-12-29 09:46:03,623:vLLMHttpServer, replica_rank: 0, master address: 10.122.253.153, master port: 44041, data parallel master port: 44615
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO:2025-12-29 09:46:03,628:override_generation_config: {'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'repetition_penalty': 1.0, 'max_new_tokens': 32768}


(vLLMHttpServer pid=528682, ip=10.122.253.153) ['serve',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '/home/tiger/Qwen/Qwen3-4B',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '--dtype',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  'bfloat16',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '--load_format',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  'auto',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '--max_model_len',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '36864',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '--max_num_seqs',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '1024',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '--enable_chunked_prefill',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '--max_num_batched_tokens',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '8192',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '--enable_prefix_caching',
(vLLMHttpServer pid=528682, ip=10.122.253.153)  '--enable_sleep_mode',
(vLLMHttpServer pid=528682

(vLLMHttpServer pid=528682, ip=10.122.253.153) Using blocking ray.get inside async actor. This blocks the event loop. Please use `await` on object ref with asyncio.gather if you want to yield execution to the event loop instead.
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO:2025-12-29 09:46:04,371:replica_rank=0, node_rank=0, nnodes=1, get worker zmq addresses: ['ipc:///tmp/verl_vllm_zmq_525452_tiger.ipc']


(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 09:46:10 [config.py:1604] Using max model len 36864
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 09:46:10 [config.py:2434] Chunked prefill is enabled with max_num_batched_tokens=8192.
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 09:46:16 [__init__.py:235] Automatically detected platform cuda.
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 09:46:21 [core.py:572] Waiting for init message from front-end.
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 09:46:21 [core.py:71] Initializing a V1 LLM engine (v0.10.0) with config: model='/home/tiger/Qwen/Qwen3-4B', speculative_config=None, tokenizer='/home/tiger/Qwen/Qwen3-4B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=36864, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_paralle

Loading safetensors checkpoint shards:   0% Completed | 0/3 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  33% Completed | 1/3 [00:00<00:01,  1.38it/s]
Loading safetensors checkpoint shards:  67% Completed | 2/3 [00:01<00:00,  1.23it/s]


(vLLMAsyncRollout pid=525452, ip=10.122.253.153) INFO 12-29 09:46:26 [default_loader.py:262] Loading weights took 1.75 seconds


Loading safetensors checkpoint shards: 100% Completed | 3/3 [00:01<00:00,  1.96it/s]
Loading safetensors checkpoint shards: 100% Completed | 3/3 [00:01<00:00,  1.71it/s]
(vLLMAsyncRollout pid=525452, ip=10.122.253.153) 


(vLLMAsyncRollout pid=525452, ip=10.122.253.153) INFO 12-29 09:46:27 [gpu_model_runner.py:1892] Model loading took 7.5552 GiB and 1.925920 seconds
(vLLMAsyncRollout pid=525452, ip=10.122.253.153) INFO 12-29 09:46:37 [backends.py:530] Using cache directory: /home/tiger/.cache/vllm/torch_compile_cache/bd66293048/rank_0_0/backbone for vLLM's torch.compile
(vLLMAsyncRollout pid=525452, ip=10.122.253.153) INFO 12-29 09:46:37 [backends.py:541] Dynamo bytecode transform time: 9.89 s
(vLLMAsyncRollout pid=525452, ip=10.122.253.153) INFO 12-29 09:46:46 [backends.py:161] Directly load the compiled graph(s) for dynamic shape from the cache, took 7.454 s
(vLLMAsyncRollout pid=525452, ip=10.122.253.153) INFO 12-29 09:46:49 [monitor.py:34] torch.compile takes 9.89 s in total
(vLLMAsyncRollout pid=525452, ip=10.122.253.153) INFO 12-29 09:46:50 [gpu_worker.py:255] Available KV cache memory: 26.52 GiB
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 09:46:50 [kv_cache_utils.py:833] GPU KV cach

Capturing CUDA graph shapes:   0%|          | 0/67 [00:00<?, ?it/s]
Capturing CUDA graph shapes:   4%|▍         | 3/67 [00:00<00:02, 23.60it/s]
Capturing CUDA graph shapes:   9%|▉         | 6/67 [00:00<00:02, 23.89it/s]
Capturing CUDA graph shapes:  13%|█▎        | 9/67 [00:00<00:02, 23.88it/s]
Capturing CUDA graph shapes:  18%|█▊        | 12/67 [00:00<00:02, 23.90it/s]
Capturing CUDA graph shapes:  22%|██▏       | 15/67 [00:00<00:02, 23.49it/s]
Capturing CUDA graph shapes:  27%|██▋       | 18/67 [00:00<00:02, 23.24it/s]
Capturing CUDA graph shapes:  31%|███▏      | 21/67 [00:00<00:01, 23.43it/s]
Capturing CUDA graph shapes:  36%|███▌      | 24/67 [00:01<00:01, 23.55it/s]
Capturing CUDA graph shapes:  40%|████      | 27/67 [00:01<00:01, 23.81it/s]
Capturing CUDA graph shapes:  45%|████▍     | 30/67 [00:01<00:01, 23.45it/s]
Capturing CUDA graph shapes:  49%|████▉     | 33/67 [00:01<00:01, 23.51it/s]
Capturing CUDA graph shapes:  54%|█████▎    | 36/67 [00:01<00:01, 24.28it/s]
Capturing C

(vLLMAsyncRollout pid=525452, ip=10.122.253.153) INFO 12-29 09:46:53 [gpu_model_runner.py:2485] Graph capturing finished in 3 secs, took 0.61 GiB
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 09:46:53 [core.py:193] init engine (profile, create kv cache, warmup model) took 26.04 seconds


Capturing CUDA graph shapes: 100%|██████████| 67/67 [00:02<00:00, 24.79it/s]
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO:2025-12-29 09:46:54,200:Initializing a V1 LLM engine with config: model='/home/tiger/Qwen/Qwen3-4B', speculative_config=None, tokenizer='/home/tiger/Qwen/Qwen3-4B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=36864, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=True, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_

(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 09:46:54 [serving_responses.py:89] Using default chat sampling params from model: {'repetition_penalty': 1.0, 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'max_tokens': 32768}
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 09:46:54 [serving_chat.py:122] Using default chat sampling params from model: {'repetition_penalty': 1.0, 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'max_tokens': 32768}
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 09:46:54 [serving_completion.py:77] Using default completion sampling params from model: {'repetition_penalty': 1.0, 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'max_tokens': 32768}
Rollout server address: 10.122.253.153:36891


## Read dataset and build one test sample
Extracts `query` and `answer` from `bc_test_emh.parquet`, sets LocalSearch URL, and builds `DataProto`.

In [3]:
dataset_path = "/opt/tiger/verl_context_folding/bc_test_emh.parquet"
assert os.path.exists(dataset_path), f"Dataset not found: {dataset_path}"
df = pd.read_parquet(dataset_path)
required_cols = {"prompt", "answer"}
missing = [c for c in required_cols if c not in df.columns]
if missing:
    raise RuntimeError(f"Dataset missing required columns: {missing}. Columns: {list(df.columns)}")

# Choose a sample
sample_idx = 0
raw_prompt = df.iloc[sample_idx]["prompt"]  # raw prompt
# query = str(df.iloc[sample_idx]["prompt"])  # question text
answer = str(df.iloc[sample_idx]["answer"])  # ground-truth label

print(answer)

extra_info = {
    "workflow": "search_branch",
    "raw_prompt": raw_prompt,
    "answer": answer,
}
uid = f"bc_test_emh:{sample_idx}"
reward_model = "default"

batch = DataProto.from_dict(
    tensors={},
    non_tensors={
        "raw_prompt": np.array([raw_prompt], dtype=object),
        "extra_info": np.array([extra_info], dtype=object),
        "uid": np.array([uid], dtype=object),
        "reward_model": np.array([reward_model], dtype=object),
        "ability": np.array(["LocalSearch"], dtype=object),
        "agent_name": np.array(["fold_agent"], dtype=object),
        "index": np.array([0], dtype=object),
    },
    meta_info={"validate": False, "global_steps": 0},
)
batch.non_tensor_batch['extra_info'][0]['answer']

Emmanuel Kwesi Danso Arthur Junior 


'Emmanuel Kwesi Danso Arthur Junior '

## Run the agent loop and summarize output
Creates a worker bound to the rollout server, runs `generate_sequences`, and prints a concise summary.

In [4]:
# Inject FoldAgent plugin fields on the copied config
trainer_config_with_plugin.actor_rollout_ref.rollout.plugin = OmegaConf.create({
    "workflow": "search",
    "max_turn": 10,
    "retry_cjk": 0,
    "turn_max_new_tokens": 2048,
    "max_session": 3,
    "val_max_session": 3,
    "session_timeout": 3600,
    "enable_summary": True,
    "branch_len": 256,
    "process_reward": "flat,scope",
    "max_traj": 4,
    "must_finish": False,
    "double_check": False,
    "must_search": True,
    "val_max_turn": 32,
    "val_response_length": 1024,
})

print("Plugin config:", OmegaConf.to_container(trainer_config_with_plugin.actor_rollout_ref.rollout.plugin, resolve=True))


Plugin config: {'workflow': 'search', 'max_turn': 10, 'retry_cjk': 0, 'turn_max_new_tokens': 2048, 'max_session': 3, 'val_max_session': 3, 'session_timeout': 3600, 'enable_summary': True, 'branch_len': 256, 'process_reward': 'flat,scope', 'max_traj': 4, 'must_finish': False, 'double_check': False, 'must_search': True, 'val_max_turn': 32, 'val_response_length': 1024}


In [5]:
# LOCAL_SEARCH_URL = os.environ.get("LOCAL_SEARCH_URL", "http://[2605:340:cd51:7700:3900:9815:f3ac:c6d2]:8000").rstrip("/")
LOCAL_SEARCH_URL = "http://[2605:340:cd51:7700:912f:284d:9dd7:367f]:8000"
print("Using LOCAL_SEARCH_URL:", LOCAL_SEARCH_URL)

alm_worker = AgentLoopWorker.options(
    name="fold_agent_local_search_worker_1",
    runtime_env={"env_vars": {"LOCAL_SEARCH_URL": LOCAL_SEARCH_URL}},
).remote(
    trainer_config_with_plugin,
    [rollout_server.server_handle],
    None,
)
output = ray.get(alm_worker.generate_sequences.remote(batch))

print("Reward score tensor present?", 'rm_scores' in output.batch)
print("Trajectories:", output.batch['responses'].shape[0])

Using LOCAL_SEARCH_URL: http://[2605:340:cd51:7700:912f:284d:9dd7:367f]:8000


(AgentLoopWorker pid=609975, ip=10.122.253.153) INFO 12-29 12:27:03 [__init__.py:235] Automatically detected platform cuda.
(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 12:27:05 [async_llm.py:269] Added request 9b9f89c1eafb489e9f8f4e879e57ddcc.
(AgentLoopWorker pid=609975, ip=10.122.253.153) {'extra_info': [{'workflow': 'search_branch', 'raw_prompt': array([{'content': "You are an expert research agent focused on comprehensive research strategy, execution, and final report writing. Your core goal is to be maximally helpful to the user by researching their query thoroughly and creating an excellent research report that answers the query very well.\n\nYou have access to the following functions:\n\n---- BEGIN FUNCTION #1: search ----\nDescription: Performs a web search: supply a string 'query' and optional 'topk'. The tool retrieves the top 'topk' results (default 10) for the query, returning their docid, url, and document content (may be truncated based on token limits).\nPa

(AgentLoopWorker pid=609975, ip=10.122.253.153) You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [24]:

def summarize_fold_output(dp):
    print("\n=== Summary ===")
    n = dp.batch['responses'].shape[0]
    for i in range(n):
        print(f"-- trajectory {i} --")
        env_stats = dp.non_tensor_batch.get("env_stats", [None])[i]
        agent_name = dp.non_tensor_batch.get("agent_name", [None])[i]
        mask_rollout = dp.non_tensor_batch.get("mask_rollout", [None])[i]
        is_finish = dp.non_tensor_batch.get("is_finish", [None])[i]
        branch_names = dp.non_tensor_batch.get("branch_names", [None])[i]
        if isinstance(env_stats, dict):
            print("env_stats keys:", list(env_stats.keys()))
        print("agent_name:", agent_name, "mask_rollout:", mask_rollout, "is_finish:", is_finish)
        if branch_names:
            print("branch_names:", branch_names)
        messages = dp.non_tensor_batch.get("messages", [None])[i]
        if isinstance(messages, list):
            print("Transcript preview:")
            for m in messages:
                role = m.get("role")
                content = m.get("content")
                snippet = content.replace("\n", " ") if isinstance(content, str) else str(content)[:200]
                print(f"- {role}: {snippet}")
    print("=== End Summary ===")

summarize_fold_output(output)


=== Summary ===
-- trajectory 0 --
env_stats keys: ['finish', 'search', 'open_page', 'change_answer', 'is_search', 'is_open', 'is_finish', 'visit_pages', 'action', 'session_time', 'get_final_score', 'traj_num', 'main_len', 'total_token', 'main_turn', 'is_branch', 'branch_success', 'use_all_branch']
agent_name: main mask_rollout: False is_finish: True
Transcript preview:
- system: You are a meticulous and strategic research agent. Your primary function is to conduct comprehensive, multi-step research to deliver a thorough, accurate, and well-supported report in response to the user's query.  Your operation is guided by these core principles: * **Rigor:** Execute every step of the research process with precision and attention to detail. * **Objectivity:** Synthesize information based on the evidence gathered, not on prior assumptions. Note and investigate conflicting information. * **Thoroughness:** Never settle for a surface-level answer. Always strive to uncover the underlying details

In [12]:
# Choose a sample
sample_idx = 1
raw_prompt = df.iloc[sample_idx]["prompt"]  # raw prompt
# query = str(df.iloc[sample_idx]["prompt"])  # question text
answer = str(df.iloc[sample_idx]["answer"])  # ground-truth label

print(answer)

extra_info = {
    "workflow": "search_branch",
    "raw_prompt": raw_prompt,
    "answer": answer,
}
uid = f"bc_test_emh:{sample_idx}"
reward_model = "default"

batch = DataProto.from_dict(
    tensors={},
    non_tensors={
        "raw_prompt": np.array([raw_prompt], dtype=object),
        "extra_info": np.array([extra_info], dtype=object),
        "uid": np.array([uid], dtype=object),
        "reward_model": np.array([reward_model], dtype=object),
        "ability": np.array(["LocalSearch"], dtype=object),
        "agent_name": np.array(["fold_agent"], dtype=object),
        "index": np.array([0], dtype=object),
    },
    meta_info={"validate": False, "global_steps": 0},
)
batch.non_tensor_batch['extra_info'][0]['answer']

Lebo


'Lebo'

In [17]:
output = ray.get(alm_worker.generate_sequences.remote(batch))

print("Reward score tensor present?", 'rm_scores' in output.batch)
print("Trajectories:", output.batch['responses'].shape[0])

(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 12:42:00 [async_llm.py:269] Added request 8c86348b91464963a005a104c1c16d1c.
(AgentLoopWorker pid=609975, ip=10.122.253.153) {'extra_info': [{'workflow': 'search_branch', 'raw_prompt': array([{'content': "You are an expert research agent focused on comprehensive research strategy, execution, and final report writing. Your core goal is to be maximally helpful to the user by researching their query thoroughly and creating an excellent research report that answers the query very well.\n\nYou have access to the following functions:\n\n---- BEGIN FUNCTION #1: search ----\nDescription: Performs a web search: supply a string 'query' and optional 'topk'. The tool retrieves the top 'topk' results (default 10) for the query, returning their docid, url, and document content (may be truncated based on token limits).\nParameters:\n  (1) query (string, required): The query string for the search.\n  (2) topk (integer, optional): Return the top 

(vLLMHttpServer pid=528682, ip=10.122.253.153) INFO 12-29 12:42:09 [async_llm.py:269] Added request c0953239d4bb4691a0e91f3adb6a6f29.


In [16]:

def summarize_fold_output(dp):
    print("\n=== Summary ===")
    n = dp.batch['responses'].shape[0]
    for i in range(n):
        print(f"-- trajectory {i} --")
        env_stats = dp.non_tensor_batch.get("env_stats", [None])[i]
        agent_name = dp.non_tensor_batch.get("agent_name", [None])[i]
        mask_rollout = dp.non_tensor_batch.get("mask_rollout", [None])[i]
        is_finish = dp.non_tensor_batch.get("is_finish", [None])[i]
        branch_names = dp.non_tensor_batch.get("branch_names", [None])[i]
        if isinstance(env_stats, dict):
            print("env_stats keys:", list(env_stats.keys()))
        print("agent_name:", agent_name, "mask_rollout:", mask_rollout, "is_finish:", is_finish)
        if branch_names:
            print("branch_names:", branch_names)
        messages = dp.non_tensor_batch.get("messages", [None])[i]
        if isinstance(messages, list):
            print("Transcript preview:")
            for m in messages:
                role = m.get("role")
                content = m.get("content")
                # snippet = content.replace("\n", " ") if isinstance(content, str) else str(content)[:200]
                print(f"- {role}: {content}")
    print("=== End Summary ===")

summarize_fold_output(output)


=== Summary ===
-- trajectory 0 --
env_stats keys: ['finish', 'search', 'open_page', 'change_answer', 'is_search', 'is_open', 'is_finish', 'visit_pages', 'action', 'session_time', 'get_final_score', 'traj_num', 'main_len', 'total_token', 'main_turn', 'is_branch', 'branch_success', 'use_all_branch']
agent_name: main mask_rollout: False is_finish: True
Transcript preview:
- system: You are a **Multi-Role Research Agent**, an advanced AI designed to conduct comprehensive, multi-step research. Your purpose is to deliver a thorough, accurate, and well-supported report in response to a user's query.

You operate in one of two modes: **MAIN** or **BRANCH**. Your current role will be clearly stated at the beginning of each turn. You must follow ONLY the instructions for your assigned role.

---

### **Global Rules (Apply to Both Roles)**

* **Tool Integrity:** You have access to tools like `search` and `open_page`. Never simulate tool outputs. Always use the provided tools for research.
* **F