Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions renderers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
ToolCallParseStatus,
ToolSpec,
VideoPart,
attribute_text_segments,
build_training_sample,
build_trajectory_step,
create_renderer,
Expand Down Expand Up @@ -90,6 +91,7 @@
"ToolSpec",
"VideoPart",
"__version__",
"attribute_text_segments",
"build_training_sample",
"build_trajectory_step",
"create_renderer",
Expand Down
316 changes: 314 additions & 2 deletions renderers/base.py

Large diffs are not rendered by default.

45 changes: 41 additions & 4 deletions renderers/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from renderers.base import (
Message,
MultiModalData,
RenderedTokens,
Renderer,
RendererPool,
ToolCallParseStatus,
Expand Down Expand Up @@ -127,6 +128,7 @@ async def generate(
model: str,
prompt_ids: list[int] | None = None,
multi_modal_data: MultiModalData | None = None,
prompt_attribution: RenderedTokens | None = None,
tools: list[ToolSpec] | None = None,
sampling_params: dict[str, Any] | None = None,
cache_salt: str | None = None,
Expand All @@ -141,7 +143,11 @@ async def generate(
renderer) and ``logprobs=1`` (we always emit completion_logprobs). Pass
``prompt_ids`` to skip rendering and use a prebuilt token sequence —
pair it with ``multi_modal_data`` when the prebuilt prompt has image /
video placeholders that need engine-side mm payload.
video placeholders that need engine-side mm payload, and with
``prompt_attribution`` (a :class:`RenderedTokens` whose ``token_ids``
match the passed-in ``prompt_ids``) to carry the renderer's per-token
attribution (``is_content`` / ``sampled_mask`` / ``message_indices`` /
``message_roles``) into the result without re-rendering.

For multimodal renderers (e.g. ``Qwen3VLRenderer``), the call goes
through ``renderer.render(...)`` to recover the ``multi_modal_data``
Expand All @@ -161,7 +167,19 @@ async def generate(

Returns a dict with: request_id, prompt_ids, completion_ids,
completion_logprobs, content, reasoning_content, tool_calls,
finish_reason, routed_experts.
finish_reason, routed_experts, multi_modal_data, prompt_attribution.

``prompt_attribution`` is the renderer's :class:`RenderedTokens` for
the prompt — either the one this call computed via
``renderer.render(...)`` or the one the caller threaded in alongside
``prompt_ids``. Carries ``token_ids``, ``message_indices``,
``sampled_mask``, ``is_content``, ``message_roles``, and
``multi_modal_data``, so downstream consumers (verifiers
``RendererClient`` → prime-rl) can build per-token loss masks
(``content_mask_for_roles({"tool"})`` for SFT-on-tool-body,
``sampled_mask`` for RL trainable spans) without a second render
pass. ``None`` when the caller passed pre-built ``prompt_ids``
without attribution.
"""
if tools and not getattr(renderer, "supports_tools", True):
raise ValueError(
Expand All @@ -171,15 +189,26 @@ async def generate(

def _prepare():
if prompt_ids is not None:
return list(prompt_ids), renderer.get_stop_token_ids(), multi_modal_data
# Caller-supplied prompt; if they also gave us pre-computed
# attribution (e.g. the bridge path in verifiers), thread it
# through unchanged.
return (
list(prompt_ids),
renderer.get_stop_token_ids(),
multi_modal_data,
prompt_attribution,
)
rendered = renderer.render(messages, tools=tools, add_generation_prompt=True)
return (
rendered.token_ids,
renderer.get_stop_token_ids(),
rendered.multi_modal_data,
rendered,
)

prompt_ids, stop_token_ids, mm_data = await _maybe_offload(renderer, _prepare)
prompt_ids, stop_token_ids, mm_data, prompt_attr = await _maybe_offload(
renderer, _prepare
)

if max_prompt_len is None:
max_prompt_len = await _resolve_max_prompt_len(client, model)
Expand Down Expand Up @@ -279,6 +308,14 @@ def _prepare():
# callers can persist it on the trajectory step for downstream
# multi-turn bridging and training-sample construction.
"multi_modal_data": mm_data,
# The renderer's per-token attribution for the prompt — either
# the RenderedTokens computed here via renderer.render(...) or
# the one threaded in by the caller alongside prompt_ids (the
# bridge path). Lets downstream consumers (verifiers
# RendererClient → prime-rl) build SFT-on-tool-body and other
# selective loss masks without a second render pass. ``None``
# when the caller passed prompt_ids without attribution.
"prompt_attribution": prompt_attr,
}


Expand Down
Loading
Loading