Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions conformance.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,17 @@
# between pinned-spec and spec-head is the consumer's job (e.g., the
# spec docs site computes the difference and renders accordingly).
#
# Convention: this file is only updated as part of release PRs. Between
# releases, the manifest reflects the most-recently-published version
# so external readers never see a `since` referring to an unreleased
# pre-tag commit.
# Convention: this file is updated as part of release PRs AND as part
# of feature PRs that bump the spec submodule pin (the manifest guard
# requires entries for every Accepted proposal in the pinned spec, so
# a submodule bump forces this file to update too). Such bump PRs set
# `since` to the upcoming release version; between the bump PR and the
# matching tag, external readers will see a `since` referring to the
# upcoming, unreleased version.

[manifest]
implementation = "openarmature-python"
spec_pin = "v0.27.1"
spec_pin = "v0.31.0"

# Status values:
# implemented — shipped behavior matches the proposal's contract
Expand Down Expand Up @@ -175,3 +178,19 @@ since = "0.10.0"
[proposals."0036"]
status = "implemented"
since = "0.10.0"

# Spec v0.28.0-v0.31.0 (proposals 0037, 0039, 0040, 0041). 0038
# (Gemini) is mid-accept on spec side and not in v0.31.0 yet.
[proposals."0037"]
status = "not-yet"

[proposals."0039"]
status = "implemented"
since = "0.11.0"
Comment thread
chris-colinsky marked this conversation as resolved.

[proposals."0040"]
status = "not-yet"

[proposals."0041"]
status = "implemented"
since = "0.11.0"
Comment thread
chris-colinsky marked this conversation as resolved.
2 changes: 1 addition & 1 deletion openarmature-spec
Submodule openarmature-spec updated 56 files
+62 −0 CHANGELOG.md
+5 −5 README.md
+14 −11 docs/proposals.md
+1 −0 docs/proposals/0039-observability-caller-supplied-invocation-id.md
+1 −0 docs/proposals/0040-observability-mid-invocation-metadata-open-span-update.md
+1 −0 docs/proposals/0041-observability-langfuse-metadata-key-collision.md
+161 −79 proposals/0037-llm-provider-anthropic-messages-mapping.md
+248 −0 proposals/0039-observability-caller-supplied-invocation-id.md
+370 −0 proposals/0040-observability-mid-invocation-metadata-open-span-update.md
+203 −0 proposals/0041-observability-langfuse-metadata-key-collision.md
+35 −10 spec/graph-engine/spec.md
+30 −0 spec/llm-provider/conformance/033-anthropic-basic-message-round-trip.md
+44 −0 spec/llm-provider/conformance/033-anthropic-basic-message-round-trip.yaml
+29 −0 spec/llm-provider/conformance/034-anthropic-tool-call-flow.md
+65 −0 spec/llm-provider/conformance/034-anthropic-tool-call-flow.yaml
+23 −0 spec/llm-provider/conformance/035-anthropic-image-content-blocks.md
+54 −0 spec/llm-provider/conformance/035-anthropic-image-content-blocks.yaml
+31 −0 spec/llm-provider/conformance/036-anthropic-tool-choice-modes.md
+179 −0 spec/llm-provider/conformance/036-anthropic-tool-choice-modes.yaml
+23 −0 spec/llm-provider/conformance/037-anthropic-runtime-config-mapping.md
+57 −0 spec/llm-provider/conformance/037-anthropic-runtime-config-mapping.yaml
+21 −0 spec/llm-provider/conformance/038-anthropic-max-tokens-required.md
+32 −0 spec/llm-provider/conformance/038-anthropic-max-tokens-required.yaml
+29 −0 spec/llm-provider/conformance/039-anthropic-error-mapping.md
+103 −0 spec/llm-provider/conformance/039-anthropic-error-mapping.yaml
+23 −0 spec/llm-provider/conformance/040-anthropic-structured-output-native.md
+51 −0 spec/llm-provider/conformance/040-anthropic-structured-output-native.yaml
+27 −0 spec/llm-provider/conformance/041-anthropic-structured-output-fallback.md
+54 −0 spec/llm-provider/conformance/041-anthropic-structured-output-fallback.yaml
+36 −0 spec/llm-provider/conformance/042-anthropic-thinking-block-round-trip.md
+77 −0 spec/llm-provider/conformance/042-anthropic-thinking-block-round-trip.yaml
+28 −0 spec/llm-provider/conformance/043-openai-strips-thinking-blocks.md
+45 −0 spec/llm-provider/conformance/043-openai-strips-thinking-blocks.yaml
+311 −12 spec/llm-provider/spec.md
+2 −2 spec/observability/conformance/022-langfuse-basic-trace.yaml
+2 −2 spec/observability/conformance/023-langfuse-generation-rendering.yaml
+2 −2 spec/observability/conformance/024-langfuse-prompt-linkage.yaml
+1 −1 spec/observability/conformance/027-langfuse-caller-supplied-metadata.yaml
+27 −4 spec/observability/conformance/028-caller-metadata-namespace-rejection.md
+110 −0 spec/observability/conformance/028-caller-metadata-namespace-rejection.yaml
+8 −5 spec/observability/conformance/029-caller-metadata-fan-out-per-instance.md
+31 −10 spec/observability/conformance/029-caller-metadata-fan-out-per-instance.yaml
+6 −1 spec/observability/conformance/030-caller-metadata-parallel-branches-per-branch.md
+21 −7 spec/observability/conformance/030-caller-metadata-parallel-branches-per-branch.yaml
+2 −2 spec/observability/conformance/031-langfuse-subgraph-span-hierarchy.yaml
+2 −2 spec/observability/conformance/032-langfuse-fan-out-per-instance-spans.yaml
+61 −0 spec/observability/conformance/034-caller-metadata-open-span-update-serial.md
+81 −0 spec/observability/conformance/034-caller-metadata-open-span-update-serial.yaml
+34 −0 spec/observability/conformance/035-caller-invocation-id-uuid.md
+45 −0 spec/observability/conformance/035-caller-invocation-id-uuid.yaml
+41 −0 spec/observability/conformance/036-caller-invocation-id-non-uuid.md
+52 −0 spec/observability/conformance/036-caller-invocation-id-non-uuid.yaml
+111 −17 spec/observability/spec.md
+39 −0 spec/pipeline-utilities/conformance/057-resume-mints-fresh-invocation-id.md
+56 −0 spec/pipeline-utilities/conformance/057-resume-mints-fresh-invocation-id.yaml
+6 −2 spec/pipeline-utilities/spec.md
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ Specification = "https://github.com/LunarCommand/openarmature-spec"
openarmature = "openarmature.cli:main"

[tool.openarmature]
spec_version = "0.27.1"
spec_version = "0.31.0"

[dependency-groups]
dev = [
Expand Down
9 changes: 5 additions & 4 deletions src/openarmature/AGENTS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# OpenArmature — Agent documentation

*This is the agent guide bundled with the openarmature Python package, version 0.10.0 (spec v0.27.1). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
*This is the agent guide bundled with the openarmature Python package, version 0.10.0 (spec v0.31.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*

## TL;DR

Expand All @@ -10,7 +10,7 @@ OpenArmature is a workflow framework for LLM pipelines and tool-calling agents

## Capability contracts

_Sourced from openarmature-spec v0.27.1. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
_Sourced from openarmature-spec v0.31.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._

### Capability: `graph-engine`

Expand Down Expand Up @@ -327,8 +327,9 @@ parent invocation's trace as nested spans. Implementations MUST also support an
its own trace and the parent's dispatch span carries an OTel `Link` to that new trace.

**Correlation ID.** A per-invocation identifier that flows across observability backends.
Distinct from `invocation_id` — the framework-generated `invocation_id` correlates spans within
a single backend, while `correlation_id` is application-supplied (or auto-generated when absent)
Distinct from `invocation_id` — the `invocation_id` (caller-supplied or framework-generated, per
§5.1) correlates spans within a single backend, while `correlation_id` is application-supplied
(or auto-generated when absent)
and is intended to be visible in every backend the implementation emits to. A user running an
LLM workflow with both an OTel backend (system traces, logs) and a Langfuse backend
(LLM-specific traces) uses the `correlation_id` as a join key between them: find a slow request
Expand Down
2 changes: 1 addition & 1 deletion src/openarmature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@
"""

__version__ = "0.10.0"
__spec_version__ = "0.27.1"
__spec_version__ = "0.31.0"
55 changes: 55 additions & 0 deletions tests/conformance/test_fixture_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,61 @@ def _id(case: tuple[str, Path]) -> str:
"prompt-management/016-prompt-observability-entities-propagation": (
"Cases shape models live in the PM-specific capability harness"
),
# Proposal 0037 (Anthropic Messages mapping) shipped in spec v0.28.0
# but python marks it not-yet in conformance.toml — the Anthropic
# provider isn't implemented in this release. Defer the
# cross-capability parse tests for the 033-042 fixtures until that
# lands; the openai-strips-thinking-blocks side (043) is in
# test_llm_provider.py's own deferral.
"llm-provider/033-anthropic-basic-message-round-trip": (
"Anthropic provider not implemented (0037 not-yet in conformance.toml)"
),
"llm-provider/034-anthropic-tool-call-flow": (
"Anthropic provider not implemented (0037 not-yet in conformance.toml)"
),
"llm-provider/035-anthropic-image-content-blocks": (
"Anthropic provider not implemented (0037 not-yet in conformance.toml)"
),
"llm-provider/036-anthropic-tool-choice-modes": (
"Anthropic provider not implemented (0037 not-yet in conformance.toml)"
),
"llm-provider/037-anthropic-runtime-config-mapping": (
"Anthropic provider not implemented (0037 not-yet in conformance.toml)"
),
"llm-provider/038-anthropic-max-tokens-required": (
"Anthropic provider not implemented (0037 not-yet in conformance.toml)"
),
"llm-provider/039-anthropic-error-mapping": (
"Anthropic provider not implemented (0037 not-yet in conformance.toml)"
),
"llm-provider/040-anthropic-structured-output-native": (
"Anthropic provider not implemented (0037 not-yet in conformance.toml)"
),
"llm-provider/041-anthropic-structured-output-fallback": (
"Anthropic provider not implemented (0037 not-yet in conformance.toml)"
),
"llm-provider/042-anthropic-thinking-block-round-trip": (
"Anthropic provider not implemented (0037 not-yet in conformance.toml)"
),
# Proposal 0040 (open-span metadata update) — task #22 implements
# the §6 augmentation-event mechanism + un-defers 029/030 + 034.
"observability/034-caller-metadata-open-span-update-serial": (
"Open-span augmentation-event mechanism lands with #22 (0040 not-yet)"
),
# Proposal 0039 (caller-supplied invocation_id) Langfuse trace.id
# derivation fixtures use the langfuse_trace expected shape the
# cross-capability parser doesn't model. The derivation itself is
# pinned by unit tests in test_observability_langfuse_adapter.py
# against the same spec vector fixture 036 uses
# (sha256("run_abc123")[:16].hex == 29b50a6c08dabfeaeb1696301f4fabe1);
# wiring into the langfuse-specific conformance harness is a
# follow-up.
"observability/035-caller-invocation-id-uuid": (
"Cross-capability parser doesn't model langfuse_trace; derivation pinned by unit tests"
),
"observability/036-caller-invocation-id-non-uuid": (
"Cross-capability parser doesn't model langfuse_trace; derivation pinned by unit tests"
),
}


Expand Down
18 changes: 17 additions & 1 deletion tests/conformance/test_llm_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,23 @@
# Skip-marked here so a green test run at this commit means "everything we
# claim to implement passes." Each subsequent PR drops its own rows as it
# lands the underlying support.
_DEFERRED_FIXTURES: dict[str, str] = {}
_DEFERRED_FIXTURES: dict[str, str] = {
# Proposal 0037 (Anthropic Messages mapping) shipped in spec v0.28.0
# but python marks it not-yet in conformance.toml — the Anthropic
# provider isn't implemented in this release. 043 (the OpenAI side
# stripping anthropic thinking-block content) waits with it.
"033-anthropic-basic-message-round-trip": "Anthropic provider not implemented (0037 not-yet)",
"034-anthropic-tool-call-flow": "Anthropic provider not implemented (0037 not-yet)",
"035-anthropic-image-content-blocks": "Anthropic provider not implemented (0037 not-yet)",
"036-anthropic-tool-choice-modes": "Anthropic provider not implemented (0037 not-yet)",
"037-anthropic-runtime-config-mapping": "Anthropic provider not implemented (0037 not-yet)",
"038-anthropic-max-tokens-required": "Anthropic provider not implemented (0037 not-yet)",
"039-anthropic-error-mapping": "Anthropic provider not implemented (0037 not-yet)",
"040-anthropic-structured-output-native": "Anthropic provider not implemented (0037 not-yet)",
"041-anthropic-structured-output-fallback": "Anthropic provider not implemented (0037 not-yet)",
"042-anthropic-thinking-block-round-trip": "Anthropic provider not implemented (0037 not-yet)",
"043-openai-strips-thinking-blocks": "Anthropic provider not implemented (0037 not-yet)",
}


def _fixture_paths() -> list[Path]:
Expand Down
25 changes: 24 additions & 1 deletion tests/conformance/test_observability.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,25 @@ async def _run_fixture_028(spec: Mapping[str, Any]) -> None:
cases = cast("list[dict[str, Any]]", spec["cases"])
for case in cases:
case_name = cast("str", case["name"])
# Cases using the `augment_metadata` directive exercise §3.4
# mid-invocation rejection at set_invocation_metadata. The
# augment_metadata harness primitive (per fixture 034) lands
# with proposal 0040 / task #22; surface the deferral via
# warnings.warn so pytest's end-of-run summary lists it (rather
# than silently passing) and continue to the other cases.
nodes_check = cast("dict[str, Any]", case.get("nodes", {}))
if any(
isinstance(n, dict) and "augment_metadata" in cast("dict[str, Any]", n)
for n in nodes_check.values()
):
import warnings # noqa: PLC0415

warnings.warn(
f"028 case {case_name!r} deferred: augment_metadata harness primitive "
f"lands with proposal 0040 / #22",
stacklevel=2,
)
continue
Comment thread
chris-colinsky marked this conversation as resolved.
try:
# Build a minimal graph from the case's nodes/edges. The
# fixture's node is a noop update — we never expect it to
Expand Down Expand Up @@ -921,7 +940,11 @@ async def _body(_s: Any) -> dict[str, Any]:

caller_metadata = cast("dict[str, Any]", case["caller_metadata"])
try:
with pytest.raises(ValueError, match="reserved namespace prefix"):
# Covers both rejection paths: the prefix-namespace
# rejection (openarmature.* / gen_ai.*, from 0034) and
# the exact-key-name rejection (0041's §8.4 reserved
# set). Both error messages contain "reserved".
with pytest.raises(ValueError, match="reserved"):
await graph.invoke(state_cls(), metadata=caller_metadata)
finally:
otel_observer.shutdown()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def test_package_versions() -> None:
assert openarmature.__version__ == "0.10.0"
assert openarmature.__spec_version__ == "0.27.1"
assert openarmature.__spec_version__ == "0.31.0"


def test_spec_version_matches_pyproject() -> None:
Expand Down