LunarCommand · chris-colinsky · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/conformance.toml b/conformance.toml
@@ -22,14 +22,17 @@
 # between pinned-spec and spec-head is the consumer's job (e.g., the
 # spec docs site computes the difference and renders accordingly).
 #
-# Convention: this file is only updated as part of release PRs. Between
-# releases, the manifest reflects the most-recently-published version
-# so external readers never see a `since` referring to an unreleased
-# pre-tag commit.
+# Convention: this file is updated as part of release PRs AND as part
+# of feature PRs that bump the spec submodule pin (the manifest guard
+# requires entries for every Accepted proposal in the pinned spec, so
+# a submodule bump forces this file to update too). Such bump PRs set
+# `since` to the upcoming release version; between the bump PR and the
+# matching tag, external readers will see a `since` referring to the
+# upcoming, unreleased version.
 
 [manifest]
 implementation = "openarmature-python"
-spec_pin = "v0.27.1"
+spec_pin = "v0.31.0"
 
 # Status values:
 #   implemented   — shipped behavior matches the proposal's contract
@@ -175,3 +178,19 @@ since = "0.10.0"
 [proposals."0036"]
 status = "implemented"
 since = "0.10.0"
+
+# Spec v0.28.0-v0.31.0 (proposals 0037, 0039, 0040, 0041). 0038
+# (Gemini) is mid-accept on spec side and not in v0.31.0 yet.
+[proposals."0037"]
+status = "not-yet"
+
+[proposals."0039"]
+status = "implemented"
+since = "0.11.0"
+
+[proposals."0040"]
+status = "not-yet"
+
+[proposals."0041"]
+status = "implemented"
+since = "0.11.0"
diff --git a/openarmature-spec b/openarmature-spec
diff --git a/pyproject.toml b/pyproject.toml
@@ -58,7 +58,7 @@ Specification = "https://github.com/LunarCommand/openarmature-spec"
 openarmature = "openarmature.cli:main"
 
 [tool.openarmature]
-spec_version = "0.27.1"
+spec_version = "0.31.0"
 
 [dependency-groups]
 dev = [

diff --git a/src/openarmature/AGENTS.md b/src/openarmature/AGENTS.md
@@ -1,6 +1,6 @@
 # OpenArmature — Agent documentation
 
-*This is the agent guide bundled with the openarmature Python package, version 0.10.0 (spec v0.27.1). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
+*This is the agent guide bundled with the openarmature Python package, version 0.10.0 (spec v0.31.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
 
 ## TL;DR
 
@@ -10,7 +10,7 @@ OpenArmature is a workflow framework for LLM pipelines and tool-calling agents
 
 ## Capability contracts
 
-_Sourced from openarmature-spec v0.27.1. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
+_Sourced from openarmature-spec v0.31.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
 
 ### Capability: `graph-engine`
 
@@ -327,8 +327,9 @@ parent invocation's trace as nested spans. Implementations MUST also support an
 its own trace and the parent's dispatch span carries an OTel `Link` to that new trace.
 
 **Correlation ID.** A per-invocation identifier that flows across observability backends.
-Distinct from `invocation_id` — the framework-generated `invocation_id` correlates spans within
-a single backend, while `correlation_id` is application-supplied (or auto-generated when absent)
+Distinct from `invocation_id` — the `invocation_id` (caller-supplied or framework-generated, per
+§5.1) correlates spans within a single backend, while `correlation_id` is application-supplied
+(or auto-generated when absent)
 and is intended to be visible in every backend the implementation emits to. A user running an
 LLM workflow with both an OTel backend (system traces, logs) and a Langfuse backend
 (LLM-specific traces) uses the `correlation_id` as a join key between them: find a slow request

diff --git a/src/openarmature/__init__.py b/src/openarmature/__init__.py
@@ -25,4 +25,4 @@
 """
 
 __version__ = "0.10.0"
-__spec_version__ = "0.27.1"
+__spec_version__ = "0.31.0"
diff --git a/tests/conformance/test_fixture_parsing.py b/tests/conformance/test_fixture_parsing.py
@@ -115,6 +115,61 @@ def _id(case: tuple[str, Path]) -> str:
     "prompt-management/016-prompt-observability-entities-propagation": (
         "Cases shape models live in the PM-specific capability harness"
     ),
+    # Proposal 0037 (Anthropic Messages mapping) shipped in spec v0.28.0
+    # but python marks it not-yet in conformance.toml — the Anthropic
+    # provider isn't implemented in this release. Defer the
+    # cross-capability parse tests for the 033-042 fixtures until that
+    # lands; the openai-strips-thinking-blocks side (043) is in
+    # test_llm_provider.py's own deferral.
+    "llm-provider/033-anthropic-basic-message-round-trip": (
+        "Anthropic provider not implemented (0037 not-yet in conformance.toml)"
+    ),
+    "llm-provider/034-anthropic-tool-call-flow": (
+        "Anthropic provider not implemented (0037 not-yet in conformance.toml)"
+    ),
+    "llm-provider/035-anthropic-image-content-blocks": (
+        "Anthropic provider not implemented (0037 not-yet in conformance.toml)"
+    ),
+    "llm-provider/036-anthropic-tool-choice-modes": (
+        "Anthropic provider not implemented (0037 not-yet in conformance.toml)"
+    ),
+    "llm-provider/037-anthropic-runtime-config-mapping": (
+        "Anthropic provider not implemented (0037 not-yet in conformance.toml)"
+    ),
+    "llm-provider/038-anthropic-max-tokens-required": (
+        "Anthropic provider not implemented (0037 not-yet in conformance.toml)"
+    ),
+    "llm-provider/039-anthropic-error-mapping": (
+        "Anthropic provider not implemented (0037 not-yet in conformance.toml)"
+    ),
+    "llm-provider/040-anthropic-structured-output-native": (
+        "Anthropic provider not implemented (0037 not-yet in conformance.toml)"
+    ),
+    "llm-provider/041-anthropic-structured-output-fallback": (
+        "Anthropic provider not implemented (0037 not-yet in conformance.toml)"
+    ),
+    "llm-provider/042-anthropic-thinking-block-round-trip": (
+        "Anthropic provider not implemented (0037 not-yet in conformance.toml)"
+    ),
+    # Proposal 0040 (open-span metadata update) — task #22 implements
+    # the §6 augmentation-event mechanism + un-defers 029/030 + 034.
+    "observability/034-caller-metadata-open-span-update-serial": (
+        "Open-span augmentation-event mechanism lands with #22 (0040 not-yet)"
+    ),
+    # Proposal 0039 (caller-supplied invocation_id) Langfuse trace.id
+    # derivation fixtures use the langfuse_trace expected shape the
+    # cross-capability parser doesn't model. The derivation itself is
+    # pinned by unit tests in test_observability_langfuse_adapter.py
+    # against the same spec vector fixture 036 uses
+    # (sha256("run_abc123")[:16].hex == 29b50a6c08dabfeaeb1696301f4fabe1);
+    # wiring into the langfuse-specific conformance harness is a
+    # follow-up.
+    "observability/035-caller-invocation-id-uuid": (
+        "Cross-capability parser doesn't model langfuse_trace; derivation pinned by unit tests"
+    ),
+    "observability/036-caller-invocation-id-non-uuid": (
+        "Cross-capability parser doesn't model langfuse_trace; derivation pinned by unit tests"
+    ),
 }
 
 

diff --git a/tests/conformance/test_llm_provider.py b/tests/conformance/test_llm_provider.py
@@ -65,7 +65,23 @@
 # Skip-marked here so a green test run at this commit means "everything we
 # claim to implement passes." Each subsequent PR drops its own rows as it
 # lands the underlying support.
-_DEFERRED_FIXTURES: dict[str, str] = {}
+_DEFERRED_FIXTURES: dict[str, str] = {
+    # Proposal 0037 (Anthropic Messages mapping) shipped in spec v0.28.0
+    # but python marks it not-yet in conformance.toml — the Anthropic
+    # provider isn't implemented in this release. 043 (the OpenAI side
+    # stripping anthropic thinking-block content) waits with it.
+    "033-anthropic-basic-message-round-trip": "Anthropic provider not implemented (0037 not-yet)",
+    "034-anthropic-tool-call-flow": "Anthropic provider not implemented (0037 not-yet)",
+    "035-anthropic-image-content-blocks": "Anthropic provider not implemented (0037 not-yet)",
+    "036-anthropic-tool-choice-modes": "Anthropic provider not implemented (0037 not-yet)",
+    "037-anthropic-runtime-config-mapping": "Anthropic provider not implemented (0037 not-yet)",
+    "038-anthropic-max-tokens-required": "Anthropic provider not implemented (0037 not-yet)",
+    "039-anthropic-error-mapping": "Anthropic provider not implemented (0037 not-yet)",
+    "040-anthropic-structured-output-native": "Anthropic provider not implemented (0037 not-yet)",
+    "041-anthropic-structured-output-fallback": "Anthropic provider not implemented (0037 not-yet)",
+    "042-anthropic-thinking-block-round-trip": "Anthropic provider not implemented (0037 not-yet)",
+    "043-openai-strips-thinking-blocks": "Anthropic provider not implemented (0037 not-yet)",
+}
 
 
 def _fixture_paths() -> list[Path]:

diff --git a/tests/conformance/test_observability.py b/tests/conformance/test_observability.py
@@ -884,6 +884,25 @@ async def _run_fixture_028(spec: Mapping[str, Any]) -> None:
     cases = cast("list[dict[str, Any]]", spec["cases"])
     for case in cases:
         case_name = cast("str", case["name"])
+        # Cases using the `augment_metadata` directive exercise §3.4
+        # mid-invocation rejection at set_invocation_metadata. The
+        # augment_metadata harness primitive (per fixture 034) lands
+        # with proposal 0040 / task #22; surface the deferral via
+        # warnings.warn so pytest's end-of-run summary lists it (rather
+        # than silently passing) and continue to the other cases.
+        nodes_check = cast("dict[str, Any]", case.get("nodes", {}))
+        if any(
+            isinstance(n, dict) and "augment_metadata" in cast("dict[str, Any]", n)
+            for n in nodes_check.values()
+        ):
+            import warnings  # noqa: PLC0415
+
+            warnings.warn(
+                f"028 case {case_name!r} deferred: augment_metadata harness primitive "
+                f"lands with proposal 0040 / #22",
+                stacklevel=2,
+            )
+            continue
         try:
             # Build a minimal graph from the case's nodes/edges. The
             # fixture's node is a noop update — we never expect it to
@@ -921,7 +940,11 @@ async def _body(_s: Any) -> dict[str, Any]:
 
             caller_metadata = cast("dict[str, Any]", case["caller_metadata"])
             try:
-                with pytest.raises(ValueError, match="reserved namespace prefix"):
+                # Covers both rejection paths: the prefix-namespace
+                # rejection (openarmature.* / gen_ai.*, from 0034) and
+                # the exact-key-name rejection (0041's §8.4 reserved
+                # set). Both error messages contain "reserved".
+                with pytest.raises(ValueError, match="reserved"):
                     await graph.invoke(state_cls(), metadata=caller_metadata)
             finally:
                 otel_observer.shutdown()

diff --git a/tests/test_smoke.py b/tests/test_smoke.py
@@ -9,7 +9,7 @@
 
 def test_package_versions() -> None:
     assert openarmature.__version__ == "0.10.0"
-    assert openarmature.__spec_version__ == "0.27.1"
+    assert openarmature.__spec_version__ == "0.31.0"
 
 
 def test_spec_version_matches_pyproject() -> None:
+62 −0		CHANGELOG.md
+5 −5		README.md
+14 −11		docs/proposals.md
+1 −0		docs/proposals/0039-observability-caller-supplied-invocation-id.md
+1 −0		docs/proposals/0040-observability-mid-invocation-metadata-open-span-update.md
+1 −0		docs/proposals/0041-observability-langfuse-metadata-key-collision.md
+161 −79		proposals/0037-llm-provider-anthropic-messages-mapping.md
+248 −0		proposals/0039-observability-caller-supplied-invocation-id.md
+370 −0		proposals/0040-observability-mid-invocation-metadata-open-span-update.md
+203 −0		proposals/0041-observability-langfuse-metadata-key-collision.md
+35 −10		spec/graph-engine/spec.md
+30 −0		spec/llm-provider/conformance/033-anthropic-basic-message-round-trip.md
+44 −0		spec/llm-provider/conformance/033-anthropic-basic-message-round-trip.yaml
+29 −0		spec/llm-provider/conformance/034-anthropic-tool-call-flow.md
+65 −0		spec/llm-provider/conformance/034-anthropic-tool-call-flow.yaml
+23 −0		spec/llm-provider/conformance/035-anthropic-image-content-blocks.md
+54 −0		spec/llm-provider/conformance/035-anthropic-image-content-blocks.yaml
+31 −0		spec/llm-provider/conformance/036-anthropic-tool-choice-modes.md
+179 −0		spec/llm-provider/conformance/036-anthropic-tool-choice-modes.yaml
+23 −0		spec/llm-provider/conformance/037-anthropic-runtime-config-mapping.md
+57 −0		spec/llm-provider/conformance/037-anthropic-runtime-config-mapping.yaml
+21 −0		spec/llm-provider/conformance/038-anthropic-max-tokens-required.md
+32 −0		spec/llm-provider/conformance/038-anthropic-max-tokens-required.yaml
+29 −0		spec/llm-provider/conformance/039-anthropic-error-mapping.md
+103 −0		spec/llm-provider/conformance/039-anthropic-error-mapping.yaml
+23 −0		spec/llm-provider/conformance/040-anthropic-structured-output-native.md
+51 −0		spec/llm-provider/conformance/040-anthropic-structured-output-native.yaml
+27 −0		spec/llm-provider/conformance/041-anthropic-structured-output-fallback.md
+54 −0		spec/llm-provider/conformance/041-anthropic-structured-output-fallback.yaml
+36 −0		spec/llm-provider/conformance/042-anthropic-thinking-block-round-trip.md
+77 −0		spec/llm-provider/conformance/042-anthropic-thinking-block-round-trip.yaml
+28 −0		spec/llm-provider/conformance/043-openai-strips-thinking-blocks.md
+45 −0		spec/llm-provider/conformance/043-openai-strips-thinking-blocks.yaml
+311 −12		spec/llm-provider/spec.md
+2 −2		spec/observability/conformance/022-langfuse-basic-trace.yaml
+2 −2		spec/observability/conformance/023-langfuse-generation-rendering.yaml
+2 −2		spec/observability/conformance/024-langfuse-prompt-linkage.yaml
+1 −1		spec/observability/conformance/027-langfuse-caller-supplied-metadata.yaml
+27 −4		spec/observability/conformance/028-caller-metadata-namespace-rejection.md
+110 −0		spec/observability/conformance/028-caller-metadata-namespace-rejection.yaml
+8 −5		spec/observability/conformance/029-caller-metadata-fan-out-per-instance.md
+31 −10		spec/observability/conformance/029-caller-metadata-fan-out-per-instance.yaml
+6 −1		spec/observability/conformance/030-caller-metadata-parallel-branches-per-branch.md
+21 −7		spec/observability/conformance/030-caller-metadata-parallel-branches-per-branch.yaml
+2 −2		spec/observability/conformance/031-langfuse-subgraph-span-hierarchy.yaml
+2 −2		spec/observability/conformance/032-langfuse-fan-out-per-instance-spans.yaml
+61 −0		spec/observability/conformance/034-caller-metadata-open-span-update-serial.md
+81 −0		spec/observability/conformance/034-caller-metadata-open-span-update-serial.yaml
+34 −0		spec/observability/conformance/035-caller-invocation-id-uuid.md
+45 −0		spec/observability/conformance/035-caller-invocation-id-uuid.yaml
+41 −0		spec/observability/conformance/036-caller-invocation-id-non-uuid.md
+52 −0		spec/observability/conformance/036-caller-invocation-id-non-uuid.yaml
+111 −17		spec/observability/spec.md
+39 −0		spec/pipeline-utilities/conformance/057-resume-mints-fresh-invocation-id.md
+56 −0		spec/pipeline-utilities/conformance/057-resume-mints-fresh-invocation-id.yaml
+6 −2		spec/pipeline-utilities/spec.md