diff --git a/contracts/steering-artifact-receipt.schema.json b/contracts/steering-artifact-receipt.schema.json new file mode 100644 index 0000000..72247c5 --- /dev/null +++ b/contracts/steering-artifact-receipt.schema.json @@ -0,0 +1,75 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:srcos:agent-machine:schema:steering-artifact-receipt:v0.1.0", + "title": "SteeringArtifactReceipt", + "description": "Receipt for resolved local steering model and SAE artifacts. Each artifact record requires source repo, exact file path, resolved revision, and SHA-256 digest.", + "type": "object", + "additionalProperties": false, + "required": ["specVersion", "id", "kind", "sourcesetId", "status", "generatedAt", "artifactRecords", "receiptSafety"], + "properties": { + "specVersion": { "type": "string", "const": "0.1.0" }, + "id": { "type": "string", "pattern": "^urn:srcos:agent-machine:steering-artifact-receipt:[a-z0-9][a-z0-9.-]*$" }, + "kind": { "type": "string", "const": "SteeringArtifactReceipt" }, + "sourcesetId": { "type": "string", "pattern": "^[a-z0-9][a-z0-9.-]*$" }, + "status": { "type": "string", "enum": ["pending", "complete", "failed"] }, + "generatedAt": { "type": "string" }, + "activationIssue": { "type": "string" }, + "artifactRecords": { "type": "array", "items": { "$ref": "#/$defs/artifactRecord" }, "uniqueItems": true }, + "missing": { "type": "array", "items": { "type": "string" }, "uniqueItems": true }, + "storageReceiptRefs": { "type": "array", "items": { "type": "string" }, "uniqueItems": true }, + "policyRefs": { "type": "array", "items": { "type": "string" }, "uniqueItems": true }, + "agentRegistryGrantRefs": { "type": "array", "items": { "type": "string" }, "uniqueItems": true }, + "receiptSafety": { + "type": "object", + "additionalProperties": false, + "required": ["includeRawArtifacts", "includeAuthMaterial"], + "properties": { + "includeRawArtifacts": { "type": "boolean", "const": false }, + "includeAuthMaterial": { "type": "boolean", "const": false } + } + }, + "notes": { "type": "array", "items": { "type": "string" } } + }, + "$defs": { + "artifactRecord": { + "type": "object", + "additionalProperties": false, + "required": ["role", "source", "storage", "digest"], + "properties": { + "role": { "type": "string", "enum": ["model-config", "model-weight", "tokenizer", "sae-artifact", "sae-config", "other"] }, + "source": { + "type": "object", + "additionalProperties": false, + "required": ["type", "repo", "filePath", "resolvedRevision"], + "properties": { + "type": { "type": "string", "enum": ["huggingface", "local", "other"] }, + "repo": { "type": "string" }, + "filePath": { "type": "string", "minLength": 1 }, + "resolvedRevision": { "type": "string", "minLength": 1 }, + "url": { "type": "string" } + } + }, + "storage": { + "type": "object", + "additionalProperties": false, + "required": ["localPath", "sizeBytes"], + "properties": { + "localPath": { "type": "string", "minLength": 1 }, + "sizeBytes": { "type": "integer", "minimum": 0 }, + "storageReceiptRef": { "type": ["string", "null"] } + } + }, + "digest": { + "type": "object", + "additionalProperties": false, + "required": ["algorithm", "sha256", "verified"], + "properties": { + "algorithm": { "type": "string", "const": "sha256" }, + "sha256": { "type": "string", "pattern": "^[a-f0-9]{64}$" }, + "verified": { "type": "boolean" } + } + } + } + } + } +} diff --git a/docs/index.md b/docs/index.md index 65d6a53..d5f298e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -14,6 +14,7 @@ Agent Machine is a bootstrap runtime-control substrate for SourceOS agent worklo | [Local SAE steering inference readiness](inference-local-steering.md) | Inspection record for Neuronpedia-compatible local steering readiness and current gaps. | | [Local /steer endpoint contract](local-steer-endpoint.md) | Noetica-compatible local steering endpoint contract and stub behavior. | | [Steering sourceset registry](steering-sourcesets.md) | Registered model/SAE sourceset records for local steering work. | +| [Steering artifact receipts](steering-artifact-receipts.md) | Artifact-resolution receipt contract for model and SAE files. | | [GPT-2 Small steering activation path](steering-activation-path.md) | Fail-closed real-path entrypoint and remaining blockers for controlled activation. | ## Architecture @@ -72,6 +73,7 @@ Important contract families: | `AgentPod` | Schedulable local or Kubernetes workload envelope. | | `InferenceProvider` | Backend-neutral inference provider declaration. | | `SteeringSourceset` | Model/SAE artifact registry record for local steering work. | +| `SteeringArtifactReceipt` | Model/SAE artifact-resolution receipt for local steering work. | | `CacheTier` | Model/cache/scratch/evidence storage tier declaration. | | `StorageReceipt` | Secret-free storage/cache/evidence proof. | | `DeploymentReceipt` | Proof that an artifact was derived from a typed source by a generator. | diff --git a/docs/steering-activation-path.md b/docs/steering-activation-path.md index e80b81d..4564f43 100644 --- a/docs/steering-activation-path.md +++ b/docs/steering-activation-path.md @@ -42,12 +42,38 @@ agent-machine steer serve-stub --host 127.0.0.1 --port 8080 --status not_configu It must not return `status: "applied"` until a real forward pass and feature injection succeed. +## Artifact receipt gate + +Before any local smoke can be accepted, Agent Machine must emit a complete `SteeringArtifactReceipt` for `gpt2-small.res-jb`. + +The receipt must include, for every model, tokenizer, and SAE file used by the runtime: + +- source repository +- exact file path +- resolved revision, commit SHA, or immutable tag +- local path +- file size in bytes +- SHA-256 digest +- digest verification status + +The receipt contract is defined in: + +```text +contracts/steering-artifact-receipt.schema.json +``` + +and documented in: + +```text +docs/steering-artifact-receipts.md +``` + ## Remaining blockers before #34 can close - optional ML dependencies installed from `requirements-steering.txt` - verified GPT-2 Small model artifacts - verified SAE artifacts for SAELens release `gpt2-small-res-jb`, SAE id `blocks.6.hook_resid_pre` -- digest locks for model and SAE artifacts +- artifact receipt with exact repo, file path, resolved revision, and SHA-256 digest for each model/SAE file - storage receipt for the resolved artifact locations - policy admission and agent-registry grant records - real activation injection implementation diff --git a/docs/steering-artifact-receipts.md b/docs/steering-artifact-receipts.md new file mode 100644 index 0000000..7b22638 --- /dev/null +++ b/docs/steering-artifact-receipts.md @@ -0,0 +1,68 @@ +# Steering Artifact Receipts + +Status: contract scaffold for Issue #34. + +This document defines the artifact receipt shape required before Agent Machine may claim that a local steering run used specific GPT-2 Small model and SAE artifacts. + +## Purpose + +A local steering smoke record is not sufficient unless the artifact chain is auditable. The receipt must prove which exact model and SAE files were resolved and verified. + +A complete `SteeringArtifactReceipt` must include, for each resolved file: + +- source repository +- exact file path inside that source +- resolved revision, commit SHA, or immutable tag +- local path where the file was used +- file size in bytes +- SHA-256 digest +- whether the digest was verified + +The receipt must not include raw model data, raw SAE tensors, credentials, or tokens. + +## Schema and examples + +Schema: + +```text +contracts/steering-artifact-receipt.schema.json +``` + +Pending fixture: + +```text +examples/steering-artifact-receipts/gpt2-small-res-jb.pending.steering-artifact-receipt.json +``` + +The pending fixture deliberately contains no artifact records. It exists to validate the receipt envelope and to record the missing fields before artifact resolution. + +## Complete receipt requirement + +A complete receipt for `gpt2-small.res-jb` must include artifact records for all model, tokenizer, and SAE files used by the runtime. Each artifact record must contain this minimum shape: + +```json +{ + "role": "model-weight", + "source": { + "type": "huggingface", + "repo": "openai-community/gpt2", + "filePath": "model.safetensors", + "resolvedRevision": "", + "url": "https://huggingface.co/openai-community/gpt2/blob//model.safetensors" + }, + "storage": { + "localPath": "/var/lib/agent-machine/models/.../model.safetensors", + "sizeBytes": 0, + "storageReceiptRef": "urn:srcos:agent-machine:storage-receipt:..." + }, + "digest": { + "algorithm": "sha256", + "sha256": "<64 lowercase hex characters>", + "verified": true + } +} +``` + +## Boundary + +This contract does not download artifacts and does not close Issue #34. It defines the audit requirement that the real artifact resolver must satisfy before `status: applied` can be accepted. diff --git a/examples/steering-artifact-receipts/gpt2-small-res-jb.pending.steering-artifact-receipt.json b/examples/steering-artifact-receipts/gpt2-small-res-jb.pending.steering-artifact-receipt.json new file mode 100644 index 0000000..fccc396 --- /dev/null +++ b/examples/steering-artifact-receipts/gpt2-small-res-jb.pending.steering-artifact-receipt.json @@ -0,0 +1,31 @@ +{ + "specVersion": "0.1.0", + "id": "urn:srcos:agent-machine:steering-artifact-receipt:gpt2-small.res-jb.pending", + "kind": "SteeringArtifactReceipt", + "sourcesetId": "gpt2-small.res-jb", + "status": "pending", + "generatedAt": "1970-01-01T00:00:00Z", + "activationIssue": "https://github.com/SourceOS-Linux/agent-machine/issues/34", + "artifactRecords": [], + "missing": [ + "resolved model file paths", + "resolved model revisions", + "model sha256 digests", + "resolved SAE file paths", + "resolved SAE revisions", + "SAE sha256 digests", + "storage receipts" + ], + "storageReceiptRefs": [], + "policyRefs": [], + "agentRegistryGrantRefs": [], + "receiptSafety": { + "includeRawArtifacts": false, + "includeAuthMaterial": false + }, + "notes": [ + "This pending fixture validates the receipt envelope before artifacts are resolved.", + "A complete receipt must include one artifact record per resolved model, tokenizer, and SAE file.", + "Each complete artifact record must include repo, exact filePath, resolvedRevision, localPath, sizeBytes, and SHA-256 digest." + ] +} diff --git a/src/agent_machine/contracts.py b/src/agent_machine/contracts.py index f33bea3..df74039 100644 --- a/src/agent_machine/contracts.py +++ b/src/agent_machine/contracts.py @@ -57,6 +57,7 @@ def schema_by_kind(root: Path | None = None) -> dict[str, Path]: "PolicyAdmission": base / "policy-admission.schema.json", "ReleaseEvidenceBundle": base / "release-evidence-bundle.schema.json", "SignedReleaseBundleEnvelope": base / "signed-release-bundle-envelope.schema.json", + "SteeringArtifactReceipt": base / "steering-artifact-receipt.schema.json", "SteeringSourceset": base / "steering-sourceset.schema.json", "StorageReceipt": base / "storage-receipt.schema.json", }