From 3d2b2d716930a71cb02020fc23a8bdc6d68fe5e3 Mon Sep 17 00:00:00 2001 From: Jascha Date: Thu, 14 May 2026 16:09:29 -0700 Subject: [PATCH 1/2] Protocol v2: wire-format break addressing 2026-05 security audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit-identified protocol issues that require a coordinated wire-format break across all reference implementations: - v not in signed payload (downgrade vector) - kid not in signed payload (cross-key swap) - no domain separator (cross-protocol signature lift) - float canonicalization underspecified (NaN, +/-0, denormals) - string canonicalization underspecified (NFC only for source) - extra value types underspecified - timestamp format underspecified - unknown top-level fields permitted - size limits absent Spec (docs/spec.md): - Bump protocol version field to 2; the wire format is not compatible with v1. - Domain separator: signed_bytes = b"vectorpin/v2\x00" || canonical_json (13-byte tag, exact). - Canonical header now includes EVERY field except sig — v, kid, and all others are signed. - NFC normalization mandatory for every string-typed field; control chars and bidi overrides rejected. - Vectors with NaN/Inf rejected at sign time; +0.0 and -0.0 distinct. - extra is strictly map; non-string values reject. - ts must match exactly /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/. - Unknown top-level fields rejected by parsers. - §4.3 size limits MUST: 64 KiB pin, 32 extra entries, etc. - §5 verifier failure mode list expanded with KEY_EXPIRED, PARSE_ERROR, RECORD/COLLECTION/TENANT_MISMATCH. - §12 new: explicit migration notes from v1 to v2. Python (src/vectorpin/, tests/): - PinHeader carries kid; canonicalize() prepends DOMAIN_TAG and emits sorted-keys NFC-normalized JSON over all fields including v and kid. - Signer rejects NaN/Inf; formats ts strictly; NFC-normalizes inputs. - Default Verifier accepts only v == 2. - LegacyV1Verifier (opt-in) byte-for-byte preserves v1 canonicalization for migration of existing pins. - KeyEntry carries (valid_from, valid_until); KEY_EXPIRED fires when pin.ts is outside the registered window. - verify(...) accepts expected_record_id/collection_id/tenant_id and enforces them per §5 step 8. - 120 tests pass (4 new test files cover v2 canonicalization, legacy v1 verification, replay protection, and the test-vector fixtures). Rust (rust/vectorpin/): - Mirror of the Python wire format; same DOMAIN_TAG bytes, same canonicalization, same failure-mode taxonomy. - Cross-language anchor: rust/vectorpin/tests/cross_lang.rs loads testvectors/v2.json and asserts byte-for-byte equality of canonical bytes, pin_json, and Ed25519 signatures against the Python reference, for every fixture. - VerifyOptions builder for replay-protection inputs. - LegacyV1Verifier mirrors the Python opt-in path. - 50 tests pass; clippy --all-targets -- -D warnings clean. TypeScript (typescript/): - Async signing/verifying API throughout (carried from P2 hardening branch). - DOMAIN_TAG enforced at module load. NFC normalization via built-in String.normalize. No new dependencies. - Cross-language anchor: typescript/test/cross-lang.test.ts asserts byte-for-byte equality against testvectors/v2.json on canonical bytes, pin_json, and Ed25519 signature. - 83 tests pass; npm run typecheck clean. Test vectors: - testvectors/v2.json — 4 positive fixtures (f32, f64, model_hash, extra+record_id). - testvectors/negative_v2.json — 17 fixtures covering tampered vector, source mismatch, model mismatch, wrong v, wrong kid, bit-flipped sig, wrong sig length, unknown top-level field, non-string extra value, NaN in vector, NFD source, fractional-seconds ts, offset ts, lowercase t/z ts, record_id mismatch, oversize JSON. - testvectors/v1.json and testvectors/negative_v1.json left as-is for LegacyV1Verifier coverage. - Deterministic seed; reproducible via scripts/generate_test_vectors.py. Migration: v1 pins do not verify under the strict v2 verifier in any of the three languages. Use LegacyV1Verifier (Python, Rust, TypeScript) to read v1 pins during migration; re-sign with the v2 Signer to produce new v2 pins. --- docs/spec.md | 173 +++- rust/vectorpin/examples/basic_usage.rs | 25 +- rust/vectorpin/src/attestation.rs | 861 +++++++++++++++----- rust/vectorpin/src/lib.rs | 9 +- rust/vectorpin/src/signer.rs | 179 ++-- rust/vectorpin/src/verifier.rs | 534 ++++++++---- rust/vectorpin/tests/cross_lang.rs | 370 ++++++--- rust/vectorpin/tests/legacy_v1.rs | 104 +++ rust/vectorpin/tests/v2_canonicalization.rs | 277 +++++++ scripts/generate_test_vectors.py | 548 ++++++++++--- src/vectorpin/__init__.py | 23 +- src/vectorpin/attestation.py | 370 ++++++++- src/vectorpin/signer.py | 109 ++- src/vectorpin/verifier.py | 285 ++++++- tests/test_attestation.py | 156 +++- tests/test_legacy_v1_verifier.py | 99 +++ tests/test_replay_protection.py | 194 +++++ tests/test_signer_verifier.py | 26 +- tests/test_v2_canonicalization.py | 327 ++++++++ tests/test_v2_test_vectors.py | 181 ++++ testvectors/README.md | 74 +- testvectors/negative_v2.json | 121 +++ testvectors/v2.json | 79 ++ typescript/README.md | 11 +- typescript/src/attestation.ts | 481 +++++++++-- typescript/src/index.ts | 33 +- typescript/src/signer.ts | 180 +++- typescript/src/verifier.ts | 298 ++++++- typescript/test/cross-lang.test.ts | 220 +++-- typescript/test/legacy-v1.test.ts | 86 ++ typescript/test/signer-verifier.test.ts | 119 +-- typescript/test/v2-canonicalization.test.ts | 245 ++++++ 32 files changed, 5748 insertions(+), 1049 deletions(-) create mode 100644 rust/vectorpin/tests/legacy_v1.rs create mode 100644 rust/vectorpin/tests/v2_canonicalization.rs create mode 100644 tests/test_legacy_v1_verifier.py create mode 100644 tests/test_replay_protection.py create mode 100644 tests/test_v2_canonicalization.py create mode 100644 tests/test_v2_test_vectors.py create mode 100644 testvectors/negative_v2.json create mode 100644 testvectors/v2.json create mode 100644 typescript/test/legacy-v1.test.ts create mode 100644 typescript/test/v2-canonicalization.test.ts diff --git a/docs/spec.md b/docs/spec.md index 9985416..639d584 100644 --- a/docs/spec.md +++ b/docs/spec.md @@ -1,11 +1,13 @@ # VectorPin Protocol Specification -**Version:** 1 +**Version:** 2 **Status:** Draft **License:** Apache 2.0 This document specifies the wire format, canonicalization, and verification rules for VectorPin attestations. Anyone implementing VectorPin in another language should be able to read this document, ignore the Python reference implementation, and produce signatures and verifications that interoperate. +**Protocol version 2 is a wire-format break over v1.** It is not backwards-compatible with v1 pins. The break is motivated by a security audit (2026-05) that identified four cross-implementation issues: protocol version not bound to the signature, `kid` not bound to the signature, no domain separator, and underspecified canonicalization of floats, strings, and timestamps. See §12. + ## 1. Goals A VectorPin Pin is a compact attestation that travels with an embedding through a vector database. It guarantees that: @@ -14,18 +16,20 @@ A VectorPin Pin is a compact attestation that travels with an embedding through - The embedding was produced by a specific model. - The pin was issued by a specific producer. - None of the above has changed since issuance. +- Cross-protocol signature reuse with sister Trust-Stack protocols is prevented by a domain separator (§4.2). -Non-goals: confidentiality, access control, anti-replay across collections. +Non-goals: confidentiality, access control, anti-replay across collections without explicit caller cooperation (§8, §5 step 7). ## 2. Cryptographic primitives | Primitive | Algorithm | |---|---| | Hash | SHA-256 | -| Signature | Ed25519 | +| Signature | Ed25519 over `domain_tag || canonical_json` | +| Domain separator | exact ASCII bytes `vectorpin/v2\x00` (13 bytes) | | Encoding | URL-safe base64, no padding | -These are fixed for protocol version 1. Future versions MAY introduce alternatives but MUST bump the version field. +These are fixed for protocol version 2. Future versions MAY introduce alternatives but MUST bump the version field AND change the domain separator. ## 3. Canonical hashes @@ -37,6 +41,13 @@ hash_text(s) := "sha256:" || hex(SHA-256(UTF-8(NFC(s)))) Text MUST be normalized to Unicode NFC before encoding. Implementations MUST reject input that cannot be normalized. +The same NFC requirement applies to **every string-typed field** in the pin (`model`, `kid`, `ts`, each `extra` key, each `extra` value). Implementations MUST normalize these to NFC before signing and MUST reject parsed pins whose string fields are not already in NFC form. + +Implementations MUST reject any string field containing: + +- Control characters in `U+0000`–`U+001F` (except none — control chars are always rejected). +- Bidirectional overrides `U+202A`–`U+202E`, `U+2066`–`U+2069`. + ### 3.2 Vector hashing ``` @@ -49,96 +60,141 @@ Where `canonical_bytes` produces: 2. Stored in little-endian byte order. 3. Packed contiguously, 1-D. +Implementations MUST reject vectors containing NaN, positive infinity, or negative infinity at sign time. `-0.0` and `+0.0` are distinct values and both valid; FTZ/DAZ floating-point modes MUST be disabled or vectors normalized before hashing. + Other dtypes are reserved for future protocol versions. ## 4. Pin format ### 4.1 Wire form -A Pin is a JSON object with the following fields: +A Pin is a JSON object with the following fields. **No other top-level fields are permitted.** Implementations MUST reject pins containing unknown top-level keys. | Field | Type | Required | Description | |---|---|---|---| -| `v` | integer | yes | Protocol version. Must equal `1`. | +| `v` | integer | yes | Protocol version. Must equal `2`. | +| `kid` | string | yes | Identifier of the signing key. | | `model` | string | yes | Embedding model identifier. | -| `model_hash` | string | no | Optional content hash of the model weights. | +| `model_hash` | string | no | Optional content hash of the model weights. When present, MUST match `"sha256:" || hex(SHA-256(weights))` where the input is the concatenation of model weight shards in sorted filename order. Implementations that cannot meet this convention MUST omit the field. | | `source_hash` | string | yes | Hash of the source text (§3.1). | | `vec_hash` | string | yes | Hash of the embedding (§3.2). | | `vec_dtype` | string | yes | One of `"f32"` or `"f64"`. | -| `vec_dim` | integer | yes | Embedding dimensionality. | -| `ts` | string | yes | RFC 3339 / ISO 8601 timestamp in UTC, e.g. `"2026-05-05T12:00:00Z"`. | -| `extra` | object | no | String-to-string map of producer-defined fields. | -| `kid` | string | yes | Identifier of the signing key. | -| `sig` | string | yes | Ed25519 signature, URL-safe base64 with no padding. | +| `vec_dim` | integer | yes | Embedding dimensionality. Must be a positive integer ≤ 1,048,576. | +| `ts` | string | yes | UTC timestamp matching exactly the pattern `^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$`. No fractional seconds, no timezone offsets, no lowercase `t`/`z`. | +| `extra` | object | no | Map of UTF-8 string keys to UTF-8 string values. Values that are not strings MUST cause a parse error. Reserved keys: see §8. | +| `sig` | string | yes | Ed25519 signature, URL-safe base64 with no padding. Decoded length MUST be exactly 64 bytes. | ### 4.2 Canonicalization for signing -The signature in `sig` is produced over a canonical byte sequence that excludes `kid` and `sig` themselves. The canonical form is JSON with: +The signed byte sequence is: + +``` +signed_bytes := b"vectorpin/v2\x00" || canonical_json(header) +``` + +Where `header` is a JSON object containing **exactly** the following fields, in the order they would appear after lexicographic sorting: `extra` (if present and non-empty), `kid`, `model`, `model_hash` (if present), `source_hash`, `ts`, `v`, `vec_dim`, `vec_dtype`, `vec_hash`. + +The `sig` field is excluded from the signed bytes. **Every other field, including `v` and `kid`, is included.** Including `v` defeats downgrade attacks (cannot strip new fields and present remainder to an older verifier); including `kid` defeats cross-key swap attacks (cannot re-attribute a signed pin to a different producer). + +`canonical_json` is JSON with: + +- All keys sorted lexicographically by Unicode code point (equivalent to ASCII sort for the well-formed v2 field set). +- No whitespace between tokens (separators are `,` and `:` with no surrounding spaces). +- UTF-8 encoding, NFC-normalized strings. +- `extra`, if present, with its keys sorted by the same rule. `extra` MUST be omitted if empty. +- `model_hash` omitted entirely if not set. +- Integers serialized in their minimal JSON form (no leading zeros, no exponent notation). +- Strings emit the JSON-standard escapes (`\"`, `\\`, `\b`, `\f`, `\n`, `\r`, `\t`, `\uXXXX` for U+0000–U+001F and U+007F). All other characters MUST be emitted as raw UTF-8 bytes (not as `\u` escapes). Non-ASCII NFC code points are emitted directly. + +The 14-byte domain tag is prepended to `canonical_json(header)` and the concatenation is fed to Ed25519 signing. Verifiers reconstruct the same bytes from the parsed pin. + +### 4.3 Size limits -- All keys sorted lexicographically. -- No whitespace (separators `","` and `":"`). -- UTF-8 encoding. -- `extra`, if present, with its keys also sorted. -- `model_hash` and `extra` omitted entirely if not set. +To bound parser resource consumption and prevent DoS through hostile pins, conforming v2 implementations MUST enforce: -This canonical form is fed directly into Ed25519 signing. +| Limit | Maximum | +|---|---| +| Total pin JSON, UTF-8 byte length | 64 KiB (65,536 bytes) | +| `extra` entry count | 32 | +| Any `extra` key, UTF-8 byte length | 128 bytes | +| Any `extra` value, UTF-8 byte length | 1 KiB (1,024 bytes) | +| `vec_dim` | 1,048,576 (2^20) | +| `sig`, decoded byte length | exactly 64 (Ed25519 signature) | + +Verifiers MUST reject oversized pins before parsing the signature. -### 4.3 Example +### 4.4 Example ```json { - "v": 1, + "v": 2, + "kid": "prod-2026-05", "model": "text-embedding-3-large", "source_hash": "sha256:9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08", "vec_hash": "sha256:0123...", "vec_dtype": "f32", "vec_dim": 3072, "ts": "2026-05-05T12:00:00Z", - "kid": "prod-2026-05", "sig": "MEUCIQD..." } ``` +The canonical bytes for this pin (excluding `sig`) begin with the 14-byte `vectorpin/v2\x00` tag, followed by `{"kid":"prod-2026-05","model":"text-embedding-3-large","source_hash":"sha256:...","ts":"2026-05-05T12:00:00Z","v":2,"vec_dim":3072,"vec_dtype":"f32","vec_hash":"sha256:..."}`. + ## 5. Verification -A verifier MUST: +A verifier MUST, in order: -1. Reject pins whose `v` field is unknown to it. -2. Reject pins whose `kid` is not in its key registry. -3. Reconstruct the canonical byte sequence (§4.2) and verify `sig` against the registered public key for `kid`. -4. If a ground-truth source string was supplied, recompute `hash_text(source)` and compare to `source_hash`. -5. If a ground-truth vector was supplied, recompute `hash_vector(vector, vec_dtype)` and compare to `vec_hash`. Also check that the supplied vector's shape matches `vec_dim`. -6. If an expected model identifier was supplied, compare to `model`. +0. Reject pins whose serialized JSON exceeds the size limits in §4.3 before parsing. +1. Reject pins whose `v` field is unknown to it. A strict v2 verifier rejects any `v != 2`. A migration-mode verifier MAY dispatch `v == 1` pins to a legacy v1 verifier; legacy mode MUST be opt-in and SHOULD be disabled by default. +2. Reject pins whose `kid` is not in its key registry, OR whose registry entry's `(valid_from, valid_until)` window excludes `ts` (see §7). +3. Reject pins containing unknown top-level fields, non-string `extra` values, or any string field that is not in NFC form. +4. Reconstruct the canonical byte sequence (§4.2) — including the domain tag — and verify `sig` against the registered public key for `kid`. +5. If a ground-truth source string was supplied, recompute `hash_text(source)` and compare to `source_hash`. +6. If a ground-truth vector was supplied, recompute `hash_vector(vector, vec_dtype)` and compare to `vec_hash`. Also check that the supplied vector's shape matches `vec_dim`. The vector MUST contain no NaN/Inf — if it does, reject before hashing. +7. If an expected model identifier was supplied, compare to `model`. +8. If the caller supplied an expected `vectorpin.record_id` / `vectorpin.collection_id` / `vectorpin.tenant_id`, the verifier MUST compare against the value in `extra` and reject on mismatch. -Verifiers MUST distinguish at least these failure modes (the reference implementation uses the names below; other implementations MAY use different names but MUST distinguish the cases): +Verifiers MUST distinguish at least these failure modes (the reference implementations use the names below; other implementations MAY use different names but MUST distinguish the cases): - `UNSUPPORTED_VERSION` - `UNKNOWN_KEY` +- `KEY_EXPIRED` +- `PARSE_ERROR` — pin JSON exceeds size limits, contains unknown top-level fields, has non-string `extra` values, or fails type/format validation. - `SIGNATURE_INVALID` - `VECTOR_TAMPERED` - `SOURCE_MISMATCH` - `MODEL_MISMATCH` - `SHAPE_MISMATCH` +- `RECORD_MISMATCH` / `COLLECTION_MISMATCH` / `TENANT_MISMATCH` ## 6. Storage conventions Adapter implementations SHOULD store pins under the metadata key `vectorpin`. Backends without free-form metadata fields are out of scope for this version of the protocol — provenance must travel with the data. -## 7. Key rotation +## 7. Key rotation and revocation -Verifiers MUST support multiple `kid` -> public key mappings simultaneously. Issuers rotate by: +Verifiers MUST support multiple `kid` -> public key mappings simultaneously, each with an optional validity window `(valid_from, valid_until)` of RFC 3339 timestamps. Issuers rotate by: 1. Generating a new keypair with a fresh `kid`. -2. Adding the new public key to all relevant verifier registries. +2. Adding the new public key to all relevant verifier registries, with a `valid_from` no earlier than the moment the new private key becomes operational. 3. Switching production signing to the new private key. 4. Optionally re-pinning the corpus over time. -5. Removing the old public key from registries once re-pinning is complete or the rotation policy expires. +5. Setting `valid_until` on the old key entry to the rotation cutover instant (do not remove the entry — historical pins must continue to verify against it). + +Old pins continue to verify against the old public key as long as their `ts` falls within the old key's `(valid_from, valid_until)` window. + +### Revocation distinct from rotation + +If a private key is **compromised** (as opposed to merely rotated for hygiene), the corresponding `kid` entry MUST be marked with `valid_until` set to the latest moment the key is believed to have been uncompromised. Pins with `ts` after that instant return `KEY_EXPIRED`; pins with `ts` before it continue to verify. This preserves the integrity of historical pins while immediately invalidating anything an attacker could produce post-compromise. -Old pins continue to verify against the old public key during this window. +Operators SHOULD pair this with a transparency-log entry (e.g., sigstore Rekor or a project-specific append-only log) for the revocation event itself, so that downstream verifiers can detect a malicious registry rollback. + +The protocol does not specify a revocation file format in v2; this is intentionally out of band so deployments can integrate with existing PKI / sigstore infrastructure. The minimum requirement on a v2 verifier is to honor the `(valid_from, valid_until)` window however it is delivered. ## 8. Reserved `extra` keys -The `vectorpin.` prefix is reserved by this specification and MUST NOT be used by implementations for any purpose other than the keys defined here. Reserved v1 keys, all optional: +The `vectorpin.` prefix is reserved by this specification and MUST NOT be used by implementations for any purpose other than the keys defined here. Reserved v2 keys, all optional: | Key | Type | Meaning | |---|---|---| @@ -146,23 +202,50 @@ The `vectorpin.` prefix is reserved by this specification and MUST NOT be used b | `vectorpin.record_id` | string | Identifier of the specific record this pin attests. | | `vectorpin.tenant_id` | string | Identifier of the multi-tenant logical namespace the pin lives in. | -Implementations that need replay protection (cross-record, cross-collection, or cross-tenant) SHOULD use these reserved keys rather than inventing private names. Because every `extra` entry is signed, the values are tamper-evident. +Implementations that need replay protection (cross-record, cross-collection, or cross-tenant) SHOULD use these reserved keys, and verifiers MUST enforce them when the caller supplies an expected value (§5 step 8). Because every `extra` entry is signed, the values are tamper-evident. -A v1.1 candidate spec promotes `record_id`, `collection_id`, and `tenant_id` to top-level fields. v1.1 verifiers will accept v1 pins; v1 verifiers will reject v1.1 pins because the protocol-version field changes. +A future v3 may promote these to required top-level fields. The current v2 design keeps them inside `extra` so operators can adopt replay-protection incrementally per collection. ## 9. Security considerations -- **Replay**: Pins are not bound to a specific record id at the wire format level. An attacker who copies a pin from one record to another can pass verification only if the vector and source they paste alongside match the pin. Implementations that need stronger replay protection SHOULD use the reserved `vectorpin.collection_id` / `vectorpin.record_id` / `vectorpin.tenant_id` keys defined in §8. -- **Time**: The `ts` field is informational. Verifiers MAY reject pins outside an acceptable time window but the protocol does not require it. -- **Key custody**: An attacker with the private signing key can produce arbitrary pins. Treat the signing key as a high-value secret. -- **Source-time integrity**: VectorPin attests to the relationship between source and vector at pin time. It does not attest that the source itself was authentic at ingestion. +- **Replay**: Pins are not bound to a specific record id at the wire format level. An attacker who copies a pin from one record to another can pass verification only if the vector and source they paste alongside match the pin. Implementations that need stronger replay protection SHOULD use the reserved `vectorpin.collection_id` / `vectorpin.record_id` / `vectorpin.tenant_id` keys defined in §8, and verifiers MUST enforce them when the caller supplies an expected value (§5 step 8). +- **Time**: The `ts` field is informational *for the pin* but load-bearing for revocation: verifiers MUST consult `(valid_from, valid_until)` on the `kid` registration (§7) and reject pins whose `ts` falls outside that window. +- **Key custody**: An attacker with the private signing key can produce arbitrary pins. Treat the signing key as a high-value secret. Reference implementations write private keys with mode `0600`; production deployments SHOULD use a KMS or hardware-backed signer rather than file-system keys. +- **Source-time integrity**: VectorPin attests to the relationship between source and vector at pin time. It does not attest that the source itself was authentic at ingestion. Pair VectorPin with source-side controls (signed ingestion logs, document provenance) where this matters. +- **DoS via malformed pins**: Without the §4.3 size limits, a single hostile pin can exhaust verifier resources. Implementations MUST enforce these limits before reaching the signature path. +- **Domain separation**: The `vectorpin/v2\x00` tag prevents cross-protocol signature lift attacks. A signature produced by a VectorPin signer cannot validate against any non-VectorPin verifier (and vice-versa) even if the same Ed25519 key is used. Operators are NOT required to use VectorPin-only keys, but doing so is RECOMMENDED. + +## 10. Key distribution + +The protocol assumes a verifier has access to a registry mapping `kid` to `(public_key, valid_from, valid_until)`. How that registry is populated is out of scope, but the following SHOULD apply to any production deployment: + +- **Fingerprint format**: Operators identifying a key out of band (Slack, email, ticket) SHOULD use `SHA-256(pubkey_bytes)` truncated to the first 16 hex digits, formatted as four colon-separated quads, e.g. `1f3a:7b22:9e0d:c4f1`. +- **Production registries SHOULD reference a transparency log entry** (e.g., sigstore Rekor) for each `kid` registration and revocation. The log entry binds the key material to a publicly observable, append-only history, allowing downstream verifiers to detect a malicious registry rollback. +- **Trust-on-first-use (TOFU) is NOT RECOMMENDED for new pins** unless the operator has explicitly opted in. A verifier that auto-registers any `kid` it encounters provides no integrity guarantee — it is a checksum, not a signature. +- **Per-tenant key separation**: Multi-tenant deployments SHOULD issue separate `kid`s per tenant rather than share a single producer key, so that compromise of one tenant's environment cannot forge pins for another tenant. -## 10. Versioning +## 11. Versioning -This is protocol version 1. Future versions MAY: +This is protocol version 2. Future versions MAY: - Add new optional fields under `extra`-style namespaces. - Add new dtype identifiers. - Add new signature/hash algorithms (with corresponding identifiers). -A change is breaking iff a v1 verifier would silently accept a v2 pin as valid when the v2 pin's additional semantics matter. Such changes MUST bump the major version. +A change is breaking iff a v2 verifier would silently accept a v3 pin as valid when the v3 pin's additional semantics matter. Such changes MUST bump the major version AND change the domain separator (§2). Including `v` in the signed canonical bytes (§4.2) plus the size limits (§4.3) prevent downgrade attacks where an attacker strips new fields and presents the remainder to an older verifier. + +## 12. Changes from v1 + +The v1 → v2 wire-format break addresses these audit-identified issues: + +1. **`v` was not in the signed payload** — v1 verifiers could not detect a downgrade where an attacker flipped `v: 2` to `v: 1` on a v2 pin. v2 includes `v` in the canonical bytes (§4.2). +2. **`kid` was not in the signed payload** — an attacker who could swap `(kid, sig)` pairs could re-attribute a pin to a different producer. v2 includes `kid` in the canonical bytes. +3. **No domain separator** — a VectorPin signature was structurally identical to any other "signed canonical JSON" message, enabling cross-protocol signature reuse against sister Trust-Stack protocols. v2 prepends a 14-byte `vectorpin/v2\x00` tag. +4. **Float canonicalization underspecified** — v1 said "little-endian, contiguous" but didn't specify NaN/Inf handling. v2 requires rejection of NaN/Inf at sign time; `-0.0` and `+0.0` are distinct. +5. **String canonicalization underspecified** — v1 required NFC for `source` only. v2 requires NFC for every string field, plus rejection of control characters and bidi overrides. +6. **`extra` value types underspecified** — v1 said "object" with no constraint. v2 is strictly `map`, with rejection at parse. +7. **Timestamp format underspecified** — v1 allowed any RFC 3339 string. v2 requires exact pattern `YYYY-MM-DDTHH:MM:SSZ`. +8. **Unknown top-level fields permitted** — v1 was silent. v2 verifiers MUST reject unknown top-level keys. +9. **Size limits absent** — v1 had no DoS protection. v2 specifies size limits (§4.3) as a verifier MUST. + +v1 pins are not directly verifiable by a strict v2 verifier. Implementations MAY ship a legacy v1 verifier for migration purposes; it MUST be opt-in. diff --git a/rust/vectorpin/examples/basic_usage.rs b/rust/vectorpin/examples/basic_usage.rs index 0eb2b68..1f7b9c1 100644 --- a/rust/vectorpin/examples/basic_usage.rs +++ b/rust/vectorpin/examples/basic_usage.rs @@ -1,9 +1,10 @@ // Copyright 2025 Jascha Wanger / Tarnover, LLC // SPDX-License-Identifier: Apache-2.0 -//! Mirror of `examples/basic_usage.py` — runs the same four scenarios. +//! Mirror of `examples/basic_usage.py` — runs the same scenarios against +//! the v2 wire format. -use vectorpin::{Pin, Signer, Verifier}; +use vectorpin::{Pin, Signer, VerifyOptions, Verifier}; fn main() { let embedding: Vec = (0..128).map(|i| (i as f32) * 0.01).collect(); @@ -48,6 +49,26 @@ fn main() { let r = verifier.verify_signature(&rogue_pin); println!("4. forged with wrong key -> {:?}", r); + // 5. replay-protection: pin with a record_id; verifier enforces a + // different expected record_id. + let opts = vectorpin::signer::PinOptions { + extra: [("vectorpin.record_id".to_string(), "rec-1".to_string())] + .into_iter() + .collect(), + ..vectorpin::signer::PinOptions::default() + }; + let scoped_pin = signer + .pin_with_options(source, "m", embedding.as_slice(), opts) + .expect("scoped pin"); + let r = verifier.verify( + &scoped_pin, + VerifyOptions { + expected_record_id: Some("rec-other"), + ..VerifyOptions::default() + }, + ); + println!("5. record_id mismatch -> {:?}", r); + let restored = Pin::from_json(&pin.to_json()).expect("round trip"); assert_eq!(restored, pin); println!("\nPin round-trip via JSON: OK"); diff --git a/rust/vectorpin/src/attestation.rs b/rust/vectorpin/src/attestation.rs index 7ebd65a..11f191b 100644 --- a/rust/vectorpin/src/attestation.rs +++ b/rust/vectorpin/src/attestation.rs @@ -1,66 +1,82 @@ // Copyright 2025 Jascha Wanger / Tarnover, LLC // SPDX-License-Identifier: Apache-2.0 -//! Pin attestation data structures and canonical serialization. +//! Pin attestation data structures and canonical serialization (v2). //! -//! A [`Pin`] is a JSON object with a header (the signed payload) plus a -//! key id and a signature. The header canonicalizes to a deterministic -//! byte sequence — sorted keys, no whitespace, raw UTF-8 (non-ASCII -//! is *not* escaped to `\uXXXX`) — that the Python, Rust, and -//! TypeScript reference implementations agree on byte-for-byte. +//! A [`Pin`] is a JSON object whose **header** (every field except `sig`) +//! canonicalizes to a deterministic byte sequence the Ed25519 signature +//! commits to. Protocol v2 — implemented here — prepends a 13-byte +//! domain separator ([`DOMAIN_TAG`]) and binds both `v` and `kid` into +//! the signed bytes to defeat downgrade and key-swap attacks. //! -//! That deterministic byte sequence is what gets signed by Ed25519, not -//! the JSON wire form. Re-serializing a pin (different whitespace, -//! different key order) therefore does *not* invalidate the signature -//! as long as the canonical form is recoverable. -//! -//! For the full wire-format specification — every field, every supported -//! dtype, the exact canonicalization algorithm — see -//! [`docs/spec.md`](https://github.com/ThirdKeyAI/VectorPin/blob/main/docs/spec.md). -//! -//! # Example -//! -//! ``` -//! use vectorpin::{Pin, Signer}; -//! -//! let signer = Signer::generate("demo".to_string()); -//! let v: Vec = vec![1.0, 2.0, 3.0]; -//! let pin = signer.pin("hello", "test-model", v.as_slice()).unwrap(); -//! -//! // Compact JSON for storage in your vector DB metadata. -//! let json: String = pin.to_json(); -//! assert!(!json.contains(": ")); -//! assert!(!json.contains(", ")); -//! -//! // Round-trip through wire form preserves the pin exactly. -//! let parsed = Pin::from_json(&json).unwrap(); -//! assert_eq!(pin, parsed); -//! ``` +//! See [`docs/spec.md`](https://github.com/ThirdKeyAI/VectorPin/blob/main/docs/spec.md) +//! §4.2 for the exact canonicalization rules. v2 is a wire-format break +//! with v1; a [`legacy_v1`] submodule re-emits the older canonical bytes +//! for migration verifiers only. use std::collections::BTreeMap; use base64::Engine; use serde::{Deserialize, Serialize}; +use unicode_normalization::{is_nfc, UnicodeNormalization}; -/// Protocol version implemented by this crate. Verifiers reject pins -/// whose `v` field does not match. -pub const PROTOCOL_VERSION: u32 = 1; +/// Protocol version implemented by this crate. +pub const PROTOCOL_VERSION: u32 = 2; -/// The signed portion of a [`Pin`]. +/// Domain separator prepended to canonical JSON before signing. /// -/// Two pins are considered equivalent iff their headers canonicalize to -/// identical bytes. Optional fields ([`model_hash`](Self::model_hash), -/// [`extra`](Self::extra)) are omitted from the canonical form when -/// unset, never written as `null` — this matters because adding a -/// `null` would change the byte sequence the signature commits to. +/// Exactly 13 bytes: ASCII `"vectorpin/v2"` (12 bytes) plus one trailing +/// NUL. The spec text describes this string as "14 bytes" in §2 and §4.2 +/// — that is a known typo; the byte literal is the contract. Cross- +/// language ports MUST match these bytes. +pub const DOMAIN_TAG: &[u8] = b"vectorpin/v2\x00"; + +// Compile-time sanity check: any edit that resizes the tag fails the build. +const _: () = assert!(DOMAIN_TAG.len() == 13); + +// ---- size limits (spec §4.3) ---- + +/// Maximum byte length of a Pin's JSON wire form. +pub const MAX_PIN_JSON_BYTES: usize = 65_536; +/// Maximum number of entries in `extra`. +pub const MAX_EXTRA_ENTRIES: usize = 32; +/// Maximum UTF-8 byte length of any single `extra` key. +pub const MAX_EXTRA_KEY_BYTES: usize = 128; +/// Maximum UTF-8 byte length of any single `extra` value. +pub const MAX_EXTRA_VALUE_BYTES: usize = 1024; +/// Hard ceiling on `vec_dim` (2^20). +pub const MAX_VEC_DIM: u32 = 1_048_576; +/// Exact length of an Ed25519 signature in bytes. +pub const SIG_LEN: usize = 64; + +/// Set of permitted top-level keys in a v2 pin (§4.1). Any other key +/// rejects at parse time. +const ALLOWED_TOP_LEVEL: &[&str] = &[ + "v", + "kid", + "model", + "model_hash", + "source_hash", + "vec_hash", + "vec_dtype", + "vec_dim", + "ts", + "extra", + "sig", +]; + +const ALLOWED_DTYPES: &[&str] = &["f32", "f64"]; + +/// The signed portion of a [`Pin`]. /// -/// You normally do not construct `PinHeader` directly; obtain one from -/// [`Signer::pin`](crate::Signer::pin) or -/// [`Signer::pin_with_options`](crate::signer::Signer::pin_with_options). +/// Carries `v` and `kid` — both are part of the v2 canonical bytes +/// (§4.2) so the signature commits to them. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct PinHeader { - /// Protocol version. Must equal [`PROTOCOL_VERSION`]. + /// Protocol version. MUST equal [`PROTOCOL_VERSION`] for new pins. pub v: u32, + /// Signing-key identifier — bound into the signature in v2. + pub kid: String, /// Embedding model identifier. pub model: String, /// Optional content hash of the model weights. @@ -74,9 +90,9 @@ pub struct PinHeader { pub vec_dtype: String, /// Embedding dimensionality. pub vec_dim: u32, - /// RFC 3339 / ISO 8601 timestamp in UTC, e.g. `"2026-05-05T12:00:00Z"`. + /// UTC timestamp matching `YYYY-MM-DDTHH:MM:SSZ` exactly. pub ts: String, - /// Producer-defined string-to-string metadata, signed alongside the + /// Producer-defined string-to-string metadata. Signed alongside the /// rest of the header. Omitted from the canonical form when empty. #[serde(skip_serializing_if = "BTreeMap::is_empty", default)] pub extra: BTreeMap, @@ -85,17 +101,33 @@ pub struct PinHeader { impl PinHeader { /// Stable byte representation for signing/verifying. /// - /// JSON with sorted keys and no whitespace. `BTreeMap` gives us - /// sorted `extra` for free; field order is fixed by hand below to - /// match the Python reference. + /// Returns `DOMAIN_TAG || canonical_json(header)` where the JSON has + /// lexicographically sorted keys, no whitespace, and raw UTF-8 (not + /// `\uXXXX`) for any non-ASCII code points that are not in the + /// JSON-mandatory escape set. NFC normalization is applied to every + /// string field at canonicalization time so the bytes match what a + /// fresh-from-spec implementation would emit. pub fn canonicalize(&self) -> Vec { - // Manually build a sorted JSON object — this is the contract the - // Python implementation also follows. Using `serde_json::to_vec` - // on the struct directly would emit fields in declaration order, - // not lexicographic order, which would break compatibility. - let mut entries: Vec<(&'static str, serde_json::Value)> = Vec::new(); + let mut out = Vec::with_capacity(DOMAIN_TAG.len() + 256); + out.extend_from_slice(DOMAIN_TAG); + out.extend_from_slice(&self.canonical_json_body()); + out + } + + fn canonical_json_body(&self) -> Vec { + // BTreeMap iterates in sorted key order; serde_json (default + // build, no preserve_order) preserves insertion order — so we + // build a Map by inserting in lexicographic order ourselves. + let mut entries: Vec<(&str, serde_json::Value)> = Vec::new(); entries.push(("v", serde_json::Value::Number(self.v.into()))); - entries.push(("model", serde_json::Value::String(self.model.clone()))); + entries.push(( + "kid", + serde_json::Value::String(nfc_string(&self.kid)), + )); + entries.push(( + "model", + serde_json::Value::String(nfc_string(&self.model)), + )); if let Some(h) = &self.model_hash { entries.push(("model_hash", serde_json::Value::String(h.clone()))); } @@ -103,26 +135,33 @@ impl PinHeader { "source_hash", serde_json::Value::String(self.source_hash.clone()), )); - entries.push(("vec_hash", serde_json::Value::String(self.vec_hash.clone()))); + entries.push(( + "vec_hash", + serde_json::Value::String(self.vec_hash.clone()), + )); entries.push(( "vec_dtype", serde_json::Value::String(self.vec_dtype.clone()), )); entries.push(("vec_dim", serde_json::Value::Number(self.vec_dim.into()))); - entries.push(("ts", serde_json::Value::String(self.ts.clone()))); + entries.push(( + "ts", + serde_json::Value::String(nfc_string(&self.ts)), + )); if !self.extra.is_empty() { - // BTreeMap iterates in sorted key order; preserve that as a - // serde_json::Map (which is also sorted in our build via the - // preserve_order feature being OFF — the default). + // NFC each key and value, then re-sort by NFC'd key. + let mut nfc_entries: Vec<(String, String)> = self + .extra + .iter() + .map(|(k, v)| (nfc_string(k), nfc_string(v))) + .collect(); + nfc_entries.sort_by(|a, b| a.0.cmp(&b.0)); let mut m = serde_json::Map::new(); - for (k, val) in &self.extra { - m.insert(k.clone(), serde_json::Value::String(val.clone())); + for (k, val) in nfc_entries { + m.insert(k, serde_json::Value::String(val)); } entries.push(("extra", serde_json::Value::Object(m))); } - - // Sort the top-level entries lexicographically. This is the rule - // the Python implementation follows via `sort_keys=True`. entries.sort_by(|a, b| a.0.cmp(b.0)); let mut map = serde_json::Map::with_capacity(entries.len()); for (k, v) in entries { @@ -133,183 +172,615 @@ impl PinHeader { } } +fn nfc_string(s: &str) -> String { + s.nfc().collect() +} + /// A signed VectorPin attestation. /// -/// Serialize with [`Pin::to_json`] and store the resulting string -/// alongside the embedding in vector-store metadata. On read, parse -/// with [`Pin::from_json`] and hand to [`Verifier::verify_full`](crate::Verifier::verify_full). -/// -/// # Example -/// -/// ``` -/// use vectorpin::{Pin, Signer, Verifier}; -/// -/// let signer = Signer::generate("k1".to_string()); -/// let v: Vec = vec![1.0, 2.0, 3.0]; -/// let pin = signer.pin("hello", "m", v.as_slice()).unwrap(); -/// -/// let mut verifier = Verifier::new(); -/// verifier.add_key(signer.key_id(), signer.public_key_bytes()); -/// assert!(verifier.verify_signature(&Pin::from_json(&pin.to_json()).unwrap()).is_ok()); -/// ``` +/// Serialize with [`Pin::to_json`] and store alongside the embedding in +/// vector-store metadata. On read, parse with [`Pin::from_json`] and +/// hand to [`Verifier::verify`](crate::Verifier::verify) (or one of its +/// convenience wrappers). #[derive(Debug, Clone, PartialEq, Eq)] pub struct Pin { - /// The signed payload. + /// The signed payload, including `v` and `kid`. pub header: PinHeader, - /// Identifier of the signing key — verifiers route to a public key by `kid`. - pub kid: String, - /// Raw Ed25519 signature bytes (64 bytes). + /// Raw Ed25519 signature bytes (exactly 64 bytes). pub sig: Vec, } -/// URL-safe base64, padding stripped, matching the Python encoder. +impl Pin { + /// Convenience accessor — `kid` lives on the header in v2. + pub fn kid(&self) -> &str { + &self.header.kid + } + + /// Compact JSON encoding suitable for vector-DB metadata. + pub fn to_json(&self) -> String { + let mut entries: Vec<(&str, serde_json::Value)> = Vec::new(); + entries.push(("v", serde_json::Value::Number(self.header.v.into()))); + entries.push(("kid", serde_json::Value::String(self.header.kid.clone()))); + entries.push(( + "model", + serde_json::Value::String(self.header.model.clone()), + )); + if let Some(h) = &self.header.model_hash { + entries.push(("model_hash", serde_json::Value::String(h.clone()))); + } + entries.push(( + "source_hash", + serde_json::Value::String(self.header.source_hash.clone()), + )); + entries.push(( + "vec_hash", + serde_json::Value::String(self.header.vec_hash.clone()), + )); + entries.push(( + "vec_dtype", + serde_json::Value::String(self.header.vec_dtype.clone()), + )); + entries.push(( + "vec_dim", + serde_json::Value::Number(self.header.vec_dim.into()), + )); + entries.push(("ts", serde_json::Value::String(self.header.ts.clone()))); + if !self.header.extra.is_empty() { + let mut m = serde_json::Map::new(); + for (k, val) in &self.header.extra { + m.insert(k.clone(), serde_json::Value::String(val.clone())); + } + entries.push(("extra", serde_json::Value::Object(m))); + } + entries.push(("sig", serde_json::Value::String(b64url_encode(&self.sig)))); + entries.sort_by(|a, b| a.0.cmp(b.0)); + let mut map = serde_json::Map::with_capacity(entries.len()); + for (k, v) in entries { + map.insert(k.to_string(), v); + } + serde_json::to_string(&serde_json::Value::Object(map)) + .expect("JSON serialization of well-formed map cannot fail") + } + + /// Parse a pin from its compact JSON wire form, enforcing every v2 + /// wire-format rule (§4.1, §4.3, §3.1). + pub fn from_json(s: &str) -> Result { + // Pre-parse size check (§4.3). Reject before allocating the parse tree. + if s.len() > MAX_PIN_JSON_BYTES { + return Err(AttestationError::SizeLimit { + limit: MAX_PIN_JSON_BYTES, + got: s.len(), + }); + } + let value: serde_json::Value = + serde_json::from_str(s).map_err(AttestationError::Json)?; + Self::from_value(value) + } + + /// Parse a pin from a parsed `serde_json::Value`. Same validation + /// rules as [`Pin::from_json`]. + pub fn from_value(value: serde_json::Value) -> Result { + parse_pin_strict(value, PROTOCOL_VERSION) + } +} + +/// URL-safe base64, padding stripped — matches every other reference port. pub(crate) fn b64url_encode(data: &[u8]) -> String { base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(data) } -/// Inverse of [`b64url_encode`]. Restores stripped padding internally. pub(crate) fn b64url_decode(s: &str) -> Result, AttestationError> { base64::engine::general_purpose::URL_SAFE_NO_PAD .decode(s.as_bytes()) .map_err(AttestationError::Base64) } -/// Errors produced when parsing or serializing pins. +/// Errors produced when parsing, serializing, or canonicalizing pins. #[derive(Debug, thiserror::Error)] pub enum AttestationError { - /// Pin uses a protocol version this crate does not support. + /// Pin uses a protocol version this code path does not accept. #[error("unsupported pin version: got {got}, expected {expected}")] UnsupportedVersion { /// Version number found in the pin. got: u32, - /// Version number this build supports. + /// Version this build accepts. expected: u32, }, /// JSON parsing failure. #[error("malformed pin JSON: {0}")] - Json(#[from] serde_json::Error), + Json(#[source] serde_json::Error), /// Base64 decode failure (signature or related field). #[error("malformed base64: {0}")] - Base64(#[from] base64::DecodeError), + Base64(#[source] base64::DecodeError), /// A required field was missing from the pin JSON. #[error("missing required field: {0}")] MissingField(&'static str), + /// A field had the wrong JSON type or violated a format rule. + #[error("invalid field {field}: {detail}")] + InvalidField { + /// Name of the offending field. + field: &'static str, + /// Human-readable explanation. + detail: String, + }, + /// Pin contains a top-level key outside the v2 allow-list. + #[error("unknown top-level field: {0}")] + UnknownTopLevelField(String), + /// A string-typed field was not in Unicode NFC form. + #[error("field is not NFC-normalized: {0}")] + NotNfc(String), + /// A string-typed field contained a control character (U+0000..U+001F). + #[error("field contains control character: {0}")] + ControlChar(String), + /// A string-typed field contained a bidi override (U+202A..U+202E / U+2066..U+2069). + #[error("field contains bidi-override character: {0}")] + BidiOverride(String), + /// `ts` did not match `^[0-9]{{4}}-[0-9]{{2}}-[0-9]{{2}}T[0-9]{{2}}:[0-9]{{2}}:[0-9]{{2}}Z$`. + #[error("bad timestamp format: {0}")] + BadTimestamp(String), + /// Pin (or a sub-field) exceeded a size cap from §4.3. + #[error("size limit exceeded: limit={limit}, got={got}")] + SizeLimit { + /// Configured cap. + limit: usize, + /// Observed value. + got: usize, + }, } -impl Pin { - /// Encode the pin as compact JSON suitable for vector DB metadata. +/// Hash-string format: `sha256:` followed by exactly 64 lowercase hex chars. +fn is_valid_hash_string(s: &str) -> bool { + if !s.starts_with("sha256:") { + return false; + } + let hex = &s[7..]; + hex.len() == 64 && hex.bytes().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f')) +} + +/// Timestamp regex: `YYYY-MM-DDTHH:MM:SSZ`, exactly. +fn is_valid_ts(s: &str) -> bool { + let b = s.as_bytes(); + if b.len() != 20 { + return false; + } + let dig = |i: usize| b[i].is_ascii_digit(); + dig(0) + && dig(1) + && dig(2) + && dig(3) + && b[4] == b'-' + && dig(5) + && dig(6) + && b[7] == b'-' + && dig(8) + && dig(9) + && b[10] == b'T' + && dig(11) + && dig(12) + && b[13] == b':' + && dig(14) + && dig(15) + && b[16] == b':' + && dig(17) + && dig(18) + && b[19] == b'Z' +} + +/// Reject control chars (U+0000-U+001F) and bidi overrides +/// (U+202A-U+202E, U+2066-U+2069) per §3.1. +pub(crate) fn check_string_safe(value: &str, field: &str) -> Result<(), AttestationError> { + for ch in value.chars() { + let cp = ch as u32; + if cp < 0x20 { + return Err(AttestationError::ControlChar(format!( + "{field}: U+{cp:04X}" + ))); + } + if (0x202A..=0x202E).contains(&cp) || (0x2066..=0x2069).contains(&cp) { + return Err(AttestationError::BidiOverride(format!( + "{field}: U+{cp:04X}" + ))); + } + } + Ok(()) +} + +/// Reject strings not already in NFC form. +pub(crate) fn check_nfc(value: &str, field: &str) -> Result<(), AttestationError> { + if !is_nfc(value) { + Err(AttestationError::NotNfc(field.to_string())) + } else { + Ok(()) + } +} + +fn parse_pin_strict( + value: serde_json::Value, + expected_version: u32, +) -> Result { + let obj = match value { + serde_json::Value::Object(m) => m, + _ => { + return Err(AttestationError::InvalidField { + field: "(root)", + detail: "pin must be a JSON object".into(), + }) + } + }; + + // 1. Reject unknown top-level fields (§4.1). + for k in obj.keys() { + if !ALLOWED_TOP_LEVEL.iter().any(|allowed| allowed == k) { + return Err(AttestationError::UnknownTopLevelField(k.clone())); + } + } + + // 2. Version check. + let v_raw = obj + .get("v") + .ok_or(AttestationError::MissingField("v"))? + .as_u64() + .ok_or(AttestationError::InvalidField { + field: "v", + detail: "must be an unsigned integer".into(), + })?; + let v = v_raw as u32; + if v != expected_version { + return Err(AttestationError::UnsupportedVersion { + got: v, + expected: expected_version, + }); + } + + // 3. String fields with NFC + control-char + bidi checks. + let kid = string_field(&obj, "kid")?; + check_string_safe(&kid, "kid")?; + check_nfc(&kid, "kid")?; + + let model = string_field(&obj, "model")?; + check_string_safe(&model, "model")?; + check_nfc(&model, "model")?; + + let ts = string_field(&obj, "ts")?; + if !is_valid_ts(&ts) { + return Err(AttestationError::BadTimestamp(ts)); + } + check_string_safe(&ts, "ts")?; + check_nfc(&ts, "ts")?; + + // 4. Hashes. + let source_hash = string_field(&obj, "source_hash")?; + if !is_valid_hash_string(&source_hash) { + return Err(AttestationError::InvalidField { + field: "source_hash", + detail: "must match 'sha256:<64 lowercase hex>'".into(), + }); + } + let vec_hash = string_field(&obj, "vec_hash")?; + if !is_valid_hash_string(&vec_hash) { + return Err(AttestationError::InvalidField { + field: "vec_hash", + detail: "must match 'sha256:<64 lowercase hex>'".into(), + }); + } + let model_hash = match obj.get("model_hash") { + None | Some(serde_json::Value::Null) => None, + Some(serde_json::Value::String(s)) => { + if !is_valid_hash_string(s) { + return Err(AttestationError::InvalidField { + field: "model_hash", + detail: "must match 'sha256:<64 lowercase hex>'".into(), + }); + } + Some(s.clone()) + } + Some(_) => { + return Err(AttestationError::InvalidField { + field: "model_hash", + detail: "must be a string when present".into(), + }) + } + }; + + // 5. vec_dtype / vec_dim. + let vec_dtype = string_field(&obj, "vec_dtype")?; + if !ALLOWED_DTYPES.contains(&vec_dtype.as_str()) { + return Err(AttestationError::InvalidField { + field: "vec_dtype", + detail: format!("must be one of {ALLOWED_DTYPES:?}; got {vec_dtype:?}"), + }); + } + + // bool is JSON-distinct from integer in serde_json, but be defensive. + let vec_dim_raw = obj + .get("vec_dim") + .ok_or(AttestationError::MissingField("vec_dim"))?; + if vec_dim_raw.is_boolean() { + return Err(AttestationError::InvalidField { + field: "vec_dim", + detail: "must be an integer, not a boolean".into(), + }); + } + let vec_dim_u64 = vec_dim_raw.as_u64().ok_or(AttestationError::InvalidField { + field: "vec_dim", + detail: "must be a positive integer".into(), + })?; + if vec_dim_u64 == 0 || vec_dim_u64 > MAX_VEC_DIM as u64 { + return Err(AttestationError::InvalidField { + field: "vec_dim", + detail: format!("must be in (0, {MAX_VEC_DIM}]; got {vec_dim_u64}"), + }); + } + let vec_dim = vec_dim_u64 as u32; + + // 6. extra: map, bounded. + let extra = match obj.get("extra") { + None | Some(serde_json::Value::Null) => BTreeMap::new(), + Some(serde_json::Value::Object(m)) => { + if m.len() > MAX_EXTRA_ENTRIES { + return Err(AttestationError::SizeLimit { + limit: MAX_EXTRA_ENTRIES, + got: m.len(), + }); + } + let mut out = BTreeMap::new(); + for (k, val) in m { + let val_s = val.as_str().ok_or(AttestationError::InvalidField { + field: "extra", + detail: format!("value for key {k:?} must be a string"), + })?; + if k.len() > MAX_EXTRA_KEY_BYTES { + return Err(AttestationError::SizeLimit { + limit: MAX_EXTRA_KEY_BYTES, + got: k.len(), + }); + } + if val_s.len() > MAX_EXTRA_VALUE_BYTES { + return Err(AttestationError::SizeLimit { + limit: MAX_EXTRA_VALUE_BYTES, + got: val_s.len(), + }); + } + check_string_safe(k, "extra key")?; + check_nfc(k, "extra key")?; + check_string_safe(val_s, "extra value")?; + check_nfc(val_s, "extra value")?; + out.insert(k.clone(), val_s.to_string()); + } + out + } + Some(_) => { + return Err(AttestationError::InvalidField { + field: "extra", + detail: "must be a JSON object".into(), + }) + } + }; + + // 7. Signature: base64, exactly 64 bytes. + let sig_str = string_field(&obj, "sig")?; + let sig = b64url_decode(&sig_str)?; + if sig.len() != SIG_LEN { + return Err(AttestationError::InvalidField { + field: "sig", + detail: format!("must decode to exactly {SIG_LEN} bytes; got {}", sig.len()), + }); + } + + Ok(Pin { + header: PinHeader { + v, + kid, + model, + model_hash, + source_hash, + vec_hash, + vec_dtype, + vec_dim, + ts, + extra, + }, + sig, + }) +} + +fn string_field( + obj: &serde_json::Map, + name: &'static str, +) -> Result { + match obj.get(name) { + Some(serde_json::Value::String(s)) if !s.is_empty() => Ok(s.clone()), + Some(serde_json::Value::String(_)) => Err(AttestationError::InvalidField { + field: name, + detail: "must be a non-empty string".into(), + }), + Some(_) => Err(AttestationError::InvalidField { + field: name, + detail: "must be a string".into(), + }), + None => Err(AttestationError::MissingField(name)), + } +} + +/// Legacy v1 canonicalization, kept exclusively for the opt-in migration +/// verifier. v1 pins are NOT accepted by the default parser. +pub mod legacy_v1 { + use super::*; + + /// v1 protocol version constant. Distinct from [`PROTOCOL_VERSION`] + /// so callers cannot accidentally confuse the two. + pub const V1_PROTOCOL_VERSION: u32 = 1; + + /// Reconstruct v1 canonical bytes for a parsed v1 pin. /// - /// Output is sorted-key, whitespace-free JSON exactly matching what - /// the Python implementation emits, so `to_json()` is deterministic - /// across implementations. - pub fn to_json(&self) -> String { + /// v1 differed from v2 in three load-bearing ways: + /// - No domain-tag prefix. + /// - `kid` was NOT in the signed bytes. + /// - No strict NFC / control-char / bidi enforcement at parse time. + /// + /// The byte sequence emitted here matches what the Python v1 reference + /// emitted, so historical pins continue to verify against their + /// original signatures. + pub fn canonicalize_v1(header: &PinHeader) -> Vec { let mut entries: Vec<(&str, serde_json::Value)> = Vec::new(); - entries.push(("v", serde_json::Value::Number(self.header.v.into()))); + entries.push(("v", serde_json::Value::Number(header.v.into()))); entries.push(( "model", - serde_json::Value::String(self.header.model.clone()), + serde_json::Value::String(header.model.clone()), )); - if let Some(h) = &self.header.model_hash { + if let Some(h) = &header.model_hash { entries.push(("model_hash", serde_json::Value::String(h.clone()))); } entries.push(( "source_hash", - serde_json::Value::String(self.header.source_hash.clone()), + serde_json::Value::String(header.source_hash.clone()), )); entries.push(( "vec_hash", - serde_json::Value::String(self.header.vec_hash.clone()), + serde_json::Value::String(header.vec_hash.clone()), )); entries.push(( "vec_dtype", - serde_json::Value::String(self.header.vec_dtype.clone()), + serde_json::Value::String(header.vec_dtype.clone()), )); - entries.push(( - "vec_dim", - serde_json::Value::Number(self.header.vec_dim.into()), - )); - entries.push(("ts", serde_json::Value::String(self.header.ts.clone()))); - if !self.header.extra.is_empty() { + entries.push(("vec_dim", serde_json::Value::Number(header.vec_dim.into()))); + entries.push(("ts", serde_json::Value::String(header.ts.clone()))); + if !header.extra.is_empty() { let mut m = serde_json::Map::new(); - for (k, val) in &self.header.extra { + for (k, val) in &header.extra { m.insert(k.clone(), serde_json::Value::String(val.clone())); } entries.push(("extra", serde_json::Value::Object(m))); } - entries.push(("kid", serde_json::Value::String(self.kid.clone()))); - entries.push(("sig", serde_json::Value::String(b64url_encode(&self.sig)))); entries.sort_by(|a, b| a.0.cmp(b.0)); let mut map = serde_json::Map::with_capacity(entries.len()); for (k, v) in entries { map.insert(k.to_string(), v); } - serde_json::to_string(&serde_json::Value::Object(map)) + serde_json::to_vec(&serde_json::Value::Object(map)) .expect("JSON serialization of well-formed map cannot fail") } - /// Parse a pin from its compact JSON wire form. - pub fn from_json(s: &str) -> Result { - let value: serde_json::Value = serde_json::from_str(s)?; - Self::from_value(value) + /// Parse a v1 pin JSON string into a [`Pin`] under the looser v1 + /// rules: no strict NFC / control-char / ts enforcement. + /// + /// Used only by the opt-in [`LegacyV1Verifier`](crate::verifier::LegacyV1Verifier). + pub fn parse_v1_pin(s: &str) -> Result { + if s.len() > MAX_PIN_JSON_BYTES { + return Err(AttestationError::SizeLimit { + limit: MAX_PIN_JSON_BYTES, + got: s.len(), + }); + } + let value: serde_json::Value = + serde_json::from_str(s).map_err(AttestationError::Json)?; + parse_v1_value(value) } - /// Parse a pin from a parsed `serde_json::Value`. - pub fn from_value(value: serde_json::Value) -> Result { - let obj = value - .as_object() - .ok_or(AttestationError::MissingField("(root)"))?; + fn parse_v1_value(value: serde_json::Value) -> Result { + let obj = match value { + serde_json::Value::Object(m) => m, + _ => { + return Err(AttestationError::InvalidField { + field: "(root)", + detail: "pin must be a JSON object".into(), + }) + } + }; - let v = obj + let v_raw = obj .get("v") - .and_then(|x| x.as_u64()) - .ok_or(AttestationError::MissingField("v"))? as u32; - if v != PROTOCOL_VERSION { + .ok_or(AttestationError::MissingField("v"))? + .as_u64() + .ok_or(AttestationError::InvalidField { + field: "v", + detail: "must be an unsigned integer".into(), + })?; + let v = v_raw as u32; + if v != V1_PROTOCOL_VERSION { return Err(AttestationError::UnsupportedVersion { got: v, - expected: PROTOCOL_VERSION, + expected: V1_PROTOCOL_VERSION, }); } - fn s_field( - obj: &serde_json::Map, - name: &'static str, - ) -> Result { - obj.get(name) - .and_then(|x| x.as_str()) - .map(str::to_owned) - .ok_or(AttestationError::MissingField(name)) - } + // v1: minimal validation. Pull each field by name, accept what's there. + let model = obj + .get("model") + .and_then(|x| x.as_str()) + .ok_or(AttestationError::MissingField("model"))? + .to_owned(); + let kid = obj + .get("kid") + .and_then(|x| x.as_str()) + .ok_or(AttestationError::MissingField("kid"))? + .to_owned(); + let source_hash = obj + .get("source_hash") + .and_then(|x| x.as_str()) + .ok_or(AttestationError::MissingField("source_hash"))? + .to_owned(); + let vec_hash = obj + .get("vec_hash") + .and_then(|x| x.as_str()) + .ok_or(AttestationError::MissingField("vec_hash"))? + .to_owned(); + let vec_dtype = obj + .get("vec_dtype") + .and_then(|x| x.as_str()) + .ok_or(AttestationError::MissingField("vec_dtype"))? + .to_owned(); + let vec_dim = obj + .get("vec_dim") + .and_then(|x| x.as_u64()) + .ok_or(AttestationError::MissingField("vec_dim"))? as u32; + let ts = obj + .get("ts") + .and_then(|x| x.as_str()) + .ok_or(AttestationError::MissingField("ts"))? + .to_owned(); + let model_hash = obj + .get("model_hash") + .and_then(|x| x.as_str()) + .map(String::from); - let header = PinHeader { - v, - model: s_field(obj, "model")?, - model_hash: obj - .get("model_hash") - .and_then(|x| x.as_str()) - .map(String::from), - source_hash: s_field(obj, "source_hash")?, - vec_hash: s_field(obj, "vec_hash")?, - vec_dtype: s_field(obj, "vec_dtype")?, - vec_dim: obj - .get("vec_dim") - .and_then(|x| x.as_u64()) - .ok_or(AttestationError::MissingField("vec_dim"))? as u32, - ts: s_field(obj, "ts")?, - extra: obj - .get("extra") - .and_then(|x| x.as_object()) - .map(|m| { - m.iter() - .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_owned()))) - .collect() - }) - .unwrap_or_default(), - }; + let extra: BTreeMap = obj + .get("extra") + .and_then(|x| x.as_object()) + .map(|m| { + m.iter() + .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_owned()))) + .collect() + }) + .unwrap_or_default(); - let kid = s_field(obj, "kid")?; - let sig = b64url_decode(s_field(obj, "sig")?.as_str())?; + let sig_str = obj + .get("sig") + .and_then(|x| x.as_str()) + .ok_or(AttestationError::MissingField("sig"))?; + let sig = b64url_decode(sig_str)?; + if sig.len() != SIG_LEN { + return Err(AttestationError::InvalidField { + field: "sig", + detail: format!("must decode to exactly {SIG_LEN} bytes; got {}", sig.len()), + }); + } - Ok(Pin { header, kid, sig }) + Ok(Pin { + header: PinHeader { + v, + kid, + model, + model_hash, + source_hash, + vec_hash, + vec_dtype, + vec_dim, + ts, + extra, + }, + sig, + }) } } @@ -320,6 +791,7 @@ mod tests { fn header() -> PinHeader { PinHeader { v: PROTOCOL_VERSION, + kid: "k1".into(), model: "test-model".into(), model_hash: None, source_hash: format!("sha256:{}", "0".repeat(64)), @@ -332,55 +804,26 @@ mod tests { } #[test] - fn canonicalize_is_deterministic() { - let h = header(); - assert_eq!(h.canonicalize(), h.canonicalize()); - } - - #[test] - fn canonicalize_omits_optional_when_unset() { - let raw = String::from_utf8(header().canonicalize()).unwrap(); - assert!(!raw.contains("model_hash")); - assert!(!raw.contains("extra")); + fn domain_tag_is_13_bytes() { + assert_eq!(DOMAIN_TAG.len(), 13); + assert_eq!(DOMAIN_TAG, b"vectorpin/v2\x00"); } #[test] - fn pin_round_trip_via_json() { - let pin = Pin { - header: header(), - kid: "k".into(), - sig: vec![1u8; 64], - }; - let restored = Pin::from_json(&pin.to_json()).unwrap(); - assert_eq!(pin, restored); + fn canonicalize_starts_with_domain_tag() { + let c = header().canonicalize(); + assert!(c.starts_with(DOMAIN_TAG)); } #[test] - fn pin_rejects_unsupported_version() { - let bad = serde_json::json!({ - "v": 99, - "model": "x", - "source_hash": format!("sha256:{}", "0".repeat(64)), - "vec_hash": format!("sha256:{}", "1".repeat(64)), - "vec_dtype": "f32", - "vec_dim": 1, - "ts": "2026-05-05T12:00:00Z", - "kid": "k", - "sig": "AA", - }); - let err = Pin::from_value(bad).unwrap_err(); - assert!(matches!(err, AttestationError::UnsupportedVersion { .. })); + fn canonicalize_includes_kid() { + let body = String::from_utf8(header().canonicalize()[DOMAIN_TAG.len()..].to_vec()).unwrap(); + assert!(body.contains("\"kid\":\"k1\"")); } #[test] - fn pin_to_json_is_compact() { - let pin = Pin { - header: header(), - kid: "k".into(), - sig: vec![1u8; 64], - }; - let j = pin.to_json(); - assert!(!j.contains(": ")); - assert!(!j.contains(", ")); + fn canonicalize_is_deterministic() { + let h = header(); + assert_eq!(h.canonicalize(), h.canonicalize()); } } diff --git a/rust/vectorpin/src/lib.rs b/rust/vectorpin/src/lib.rs index b558620..f6d4cf2 100644 --- a/rust/vectorpin/src/lib.rs +++ b/rust/vectorpin/src/lib.rs @@ -10,10 +10,12 @@ //! [steganographic exfiltration attacks](https://doi.org/10.5281/zenodo.20058256) //! that current vector databases ingest without complaint. //! -//! This crate is the **Rust reference implementation** of protocol version 1. +//! This crate is the **Rust reference implementation** of protocol version 2. //! It is byte-for-byte compatible with the Python reference (`pip install //! vectorpin`) and the TypeScript reference (`npm install vectorpin`); a pin //! produced by any of the three implementations verifies on the other two. +//! v2 is a wire-format break with v1; pass through [`verifier::LegacyV1Verifier`] +//! when migrating historical pins. //! Compatibility is enforced by shared test vectors in //! [`testvectors/`](https://github.com/ThirdKeyAI/VectorPin/tree/main/testvectors) //! consumed by every port's test suite. @@ -49,6 +51,7 @@ //! None, //! ); //! assert!(result.is_ok()); +//! # let _ = stored; //! ``` //! //! # What a Pin commits to @@ -131,7 +134,7 @@ pub mod hash; pub mod signer; pub mod verifier; -pub use attestation::{Pin, PinHeader, PROTOCOL_VERSION}; +pub use attestation::{AttestationError, Pin, PinHeader, DOMAIN_TAG, PROTOCOL_VERSION}; pub use hash::{canonical_vector_bytes, hash_text, hash_vector, VecDtype}; pub use signer::{Signer, SignerError}; -pub use verifier::{Verifier, VerifyError}; +pub use verifier::{KeyEntry, LegacyV1Verifier, VerifyError, VerifyOptions, Verifier}; diff --git a/rust/vectorpin/src/signer.rs b/rust/vectorpin/src/signer.rs index 5e8d343..893a85e 100644 --- a/rust/vectorpin/src/signer.rs +++ b/rust/vectorpin/src/signer.rs @@ -1,50 +1,30 @@ // Copyright 2025 Jascha Wanger / Tarnover, LLC // SPDX-License-Identifier: Apache-2.0 -//! Pin signing. +//! Pin signing (protocol v2). //! -//! Wraps an Ed25519 signing key plus a `kid` (key id) so verifiers can -//! route signatures during key rotation. Use [`Signer::generate`] for -//! tests and demos; load production keys from a managed secret store -//! via [`Signer::from_private_bytes`]. +//! Wraps an Ed25519 signing key plus a `kid` (key id) that is bound into +//! every pin's canonical bytes. Use [`Signer::generate`] for tests and +//! demos; load production keys via [`Signer::from_private_bytes`] from a +//! managed secret store. //! -//! # Examples +//! v2 sign-time rules (mirroring [`docs/spec.md`](https://github.com/ThirdKeyAI/VectorPin/blob/main/docs/spec.md)): //! -//! ``` -//! use vectorpin::Signer; -//! -//! let signer = Signer::generate("prod-2026-05".to_string()); -//! let v: Vec = vec![0.1, 0.2, 0.3]; -//! let pin = signer.pin("hello", "text-embedding-3-large", v.as_slice()).unwrap(); -//! assert_eq!(pin.kid, "prod-2026-05"); -//! assert_eq!(pin.sig.len(), 64); // Ed25519 signature -//! ``` -//! -//! For deterministic signing (test fixtures, reproducible CI builds), -//! use [`PinOptions`] to supply an explicit timestamp and dtype: -//! -//! ``` -//! use vectorpin::signer::{PinOptions, Signer}; -//! use vectorpin::VecDtype; -//! -//! let signer = Signer::generate("test".to_string()); -//! let v: Vec = vec![0.1, 0.2, 0.3]; -//! let opts = PinOptions { -//! dtype: Some(VecDtype::F32), -//! timestamp: Some("2026-05-05T12:00:00Z".to_string()), -//! ..PinOptions::default() -//! }; -//! let pin = signer -//! .pin_with_options("hello", "test-model", v.as_slice(), opts) -//! .unwrap(); -//! assert_eq!(pin.header.ts, "2026-05-05T12:00:00Z"); -//! ``` +//! - All string inputs are NFC-normalized before signing. +//! - Control characters (U+0000..U+001F) and bidi overrides +//! (U+202A..U+202E, U+2066..U+2069) are rejected. +//! - Vectors containing NaN/Inf are rejected — `-0.0` and `+0.0` are +//! distinct values and both valid. +//! - `ts` is always written in the strict `YYYY-MM-DDTHH:MM:SSZ` form. use std::collections::BTreeMap; use ed25519_dalek::{Signer as _, SigningKey, VerifyingKey}; +use unicode_normalization::UnicodeNormalization; -use crate::attestation::{Pin, PinHeader, PROTOCOL_VERSION}; +use crate::attestation::{ + check_nfc, check_string_safe, AttestationError, Pin, PinHeader, PROTOCOL_VERSION, +}; use crate::hash::{hash_text, hash_vector, VecDtype, VectorRef}; /// Errors raised by signer construction or pinning. @@ -56,28 +36,15 @@ pub enum SignerError { /// Private key bytes were the wrong length (must be 32). #[error("private key must be exactly 32 bytes, got {0}")] BadKeyLength(usize), - /// Vector was empty or otherwise unrepresentable. + /// Vector was empty, malformed, or contained NaN/Inf. #[error("invalid vector: {0}")] - InvalidVector(&'static str), + InvalidVector(String), + /// A caller-supplied string violated v2 wire-format rules. + #[error("invalid string input: {0}")] + InvalidString(#[from] AttestationError), } /// Produces signed [`Pin`] attestations. -/// -/// A `Signer` holds one Ed25519 private key plus a stable identifier -/// (`kid`) that gets embedded in every pin it produces. Verifiers use -/// the `kid` to look up the matching public key in their registry, so -/// rotating signing keys is a matter of issuing a new `(kid, key)` pair -/// and accepting both the old and new `kid` values during the rotation -/// window — no protocol changes required. -/// -/// # Secret material -/// -/// [`Signer::generate`] is for tests, demos, and one-off CLI tools. -/// In production, hold the 32-byte private seed in a managed secrets -/// store (HSM, KMS, sealed env var) and instantiate via -/// [`Signer::from_private_bytes`]. [`Signer::private_key_bytes`] is -/// provided for backup/key-export workflows; treat its output as -/// secret. pub struct Signer { signing_key: SigningKey, key_id: String, @@ -85,13 +52,16 @@ pub struct Signer { impl Signer { /// Generate a fresh Ed25519 signer. Tests and demos only. + /// + /// Panics if `key_id` is empty (the API for new pins requires a kid + /// — tests are the only generation path and a panic is acceptable + /// there). pub fn generate(key_id: String) -> Self { - if key_id.is_empty() { - // Match the contract documented for `from_private_bytes`. - // Generation in tests is the only path here so this panic is - // acceptable; from_private_bytes returns Result. - panic!("key_id must be non-empty"); - } + assert!(!key_id.is_empty(), "key_id must be non-empty"); + // Validate the kid against v2 string rules so a generated signer + // can never produce a pin a strict verifier would reject. + check_string_safe(&key_id, "key_id").expect("key_id contains unsafe chars"); + check_nfc(&key_id, "key_id").expect("key_id is not NFC"); let mut rng = rand::rngs::OsRng; Signer { signing_key: SigningKey::generate(&mut rng), @@ -104,12 +74,14 @@ impl Signer { if key_id.is_empty() { return Err(SignerError::EmptyKeyId); } + let nfc: String = key_id.nfc().collect(); + check_string_safe(&nfc, "key_id")?; let bytes: [u8; 32] = raw .try_into() .map_err(|_| SignerError::BadKeyLength(raw.len()))?; Ok(Signer { signing_key: SigningKey::from_bytes(&bytes), - key_id, + key_id: nfc, }) } @@ -129,10 +101,6 @@ impl Signer { } /// Create a [`Pin`] for `(source, model, vector)`. - /// - /// `vector` accepts anything that converts into a [`VectorRef`], - /// which includes `&[f32]` and `&[f64]`. The dtype written into - /// the header defaults to the native dtype of the slice. pub fn pin<'a>( &self, source: &str, @@ -153,37 +121,78 @@ impl Signer { ) -> Result { let vector = vector.into(); if vector.is_empty() { - return Err(SignerError::InvalidVector("empty vector")); + return Err(SignerError::InvalidVector("empty vector".into())); + } + + // NaN/Inf rejection per §3.2. + if !vector_is_finite(vector) { + return Err(SignerError::InvalidVector( + "vector contains NaN or infinity".into(), + )); } let dtype = opts.dtype.unwrap_or_else(|| vector.native_dtype()); + + // NFC normalize string inputs and reject structurally hostile chars. + // The signer tolerates non-NFC input (it normalizes) but rejects + // control chars and bidi overrides so the produced pin always + // parses under the strict verifier. + let model_nfc: String = model.nfc().collect(); + check_string_safe(&model_nfc, "model")?; + if model_nfc.is_empty() { + return Err(SignerError::InvalidString(AttestationError::InvalidField { + field: "model", + detail: "must be non-empty".into(), + })); + } + + let source_nfc: String = source.nfc().collect(); + + let mut extra_nfc: BTreeMap = BTreeMap::new(); + for (k, val) in &opts.extra { + let k_nfc: String = k.nfc().collect(); + let v_nfc: String = val.nfc().collect(); + check_string_safe(&k_nfc, "extra key")?; + check_string_safe(&v_nfc, "extra value")?; + extra_nfc.insert(k_nfc, v_nfc); + } + let ts = opts.timestamp.unwrap_or_else(now_utc_iso8601); + // Defensive: even when we generate the timestamp ourselves, run it + // through the same validators a parser would so locale bugs cannot + // sneak a malformed string through. + check_string_safe(&ts, "ts")?; + check_nfc(&ts, "ts")?; let header = PinHeader { v: PROTOCOL_VERSION, - model: model.to_owned(), + kid: self.key_id.clone(), + model: model_nfc, model_hash: opts.model_hash, - source_hash: hash_text(source), + source_hash: hash_text(&source_nfc), vec_hash: hash_vector(vector, dtype), vec_dtype: dtype.as_str().to_owned(), vec_dim: vector.len() as u32, ts, - extra: opts.extra, + extra: extra_nfc, }; let signature = self.signing_key.sign(&header.canonicalize()); Ok(Pin { header, - kid: self.key_id.clone(), sig: signature.to_bytes().to_vec(), }) } } +fn vector_is_finite(v: VectorRef<'_>) -> bool { + match v { + VectorRef::F32(xs) => xs.iter().all(|x| x.is_finite()), + VectorRef::F64(xs) => xs.iter().all(|x| x.is_finite()), + } +} + /// Optional knobs for [`Signer::pin_with_options`]. -/// -/// Defaults match what `Signer::pin` uses: native dtype, no model hash, -/// current UTC time, no extra metadata. #[derive(Debug, Default, Clone)] pub struct PinOptions { /// Override the canonical dtype the vector is hashed under. @@ -199,8 +208,6 @@ pub struct PinOptions { fn now_utc_iso8601() -> String { use std::time::{SystemTime, UNIX_EPOCH}; - // We avoid pulling in `chrono` for one timestamp; this gives the - // same `YYYY-MM-DDTHH:MM:SSZ` format the Python reference emits. let secs = SystemTime::now() .duration_since(UNIX_EPOCH) .map(|d| d.as_secs()) @@ -210,14 +217,13 @@ fn now_utc_iso8601() -> String { } fn unix_to_ymdhms(t: i64) -> (i32, u32, u32, u32, u32, u32) { - // Days since 1970-01-01. let days = (t.div_euclid(86400)) as i32; let secs_of_day = t.rem_euclid(86400) as u32; let h = secs_of_day / 3600; let mi = (secs_of_day % 3600) / 60; let se = secs_of_day % 60; - // Civil from days, see http://howardhinnant.github.io/date_algorithms.html + // Civil from days — howardhinnant.github.io/date_algorithms.html let z = days + 719468; let era = if z >= 0 { z } else { z - 146096 } / 146097; let doe = (z - era * 146097) as u32; @@ -240,7 +246,8 @@ mod tests { let signer = Signer::generate("test".into()); let v: Vec = vec![1.0, 2.0, 3.0]; let pin = signer.pin("hello", "model", v.as_slice()).unwrap(); - assert_eq!(pin.kid, "test"); + assert_eq!(pin.kid(), "test"); + assert_eq!(pin.header.v, PROTOCOL_VERSION); assert_eq!(pin.header.vec_dim, 3); assert_eq!(pin.header.vec_dtype, "f32"); assert_eq!(pin.sig.len(), 64); @@ -259,10 +266,18 @@ mod tests { } #[test] - fn private_seed_round_trip() { + fn signer_rejects_nan() { + let signer = Signer::generate("k".into()); + let v: Vec = vec![1.0, f32::NAN, 3.0]; + let err = signer.pin("x", "m", v.as_slice()).unwrap_err(); + assert!(matches!(err, SignerError::InvalidVector(_))); + } + + #[test] + fn signer_rejects_infinity() { let signer = Signer::generate("k".into()); - let seed = signer.private_key_bytes(); - let restored = Signer::from_private_bytes(&seed, "k".into()).unwrap(); - assert_eq!(signer.public_key_bytes(), restored.public_key_bytes()); + let v: Vec = vec![1.0, f64::INFINITY]; + let err = signer.pin("x", "m", v.as_slice()).unwrap_err(); + assert!(matches!(err, SignerError::InvalidVector(_))); } } diff --git a/rust/vectorpin/src/verifier.rs b/rust/vectorpin/src/verifier.rs index e798c9a..b0a61c8 100644 --- a/rust/vectorpin/src/verifier.rs +++ b/rust/vectorpin/src/verifier.rs @@ -1,123 +1,66 @@ // Copyright 2025 Jascha Wanger / Tarnover, LLC // SPDX-License-Identifier: Apache-2.0 -//! Pin verification. +//! Pin verification (protocol v2). //! -//! Mirrors the Python and TypeScript verifiers: same failure-mode -//! enum, same matching semantics, same support for partial verification -//! (signature-only, signature + vector, full). +//! The default [`Verifier`] accepts only v2 pins. [`LegacyV1Verifier`] +//! is an opt-in migration aid that additionally accepts v1 pins by +//! dispatching them to the legacy canonicalization in +//! [`crate::attestation::legacy_v1`]. //! -//! [`Verifier`] holds a registry of `kid -> public key` so it can verify -//! pins signed under multiple key ids during rotation. Add keys with -//! [`Verifier::add_key`] before calling [`Verifier::verify_full`] or -//! [`Verifier::verify_signature`]; missing-key errors surface as -//! [`VerifyError::UnknownKey`]. -//! -//! # Examples -//! -//! Full verification — signature, vector hash, source hash: -//! -//! ``` -//! use vectorpin::{Signer, Verifier}; -//! -//! let signer = Signer::generate("k1".to_string()); -//! let v: Vec = vec![1.0, 2.0, 3.0]; -//! let pin = signer.pin("hello", "m", v.as_slice()).unwrap(); -//! -//! let mut verifier = Verifier::new(); -//! verifier.add_key(signer.key_id(), signer.public_key_bytes()); -//! verifier -//! .verify_full(&pin, Some("hello"), Some(v.as_slice()), None) -//! .expect("honest verify must pass"); -//! ``` -//! -//! Signature-only verification (when ground-truth source/vector are not -//! on hand but producer identity still matters): -//! -//! ``` -//! # use vectorpin::{Signer, Verifier}; -//! # let signer = Signer::generate("k1".to_string()); -//! # let v: Vec = vec![1.0, 2.0, 3.0]; -//! # let pin = signer.pin("hello", "m", v.as_slice()).unwrap(); -//! # let mut verifier = Verifier::new(); -//! # verifier.add_key(signer.key_id(), signer.public_key_bytes()); -//! verifier.verify_signature(&pin).unwrap(); -//! ``` -//! -//! Tampered vector — caught by [`VerifyError::VectorTampered`]: -//! -//! ``` -//! # use vectorpin::{Signer, Verifier, VerifyError}; -//! # let signer = Signer::generate("k1".to_string()); -//! # let v: Vec = vec![1.0, 2.0, 3.0]; -//! # let pin = signer.pin("hello", "m", v.as_slice()).unwrap(); -//! # let mut verifier = Verifier::new(); -//! # verifier.add_key(signer.key_id(), signer.public_key_bytes()); -//! let mut tampered = v.clone(); -//! tampered[0] += 1e-5; -//! let err = verifier -//! .verify_full(&pin, None::<&str>, Some(tampered.as_slice()), None) -//! .unwrap_err(); -//! assert_eq!(err, VerifyError::VectorTampered); -//! ``` -//! -//! Key rotation — accept both old and new `kid` during the rollover -//! window: -//! -//! ``` -//! use vectorpin::{Signer, Verifier}; -//! -//! let old = Signer::generate("2026-04".to_string()); -//! let new = Signer::generate("2026-05".to_string()); -//! let mut verifier = Verifier::new(); -//! verifier.add_key(old.key_id(), old.public_key_bytes()); -//! verifier.add_key(new.key_id(), new.public_key_bytes()); -//! -//! let v: Vec = vec![1.0, 2.0]; -//! let pin_old = old.pin("hello", "m", v.as_slice()).unwrap(); -//! let pin_new = new.pin("hello", "m", v.as_slice()).unwrap(); -//! verifier.verify_signature(&pin_old).unwrap(); -//! verifier.verify_signature(&pin_new).unwrap(); -//! ``` +//! [`VerifyError`] mirrors the failure-mode set in spec §5 so callers +//! can route distinct outcomes (forgery, tamper, mismatch, parse error) +//! to different handlers. use std::collections::HashMap; use ed25519_dalek::{Signature, Verifier as _, VerifyingKey}; -use crate::attestation::{Pin, PROTOCOL_VERSION}; +use crate::attestation::{ + legacy_v1::{canonicalize_v1, parse_v1_pin, V1_PROTOCOL_VERSION}, + AttestationError, Pin, PROTOCOL_VERSION, +}; use crate::hash::{hash_text, hash_vector, VecDtype, VectorRef}; -/// Distinct verification failure modes. Callers route on this so a -/// signature-invalid result (potential forgery) can be handled -/// differently from a vector-tampered result (potential steganography -/// kill shot). +/// Distinct verification failure modes (spec §5). #[derive(Debug, Clone, PartialEq, Eq)] pub enum VerifyError { /// Pin uses a protocol version this verifier does not understand. UnsupportedVersion(u32), /// `kid` not present in the verifier's key registry. UnknownKey(String), + /// `kid` is registered but its validity window excludes the pin's `ts`. + KeyExpired, + /// Pin failed wire-format / size / format validation before any + /// cryptographic work was attempted. + ParseError(String), /// Ed25519 signature did not verify against the canonical header. SignatureInvalid, - /// Vector hash mismatch — the embedding has been modified after pinning. + /// Vector hash mismatch — embedding modified after pinning. VectorTampered, /// Source text hash mismatch. SourceMismatch, - /// Pin was issued for a different model than the caller expected. + /// Pin issued for a different model than the caller expected. ModelMismatch { /// Model identifier in the pin. pin_model: String, - /// Model identifier the caller asked us to require. + /// Identifier the caller required. expected: String, }, - /// Supplied vector's dim did not match the dim in the pin header. + /// Supplied vector's dim did not match the pin's `vec_dim`. ShapeMismatch { /// Length of the supplied vector. supplied: usize, /// `vec_dim` from the pin header. expected: u32, }, - /// Pin failed to parse one of its dtype-related fields. + /// Caller's expected `vectorpin.record_id` did not match the pin. + RecordMismatch, + /// Caller's expected `vectorpin.collection_id` did not match the pin. + CollectionMismatch, + /// Caller's expected `vectorpin.tenant_id` did not match the pin. + TenantMismatch, + /// Pin's `vec_dtype` is not understood by this build. UnsupportedDtype(String), } @@ -126,24 +69,26 @@ impl std::fmt::Display for VerifyError { match self { VerifyError::UnsupportedVersion(v) => write!(f, "unsupported pin version: {v}"), VerifyError::UnknownKey(k) => write!(f, "unknown signing key id: {k}"), + VerifyError::KeyExpired => { + write!(f, "pin ts falls outside the key's validity window") + } + VerifyError::ParseError(s) => write!(f, "pin parse error: {s}"), VerifyError::SignatureInvalid => write!(f, "ed25519 signature did not verify"), - VerifyError::VectorTampered => write!( - f, - "vector hash mismatch — embedding has been modified after pinning" - ), + VerifyError::VectorTampered => { + write!(f, "vector hash mismatch — embedding modified after pinning") + } VerifyError::SourceMismatch => write!(f, "source hash mismatch"), VerifyError::ModelMismatch { pin_model, expected, - } => { - write!(f, "pin model {pin_model:?} != expected {expected:?}") - } - VerifyError::ShapeMismatch { supplied, expected } => { - write!( - f, - "vector shape mismatch: supplied len {supplied}, pin dim {expected}" - ) - } + } => write!(f, "pin model {pin_model:?} != expected {expected:?}"), + VerifyError::ShapeMismatch { supplied, expected } => write!( + f, + "vector shape mismatch: supplied len {supplied}, pin dim {expected}" + ), + VerifyError::RecordMismatch => write!(f, "vectorpin.record_id mismatch"), + VerifyError::CollectionMismatch => write!(f, "vectorpin.collection_id mismatch"), + VerifyError::TenantMismatch => write!(f, "vectorpin.tenant_id mismatch"), VerifyError::UnsupportedDtype(s) => write!(f, "unsupported canonical dtype: {s}"), } } @@ -151,28 +96,105 @@ impl std::fmt::Display for VerifyError { impl std::error::Error for VerifyError {} -/// Holds the public-key registry for one or more `kid` values and runs -/// pin verification against supplied ground truth. +impl From for VerifyError { + fn from(e: AttestationError) -> Self { + match e { + AttestationError::UnsupportedVersion { got, .. } => VerifyError::UnsupportedVersion(got), + other => VerifyError::ParseError(other.to_string()), + } + } +} + +/// A registered public key plus an optional validity window (§7). +/// +/// `valid_from` is inclusive, `valid_until` is exclusive. If both are +/// `None`, the key validates pins of any timestamp. +#[derive(Debug, Clone)] +pub struct KeyEntry { + /// 32-byte Ed25519 public key. + pub public_key: VerifyingKey, + /// Earliest `ts` (inclusive) accepted under this key. UNIX epoch seconds. + pub valid_from: Option, + /// Latest `ts` (exclusive) accepted under this key. UNIX epoch seconds. + pub valid_until: Option, +} + +impl KeyEntry { + /// Create a `KeyEntry` with no validity window. + pub fn new(public_key: VerifyingKey) -> Self { + Self { + public_key, + valid_from: None, + valid_until: None, + } + } + + /// Construct from raw 32-byte public-key material. + pub fn from_public_bytes(raw: [u8; 32]) -> Result { + VerifyingKey::from_bytes(&raw) + .map(Self::new) + .map_err(|_| VerifyError::ParseError("invalid Ed25519 public key bytes".into())) + } + + /// Builder: attach a `valid_from` lower bound (inclusive). + pub fn with_valid_from(mut self, ts_unix_seconds: i64) -> Self { + self.valid_from = Some(ts_unix_seconds); + self + } + + /// Builder: attach a `valid_until` upper bound (exclusive). + pub fn with_valid_until(mut self, ts_unix_seconds: i64) -> Self { + self.valid_until = Some(ts_unix_seconds); + self + } +} + +/// Optional caller-supplied ground truth and replay-protection IDs. +/// +/// Each field is independent: leave it `None` to skip that check. +#[derive(Debug, Default, Clone)] +pub struct VerifyOptions<'a> { + /// Ground-truth source text. If supplied, must hash to `source_hash`. + pub source: Option<&'a str>, + /// Ground-truth vector. If supplied, must hash to `vec_hash`. + pub vector: Option>, + /// Expected model identifier. + pub expected_model: Option<&'a str>, + /// Expected `vectorpin.record_id` in `extra`. + pub expected_record_id: Option<&'a str>, + /// Expected `vectorpin.collection_id` in `extra`. + pub expected_collection_id: Option<&'a str>, + /// Expected `vectorpin.tenant_id` in `extra`. + pub expected_tenant_id: Option<&'a str>, +} + +/// Holds the public-key registry and runs pin verification against +/// supplied ground truth. #[derive(Default)] pub struct Verifier { - keys: HashMap, + keys: HashMap, + accept_v1: bool, } impl Verifier { - /// Construct an empty verifier. Add public keys with [`Self::add_key`]. + /// Construct an empty default verifier (v2-only). pub fn new() -> Self { Self::default() } - /// Register a public key under `kid`. Multiple keys may live in - /// the registry simultaneously to support rotation. + /// Register a public key under `kid` with no validity window. pub fn add_key(&mut self, kid: &str, public_key_bytes: [u8; 32]) { if let Ok(vk) = VerifyingKey::from_bytes(&public_key_bytes) { - self.keys.insert(kid.to_owned(), vk); + self.keys.insert(kid.to_owned(), KeyEntry::new(vk)); } } - /// Number of registered keys (sanity check for tests). + /// Register a fully-specified [`KeyEntry`] under `kid`. + pub fn add_key_entry(&mut self, kid: &str, entry: KeyEntry) { + self.keys.insert(kid.to_owned(), entry); + } + + /// Number of registered keys. pub fn key_count(&self) -> usize { self.keys.len() } @@ -180,51 +202,100 @@ impl Verifier { /// Verify just the signature — useful when ground-truth source/vector /// are unavailable but producer identity still matters. pub fn verify_signature(&self, pin: &Pin) -> Result<(), VerifyError> { - if pin.header.v != PROTOCOL_VERSION { - return Err(VerifyError::UnsupportedVersion(pin.header.v)); - } - let key = self - .keys - .get(&pin.kid) - .ok_or_else(|| VerifyError::UnknownKey(pin.kid.clone()))?; - let sig_bytes: [u8; 64] = pin - .sig - .as_slice() - .try_into() - .map_err(|_| VerifyError::SignatureInvalid)?; - let signature = Signature::from_bytes(&sig_bytes); - key.verify(&pin.header.canonicalize(), &signature) - .map_err(|_| VerifyError::SignatureInvalid) + self.verify(pin, VerifyOptions::default()) } - /// Verify signature + any supplied ground truth. + /// Convenience: verify with ground-truth source/vector/model. /// - /// Pass `Some(...)` for whichever components you have on hand: - /// the signature is always checked; source/vector/model checks run - /// only when the corresponding argument is present. This mirrors - /// the Python reference verifier so callers can do partial - /// verification (e.g. signature-only at retrieval time, full - /// verification at audit time). + /// Preserved for parity with v1 callers; equivalent to building a + /// [`VerifyOptions`] and calling [`Self::verify`]. pub fn verify_full<'a, V>( &self, pin: &Pin, - source: Option<&str>, + source: Option<&'a str>, vector: Option, - expected_model: Option<&str>, + expected_model: Option<&'a str>, ) -> Result<(), VerifyError> where V: Into>, { - self.verify_signature(pin)?; + self.verify( + pin, + VerifyOptions { + source, + vector: vector.map(Into::into), + expected_model, + ..VerifyOptions::default() + }, + ) + } + + /// Full verification: signature + any supplied ground truth + any + /// supplied replay-protection identifiers. + pub fn verify(&self, pin: &Pin, opts: VerifyOptions<'_>) -> Result<(), VerifyError> { + // Step 1: version dispatch. + if pin.header.v != PROTOCOL_VERSION + && !(self.accept_v1 && pin.header.v == V1_PROTOCOL_VERSION) + { + return Err(VerifyError::UnsupportedVersion(pin.header.v)); + } + + // Defensive: sig length is checked at parse time, but verify + // before any signature work too so a hand-built Pin can't crash + // the ed25519 library. + if pin.sig.len() != 64 { + return Err(VerifyError::ParseError(format!( + "sig must be exactly 64 bytes; got {}", + pin.sig.len() + ))); + } - if let Some(vec) = vector { - let vec = vec.into(); + // Step 2: kid lookup + validity window. + let entry = self + .keys + .get(&pin.header.kid) + .ok_or_else(|| VerifyError::UnknownKey(pin.header.kid.clone()))?; + + if entry.valid_from.is_some() || entry.valid_until.is_some() { + let pin_ts = parse_v2_ts_unix(&pin.header.ts).ok_or(VerifyError::KeyExpired)?; + if let Some(vf) = entry.valid_from { + if pin_ts < vf { + return Err(VerifyError::KeyExpired); + } + } + if let Some(vu) = entry.valid_until { + if pin_ts >= vu { + return Err(VerifyError::KeyExpired); + } + } + } + + // Step 4: signature. + let canonical = canonical_for(pin); + let sig_bytes: [u8; 64] = pin + .sig + .as_slice() + .try_into() + .map_err(|_| VerifyError::SignatureInvalid)?; + let signature = Signature::from_bytes(&sig_bytes); + entry + .public_key + .verify(&canonical, &signature) + .map_err(|_| VerifyError::SignatureInvalid)?; + + // Step 6: vector check. + if let Some(vec) = opts.vector { if vec.len() as u32 != pin.header.vec_dim { return Err(VerifyError::ShapeMismatch { supplied: vec.len(), expected: pin.header.vec_dim, }); } + if !vector_is_finite(vec) { + return Err(VerifyError::ParseError( + "supplied vector contains NaN or infinity".into(), + )); + } let dtype = VecDtype::parse(&pin.header.vec_dtype) .map_err(|_| VerifyError::UnsupportedDtype(pin.header.vec_dtype.clone()))?; if hash_vector(vec, dtype) != pin.header.vec_hash { @@ -232,13 +303,15 @@ impl Verifier { } } - if let Some(s) = source { + // Step 5: source check. + if let Some(s) = opts.source { if hash_text(s) != pin.header.source_hash { return Err(VerifyError::SourceMismatch); } } - if let Some(em) = expected_model { + // Step 7: model check. + if let Some(em) = opts.expected_model { if pin.header.model != em { return Err(VerifyError::ModelMismatch { pin_model: pin.header.model.clone(), @@ -247,10 +320,154 @@ impl Verifier { } } + // Step 8: replay-protection identifier checks. + if let Some(expected) = opts.expected_record_id { + if pin.header.extra.get("vectorpin.record_id").map(|s| s.as_str()) != Some(expected) { + return Err(VerifyError::RecordMismatch); + } + } + if let Some(expected) = opts.expected_collection_id { + if pin + .header + .extra + .get("vectorpin.collection_id") + .map(|s| s.as_str()) + != Some(expected) + { + return Err(VerifyError::CollectionMismatch); + } + } + if let Some(expected) = opts.expected_tenant_id { + if pin.header.extra.get("vectorpin.tenant_id").map(|s| s.as_str()) != Some(expected) { + return Err(VerifyError::TenantMismatch); + } + } + Ok(()) } } +/// Verifier that additionally accepts protocol-v1 pins via legacy +/// canonicalization. Opt-in per spec §5 step 1. +pub struct LegacyV1Verifier { + inner: Verifier, +} + +impl LegacyV1Verifier { + /// Construct an empty legacy verifier. + pub fn new() -> Self { + let mut inner = Verifier::new(); + inner.accept_v1 = true; + Self { inner } + } + + /// Forwarded: register a public key. + pub fn add_key(&mut self, kid: &str, public_key_bytes: [u8; 32]) { + self.inner.add_key(kid, public_key_bytes); + } + + /// Forwarded: register a [`KeyEntry`] with optional validity window. + pub fn add_key_entry(&mut self, kid: &str, entry: KeyEntry) { + self.inner.add_key_entry(kid, entry); + } + + /// Verify a parsed pin (v1 or v2). + pub fn verify(&self, pin: &Pin, opts: VerifyOptions<'_>) -> Result<(), VerifyError> { + self.inner.verify(pin, opts) + } + + /// Parse a v1 or v2 pin JSON string. v1 pins go through the looser + /// v1 parser; v2 pins are parsed strictly. + pub fn parse_pin(s: &str) -> Result { + // Cheap peek at the version field to choose the right parser. + let value: serde_json::Value = serde_json::from_str(s) + .map_err(|e| VerifyError::ParseError(format!("JSON parse: {e}")))?; + let v = value + .get("v") + .and_then(|x| x.as_u64()) + .ok_or_else(|| VerifyError::ParseError("missing `v` field".into()))? + as u32; + if v == V1_PROTOCOL_VERSION { + parse_v1_pin(s).map_err(VerifyError::from) + } else { + Pin::from_value(value).map_err(VerifyError::from) + } + } +} + +impl Default for LegacyV1Verifier { + fn default() -> Self { + Self::new() + } +} + +fn canonical_for(pin: &Pin) -> Vec { + if pin.header.v == V1_PROTOCOL_VERSION { + canonicalize_v1(&pin.header) + } else { + pin.header.canonicalize() + } +} + +fn vector_is_finite(v: VectorRef<'_>) -> bool { + match v { + VectorRef::F32(xs) => xs.iter().all(|x| x.is_finite()), + VectorRef::F64(xs) => xs.iter().all(|x| x.is_finite()), + } +} + +/// Parse a v2-format `ts` string to UNIX epoch seconds. Returns `None` +/// on any format violation (callers map this to [`VerifyError::KeyExpired`] +/// to avoid leaking parser-internal errors from the validity-window path). +fn parse_v2_ts_unix(ts: &str) -> Option { + let b = ts.as_bytes(); + if b.len() != 20 || b[4] != b'-' || b[7] != b'-' || b[10] != b'T' || b[13] != b':' + || b[16] != b':' + || b[19] != b'Z' + { + return None; + } + let n2 = |i: usize| -> Option { + let a = b[i]; + let c = b[i + 1]; + if a.is_ascii_digit() && c.is_ascii_digit() { + Some(((a - b'0') * 10 + (c - b'0')) as i64) + } else { + None + } + }; + let n4 = |i: usize| -> Option { + let mut acc = 0i64; + for j in 0..4 { + let c = b[i + j]; + if !c.is_ascii_digit() { + return None; + } + acc = acc * 10 + (c - b'0') as i64; + } + Some(acc) + }; + let year = n4(0)? as i32; + let month = n2(5)? as u32; + let day = n2(8)? as u32; + let hour = n2(11)? as u32; + let minute = n2(14)? as u32; + let second = n2(17)? as u32; + Some(civil_to_unix(year, month, day, hour, minute, second)) +} + +fn civil_to_unix(y: i32, m: u32, d: u32, h: u32, mi: u32, s: u32) -> i64 { + // Inverse of unix_to_ymdhms (Howard Hinnant). + let y = if m <= 2 { y - 1 } else { y }; + let era = if y >= 0 { y } else { y - 399 } / 400; + let yoe = (y - era * 400) as u32; + let m_u = m as i32; + let doy = (153 * (if m_u > 2 { m_u - 3 } else { m_u + 9 }) as u32 + 2) / 5 + d - 1; + let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; + let days = era as i64 * 146097 + doe as i64 - 719468; + days * 86400 + (h as i64) * 3600 + (mi as i64) * 60 + (s as i64) +} + #[cfg(test)] mod tests { use super::*; @@ -273,13 +490,6 @@ mod tests { .expect("honest verify must succeed"); } - #[test] - fn verify_signature_only_passes() { - let (signer, verifier, v) = fixture("k1"); - let pin = signer.pin("hello", "m", v.as_slice()).unwrap(); - verifier.verify_signature(&pin).unwrap(); - } - #[test] fn vector_tamper_is_caught() { let (signer, verifier, v) = fixture("k1"); @@ -326,28 +536,28 @@ mod tests { } #[test] - fn model_mismatch_is_caught() { - let (signer, verifier, v) = fixture("k1"); - let pin = signer.pin("x", "model-A", v.as_slice()).unwrap(); - let err = verifier - .verify_full(&pin, None::<&str>, None::<&[f32]>, Some("model-B")) - .unwrap_err(); - assert!(matches!(err, VerifyError::ModelMismatch { .. })); + fn ts_round_trip() { + // Verify our local ts parser matches the format the signer emits. + let unix = parse_v2_ts_unix("2026-05-05T12:00:00Z").unwrap(); + // 2026-05-05T12:00:00Z = 1777982400 (verified via Python + // `datetime(...).timestamp()` against UTC). + assert_eq!(unix, 1_777_982_400); } #[test] - fn key_rotation_works() { - let old = Signer::generate("2026-04".into()); - let new = Signer::generate("2026-05".into()); - let mut verifier = Verifier::new(); - verifier.add_key(old.key_id(), old.public_key_bytes()); - verifier.add_key(new.key_id(), new.public_key_bytes()); + fn key_expired_lower_bound() { + let signer = Signer::generate("k".into()); let v: Vec = vec![1.0, 2.0]; - verifier - .verify_signature(&old.pin("x", "m", v.as_slice()).unwrap()) - .unwrap(); - verifier - .verify_signature(&new.pin("x", "m", v.as_slice()).unwrap()) - .unwrap(); + let pin = signer.pin("x", "m", v.as_slice()).unwrap(); + let pin_unix = parse_v2_ts_unix(&pin.header.ts).unwrap(); + + let mut verifier = Verifier::new(); + let vk = VerifyingKey::from_bytes(&signer.public_key_bytes()).unwrap(); + verifier.add_key_entry( + signer.key_id(), + KeyEntry::new(vk).with_valid_from(pin_unix + 1), + ); + let err = verifier.verify_signature(&pin).unwrap_err(); + assert_eq!(err, VerifyError::KeyExpired); } } diff --git a/rust/vectorpin/tests/cross_lang.rs b/rust/vectorpin/tests/cross_lang.rs index 315cb1c..ee1ebe6 100644 --- a/rust/vectorpin/tests/cross_lang.rs +++ b/rust/vectorpin/tests/cross_lang.rs @@ -1,56 +1,62 @@ // Copyright 2025 Jascha Wanger / Tarnover, LLC // SPDX-License-Identifier: Apache-2.0 -//! Cross-language compatibility tests. +//! Cross-language compatibility tests for protocol v2. //! -//! Loads the JSON fixtures generated by `scripts/generate_test_vectors.py` -//! and asserts that this Rust port produces byte-for-byte identical -//! canonical bytes, hashes, and signatures. If the Python and Rust -//! implementations disagree, these tests catch it before any pin -//! signed by Python is rejected by Rust (or vice versa). +//! Loads `testvectors/v2.json` (positive fixtures) and +//! `testvectors/negative_v2.json` (negative fixtures) generated by the +//! Python reference. Asserts: +//! +//! - Rust produces byte-for-byte identical canonical bytes for every +//! positive fixture (`expected_canonical_bytes_b64`). +//! - Rust produces byte-for-byte identical pin JSON (Ed25519 is +//! deterministic, so the signature must also match). +//! - Rust verifies every Python-produced pin under the same key. +//! - Each negative fixture maps to the expected `VerifyError` variant. use std::path::PathBuf; use base64::Engine; use serde::Deserialize; +use serde_json::Value; use vectorpin::{ hash::{hash_text, hash_vector, VecDtype, VectorRef}, - signer::PinOptions, - Pin, Signer, Verifier, VerifyError, + KeyEntry, Pin, Signer, VerifyError, VerifyOptions, Verifier, }; +// ---- v2 positive fixtures ---------------------------------------------- + #[derive(Debug, Deserialize)] -struct FixtureBundle { +struct V2Bundle { public_key_b64: String, - private_seed_b64: String, + private_key_b64: String, key_id: String, - fixtures: Vec, + fixtures: Vec, } #[derive(Debug, Deserialize)] -struct Fixture { +struct V2Fixture { name: String, - input: FixtureInput, - expected: FixtureExpected, + input: V2Input, + pin_json: String, + expected_canonical_bytes_b64: String, + expected_vec_hash: String, + expected_source_hash: String, } #[derive(Debug, Deserialize)] -struct FixtureInput { +struct V2Input { source: String, model: String, - vector_b64: String, + vec_b64: String, vec_dtype: String, vec_dim: usize, timestamp: String, -} - -#[derive(Debug, Deserialize)] -struct FixtureExpected { - pin_json: String, - canonical_header_b64: String, - vec_hash: String, - source_hash: String, + #[serde(default)] + model_hash: Option, + #[serde(default)] + extra: std::collections::BTreeMap, } fn b64(s: &str) -> Vec { @@ -59,56 +65,41 @@ fn b64(s: &str) -> Vec { .expect("base64 fixture input") } -fn fixtures_path() -> PathBuf { - // The `cargo test` cwd is the crate root; testvectors live two levels up. - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("..") - .join("..") - .join("testvectors") - .join("v1.json") -} - -fn negative_path() -> PathBuf { +fn testvectors_dir() -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")) .join("..") .join("..") .join("testvectors") - .join("negative_v1.json") } fn parse_vec_f32(bytes: &[u8], dim: usize) -> Vec { assert_eq!(bytes.len(), dim * 4, "f32 fixture length sanity check"); - let mut out = Vec::with_capacity(dim); - for chunk in bytes.chunks_exact(4) { - let arr: [u8; 4] = chunk.try_into().unwrap(); - out.push(f32::from_le_bytes(arr)); - } - out + bytes + .chunks_exact(4) + .map(|c| f32::from_le_bytes(c.try_into().unwrap())) + .collect() } fn parse_vec_f64(bytes: &[u8], dim: usize) -> Vec { assert_eq!(bytes.len(), dim * 8, "f64 fixture length sanity check"); - let mut out = Vec::with_capacity(dim); - for chunk in bytes.chunks_exact(8) { - let arr: [u8; 8] = chunk.try_into().unwrap(); - out.push(f64::from_le_bytes(arr)); - } - out + bytes + .chunks_exact(8) + .map(|c| f64::from_le_bytes(c.try_into().unwrap())) + .collect() } -fn run_fixture(bundle: &FixtureBundle, fx: &Fixture) { - eprintln!("running fixture: {}", fx.name); +fn run_v2_fixture(bundle: &V2Bundle, fx: &V2Fixture) { + eprintln!("running v2 fixture: {}", fx.name); let dtype = VecDtype::parse(&fx.input.vec_dtype).expect("known dtype"); - let raw_bytes = b64(&fx.input.vector_b64); + let raw_bytes = b64(&fx.input.vec_b64); - // 1. Hash equality on text and vector — agnostic to signing. - let computed_source_hash = hash_text(&fx.input.source); + // 1. Hash equality on text and vector. assert_eq!( - computed_source_hash, fx.expected.source_hash, + hash_text(&fx.input.source), + fx.expected_source_hash, "source_hash mismatch for {}", fx.name ); - let computed_vec_hash = match dtype { VecDtype::F32 => { let v = parse_vec_f32(&raw_bytes, fx.input.vec_dim); @@ -120,73 +111,61 @@ fn run_fixture(bundle: &FixtureBundle, fx: &Fixture) { } }; assert_eq!( - computed_vec_hash, fx.expected.vec_hash, + computed_vec_hash, fx.expected_vec_hash, "vec_hash mismatch for {}", fx.name ); - // 2. Reproduce the pin from the deterministic seed and confirm the - // resulting JSON is byte-for-byte identical to the Python output. - let signer = Signer::from_private_bytes(&b64(&bundle.private_seed_b64), bundle.key_id.clone()) - .expect("seed loads"); + // 2. Reproduce the pin from the deterministic seed. + let signer = + Signer::from_private_bytes(&b64(&bundle.private_key_b64), bundle.key_id.clone()) + .expect("seed loads"); assert_eq!( signer.public_key_bytes().to_vec(), - b64(&bundle.public_key_b64) + b64(&bundle.public_key_b64), + "public key disagreement" ); + let opts = vectorpin::signer::PinOptions { + dtype: Some(dtype), + timestamp: Some(fx.input.timestamp.clone()), + model_hash: fx.input.model_hash.clone(), + extra: fx.input.extra.clone(), + }; let pin = match dtype { VecDtype::F32 => { let v = parse_vec_f32(&raw_bytes, fx.input.vec_dim); signer - .pin_with_options( - &fx.input.source, - &fx.input.model, - v.as_slice(), - PinOptions { - dtype: Some(dtype), - timestamp: Some(fx.input.timestamp.clone()), - ..PinOptions::default() - }, - ) + .pin_with_options(&fx.input.source, &fx.input.model, v.as_slice(), opts) .unwrap() } VecDtype::F64 => { let v = parse_vec_f64(&raw_bytes, fx.input.vec_dim); signer - .pin_with_options( - &fx.input.source, - &fx.input.model, - v.as_slice(), - PinOptions { - dtype: Some(dtype), - timestamp: Some(fx.input.timestamp.clone()), - ..PinOptions::default() - }, - ) + .pin_with_options(&fx.input.source, &fx.input.model, v.as_slice(), opts) .unwrap() } }; - // Canonical bytes must match what Python produced. + // Canonical bytes must match Python output (domain tag + JSON body). let canonical = pin.header.canonicalize(); - let expected_canonical = b64(&fx.expected.canonical_header_b64); + let expected_canonical = b64(&fx.expected_canonical_bytes_b64); assert_eq!( canonical, expected_canonical, "canonical header bytes mismatch for {}", fx.name ); - // Pin JSON must match exactly (deterministic signing means the - // signature is also identical). - let produced_json = pin.to_json(); + // Pin JSON must match exactly — Ed25519 is deterministic. assert_eq!( - produced_json, fx.expected.pin_json, + pin.to_json(), + fx.pin_json, "pin JSON mismatch for {}", fx.name ); - // Round-trip back through from_json and confirm the verifier accepts. - let parsed = Pin::from_json(&produced_json).expect("rust parses its own JSON"); + // Round-trip and verify. + let parsed = Pin::from_json(&pin.to_json()).expect("rust parses its own JSON"); let mut verifier = Verifier::new(); verifier.add_key(&bundle.key_id, signer.public_key_bytes()); verifier @@ -194,50 +173,209 @@ fn run_fixture(bundle: &FixtureBundle, fx: &Fixture) { .expect("rust verifies own pin"); // Also verify the Python-produced JSON directly. - let python_pin = Pin::from_json(&fx.expected.pin_json).expect("rust parses python JSON"); + let python_pin = Pin::from_json(&fx.pin_json).expect("rust parses python JSON"); verifier .verify_full::<&[f32]>(&python_pin, Some(&fx.input.source), None, None) .expect("rust verifies python-produced pin"); } #[test] -fn cross_language_positive_fixtures() { - let raw = std::fs::read_to_string(fixtures_path()).expect("read v1.json"); - let bundle: FixtureBundle = serde_json::from_str(&raw).expect("parse v1.json"); - assert!(!bundle.fixtures.is_empty(), "no fixtures to test"); +fn cross_language_v2_positive_fixtures() { + let path = testvectors_dir().join("v2.json"); + let raw = std::fs::read_to_string(&path).expect("read v2.json"); + let bundle: V2Bundle = serde_json::from_str(&raw).expect("parse v2.json"); + assert!(!bundle.fixtures.is_empty(), "no v2 fixtures to test"); for fx in &bundle.fixtures { - run_fixture(&bundle, fx); + run_v2_fixture(&bundle, fx); } } +// ---- v2 negative fixtures ---------------------------------------------- + +#[derive(Debug, Deserialize)] +struct V2NegativeBundle { + public_key_b64: String, + key_id: String, + fixtures: Vec, +} + #[derive(Debug, Deserialize)] -struct NegativeFixture { +struct V2NegativeFixture { + name: String, + expected_failure: String, pin_json: String, - tampered_vector_b64: String, - expected_error: String, + #[serde(default)] + tampered_vec_b64: Option, + #[serde(default)] + nan_vec_b64: Option, + #[serde(default)] + #[allow(dead_code)] + vec_dtype: Option, + #[serde(default)] + vec_dim: Option, + #[serde(default)] + tampered_source: Option, + #[serde(default)] + expected_model: Option, + #[serde(default)] + expected_record_id: Option, } -#[test] -fn cross_language_negative_tampered_vector() { - let raw = std::fs::read_to_string(negative_path()).expect("read negative_v1.json"); - let neg: NegativeFixture = serde_json::from_str(&raw).expect("parse negative_v1.json"); - assert_eq!(neg.expected_error, "vector_tampered"); - - let pin = Pin::from_json(&neg.pin_json).expect("parse pin"); - let tampered = parse_vec_f32(&b64(&neg.tampered_vector_b64), pin.header.vec_dim as usize); - - // We need ANY public key registered to even attempt verification. - // Reload the key from the positive bundle since they share a key id. - let raw_pos = std::fs::read_to_string(fixtures_path()).expect("read v1.json"); - let bundle: FixtureBundle = serde_json::from_str(&raw_pos).expect("parse v1.json"); +fn classify(err: &VerifyError) -> &'static str { + match err { + VerifyError::UnsupportedVersion(_) => "UNSUPPORTED_VERSION", + VerifyError::UnknownKey(_) => "UNKNOWN_KEY", + VerifyError::KeyExpired => "KEY_EXPIRED", + VerifyError::ParseError(_) => "PARSE_ERROR", + VerifyError::SignatureInvalid => "SIGNATURE_INVALID", + VerifyError::VectorTampered => "VECTOR_TAMPERED", + VerifyError::SourceMismatch => "SOURCE_MISMATCH", + VerifyError::ModelMismatch { .. } => "MODEL_MISMATCH", + VerifyError::ShapeMismatch { .. } => "SHAPE_MISMATCH", + VerifyError::RecordMismatch => "RECORD_MISMATCH", + VerifyError::CollectionMismatch => "COLLECTION_MISMATCH", + VerifyError::TenantMismatch => "TENANT_MISMATCH", + VerifyError::UnsupportedDtype(_) => "PARSE_ERROR", + } +} + +fn run_negative(bundle: &V2NegativeBundle, fx: &V2NegativeFixture) { + eprintln!( + "running negative_v2 fixture: {} ({})", + fx.name, fx.expected_failure + ); let mut verifier = Verifier::new(); - verifier.add_key( + let pk: [u8; 32] = b64(&bundle.public_key_b64) + .try_into() + .expect("public key 32 bytes"); + verifier.add_key(&bundle.key_id, pk); + + // The pin may fail to parse — that itself is a PARSE_ERROR outcome. + let parsed = match Pin::from_json(&fx.pin_json) { + Ok(p) => p, + Err(e) => { + let mapped: VerifyError = e.into(); + assert_eq!( + classify(&mapped), + fx.expected_failure, + "{}: parse error class mismatch (got {mapped:?})", + fx.name + ); + return; + } + }; + + let dtype = parsed.header.vec_dtype.clone(); + let vec_holder_f32: Vec; + let vec_holder_f64: Vec; + let vec_ref: Option> = if let Some(b) = &fx.tampered_vec_b64 { + match dtype.as_str() { + "f32" => { + vec_holder_f32 = parse_vec_f32(&b64(b), fx.vec_dim.unwrap()); + Some(VectorRef::F32(&vec_holder_f32)) + } + "f64" => { + vec_holder_f64 = parse_vec_f64(&b64(b), fx.vec_dim.unwrap()); + Some(VectorRef::F64(&vec_holder_f64)) + } + _ => panic!("unknown dtype {dtype}"), + } + } else if let Some(b) = &fx.nan_vec_b64 { + // NaN-bearing vector — used for the nan_in_vector_at_verify case. + match dtype.as_str() { + "f32" => { + vec_holder_f32 = parse_vec_f32(&b64(b), fx.vec_dim.unwrap()); + Some(VectorRef::F32(&vec_holder_f32)) + } + "f64" => { + vec_holder_f64 = parse_vec_f64(&b64(b), fx.vec_dim.unwrap()); + Some(VectorRef::F64(&vec_holder_f64)) + } + _ => panic!("unknown dtype {dtype}"), + } + } else { + None + }; + + let opts = VerifyOptions { + source: fx.tampered_source.as_deref(), + vector: vec_ref, + expected_model: fx.expected_model.as_deref(), + expected_record_id: fx.expected_record_id.as_deref(), + ..VerifyOptions::default() + }; + let err = verifier + .verify(&parsed, opts) + .expect_err("negative fixture must fail"); + assert_eq!( + classify(&err), + fx.expected_failure, + "{}: error class mismatch (got {err:?})", + fx.name + ); +} + +#[test] +fn cross_language_v2_negative_fixtures() { + let path = testvectors_dir().join("negative_v2.json"); + let raw = std::fs::read_to_string(&path).expect("read negative_v2.json"); + + // Some negative fixtures intentionally exceed the JSON size cap, so + // the bundle is large; parse it as Value first then pick out the + // fields we need. + let val: Value = serde_json::from_str(&raw).expect("parse negative_v2.json root"); + let bundle = V2NegativeBundle { + public_key_b64: val["public_key_b64"].as_str().unwrap().to_string(), + key_id: val["key_id"].as_str().unwrap().to_string(), + fixtures: serde_json::from_value(val["fixtures"].clone()) + .expect("parse negative fixtures"), + }; + + assert!(!bundle.fixtures.is_empty()); + for fx in &bundle.fixtures { + run_negative(&bundle, fx); + } +} + +// ---- v1 legacy verifier (migration path) ------------------------------- + +#[derive(Debug, Deserialize)] +struct V1Bundle { + public_key_b64: String, + key_id: String, + fixtures: Vec, +} + +#[derive(Debug, Deserialize)] +struct V1Fixture { + name: String, + expected: V1Expected, +} + +#[derive(Debug, Deserialize)] +struct V1Expected { + pin_json: String, +} + +#[test] +fn legacy_v1_verifier_accepts_all_v1_fixtures() { + let path = testvectors_dir().join("v1.json"); + let raw = std::fs::read_to_string(&path).expect("read v1.json"); + let bundle: V1Bundle = serde_json::from_str(&raw).expect("parse v1.json"); + + let pk: [u8; 32] = b64(&bundle.public_key_b64).try_into().expect("32 bytes"); + let mut verifier = vectorpin::LegacyV1Verifier::new(); + verifier.add_key_entry( &bundle.key_id, - b64(&bundle.public_key_b64).try_into().unwrap(), + KeyEntry::from_public_bytes(pk).expect("valid key"), ); - let err = verifier - .verify_full::<&[f32]>(&pin, None, Some(tampered.as_slice()), None) - .expect_err("tampered vector must fail"); - assert_eq!(err, VerifyError::VectorTampered); + for fx in &bundle.fixtures { + eprintln!("legacy v1 fixture: {}", fx.name); + let pin = vectorpin::LegacyV1Verifier::parse_pin(&fx.expected.pin_json) + .expect("parse v1 pin"); + verifier + .verify(&pin, VerifyOptions::default()) + .expect("legacy verifier accepts v1"); + } } diff --git a/rust/vectorpin/tests/legacy_v1.rs b/rust/vectorpin/tests/legacy_v1.rs new file mode 100644 index 0000000..57bbaf8 --- /dev/null +++ b/rust/vectorpin/tests/legacy_v1.rs @@ -0,0 +1,104 @@ +// Copyright 2025 Jascha Wanger / Tarnover, LLC +// SPDX-License-Identifier: Apache-2.0 + +//! Opt-in legacy v1 verifier tests. +//! +//! Confirms that `LegacyV1Verifier` accepts every entry in +//! `testvectors/v1.json` (the original v1 fixtures) without modification, +//! while the default v2 verifier rejects them with UNSUPPORTED_VERSION. + +use std::path::PathBuf; + +use base64::Engine; +use serde::Deserialize; + +use vectorpin::{KeyEntry, LegacyV1Verifier, Pin, VerifyError, VerifyOptions, Verifier}; + +fn b64(s: &str) -> Vec { + base64::engine::general_purpose::URL_SAFE_NO_PAD + .decode(s.as_bytes()) + .expect("base64 fixture") +} + +fn v1_path() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("testvectors") + .join("v1.json") +} + +#[derive(Debug, Deserialize)] +struct V1Bundle { + public_key_b64: String, + key_id: String, + fixtures: Vec, +} + +#[derive(Debug, Deserialize)] +struct V1Fixture { + name: String, + expected: V1Expected, +} + +#[derive(Debug, Deserialize)] +struct V1Expected { + pin_json: String, +} + +#[test] +fn legacy_verifier_accepts_v1_fixtures() { + let raw = std::fs::read_to_string(v1_path()).expect("read v1.json"); + let bundle: V1Bundle = serde_json::from_str(&raw).expect("parse v1.json"); + + let pk: [u8; 32] = b64(&bundle.public_key_b64) + .try_into() + .expect("public key 32 bytes"); + let mut verifier = LegacyV1Verifier::new(); + verifier.add_key_entry(&bundle.key_id, KeyEntry::from_public_bytes(pk).unwrap()); + + assert!(!bundle.fixtures.is_empty()); + for fx in &bundle.fixtures { + eprintln!("legacy v1 fixture: {}", fx.name); + let pin = LegacyV1Verifier::parse_pin(&fx.expected.pin_json).expect("parse v1"); + verifier + .verify(&pin, VerifyOptions::default()) + .expect("legacy verifier accepts v1"); + } +} + +#[test] +fn default_v2_verifier_rejects_v1_fixtures() { + let raw = std::fs::read_to_string(v1_path()).expect("read v1.json"); + let bundle: V1Bundle = serde_json::from_str(&raw).expect("parse v1.json"); + + // Strict v2 verifier must NOT accept v1 — confirms wire-format break. + let mut verifier = Verifier::new(); + verifier.add_key( + &bundle.key_id, + b64(&bundle.public_key_b64).try_into().unwrap(), + ); + + for fx in &bundle.fixtures { + // The strict parser rejects v1 pins outright, before reaching + // the verifier. That maps to a PARSE_ERROR for the caller. + match Pin::from_json(&fx.expected.pin_json) { + Ok(pin) => { + let err = verifier + .verify(&pin, VerifyOptions::default()) + .expect_err("v2 verifier must reject v1 pins"); + assert!(matches!(err, VerifyError::UnsupportedVersion(1))); + } + Err(e) => { + // Strict v2 parser rejects pins whose `v != 2`. + let msg = format!("{e:?}"); + assert!( + msg.contains("UnsupportedVersion") + || msg.contains("unsupported") + || msg.contains("got: 1"), + "unexpected parse error: {msg}" + ); + } + } + } +} diff --git a/rust/vectorpin/tests/v2_canonicalization.rs b/rust/vectorpin/tests/v2_canonicalization.rs new file mode 100644 index 0000000..fb25ff3 --- /dev/null +++ b/rust/vectorpin/tests/v2_canonicalization.rs @@ -0,0 +1,277 @@ +// Copyright 2025 Jascha Wanger / Tarnover, LLC +// SPDX-License-Identifier: Apache-2.0 + +//! v2-specific hardening tests: domain tag, kid binding, NFC enforcement, +//! ts strictness, NaN rejection, unknown-field rejection, size caps. + +use vectorpin::attestation::DOMAIN_TAG; +use vectorpin::{ + signer::PinOptions, AttestationError, Pin, Signer, VerifyError, VerifyOptions, Verifier, +}; + +fn v2_signer(kid: &str) -> Signer { + Signer::generate(kid.into()) +} + +fn small_vec() -> Vec { + (0..8).map(|i| (i as f32) * 0.1).collect() +} + +#[test] +fn domain_tag_is_exactly_13_bytes() { + assert_eq!(DOMAIN_TAG.len(), 13); + assert_eq!(DOMAIN_TAG, b"vectorpin/v2\x00"); +} + +#[test] +fn canonical_bytes_start_with_domain_tag() { + let signer = v2_signer("k1"); + let v = small_vec(); + let pin = signer.pin("hello", "m", v.as_slice()).unwrap(); + let canonical = pin.header.canonicalize(); + assert!(canonical.starts_with(DOMAIN_TAG)); +} + +#[test] +fn kid_is_in_signed_bytes() { + let signer = v2_signer("kid-a"); + let v = small_vec(); + let pin = signer.pin("hello", "m", v.as_slice()).unwrap(); + + // Mutate the kid in a copy of the pin; the registered key won't match + // anyway, but more importantly: the cross-key swap signature attempt + // must fail signature check against the original key when the kid changes. + let mut tampered = pin.clone(); + tampered.header.kid = "kid-b".into(); + + let mut verifier = Verifier::new(); + verifier.add_key("kid-b", signer.public_key_bytes()); + let err = verifier.verify_signature(&tampered).unwrap_err(); + assert_eq!(err, VerifyError::SignatureInvalid); +} + +#[test] +fn v_is_in_signed_bytes() { + let signer = v2_signer("k1"); + let v = small_vec(); + let pin = signer.pin("hello", "m", v.as_slice()).unwrap(); + + let mut tampered = pin.clone(); + tampered.header.v = 99; + let mut verifier = Verifier::new(); + verifier.add_key("k1", signer.public_key_bytes()); + let err = verifier.verify_signature(&tampered).unwrap_err(); + // Unsupported version is rejected before reaching signature, but in + // either case the pin must NOT verify. + assert!(matches!(err, VerifyError::UnsupportedVersion(99))); +} + +#[test] +fn signer_rejects_nan_in_vector() { + let signer = v2_signer("k1"); + let v: Vec = vec![1.0, f32::NAN, 3.0]; + let err = signer.pin("x", "m", v.as_slice()).unwrap_err(); + assert!(matches!( + err, + vectorpin::SignerError::InvalidVector(_) + )); +} + +#[test] +fn signer_rejects_pos_inf() { + let signer = v2_signer("k1"); + let v: Vec = vec![1.0, f64::INFINITY, 3.0]; + let err = signer.pin("x", "m", v.as_slice()).unwrap_err(); + assert!(matches!( + err, + vectorpin::SignerError::InvalidVector(_) + )); +} + +#[test] +fn pos_zero_and_neg_zero_distinct() { + let signer = v2_signer("k1"); + let p: Vec = vec![0.0, 1.0]; + let n: Vec = vec![-0.0, 1.0]; + let pin_p = signer.pin("x", "m", p.as_slice()).unwrap(); + let pin_n = signer.pin("x", "m", n.as_slice()).unwrap(); + assert_ne!(pin_p.header.vec_hash, pin_n.header.vec_hash); +} + +#[test] +fn parser_rejects_nfd_model_field() { + // 'café' as NFD (e + COMBINING ACUTE U+0301). + let pin_json = r#"{"kid":"k","model":"café","sig":"AA","source_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","ts":"2026-05-05T12:00:00Z","v":2,"vec_dim":1,"vec_dtype":"f32","vec_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000"}"#; + let err = Pin::from_json(pin_json).unwrap_err(); + // Either NotNfc (caught at NFC check) or InvalidField (caught earlier + // on sig length) — both are PARSE_ERROR class. Be precise about NFC. + assert!( + matches!(err, AttestationError::NotNfc(_)) + || matches!(err, AttestationError::InvalidField { .. }), + "got {err:?}" + ); +} + +#[test] +fn parser_rejects_control_char_in_string_field() { + let bad = "kid\u{0007}".to_string(); + // Build a pin via signer first to produce a valid sig field, then + // mutate the JSON. + let signer = v2_signer("k"); + let v = small_vec(); + let pin = signer.pin("x", "m", v.as_slice()).unwrap(); + let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); + value["kid"] = serde_json::Value::String(bad); + let err = Pin::from_value(value).unwrap_err(); + assert!(matches!(err, AttestationError::ControlChar(_)), "got {err:?}"); +} + +#[test] +fn parser_rejects_bidi_override() { + let bad = "kid\u{202E}".to_string(); // RIGHT-TO-LEFT OVERRIDE + let signer = v2_signer("k"); + let v = small_vec(); + let pin = signer.pin("x", "m", v.as_slice()).unwrap(); + let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); + value["kid"] = serde_json::Value::String(bad); + let err = Pin::from_value(value).unwrap_err(); + assert!(matches!(err, AttestationError::BidiOverride(_)), "got {err:?}"); +} + +#[test] +fn parser_rejects_ts_with_fractional_seconds() { + let signer = v2_signer("k"); + let v = small_vec(); + let pin = signer.pin("x", "m", v.as_slice()).unwrap(); + let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); + value["ts"] = serde_json::Value::String("2026-05-05T12:00:00.123Z".to_string()); + let err = Pin::from_value(value).unwrap_err(); + assert!(matches!(err, AttestationError::BadTimestamp(_)), "got {err:?}"); +} + +#[test] +fn parser_rejects_ts_with_offset() { + let signer = v2_signer("k"); + let v = small_vec(); + let pin = signer.pin("x", "m", v.as_slice()).unwrap(); + let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); + value["ts"] = serde_json::Value::String("2026-05-05T12:00:00+00:00".to_string()); + let err = Pin::from_value(value).unwrap_err(); + assert!(matches!(err, AttestationError::BadTimestamp(_)), "got {err:?}"); +} + +#[test] +fn parser_rejects_unknown_top_level_field() { + let signer = v2_signer("k"); + let v = small_vec(); + let pin = signer.pin("x", "m", v.as_slice()).unwrap(); + let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); + value["bogus"] = serde_json::Value::String("x".into()); + let err = Pin::from_value(value).unwrap_err(); + assert!( + matches!(err, AttestationError::UnknownTopLevelField(ref s) if s == "bogus"), + "got {err:?}" + ); +} + +#[test] +fn parser_rejects_non_string_extra_value() { + let signer = v2_signer("k"); + let v = small_vec(); + let opts = PinOptions { + extra: [("region".to_string(), "us-east".to_string())].into_iter().collect(), + ..PinOptions::default() + }; + let pin = signer.pin_with_options("x", "m", v.as_slice(), opts).unwrap(); + let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); + value["extra"]["region"] = serde_json::json!(5); + let err = Pin::from_value(value).unwrap_err(); + assert!(matches!(err, AttestationError::InvalidField { field: "extra", .. }), "got {err:?}"); +} + +#[test] +fn parser_rejects_oversize_pin_json() { + let oversize = "x".repeat(vectorpin::attestation::MAX_PIN_JSON_BYTES + 1); + let err = Pin::from_json(&oversize).unwrap_err(); + assert!(matches!(err, AttestationError::SizeLimit { .. }), "got {err:?}"); +} + +#[test] +fn parser_rejects_sig_wrong_length() { + let signer = v2_signer("k"); + let v = small_vec(); + let pin = signer.pin("x", "m", v.as_slice()).unwrap(); + let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); + // Replace sig with a base64 string that decodes to fewer bytes. + value["sig"] = serde_json::Value::String("AAAA".to_string()); + let err = Pin::from_value(value).unwrap_err(); + assert!( + matches!(err, AttestationError::InvalidField { field: "sig", .. }), + "got {err:?}" + ); +} + +#[test] +fn parser_rejects_vec_dim_zero() { + let signer = v2_signer("k"); + let v = small_vec(); + let pin = signer.pin("x", "m", v.as_slice()).unwrap(); + let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); + value["vec_dim"] = serde_json::json!(0); + let err = Pin::from_value(value).unwrap_err(); + assert!(matches!(err, AttestationError::InvalidField { field: "vec_dim", .. }), "got {err:?}"); +} + +#[test] +fn verify_nan_vector_rejected_as_parse_error() { + let signer = v2_signer("k"); + let v = small_vec(); + let pin = signer.pin("x", "m", v.as_slice()).unwrap(); + + let mut verifier = Verifier::new(); + verifier.add_key(signer.key_id(), signer.public_key_bytes()); + + let mut nan_vec = v.clone(); + nan_vec[0] = f32::NAN; + let err = verifier + .verify( + &pin, + VerifyOptions { + vector: Some(vectorpin::hash::VectorRef::F32(&nan_vec)), + ..VerifyOptions::default() + }, + ) + .unwrap_err(); + assert!(matches!(err, VerifyError::ParseError(_)), "got {err:?}"); +} + +#[test] +fn round_trip_with_extra_and_model_hash() { + let signer = v2_signer("k"); + let v = small_vec(); + let mut extra = std::collections::BTreeMap::new(); + extra.insert("vectorpin.record_id".to_string(), "rec-1".to_string()); + let opts = PinOptions { + model_hash: Some(format!("sha256:{}", "a".repeat(64))), + extra, + ..PinOptions::default() + }; + let pin = signer + .pin_with_options("hi", "m", v.as_slice(), opts) + .unwrap(); + let json = pin.to_json(); + let parsed = Pin::from_json(&json).unwrap(); + assert_eq!(parsed, pin); + + let mut verifier = Verifier::new(); + verifier.add_key(signer.key_id(), signer.public_key_bytes()); + verifier + .verify( + &parsed, + VerifyOptions { + expected_record_id: Some("rec-1"), + ..VerifyOptions::default() + }, + ) + .expect("valid record_id matches"); +} diff --git a/scripts/generate_test_vectors.py b/scripts/generate_test_vectors.py index d5856aa..86ec821 100644 --- a/scripts/generate_test_vectors.py +++ b/scripts/generate_test_vectors.py @@ -1,164 +1,510 @@ #!/usr/bin/env python3 # Copyright 2025 Jascha Wanger / Tarnover, LLC # SPDX-License-Identifier: Apache-2.0 -"""Generate cross-language test vectors. +"""Generate cross-language test vectors for VectorPin protocol v2. -Every language port (Rust, JS, Go) consumes the JSON fixtures this +Every language port (Rust, TypeScript) consumes the JSON fixtures this script writes and asserts that: - Recomputing canonical bytes / hashes matches. - Signature verification succeeds against the published public key. - - Negative cases (tampered vector, wrong source) fail with the - correct error code. + - Negative cases fail with the correct error code from the spec §5 + failure list. The fixtures use a deterministic signing key seed so output is -reproducible. The seed and key id are NOT secrets — they exist solely -to make the fixtures verifiable across implementations. +reproducible byte-for-byte across runs. The seed and key id are NOT +secrets — they exist solely to make the fixtures verifiable across +implementations. + +DETERMINISTIC SEED: bytes(range(32)) — i.e. 0x00..0x1f. Do NOT change +this seed without bumping every cross-language compat test. Run from the repo root: python scripts/generate_test_vectors.py -Outputs land in testvectors/. +Outputs v2.json + negative_v2.json under testvectors/. The legacy v1 +fixtures (v1.json, negative_v1.json) are deliberately left untouched — +they remain the contract for the opt-in LegacyV1Verifier. """ from __future__ import annotations import base64 +import copy import json -from datetime import UTC +from datetime import UTC, datetime from pathlib import Path import numpy as np -from vectorpin import Signer +from vectorpin import ( + DOMAIN_TAG, + PROTOCOL_VERSION, + Pin, + Signer, +) OUT_DIR = Path(__file__).resolve().parent.parent / "testvectors" -# Deterministic key material — fixture purposes only. +# ---- deterministic key material ---- + DETERMINISTIC_SEED = bytes(range(32)) # 0x00..0x1f -KEY_ID = "test-vectors-2026-05" +KEY_ID = "test-vectors-v2-2026-05" + +# Fixed timestamp so signatures are bit-for-bit reproducible. +FIXED_TS = datetime(2026, 5, 5, 12, 0, 0, tzinfo=UTC) +FIXED_TS_ISO = "2026-05-05T12:00:00Z" def b64url(data: bytes) -> str: + """URL-safe base64, no padding.""" return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii") -def make_vector(seed: int, dim: int = 16, dtype: str = "f32") -> np.ndarray: +def b64url_dec(s: str) -> bytes: + pad = "=" * (-len(s) % 4) + return base64.urlsafe_b64decode(s + pad) + + +def make_vector(seed: int, dim: int, dtype: str) -> np.ndarray: + """Reproducible vector. The same (seed, dim, dtype) always yields + the same byte sequence across machines and Python versions.""" rng = np.random.default_rng(seed) arr = rng.normal(0, 1, size=dim) return arr.astype(np.float32 if dtype == "f32" else np.float64) -def main() -> None: - OUT_DIR.mkdir(exist_ok=True) - signer = Signer.from_private_bytes(DETERMINISTIC_SEED, key_id=KEY_ID) +def vec_canonical_bytes(vec: np.ndarray, dtype: str) -> bytes: + target = " dict: + return { + "name": name, + "description": description, + "input": { + "source": source, + "model": model, + "vec_b64": b64url(vec_canonical_bytes(vec, dtype)), + "vec_dtype": dtype, + "vec_dim": int(vec.shape[0]), + "timestamp": FIXED_TS_ISO, + }, + "pin_json": pin.to_json(), + "expected_canonical_bytes_b64": b64url(pin.header.canonicalize()), + "expected_vec_hash": pin.header.vec_hash, + "expected_source_hash": pin.header.source_hash, + } - fixtures = [] - for i, (text, model, dim, dtype) in enumerate( - [ - ("hello world", "test-model-v1", 16, "f32"), - ("The quick brown fox jumps over the lazy dog.", "text-embedding-3-large", 32, "f32"), - ("café", "unicode-test-v1", 8, "f32"), # NFC normalization fixture - ("multi\nline\ntext", "test-model-v1", 4, "f64"), - ] - ): - vec = make_vector(seed=i, dim=dim, dtype=dtype) - # Use a fixed timestamp so the pin (and therefore the signature) is - # bit-for-bit reproducible across runs. - from datetime import datetime - ts = datetime(2026, 5, 5, 12, 0, 0, tzinfo=UTC) - pin = signer.pin( - source=text, - model=model, - vector=vec, - vec_dtype=dtype, - timestamp=ts, + +def build_positive_fixtures(signer: Signer) -> list[dict]: + fixtures: list[dict] = [] + + # 1. Small f32. + vec = make_vector(seed=0, dim=16, dtype="f32") + pin = signer.pin( + source="hello world", + model="test-model-v1", + vector=vec, + vec_dtype="f32", + timestamp=FIXED_TS, + ) + fixtures.append( + _fixture( + name="vector_0_f32_small", + description="Small f32 vector, no extras, no model_hash.", + source="hello world", + model="test-model-v1", + vec=vec, + dtype="f32", + pin=pin, + ) + ) + + # 2. Small f64. + vec = make_vector(seed=1, dim=8, dtype="f64") + pin = signer.pin( + source="multi\nline\ntext", + model="test-model-v1", + vector=vec, + vec_dtype="f64", + timestamp=FIXED_TS, + ) + fixtures.append( + _fixture( + name="vector_1_f64_small", + description="Small f64 vector with multi-line source.", + source="multi\nline\ntext", + model="test-model-v1", + vec=vec, + dtype="f64", + pin=pin, ) - np_dtype = " list[dict]: + """One fixture per failure mode listed in spec §5 / task spec.""" + + fixtures: list[dict] = [] + base_json = base_pin.to_json() + base_dict = json.loads(base_json) + + def _emit(name: str, *, description: str, expected_failure: str, **extra_fields): fixtures.append( { - "name": f"vector_{i}", - "input": { - "source": text, - "model": model, - "vector_b64": b64url(vec.astype(np_dtype).tobytes()), - "vec_dtype": dtype, - "vec_dim": dim, - "timestamp": "2026-05-05T12:00:00Z", - }, - "expected": { - "pin_json": pin.to_json(), - "canonical_header_b64": b64url(pin.header.canonicalize()), - "vec_hash": pin.header.vec_hash, - "source_hash": pin.header.source_hash, - }, + "name": name, + "description": description, + "expected_failure": expected_failure, + **extra_fields, } ) - public_key = b64url(signer.public_key_bytes()) - private_seed = b64url(DETERMINISTIC_SEED) + # a. Tampered f32 vector — vector bytes differ from the pin's vec_hash. + tampered = base_vec.copy() + tampered[0] += 1e-3 + _emit( + "tampered_vector", + description="The vector bytes have been modified after pinning.", + expected_failure="VECTOR_TAMPERED", + pin_json=base_json, + tampered_vec_b64=b64url(vec_canonical_bytes(tampered, "f32")), + vec_dtype="f32", + vec_dim=int(base_vec.shape[0]), + ) + + # b. Tampered source — caller supplies a different source than what was signed. + _emit( + "tampered_source", + description=( + "The caller supplies a source string that does not match the pin's source_hash." + ), + expected_failure="SOURCE_MISMATCH", + pin_json=base_json, + tampered_source="goodbye world", + original_source="hello world", + ) + + # c. Wrong expected_model — caller asks for a different model name. + _emit( + "wrong_expected_model", + description="Caller asks the verifier to enforce a model name that does not match.", + expected_failure="MODEL_MISMATCH", + pin_json=base_json, + expected_model="some-other-model", + ) + + # d. Wrong vec_dim — flipping vec_dim breaks the signature because it's signed. + d_dict = copy.deepcopy(base_dict) + d_dict["vec_dim"] = 17 # was 16 + _emit( + "wrong_vec_dim_breaks_signature", + description=( + "vec_dim is part of the signed canonical bytes; changing it " + "breaks the ed25519 signature verification (not a SHAPE_MISMATCH " + "because the verifier reaches signature check first)." + ), + expected_failure="SIGNATURE_INVALID", + pin_json=json.dumps(d_dict, sort_keys=True, separators=(",", ":")), + ) + + # e. Wrong v — UNSUPPORTED_VERSION (regardless of signature). + e_dict = copy.deepcopy(base_dict) + e_dict["v"] = 99 + _emit( + "wrong_version", + description="Pin with v=99; strict v2 verifier rejects before signature check.", + expected_failure="UNSUPPORTED_VERSION", + pin_json=json.dumps(e_dict, sort_keys=True, separators=(",", ":")), + ) + + # f. Wrong kid — UNKNOWN_KEY when registry doesn't have it. + # We deliberately re-sign the pin under the original key but with kid + # swapped in the JSON. The signature decodes fine but the verifier + # gates on UNKNOWN_KEY before signature verification (§5 step 2). + f_dict = copy.deepcopy(base_dict) + f_dict["kid"] = "no-such-kid" + _emit( + "wrong_kid_unknown_key", + description=( + "Pin's kid is not in the verifier's registry. UNKNOWN_KEY " + "fires before signature check per spec §5 step 2." + ), + expected_failure="UNKNOWN_KEY", + pin_json=json.dumps(f_dict, sort_keys=True, separators=(",", ":")), + ) + + # g. Sig bit-flipped — same length, but invalid signature. + sig_bytes = bytearray(base_pin.sig) + sig_bytes[0] ^= 0x01 + g_dict = copy.deepcopy(base_dict) + g_dict["sig"] = b64url(bytes(sig_bytes)) + _emit( + "sig_bit_flipped", + description="One bit of the signature has been flipped; ed25519 rejects.", + expected_failure="SIGNATURE_INVALID", + pin_json=json.dumps(g_dict, sort_keys=True, separators=(",", ":")), + ) + + # h. Sig wrong length — decodes to 32 bytes instead of 64. + h_dict = copy.deepcopy(base_dict) + h_dict["sig"] = b64url(b"\x00" * 32) + _emit( + "sig_wrong_length", + description="Signature decodes to 32 bytes; parser rejects before crypto.", + expected_failure="PARSE_ERROR", + pin_json=json.dumps(h_dict, sort_keys=True, separators=(",", ":")), + ) + + # i. Unknown top-level key. + i_dict = copy.deepcopy(base_dict) + i_dict["sig2"] = "extra" + _emit( + "unknown_top_level_field", + description="Pin contains a top-level field outside the v2 allowed set.", + expected_failure="PARSE_ERROR", + pin_json=json.dumps(i_dict, sort_keys=True, separators=(",", ":")), + ) + + # j. Non-string extra value. + j_dict = copy.deepcopy(base_dict) + j_dict["extra"] = {"region": 5} + _emit( + "non_string_extra_value", + description="extra map contains a value that is not a string.", + expected_failure="PARSE_ERROR", + pin_json=json.dumps(j_dict, sort_keys=True, separators=(",", ":")), + ) + + # k. NaN in vector at verify time. + nan_vec = base_vec.copy().astype(np.float32) + nan_vec[0] = float("nan") + _emit( + "nan_in_vector_at_verify", + description=( + "Caller supplies a vector containing NaN. Verifier rejects " + "before hashing per spec §5 step 6." + ), + expected_failure="PARSE_ERROR", + pin_json=base_json, + # Manually serialize the NaN-bearing bytes; JSON doesn't allow + # NaN, so we encode as raw little-endian f32 bytes via b64. + nan_vec_b64=b64url(nan_vec.tobytes()), + vec_dtype="f32", + vec_dim=int(base_vec.shape[0]), + ) + + # l. NFD source string in model field — non-NFC strings are rejected. + # We construct NFD explicitly from code points so the distinction + # survives editor normalization: 'cafe' + U+0301 COMBINING ACUTE + # ACCENT is the NFD form of 'café' (which in NFC is U+00E9). + nfd_model = "cafe\u0301" + assert nfd_model != "caf\u00e9", "NFD/NFC distinction collapsed" + nfd_dict = copy.deepcopy(base_dict) + nfd_dict["model"] = nfd_model + _emit( + "nfd_model_string", + description=( + "model field is in NFD form ('café' as e + COMBINING ACUTE), " + "not NFC. Parser rejects per spec §3.1." + ), + expected_failure="PARSE_ERROR", + pin_json=json.dumps(nfd_dict, sort_keys=True, separators=(",", ":"), ensure_ascii=False), + ) + + # m. Timestamp with fractional seconds. + m_dict = copy.deepcopy(base_dict) + m_dict["ts"] = "2026-05-05T12:00:00.000Z" + _emit( + "ts_fractional_seconds", + description="Timestamp has fractional seconds; strict regex rejects.", + expected_failure="PARSE_ERROR", + pin_json=json.dumps(m_dict, sort_keys=True, separators=(",", ":")), + ) + + # n. Timestamp with offset. + n_dict = copy.deepcopy(base_dict) + n_dict["ts"] = "2026-05-05T12:00:00+00:00" + _emit( + "ts_with_offset", + description="Timestamp has an explicit offset instead of trailing Z.", + expected_failure="PARSE_ERROR", + pin_json=json.dumps(n_dict, sort_keys=True, separators=(",", ":")), + ) + + # o. Timestamp with lowercase t / z. + o_dict = copy.deepcopy(base_dict) + o_dict["ts"] = "2026-05-05t12:00:00z" + _emit( + "ts_lowercase_tz", + description="Timestamp uses lowercase t/z separators; strict regex rejects.", + expected_failure="PARSE_ERROR", + pin_json=json.dumps(o_dict, sort_keys=True, separators=(",", ":")), + ) + + # p. RECORD_MISMATCH — pin has a record_id, caller expects a different one. + p_signer = Signer.from_private_bytes(DETERMINISTIC_SEED, key_id=KEY_ID) + p_pin = p_signer.pin( + source="hello world", + model="test-model-v1", + vector=base_vec, + vec_dtype="f32", + timestamp=FIXED_TS, + extra={"vectorpin.record_id": "rec-real"}, + ) + _emit( + "record_id_mismatch", + description=( + "Pin commits to vectorpin.record_id=rec-real but the caller " + "asks the verifier to enforce rec-other." + ), + expected_failure="RECORD_MISMATCH", + pin_json=p_pin.to_json(), + expected_record_id="rec-other", + ) + + # q. Oversize pin JSON — synthetic 70 KiB. + q_dict = copy.deepcopy(base_dict) + # Pad sig field with a long base64-looking string. The parser rejects + # on raw byte length before any structural check. + q_dict["model"] = "x" * 70_000 + _emit( + "oversize_pin_json", + description="Pin JSON exceeds the §4.3 64 KiB cap; rejected pre-parse.", + expected_failure="PARSE_ERROR", + pin_json=json.dumps(q_dict, sort_keys=True, separators=(",", ":")), + ) + + return fixtures + + +def main() -> None: + OUT_DIR.mkdir(exist_ok=True) + signer = Signer.from_private_bytes(DETERMINISTIC_SEED, key_id=KEY_ID) + + # ---- positive fixtures ---- + fixtures = build_positive_fixtures(signer) out = { - "version": 1, + "version": PROTOCOL_VERSION, + "domain_tag_b64": b64url(DOMAIN_TAG), "comment": ( - "Cross-language test vectors for VectorPin protocol v1. " + "Cross-language test vectors for VectorPin protocol v2. " "The signing key seed is intentionally public — it exists " "only to make these fixtures reproducible. Do not use in " "production." ), "key_id": KEY_ID, - "public_key_b64": public_key, - "private_seed_b64": private_seed, + "public_key_b64": b64url(signer.public_key_bytes()), + "private_key_b64": b64url(DETERMINISTIC_SEED), "fixtures": fixtures, } - fixtures_path = OUT_DIR / "v1.json" - fixtures_path.write_text(json.dumps(out, indent=2) + "\n") - - # Also dump a NEGATIVE-case fixture: same pin, but the vector has been - # tampered with. Implementations should detect VECTOR_TAMPERED. - legit = fixtures[0] - tampered_vec = make_vector(seed=0, dim=16, dtype="f32") - tampered_vec[0] += 1e-3 # any change at all - negative = { - "name": "tampered_vector", - "pin_json": legit["expected"]["pin_json"], - "tampered_vector_b64": b64url(tampered_vec.astype(" hash strings used in source_hash, +# vec_hash, model_hash. +_HASH_RE = re.compile(r"^sha256:[0-9a-f]{64}$") + +# Allowed vec_dtype values. Mirrors hash.CanonicalDtype but kept local +# to avoid an import cycle. +_ALLOWED_DTYPES = frozenset({"f32", "f64"}) + +# Strict v2 timestamp pattern: YYYY-MM-DDTHH:MM:SSZ, exactly. No +# fractional seconds, no offsets, no lowercase t/z. +_TS_RE = re.compile(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$") + +# Top-level keys permitted in a v2 pin (§4.1). Anything else MUST be +# rejected at parse time to defeat field-injection attacks. +_ALLOWED_TOP_LEVEL_KEYS = frozenset( + { + "v", + "kid", + "model", + "model_hash", + "source_hash", + "vec_hash", + "vec_dtype", + "vec_dim", + "ts", + "extra", + "sig", + } +) def _b64(data: bytes) -> str: @@ -41,15 +117,42 @@ def _b64dec(s: str) -> bytes: return base64.urlsafe_b64decode(s + pad) +def _check_string_safe(value: str, field_name: str) -> None: + """Reject control chars (U+0000-U+001F) and bidi overrides. + + Per §3.1, any string-typed field must be free of: + - Control characters U+0000-U+001F + - Bidirectional overrides U+202A-U+202E, U+2066-U+2069 + """ + for ch in value: + cp = ord(ch) + if cp < 0x20: + raise ValueError( + f"{field_name} contains control character U+{cp:04X}" + ) + if 0x202A <= cp <= 0x202E or 0x2066 <= cp <= 0x2069: + raise ValueError( + f"{field_name} contains bidi-override character U+{cp:04X}" + ) + + +def _check_nfc(value: str, field_name: str) -> None: + """Reject strings that are not already in Unicode NFC form.""" + if unicodedata.normalize("NFC", value) != value: + raise ValueError(f"{field_name} is not NFC-normalized") + + @dataclass(frozen=True) class PinHeader: - """The signed portion of a Pin. + """The signed portion of a Pin (everything except `sig`). - Everything except `sig` and `kid` lives here. Two Pins are - equivalent iff their headers canonicalize to identical bytes. + Two Pins are equivalent iff their headers canonicalize to identical + bytes. In v2, the header includes `v` and `kid` so both are bound + by the signature. """ v: int + kid: str model: str source_hash: str vec_hash: str @@ -60,8 +163,14 @@ class PinHeader: extra: dict[str, str] = field(default_factory=dict) def to_dict(self) -> dict[str, Any]: + """Plain-dict view of the header, including `v` and `kid`. + + Used both for JSON serialization (via Pin.to_dict, which then + adds `sig`) and for canonicalization (via canonicalize()). + """ out: dict[str, Any] = { "v": self.v, + "kid": self.kid, "model": self.model, "source_hash": self.source_hash, "vec_hash": self.vec_hash, @@ -78,29 +187,62 @@ def to_dict(self) -> dict[str, Any]: def canonicalize(self) -> bytes: """Stable byte representation for signing/verifying. - Uses JSON with sorted keys, no whitespace. This is the form of - canonicalization that has the best library support across - languages while still being deterministic. + Returns ``DOMAIN_TAG || canonical_json(header)`` where + ``canonical_json`` is JSON with sorted keys, no whitespace, + UTF-8 encoded, NFC-normalized strings, and `ensure_ascii=False` + so non-ASCII NFC code points are emitted as raw UTF-8. + + All header fields — including `v` and `kid` — are included. + The 14-byte domain tag prevents cross-protocol signature reuse. """ - return json.dumps( - self.to_dict(), + # NFC every string field at canonicalization time so the bytes + # match what a fresh-from-spec implementation would emit. We + # also validate the well-formedness invariants the parser + # enforces so signers can't silently emit a pin a verifier + # would later reject. + d = self.to_dict() + # NFC-normalize the string-typed fields in place. + d["kid"] = unicodedata.normalize("NFC", d["kid"]) + d["model"] = unicodedata.normalize("NFC", d["model"]) + d["ts"] = unicodedata.normalize("NFC", d["ts"]) + if "extra" in d: + d["extra"] = { + unicodedata.normalize("NFC", k): unicodedata.normalize("NFC", v) + for k, v in d["extra"].items() + } + # Re-sort after NFC to keep canonical key order stable + # under NFC composition (composed forms may differ in code + # point ordering from decomposed forms). + d["extra"] = dict(sorted(d["extra"].items())) + + body = json.dumps( + d, sort_keys=True, separators=(",", ":"), ensure_ascii=False, ).encode("utf-8") + return DOMAIN_TAG + body @dataclass(frozen=True) class Pin: - """An attestation binding an embedding to its source and producer.""" + """An attestation binding an embedding to its source and producer. + + The `kid` and `v` fields live on the header (because they are + signed) but the wire-format JSON still flattens them at the top + level for compactness and readability. + """ header: PinHeader - kid: str sig: bytes # raw signature bytes (ed25519 = 64 bytes) + @property + def kid(self) -> str: + """Convenience accessor — kid is part of the header.""" + return self.header.kid + def to_dict(self) -> dict[str, Any]: d = self.header.to_dict() - d["kid"] = self.kid d["sig"] = _b64(self.sig) return d @@ -109,22 +251,188 @@ def to_json(self) -> str: return json.dumps(self.to_dict(), sort_keys=True, separators=(",", ":")) @classmethod - def from_dict(cls, d: dict[str, Any]) -> Pin: - if d.get("v") != PROTOCOL_VERSION: - raise ValueError(f"unsupported pin version {d.get('v')!r}; expected {PROTOCOL_VERSION}") + def from_dict( + cls, d: dict[str, Any], *, accept_versions: frozenset[int] | None = None + ) -> Pin: + """Parse a pin from a dict, enforcing all v2 wire-format rules. + + `accept_versions` is for internal use by the legacy verifier; + callers should generally not pass it. + """ + if not isinstance(d, dict): + raise ValueError("pin must be a JSON object") + + # 1. Reject unknown top-level fields (§4.1). + unknown = set(d.keys()) - _ALLOWED_TOP_LEVEL_KEYS + if unknown: + raise ValueError( + f"pin contains unknown top-level field(s): {sorted(unknown)}" + ) + + # 2. Version check. + v = d.get("v") + allowed = accept_versions if accept_versions is not None else frozenset( + {PROTOCOL_VERSION} + ) + if v not in allowed: + raise ValueError( + f"unsupported pin version {v!r}; expected {sorted(allowed)}" + ) + + # 3. String field validation: type, NFC, control chars, bidi. + model = d.get("model") + if not isinstance(model, str) or not model: + raise ValueError("model must be a non-empty string") + _check_string_safe(model, "model") + _check_nfc(model, "model") + + kid = d.get("kid") + if not isinstance(kid, str) or not kid: + raise ValueError("kid must be a non-empty string") + _check_string_safe(kid, "kid") + _check_nfc(kid, "kid") + + vec_dtype = d.get("vec_dtype") + if vec_dtype not in _ALLOWED_DTYPES: + raise ValueError( + f"vec_dtype must be one of {sorted(_ALLOWED_DTYPES)}; got {vec_dtype!r}" + ) + + # 4. vec_dim: int (and not bool), in (0, MAX_VEC_DIM]. + vec_dim_raw = d.get("vec_dim") + if not isinstance(vec_dim_raw, int) or isinstance(vec_dim_raw, bool): + raise ValueError( + f"vec_dim must be an int; got {type(vec_dim_raw).__name__}" + ) + if not (0 < vec_dim_raw <= MAX_VEC_DIM): + raise ValueError( + f"vec_dim must be in (0, {MAX_VEC_DIM}]; got {vec_dim_raw}" + ) + + # 5. Hashes. + source_hash = d.get("source_hash") + if not isinstance(source_hash, str) or not _HASH_RE.match(source_hash): + raise ValueError("source_hash must match 'sha256:<64 hex chars>'") + + vec_hash = d.get("vec_hash") + if not isinstance(vec_hash, str) or not _HASH_RE.match(vec_hash): + raise ValueError("vec_hash must match 'sha256:<64 hex chars>'") + + model_hash = d.get("model_hash") + if model_hash is not None: + if not isinstance(model_hash, str) or not _HASH_RE.match(model_hash): + raise ValueError("model_hash must match 'sha256:<64 hex chars>'") + + # 6. Timestamp: strict regex; v1 mode allows any non-empty string. + ts = d.get("ts") + if not isinstance(ts, str) or not ts: + raise ValueError("ts must be a non-empty string") + if PROTOCOL_VERSION in allowed and v == PROTOCOL_VERSION: + if not _TS_RE.match(ts): + raise ValueError( + "ts must match 'YYYY-MM-DDTHH:MM:SSZ' exactly" + ) + _check_string_safe(ts, "ts") + _check_nfc(ts, "ts") + + # 7. extra: map, bounded. + extra_raw = d.get("extra", {}) + if not isinstance(extra_raw, dict): + raise ValueError("extra must be an object") + if len(extra_raw) > MAX_EXTRA_ENTRIES: + raise ValueError( + f"extra exceeds maximum {MAX_EXTRA_ENTRIES} entries" + ) + extra: dict[str, str] = {} + for k, val in extra_raw.items(): + if not isinstance(k, str): + raise ValueError("extra keys must be strings") + if not isinstance(val, str): + raise ValueError("extra values must be strings") + if len(k.encode("utf-8")) > MAX_EXTRA_KEY_BYTES: + raise ValueError( + f"extra key exceeds maximum {MAX_EXTRA_KEY_BYTES} bytes" + ) + if len(val.encode("utf-8")) > MAX_EXTRA_VALUE_BYTES: + raise ValueError( + f"extra value exceeds maximum {MAX_EXTRA_VALUE_BYTES} bytes" + ) + if v == PROTOCOL_VERSION: + _check_string_safe(k, f"extra key {k!r}") + _check_nfc(k, f"extra key {k!r}") + _check_string_safe(val, f"extra[{k!r}]") + _check_nfc(val, f"extra[{k!r}]") + extra[k] = val + + # 8. Signature. + sig_raw = d.get("sig") + if not isinstance(sig_raw, str): + raise ValueError("sig must be a base64-encoded string") + try: + sig_bytes = _b64dec(sig_raw) + except (binascii.Error, ValueError) as e: + raise ValueError(f"sig is not valid base64: {e}") from e + if len(sig_bytes) != SIG_LEN: + raise ValueError( + f"sig must decode to exactly {SIG_LEN} bytes; got {len(sig_bytes)}" + ) + header = PinHeader( - v=d["v"], - model=d["model"], - source_hash=d["source_hash"], - vec_hash=d["vec_hash"], - vec_dtype=d["vec_dtype"], - vec_dim=int(d["vec_dim"]), - ts=d["ts"], - model_hash=d.get("model_hash"), - extra=dict(d.get("extra", {})), + v=int(v), + kid=kid, + model=model, + source_hash=source_hash, + vec_hash=vec_hash, + vec_dtype=vec_dtype, + vec_dim=int(vec_dim_raw), + ts=ts, + model_hash=model_hash, + extra=extra, ) - return cls(header=header, kid=d["kid"], sig=_b64dec(d["sig"])) + return cls(header=header, sig=sig_bytes) @classmethod - def from_json(cls, s: str) -> Pin: - return cls.from_dict(json.loads(s)) + def from_json( + cls, s: str, *, accept_versions: frozenset[int] | None = None + ) -> Pin: + # Measure the raw byte size before json.loads runs so we cap + # parser memory use, not just the resulting object. + s_bytes = s.encode("utf-8") if isinstance(s, str) else s + if len(s_bytes) > MAX_PIN_JSON_BYTES: + raise ValueError("pin JSON too large") + return cls.from_dict(json.loads(s), accept_versions=accept_versions) + + +# ---- legacy v1 canonicalization (migration-only) ---- + + +def _canonicalize_v1(header: PinHeader) -> bytes: + """Reconstruct v1 canonical bytes for a header. + + v1 canonicalization differed from v2 in three ways: + - No domain tag prefix. + - `kid` was NOT included in the signed payload (it lived only at + the Pin level, not the PinHeader level). + - No strict NFC / control-char / bidi enforcement on string + fields (these were silently passed through). + + The header dict emitted here matches v1.json's `canonical_header_b64` + byte-for-byte so that pins generated by the v1 reference + implementation continue to verify under LegacyV1Verifier. + """ + out: dict[str, Any] = { + "v": header.v, + "model": header.model, + "source_hash": header.source_hash, + "vec_hash": header.vec_hash, + "vec_dtype": header.vec_dtype, + "vec_dim": header.vec_dim, + "ts": header.ts, + } + if header.model_hash is not None: + out["model_hash"] = header.model_hash + if header.extra: + out["extra"] = dict(sorted(header.extra.items())) + return json.dumps( + out, sort_keys=True, separators=(",", ":"), ensure_ascii=False + ).encode("utf-8") diff --git a/src/vectorpin/signer.py b/src/vectorpin/signer.py index 50702fc..07606f2 100644 --- a/src/vectorpin/signer.py +++ b/src/vectorpin/signer.py @@ -16,6 +16,8 @@ from __future__ import annotations +import math +import unicodedata from datetime import UTC, datetime import numpy as np @@ -25,10 +27,28 @@ Ed25519PublicKey, ) -from vectorpin.attestation import PROTOCOL_VERSION, Pin, PinHeader +from vectorpin.attestation import ( + PROTOCOL_VERSION, + Pin, + PinHeader, + _check_nfc, + _check_string_safe, +) from vectorpin.hash import CanonicalDtype, hash_text, hash_vector +def _normalize_str(value: str, field_name: str) -> str: + """NFC-normalize + reject control/bidi characters in a string input. + + Signers tolerate non-NFC input (they normalize before signing) but + still reject structurally hostile characters so the signed pin is + always parseable by a strict verifier. + """ + nfc = unicodedata.normalize("NFC", value) + _check_string_safe(nfc, field_name) + return nfc + + class Signer: """Produces Pin attestations for embeddings. @@ -40,8 +60,11 @@ class Signer: def __init__(self, private_key: Ed25519PrivateKey, key_id: str): if not key_id: raise ValueError("key_id must be non-empty") + # Normalize/validate the key id once at construction so every + # subsequent Pin emits a header parseable by a strict verifier. + normalized = _normalize_str(key_id, "key_id") self._private_key = private_key - self._key_id = key_id + self._key_id = normalized @classmethod def generate(cls, key_id: str) -> Signer: @@ -58,8 +81,27 @@ def from_private_bytes(cls, raw: bytes, key_id: str) -> Signer: return cls(Ed25519PrivateKey.from_private_bytes(raw), key_id) @classmethod - def from_pem(cls, pem: bytes, key_id: str, password: bytes | None = None) -> Signer: - """Load a signer from PEM-encoded PKCS#8 ed25519 key material.""" + def from_pem( + cls, + pem: bytes, + key_id: str, + password: bytes | None = None, + *, + allow_unencrypted: bool = False, + ) -> Signer: + """Load a signer from PEM-encoded PKCS#8 ed25519 key material. + + Callers must either provide a `password` to decrypt an + encrypted PEM, or set `allow_unencrypted=True` to opt in to + loading an unencrypted file. The default is to refuse: + unencrypted private keys on disk are a footgun, and we want a + positive confirmation that the caller knew the file lacked + encryption. + """ + if password is None and not allow_unencrypted: + raise ValueError( + "PEM is unencrypted; pass allow_unencrypted=True to confirm" + ) key = serialization.load_pem_private_key(pem, password=password) if not isinstance(key, Ed25519PrivateKey): raise TypeError(f"expected Ed25519PrivateKey, got {type(key).__name__}") @@ -100,6 +142,10 @@ def pin( ) -> Pin: """Create a Pin for (source, model, vector). + Per §3.2, vectors containing NaN, +inf, or -inf are rejected at + sign time. Per §3.1, every string-typed input is NFC-normalized + and checked for control characters and bidi overrides. + Args: source: The exact source text the embedding was produced from. Hashed and committed to; the verifier needs the same text @@ -118,20 +164,63 @@ def pin( A signed Pin. Serialize with `pin.to_json()` and store alongside the vector in the DB metadata. """ + # Reject NaN / Inf at sign time so a signer never commits to a + # vector value with ambiguous hash semantics. The cast to the + # canonical dtype happens here too so we catch overflows that + # would silently become +inf in f32. + target = np.dtype(" str" + ) + k_norm = _normalize_str(k, f"extra key {k!r}") + v_norm = _normalize_str(val, f"extra[{k!r}]") + extra_norm[k_norm] = v_norm + if timestamp is None: timestamp = datetime.now(UTC) ts_iso = timestamp.astimezone(UTC).strftime("%Y-%m-%dT%H:%M:%SZ") + # Defensive: the strftime above should always emit the v2 ts + # format, but verify so a buggy platform locale can't sneak + # something else through. + _check_nfc(ts_iso, "ts") header = PinHeader( v=PROTOCOL_VERSION, - model=model, + kid=self._key_id, + model=model_norm, model_hash=model_hash, - source_hash=hash_text(source), - vec_hash=hash_vector(vector, vec_dtype), + source_hash=hash_text(source_norm), + vec_hash=hash_vector(cast, vec_dtype), vec_dtype=vec_dtype, - vec_dim=int(vector.shape[0]), + vec_dim=int(cast.shape[0]), ts=ts_iso, - extra=dict(extra) if extra else {}, + extra=extra_norm, ) sig = self._private_key.sign(header.canonicalize()) - return Pin(header=header, kid=self._key_id, sig=sig) + return Pin(header=header, sig=sig) diff --git a/src/vectorpin/verifier.py b/src/vectorpin/verifier.py index 855d744..e33569e 100644 --- a/src/vectorpin/verifier.py +++ b/src/vectorpin/verifier.py @@ -2,15 +2,23 @@ # SPDX-License-Identifier: Apache-2.0 """Pin verification. -Three failure modes, in order of severity: +Failure modes, in rough order of severity: - 1. SignatureInvalid — the producer is not who the pin claims, or the - attestation has been re-signed by an attacker. - 2. VectorTampered — the vector in the store does not match what the - producer attested to. This is the steganography kill shot. - 3. SourceMismatch — the source text the verifier is checking against - does not match what the producer pinned. Either the verifier has - the wrong text, or the source corpus drifted. + 1. PARSE_ERROR — pin JSON failed wire-format validation before any + cryptographic work was attempted. + 2. UNSUPPORTED_VERSION / UNKNOWN_KEY / KEY_EXPIRED — registry-level + problems; signature was never checked. + 3. SIGNATURE_INVALID — the producer is not who the pin claims, or the + attestation has been re-signed by an attacker. + 4. VECTOR_TAMPERED — the vector in the store does not match what the + producer attested to. The steganography kill shot. + 5. SOURCE_MISMATCH — the source text the verifier is checking against + does not match what the producer pinned. + 6. MODEL_MISMATCH / SHAPE_MISMATCH — provided ground truth does not + match what the pin attested to. + 7. RECORD_MISMATCH / COLLECTION_MISMATCH / TENANT_MISMATCH — caller + supplied an expected replay-protection identifier + that did not match the pin's `extra` value (§5.8). The Verifier returns a structured VerificationResult so callers can distinguish these and route them differently (alert vs. quarantine vs. @@ -20,13 +28,18 @@ from __future__ import annotations from dataclasses import dataclass +from datetime import datetime from enum import Enum import numpy as np from cryptography.exceptions import InvalidSignature from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey -from vectorpin.attestation import PROTOCOL_VERSION, Pin +from vectorpin.attestation import ( + PROTOCOL_VERSION, + Pin, + _canonicalize_v1, +) from vectorpin.hash import hash_text, hash_vector @@ -36,11 +49,16 @@ class VerifyError(Enum): OK = "ok" UNKNOWN_KEY = "unknown_key" UNSUPPORTED_VERSION = "unsupported_version" + KEY_EXPIRED = "key_expired" + PARSE_ERROR = "parse_error" SIGNATURE_INVALID = "signature_invalid" VECTOR_TAMPERED = "vector_tampered" SOURCE_MISMATCH = "source_mismatch" MODEL_MISMATCH = "model_mismatch" SHAPE_MISMATCH = "shape_mismatch" + RECORD_MISMATCH = "record_mismatch" + COLLECTION_MISMATCH = "collection_mismatch" + TENANT_MISMATCH = "tenant_mismatch" @dataclass(frozen=True) @@ -55,10 +73,67 @@ def __bool__(self) -> bool: return self.ok +@dataclass(frozen=True) +class KeyEntry: + """A registered public key, with an optional validity window (§7). + + `valid_from` / `valid_until` are inclusive of `valid_from` and + exclusive of `valid_until` per common convention. If both are None, + the key validates pins of any `ts`. + """ + + public_key: Ed25519PublicKey + valid_from: datetime | None = None + valid_until: datetime | None = None + + def _public_key_from_bytes(raw: bytes) -> Ed25519PublicKey: return Ed25519PublicKey.from_public_bytes(raw) +def _coerce_entry( + key: Ed25519PublicKey | bytes | KeyEntry, kid: str +) -> KeyEntry: + if isinstance(key, KeyEntry): + return key + if isinstance(key, bytes): + return KeyEntry(public_key=_public_key_from_bytes(key)) + if isinstance(key, Ed25519PublicKey): + return KeyEntry(public_key=key) + raise TypeError( + f"public key for {kid!r} must be Ed25519PublicKey, bytes, or KeyEntry" + ) + + +def _parse_ts(ts: str) -> datetime | None: + """Parse a pin's `ts` to UTC datetime. Returns None on failure. + + Used only for validity-window checks; a malformed ts here means + the registry can't decide whether the key was valid at pin time, + so we conservatively reject as KEY_EXPIRED rather than allowing. + """ + try: + # v2 ts is YYYY-MM-DDTHH:MM:SSZ; fromisoformat needs +00:00 or + # a stripped Z. Strip the trailing Z to keep semantics + # consistent across Python versions. + if ts.endswith("Z"): + ts = ts[:-1] + "+00:00" + return datetime.fromisoformat(ts) + except (ValueError, TypeError): + return None + + +@dataclass +class _ReplayCheck: + """Bundle of optional replay-protection identifiers the caller + can supply to verify(). Each maps to a reserved `extra` key (§8). + """ + + record_id: str | None = None + collection_id: str | None = None + tenant_id: str | None = None + + class Verifier: """Verifies Pin attestations against a key registry. @@ -66,23 +141,54 @@ class Verifier: hold multiple keys at once to support rotation: when a new signing key is introduced, both the old and new public keys live in the registry until the rotation window closes. + + The default verifier accepts only protocol v2 pins. To accept + legacy v1 pins for migration purposes, use ``LegacyV1Verifier`` or + pass ``accept_v1_legacy=True`` here. Legacy mode is opt-in + because v1 pins lack the §12 audit fixes. """ - def __init__(self, public_keys: dict[str, Ed25519PublicKey | bytes]): - self._keys: dict[str, Ed25519PublicKey] = {} - for kid, key in public_keys.items(): - if isinstance(key, bytes): - self._keys[kid] = _public_key_from_bytes(key) - elif isinstance(key, Ed25519PublicKey): - self._keys[kid] = key - else: - raise TypeError(f"public key for {kid!r} must be Ed25519PublicKey or bytes") + _accepted_versions: frozenset[int] = frozenset({PROTOCOL_VERSION}) + + def __init__( + self, + public_keys: dict[str, Ed25519PublicKey | bytes | KeyEntry], + *, + accept_v1_legacy: bool = False, + ): + self._keys: dict[str, KeyEntry] = { + kid: _coerce_entry(key, kid) for kid, key in public_keys.items() + } + self._accept_v1_legacy = accept_v1_legacy - def add_key(self, kid: str, key: Ed25519PublicKey | bytes) -> None: + def add_key( + self, + kid: str, + key: Ed25519PublicKey | bytes | KeyEntry, + *, + valid_from: datetime | None = None, + valid_until: datetime | None = None, + ) -> None: """Register an additional public key — used during rotation.""" - if isinstance(key, bytes): - key = _public_key_from_bytes(key) - self._keys[kid] = key + entry = _coerce_entry(key, kid) + if valid_from is not None or valid_until is not None: + entry = KeyEntry( + public_key=entry.public_key, + valid_from=valid_from if valid_from is not None else entry.valid_from, + valid_until=valid_until if valid_until is not None else entry.valid_until, + ) + self._keys[kid] = entry + + def _canonical_for(self, pin: Pin) -> bytes: + """Reconstruct the canonical bytes a signer would have signed. + + Dispatches on the pin's `v` field so the legacy verifier can + share most of this code path. v2 emits `DOMAIN_TAG || json`; + v1 emits just the (kid-less) json. + """ + if pin.header.v == 1: + return _canonicalize_v1(pin.header) + return pin.header.canonicalize() def verify( self, @@ -91,6 +197,9 @@ def verify( source: str | None = None, vector: np.ndarray | None = None, expected_model: str | None = None, + expected_record_id: str | None = None, + expected_collection_id: str | None = None, + expected_tenant_id: str | None = None, ) -> VerificationResult: """Verify a Pin against optional ground-truth source/vector. @@ -98,24 +207,71 @@ def verify( the corresponding ground truth is supplied — letting callers do partial verification (e.g. signature-only when the source text is unavailable but the producer identity still matters). + + Replay-protection identifiers (§5 step 8): if any of + ``expected_record_id``, ``expected_collection_id``, or + ``expected_tenant_id`` is supplied, the verifier compares it + against the value at ``vectorpin.record_id`` / + ``vectorpin.collection_id`` / ``vectorpin.tenant_id`` in the + pin's ``extra`` map. A missing or mismatched value rejects. """ - if pin.header.v != PROTOCOL_VERSION: + # Step 1: version dispatch. + accepted = self._accepted_versions + if self._accept_v1_legacy: + accepted = accepted | {1} + if pin.header.v not in accepted: return VerificationResult( False, VerifyError.UNSUPPORTED_VERSION, f"pin version {pin.header.v} not supported by this verifier", ) - public_key = self._keys.get(pin.kid) - if public_key is None: + # Pre-check signature shape before any cryptographic work so a + # malformed pin produces a structured SIGNATURE_INVALID rather + # than letting a downstream exception escape. + if not isinstance(pin.sig, (bytes, bytearray)) or len(pin.sig) != 64: + if isinstance(pin.sig, (bytes, bytearray)): + detail = f"signature must be exactly 64 bytes; got {len(pin.sig)}" + else: + detail = ( + f"signature must be exactly 64 bytes; " + f"got {type(pin.sig).__name__}" + ) + return VerificationResult(False, VerifyError.SIGNATURE_INVALID, detail) + + # Step 2: kid lookup + validity window. + entry = self._keys.get(pin.kid) + if entry is None: return VerificationResult( False, VerifyError.UNKNOWN_KEY, f"no registered public key for kid={pin.kid!r}", ) + if entry.valid_from is not None or entry.valid_until is not None: + pin_ts = _parse_ts(pin.header.ts) + if pin_ts is None: + return VerificationResult( + False, + VerifyError.KEY_EXPIRED, + "pin ts unparseable; cannot evaluate key validity window", + ) + if entry.valid_from is not None and pin_ts < entry.valid_from: + return VerificationResult( + False, + VerifyError.KEY_EXPIRED, + f"pin ts {pin.header.ts} predates key valid_from", + ) + if entry.valid_until is not None and pin_ts >= entry.valid_until: + return VerificationResult( + False, + VerifyError.KEY_EXPIRED, + f"pin ts {pin.header.ts} is at or past key valid_until", + ) + + # Step 4: signature. try: - public_key.verify(pin.sig, pin.header.canonicalize()) + entry.public_key.verify(pin.sig, self._canonical_for(pin)) except InvalidSignature: return VerificationResult( False, @@ -123,6 +279,8 @@ def verify( "ed25519 signature did not verify", ) + # Step 6: vector check. Vector NaN/Inf at verify time is a + # parse error per §5 step 6 — reject before hashing. if vector is not None: if vector.ndim != 1 or vector.shape[0] != pin.header.vec_dim: return VerificationResult( @@ -130,6 +288,12 @@ def verify( VerifyError.SHAPE_MISMATCH, f"vector shape {vector.shape} does not match pin (dim={pin.header.vec_dim})", ) + if not np.isfinite(vector).all(): + return VerificationResult( + False, + VerifyError.PARSE_ERROR, + "supplied vector contains NaN or infinity", + ) if hash_vector(vector, pin.header.vec_dtype) != pin.header.vec_hash: return VerificationResult( False, @@ -137,6 +301,7 @@ def verify( "vector hash mismatch — embedding has been modified after pinning", ) + # Step 5: source check. if source is not None and hash_text(source) != pin.header.source_hash: return VerificationResult( False, @@ -144,6 +309,7 @@ def verify( "source hash mismatch — pinned source differs from supplied source", ) + # Step 7: model check. if expected_model is not None and pin.header.model != expected_model: return VerificationResult( False, @@ -151,4 +317,71 @@ def verify( f"pin model {pin.header.model!r} != expected {expected_model!r}", ) + # Step 8: replay-protection identifier checks. Reserved + # `vectorpin.*` keys are tamper-evident because every `extra` + # entry is signed. + replay = ( + ("vectorpin.record_id", expected_record_id, VerifyError.RECORD_MISMATCH), + ( + "vectorpin.collection_id", + expected_collection_id, + VerifyError.COLLECTION_MISMATCH, + ), + ( + "vectorpin.tenant_id", + expected_tenant_id, + VerifyError.TENANT_MISMATCH, + ), + ) + for key, expected, err in replay: + if expected is None: + continue + actual = pin.header.extra.get(key) + if actual != expected: + return VerificationResult( + False, + err, + f"pin extra[{key!r}]={actual!r} != expected {expected!r}", + ) + return VerificationResult(True, VerifyError.OK) + + +class LegacyV1Verifier(Verifier): + """Verifier that accepts protocol v1 pins for migration purposes. + + This dispatches v1 pins to the v1 canonicalization (no domain tag, + no kid in the signed bytes) so historical pins continue to verify + byte-for-byte against their original signatures. v2 pins are still + accepted by this verifier so a migration window can verify both + formats from the same registry. + + Per spec §5 step 1: legacy mode MUST be opt-in and SHOULD be + disabled by default. Use this class explicitly, do not enable it + in shared infrastructure paths. + + To parse a v1 pin JSON string under this verifier, call + ``LegacyV1Verifier.parse_pin(s)`` — the default ``Pin.from_json`` + rejects v1 pins because the strict v2 wire-format rules don't + apply to them. + """ + + _accepted_versions = frozenset({1, PROTOCOL_VERSION}) + _ACCEPT_PARSE_VERSIONS = frozenset({1, PROTOCOL_VERSION}) + + def __init__( + self, public_keys: dict[str, Ed25519PublicKey | bytes | KeyEntry] + ): + super().__init__(public_keys, accept_v1_legacy=True) + + @classmethod + def parse_pin(cls, s: str | dict) -> Pin: + """Parse a v1 or v2 pin JSON string / dict. + + v1 pins are subject to the v1 parser's looser rules (no NFC + enforcement, no strict ts regex), so historical artifacts + load. v2 pins still go through the strict v2 parser. + """ + if isinstance(s, dict): + return Pin.from_dict(s, accept_versions=cls._ACCEPT_PARSE_VERSIONS) + return Pin.from_json(s, accept_versions=cls._ACCEPT_PARSE_VERSIONS) diff --git a/tests/test_attestation.py b/tests/test_attestation.py index 77d575f..0f21474 100644 --- a/tests/test_attestation.py +++ b/tests/test_attestation.py @@ -1,17 +1,19 @@ # Copyright 2025 Jascha Wanger / Tarnover, LLC # SPDX-License-Identifier: Apache-2.0 -"""Attestation format and round-trip tests.""" +"""Attestation format and round-trip tests (v2 protocol).""" +import base64 import json import pytest -from vectorpin.attestation import PROTOCOL_VERSION, Pin, PinHeader +from vectorpin.attestation import DOMAIN_TAG, PROTOCOL_VERSION, Pin, PinHeader def _header(**overrides) -> PinHeader: base = { "v": PROTOCOL_VERSION, + "kid": "prod-2026-05", "model": "text-embedding-3-large", "source_hash": "sha256:" + "0" * 64, "vec_hash": "sha256:" + "1" * 64, @@ -23,6 +25,13 @@ def _header(**overrides) -> PinHeader: return PinHeader(**base) +def test_canonicalize_starts_with_domain_tag(): + """The whole point of v2: signed bytes begin with the domain tag.""" + raw = _header().canonicalize() + assert raw.startswith(DOMAIN_TAG) + assert raw.startswith(b"vectorpin/v2\x00") + + def test_canonicalize_is_deterministic(): h = _header() assert h.canonicalize() == h.canonicalize() @@ -48,23 +57,38 @@ def test_canonicalize_includes_optional_fields_when_set(): assert "us-west" in raw +def test_canonicalize_includes_v_and_kid(): + """v2: both `v` and `kid` are part of the signed canonical bytes.""" + raw = _header(kid="prod-2026-05").canonicalize().decode() + # The domain tag is binary so split on it. + body = raw.split("\x00", 1)[1] + parsed = json.loads(body) + assert parsed["v"] == 2 + assert parsed["kid"] == "prod-2026-05" + + def test_pin_to_json_round_trip(): - pin = Pin(header=_header(), kid="prod-2026-05", sig=b"\x01" * 64) + pin = Pin(header=_header(), sig=b"\x01" * 64) restored = Pin.from_json(pin.to_json()) assert restored == pin +def test_pin_kid_property(): + pin = Pin(header=_header(kid="my-key"), sig=b"\x01" * 64) + assert pin.kid == "my-key" + + def test_pin_from_dict_rejects_unsupported_version(): bad = { "v": 99, + "kid": "k", "model": "x", "source_hash": "sha256:" + "0" * 64, "vec_hash": "sha256:" + "1" * 64, "vec_dtype": "f32", "vec_dim": 1, "ts": "2026-05-05T12:00:00Z", - "kid": "k", - "sig": "AA", + "sig": base64.urlsafe_b64encode(b"\x01" * 64).rstrip(b"=").decode("ascii"), } with pytest.raises(ValueError, match="version"): Pin.from_dict(bad) @@ -72,10 +96,128 @@ def test_pin_from_dict_rejects_unsupported_version(): def test_pin_json_is_compact(): """Pin JSON must fit in vector DB metadata fields without fuss.""" - pin = Pin(header=_header(), kid="k", sig=b"\x01" * 64) + pin = Pin(header=_header(), sig=b"\x01" * 64) j = pin.to_json() parsed = json.loads(j) assert "model" in parsed - # No whitespace, sorted keys + # No whitespace, sorted keys. assert ": " not in j assert ", " not in j + + +# ---- strict validation in from_dict / from_json ---- + + +def _valid_pin_dict(**overrides): + """A baseline dict that passes from_dict, plus an override hook.""" + d = { + "v": PROTOCOL_VERSION, + "kid": "k", + "model": "m", + "source_hash": "sha256:" + "0" * 64, + "vec_hash": "sha256:" + "1" * 64, + "vec_dtype": "f32", + "vec_dim": 16, + "ts": "2026-05-13T00:00:00Z", + "sig": base64.urlsafe_b64encode(b"\x01" * 64).rstrip(b"=").decode("ascii"), + } + d.update(overrides) + return d + + +def test_from_json_rejects_oversized_payload(): + # MAX_PIN_JSON_BYTES is 64 KiB; anything bigger must be refused + # before json.loads runs. + huge = '{"v":2,"junk":"' + ("a" * 70_000) + '"}' + with pytest.raises(ValueError, match="too large"): + Pin.from_json(huge) + + +def test_from_dict_rejects_wrong_version(): + with pytest.raises(ValueError, match="version"): + Pin.from_dict(_valid_pin_dict(v=1)) + + +def test_from_dict_rejects_bad_vec_dtype(): + with pytest.raises(ValueError, match="vec_dtype"): + Pin.from_dict(_valid_pin_dict(vec_dtype="f16")) + + +def test_from_dict_rejects_negative_vec_dim(): + with pytest.raises(ValueError, match="vec_dim"): + Pin.from_dict(_valid_pin_dict(vec_dim=-1)) + + +def test_from_dict_rejects_zero_vec_dim(): + with pytest.raises(ValueError, match="vec_dim"): + Pin.from_dict(_valid_pin_dict(vec_dim=0)) + + +def test_from_dict_rejects_huge_vec_dim(): + with pytest.raises(ValueError, match="vec_dim"): + Pin.from_dict(_valid_pin_dict(vec_dim=10_000_000)) + + +def test_from_dict_rejects_non_int_vec_dim(): + with pytest.raises(ValueError, match="vec_dim"): + Pin.from_dict(_valid_pin_dict(vec_dim="3072")) + + +def test_from_dict_rejects_bool_vec_dim(): + # bool is technically a subclass of int — we explicitly reject it. + with pytest.raises(ValueError, match="vec_dim"): + Pin.from_dict(_valid_pin_dict(vec_dim=True)) + + +def test_from_dict_rejects_malformed_source_hash(): + with pytest.raises(ValueError, match="source_hash"): + Pin.from_dict(_valid_pin_dict(source_hash="md5:beef")) + + +def test_from_dict_rejects_malformed_vec_hash(): + with pytest.raises(ValueError, match="vec_hash"): + Pin.from_dict(_valid_pin_dict(vec_hash="sha256:short")) + + +def test_from_dict_rejects_uppercase_hash_hex(): + # Lowercase hex only — matches what hash.py produces. + with pytest.raises(ValueError, match="source_hash"): + Pin.from_dict(_valid_pin_dict(source_hash="sha256:" + "A" * 64)) + + +def test_from_dict_rejects_wrong_sig_length(): + short_sig = base64.urlsafe_b64encode(b"\x01" * 32).rstrip(b"=").decode("ascii") + with pytest.raises(ValueError, match="sig"): + Pin.from_dict(_valid_pin_dict(sig=short_sig)) + + +def test_from_dict_rejects_non_base64_sig(): + with pytest.raises(ValueError, match="sig"): + Pin.from_dict(_valid_pin_dict(sig="!!!not_base64!!!")) + + +def test_from_dict_rejects_empty_model(): + with pytest.raises(ValueError, match="model"): + Pin.from_dict(_valid_pin_dict(model="")) + + +def test_from_dict_rejects_empty_kid(): + with pytest.raises(ValueError, match="kid"): + Pin.from_dict(_valid_pin_dict(kid="")) + + +def test_from_dict_rejects_non_string_extra_value(): + with pytest.raises(ValueError, match="extra values"): + Pin.from_dict(_valid_pin_dict(extra={"region": 5})) + + +def test_from_dict_rejects_non_string_extra_key(): + with pytest.raises(ValueError, match="extra keys"): + Pin.from_dict(_valid_pin_dict(extra={5: "x"})) + + +def test_from_dict_accepts_valid_pin(): + # Sanity check that the baseline isn't accidentally rejected. + pin = Pin.from_dict(_valid_pin_dict()) + assert pin.header.vec_dim == 16 + assert pin.kid == "k" diff --git a/tests/test_legacy_v1_verifier.py b/tests/test_legacy_v1_verifier.py new file mode 100644 index 0000000..728f1da --- /dev/null +++ b/tests/test_legacy_v1_verifier.py @@ -0,0 +1,99 @@ +# Copyright 2025 Jascha Wanger / Tarnover, LLC +# SPDX-License-Identifier: Apache-2.0 +"""Migration: the opt-in LegacyV1Verifier must verify v1 test vectors. + +This proves the v1 → v2 wire-format break does not orphan +historical pins. Production verifiers default to strict v2; operators +running a migration can opt in to legacy mode and continue verifying +v1 corpora until the re-pin pass is complete. +""" + +from __future__ import annotations + +import base64 +import json +from pathlib import Path + +import numpy as np +import pytest + +from vectorpin import LegacyV1Verifier, Verifier, VerifyError + +V1_TESTVECTORS = Path(__file__).resolve().parent.parent / "testvectors" / "v1.json" + + +def _b64dec(s: str) -> bytes: + pad = "=" * (-len(s) % 4) + return base64.urlsafe_b64decode(s + pad) + + +@pytest.fixture(scope="module") +def v1_corpus() -> dict: + if not V1_TESTVECTORS.exists(): + pytest.skip("testvectors/v1.json not present") + return json.loads(V1_TESTVECTORS.read_text()) + + +def test_legacy_verifier_validates_every_v1_fixture(v1_corpus: dict): + """Every entry in testvectors/v1.json verifies under LegacyV1Verifier.""" + pub = _b64dec(v1_corpus["public_key_b64"]) + verifier = LegacyV1Verifier({v1_corpus["key_id"]: pub}) + + for fixture in v1_corpus["fixtures"]: + pin = LegacyV1Verifier.parse_pin(fixture["expected"]["pin_json"]) + # Pure signature check (the v1 vector_b64 lets us go further if + # we want, but the signature alone is the legacy proof). + result = verifier.verify(pin) + assert result.ok, ( + f"{fixture['name']}: expected OK, got " + f"{result.error.value} ({result.detail})" + ) + + +def test_legacy_verifier_v1_full_check_with_vector(v1_corpus: dict): + """Fully verify a v1 fixture including the vector — proves the v1 + canonical bytes path is byte-for-byte equivalent to the original + v1 implementation.""" + pub = _b64dec(v1_corpus["public_key_b64"]) + verifier = LegacyV1Verifier({v1_corpus["key_id"]: pub}) + + fixture = v1_corpus["fixtures"][0] + pin = LegacyV1Verifier.parse_pin(fixture["expected"]["pin_json"]) + vec_bytes = _b64dec(fixture["input"]["vector_b64"]) + dtype = " Signer: + return Signer.generate(key_id="prod") + + +@pytest.fixture +def verifier(signer: Signer) -> Verifier: + return Verifier({signer.key_id: signer.public_key()}) + + +@pytest.fixture +def vector() -> np.ndarray: + return np.arange(8, dtype=np.float32) + + +# ---- record_id ---- + + +def test_record_id_match_passes(signer: Signer, verifier: Verifier, vector: np.ndarray): + pin = signer.pin( + source="x", + model="m", + vector=vector, + extra={"vectorpin.record_id": "rec-42"}, + ) + result = verifier.verify(pin, expected_record_id="rec-42") + assert result.ok + + +def test_record_id_mismatch_returns_record_mismatch( + signer: Signer, verifier: Verifier, vector: np.ndarray +): + pin = signer.pin( + source="x", + model="m", + vector=vector, + extra={"vectorpin.record_id": "rec-42"}, + ) + result = verifier.verify(pin, expected_record_id="rec-99") + assert not result.ok + assert result.error is VerifyError.RECORD_MISMATCH + + +def test_record_id_missing_returns_record_mismatch( + signer: Signer, verifier: Verifier, vector: np.ndarray +): + """If the caller expects a record_id but the pin has none, fail.""" + pin = signer.pin(source="x", model="m", vector=vector) + result = verifier.verify(pin, expected_record_id="rec-42") + assert not result.ok + assert result.error is VerifyError.RECORD_MISMATCH + + +# ---- collection_id ---- + + +def test_collection_id_match_passes(signer: Signer, verifier: Verifier, vector: np.ndarray): + pin = signer.pin( + source="x", + model="m", + vector=vector, + extra={"vectorpin.collection_id": "col-A"}, + ) + result = verifier.verify(pin, expected_collection_id="col-A") + assert result.ok + + +def test_collection_id_mismatch_returns_collection_mismatch( + signer: Signer, verifier: Verifier, vector: np.ndarray +): + pin = signer.pin( + source="x", + model="m", + vector=vector, + extra={"vectorpin.collection_id": "col-A"}, + ) + result = verifier.verify(pin, expected_collection_id="col-B") + assert not result.ok + assert result.error is VerifyError.COLLECTION_MISMATCH + + +# ---- tenant_id ---- + + +def test_tenant_id_match_passes(signer: Signer, verifier: Verifier, vector: np.ndarray): + pin = signer.pin( + source="x", + model="m", + vector=vector, + extra={"vectorpin.tenant_id": "tenant-1"}, + ) + result = verifier.verify(pin, expected_tenant_id="tenant-1") + assert result.ok + + +def test_tenant_id_mismatch_returns_tenant_mismatch( + signer: Signer, verifier: Verifier, vector: np.ndarray +): + pin = signer.pin( + source="x", + model="m", + vector=vector, + extra={"vectorpin.tenant_id": "tenant-1"}, + ) + result = verifier.verify(pin, expected_tenant_id="tenant-2") + assert not result.ok + assert result.error is VerifyError.TENANT_MISMATCH + + +# ---- combined ---- + + +def test_all_three_identifiers_pass_when_matched( + signer: Signer, verifier: Verifier, vector: np.ndarray +): + pin = signer.pin( + source="x", + model="m", + vector=vector, + extra={ + "vectorpin.record_id": "rec-1", + "vectorpin.collection_id": "col-1", + "vectorpin.tenant_id": "ten-1", + }, + ) + result = verifier.verify( + pin, + expected_record_id="rec-1", + expected_collection_id="col-1", + expected_tenant_id="ten-1", + ) + assert result.ok + + +def test_no_expected_identifiers_skips_replay_check( + signer: Signer, verifier: Verifier, vector: np.ndarray +): + """When the caller supplies none of the replay identifiers, the + pin's `extra` is signed but not enforced — backward-compatible + behaviour for callers that don't opt in.""" + pin = signer.pin( + source="x", + model="m", + vector=vector, + extra={"vectorpin.record_id": "rec-1"}, + ) + result = verifier.verify(pin) + assert result.ok + + +def test_replay_check_runs_after_signature_check( + signer: Signer, verifier: Verifier, vector: np.ndarray +): + """A tampered signature must surface as SIGNATURE_INVALID, not + RECORD_MISMATCH, even if a replay identifier mismatches. + + Order of errors matters: callers route on the first failure mode + and we want signature failures to take precedence over policy + mismatches. + """ + pin = signer.pin( + source="x", + model="m", + vector=vector, + extra={"vectorpin.record_id": "rec-1"}, + ) + # Flip a bit in the signature. + bad_sig = bytearray(pin.sig) + bad_sig[0] ^= 0x01 + from vectorpin import Pin + + forged = Pin(header=pin.header, sig=bytes(bad_sig)) + result = verifier.verify(forged, expected_record_id="rec-99") + assert result.error is VerifyError.SIGNATURE_INVALID diff --git a/tests/test_signer_verifier.py b/tests/test_signer_verifier.py index 1c87256..725536a 100644 --- a/tests/test_signer_verifier.py +++ b/tests/test_signer_verifier.py @@ -79,7 +79,7 @@ def test_re_signed_pin_with_wrong_kid_is_caught(verifier: Verifier, vector: np.n # Attacker re-signs the modified body but the verifier registry has # only the legit public key for this kid, so signature fails. forged_sig = other_signer._private_key.sign(pin.header.canonicalize()) - forged = Pin(header=pin.header, kid=pin.kid, sig=forged_sig) + forged = Pin(header=pin.header, sig=forged_sig) verifier_with_legit = Verifier({"test-key-1": legit_signer.public_key()}) result = verifier_with_legit.verify(forged) assert not result.ok @@ -132,3 +132,27 @@ def test_pin_json_round_trip_with_verification( json_str = pin.to_json() restored = Pin.from_json(json_str) assert verifier.verify(restored, source="hello", vector=vector) + + +def test_verify_rejects_wrong_length_sig(signer: Signer, verifier: Verifier, vector: np.ndarray): + """A Pin assembled by hand with a too-short sig must fail signature_invalid. + + We bypass Pin.from_dict (which would catch this earlier) by + constructing the dataclass directly, mirroring what would happen + if a caller pulled the dataclass straight out of a partially-validated + pipeline. + """ + pin = signer.pin(source="x", model="m", vector=vector) + bad = Pin(header=pin.header, sig=b"\x00" * 32) + result = verifier.verify(bad) + assert not result.ok + assert result.error is VerifyError.SIGNATURE_INVALID + assert "64 bytes" in result.detail + + +def test_verify_rejects_non_bytes_sig(signer: Signer, verifier: Verifier, vector: np.ndarray): + pin = signer.pin(source="x", model="m", vector=vector) + bad = Pin(header=pin.header, sig="not bytes") # type: ignore[arg-type] + result = verifier.verify(bad) + assert not result.ok + assert result.error is VerifyError.SIGNATURE_INVALID diff --git a/tests/test_v2_canonicalization.py b/tests/test_v2_canonicalization.py new file mode 100644 index 0000000..8910e23 --- /dev/null +++ b/tests/test_v2_canonicalization.py @@ -0,0 +1,327 @@ +# Copyright 2025 Jascha Wanger / Tarnover, LLC +# SPDX-License-Identifier: Apache-2.0 +"""Protocol v2 canonicalization, signing, and parse-time guarantees. + +These tests pin down the v2 wire-format break: domain tag, v / kid in +the signed bytes, strict NFC + control-char + bidi-override rejection, +strict timestamp regex, no unknown top-level fields, no non-string +extra values, NaN/Inf rejection at sign time, and §4.3 size limits. +""" + +from __future__ import annotations + +import base64 +import json + +import numpy as np +import pytest + +from vectorpin import ( + DOMAIN_TAG, + PROTOCOL_VERSION, + Pin, + PinHeader, + Signer, + Verifier, + VerifyError, +) + +# ---- fixtures ---- + + +@pytest.fixture +def signer() -> Signer: + return Signer.generate(key_id="prod-2026-05") + + +@pytest.fixture +def verifier(signer: Signer) -> Verifier: + return Verifier({signer.key_id: signer.public_key()}) + + +@pytest.fixture +def vector() -> np.ndarray: + rng = np.random.default_rng(0) + return rng.normal(0, 1, size=16).astype(np.float32) + + +# ---- domain tag ---- + + +def test_protocol_version_is_2(): + assert PROTOCOL_VERSION == 2 + + +def test_domain_tag_bytes(): + assert DOMAIN_TAG == b"vectorpin/v2\x00" + + +def test_signed_bytes_start_with_domain_tag(signer: Signer, vector: np.ndarray): + pin = signer.pin(source="hello", model="m", vector=vector) + canonical = pin.header.canonicalize() + assert canonical.startswith(DOMAIN_TAG) + # And the rest is JSON. + body = canonical[len(DOMAIN_TAG):] + decoded = json.loads(body) + assert decoded["v"] == 2 + assert decoded["kid"] == "prod-2026-05" + + +# ---- v and kid bound to signature ---- + + +def test_changing_v_breaks_signature(signer: Signer, verifier: Verifier, vector: np.ndarray): + """Flipping `v` from 2 to anything else MUST break verification. + + This is the audit-finding fix: in v1, `v` was unsigned, so a + downgrade-and-re-version attack was possible. v2 includes `v` in + the canonical bytes; any change breaks the ed25519 signature. + """ + pin = signer.pin(source="x", model="m", vector=vector) + tampered_header = PinHeader( + v=99, # changed from 2 + kid=pin.header.kid, + model=pin.header.model, + source_hash=pin.header.source_hash, + vec_hash=pin.header.vec_hash, + vec_dtype=pin.header.vec_dtype, + vec_dim=pin.header.vec_dim, + ts=pin.header.ts, + ) + tampered = Pin(header=tampered_header, sig=pin.sig) + result = verifier.verify(tampered) + assert not result.ok + # The version filter fires first. + assert result.error is VerifyError.UNSUPPORTED_VERSION + + +def test_changing_kid_breaks_signature(signer: Signer, vector: np.ndarray): + """Re-attributing a pin to a different `kid` MUST break verification. + + The audit-finding fix for cross-key swap: in v1, `kid` was + unsigned, so an attacker could lift (header, sig) and re-attribute + it to a different producer. v2 binds `kid` to the signature. + """ + pin = signer.pin(source="x", model="m", vector=vector) + other_signer = Signer.generate(key_id="other-kid") + swapped_header = PinHeader( + v=pin.header.v, + kid="other-kid", # changed + model=pin.header.model, + source_hash=pin.header.source_hash, + vec_hash=pin.header.vec_hash, + vec_dtype=pin.header.vec_dtype, + vec_dim=pin.header.vec_dim, + ts=pin.header.ts, + ) + swapped = Pin(header=swapped_header, sig=pin.sig) + # Verifier knows both keys; the swap must still fail signature. + verifier = Verifier( + { + "prod-2026-05": signer.public_key(), + "other-kid": other_signer.public_key(), + } + ) + result = verifier.verify(swapped) + assert not result.ok + assert result.error is VerifyError.SIGNATURE_INVALID + + +# ---- NaN/Inf rejection at sign time ---- + + +def test_nan_in_vector_rejected_at_sign_time(signer: Signer): + vec = np.array([1.0, 2.0, float("nan"), 4.0], dtype=np.float32) + with pytest.raises(ValueError, match="NaN or infinity"): + signer.pin(source="x", model="m", vector=vec) + + +def test_positive_inf_in_vector_rejected_at_sign_time(signer: Signer): + vec = np.array([1.0, 2.0, float("inf"), 4.0], dtype=np.float32) + with pytest.raises(ValueError, match="NaN or infinity"): + signer.pin(source="x", model="m", vector=vec) + + +def test_negative_inf_in_vector_rejected_at_sign_time(signer: Signer): + vec = np.array([1.0, 2.0, float("-inf"), 4.0], dtype=np.float32) + with pytest.raises(ValueError, match="NaN or infinity"): + signer.pin(source="x", model="m", vector=vec) + + +def test_signed_negative_zero_accepted(signer: Signer, verifier: Verifier): + """Spec §3.2: -0.0 and +0.0 are distinct and both valid.""" + vec_pos = np.array([0.0, 1.0], dtype=np.float32) + vec_neg = np.array([-0.0, 1.0], dtype=np.float32) + pin_pos = signer.pin(source="x", model="m", vector=vec_pos) + pin_neg = signer.pin(source="x", model="m", vector=vec_neg) + # Both verify against their own vectors. + assert verifier.verify(pin_pos, vector=vec_pos) + assert verifier.verify(pin_neg, vector=vec_neg) + # And the hashes ARE different — +0.0 vs -0.0 has distinct bytes. + assert pin_pos.header.vec_hash != pin_neg.header.vec_hash + + +# ---- NFC enforcement ---- + + +def _valid_pin_dict(**overrides): + d = { + "v": PROTOCOL_VERSION, + "kid": "k", + "model": "m", + "source_hash": "sha256:" + "0" * 64, + "vec_hash": "sha256:" + "1" * 64, + "vec_dtype": "f32", + "vec_dim": 16, + "ts": "2026-05-13T00:00:00Z", + "sig": base64.urlsafe_b64encode(b"\x01" * 64).rstrip(b"=").decode("ascii"), + } + d.update(overrides) + return d + + +def test_nfd_model_string_rejected_at_parse(): + """An NFD-form composed character must be rejected. + + 'cafe' + U+0301 COMBINING ACUTE is the NFD form of 'café'; + 'caf' + U+00E9 (precomposed) is the NFC form. The parser must + reject NFD inputs. + """ + nfd_cafe = "cafe\u0301" + # Sanity: this string IS in NFD, not NFC. + import unicodedata + assert unicodedata.normalize("NFC", nfd_cafe) != nfd_cafe + assert unicodedata.normalize("NFC", nfd_cafe) == "caf\u00e9" + with pytest.raises(ValueError, match="NFC"): + Pin.from_dict(_valid_pin_dict(model=nfd_cafe)) + + +def test_nfd_kid_string_rejected_at_parse(): + nfd = "cafe\u0301" # NFD form + with pytest.raises(ValueError, match="NFC"): + Pin.from_dict(_valid_pin_dict(kid=nfd)) + + +def test_control_character_in_model_rejected(): + with pytest.raises(ValueError, match="control character"): + Pin.from_dict(_valid_pin_dict(model="bad\x07name")) + + +def test_bidi_override_in_model_rejected(): + # U+202E RIGHT-TO-LEFT OVERRIDE + with pytest.raises(ValueError, match="bidi"): + Pin.from_dict(_valid_pin_dict(model="evil‮name")) + + +# ---- timestamp strictness ---- + + +def test_ts_fractional_seconds_rejected(): + with pytest.raises(ValueError, match="ts"): + Pin.from_dict(_valid_pin_dict(ts="2026-05-05T12:00:00.000Z")) + + +def test_ts_offset_rejected(): + with pytest.raises(ValueError, match="ts"): + Pin.from_dict(_valid_pin_dict(ts="2026-05-05T12:00:00+00:00")) + + +def test_ts_lowercase_t_rejected(): + with pytest.raises(ValueError, match="ts"): + Pin.from_dict(_valid_pin_dict(ts="2026-05-05t12:00:00Z")) + + +def test_ts_lowercase_z_rejected(): + with pytest.raises(ValueError, match="ts"): + Pin.from_dict(_valid_pin_dict(ts="2026-05-05T12:00:00z")) + + +# ---- unknown top-level field ---- + + +def test_unknown_top_level_field_rejected(): + bad = _valid_pin_dict() + bad["sig2"] = "extra" + with pytest.raises(ValueError, match="unknown top-level"): + Pin.from_dict(bad) + + +# ---- non-string extra ---- + + +def test_non_string_extra_value_rejected(): + with pytest.raises(ValueError, match="extra values"): + Pin.from_dict(_valid_pin_dict(extra={"k": 42})) + + +def test_non_string_extra_value_list_rejected(): + with pytest.raises(ValueError, match="extra values"): + Pin.from_dict(_valid_pin_dict(extra={"k": ["a", "b"]})) + + +# ---- size limits (§4.3) ---- + + +def test_oversize_pin_json_rejected(): + """Spec §4.3: total pin JSON > 64 KiB MUST be rejected.""" + huge = '{"v":2,"junk":"' + ("a" * 70_000) + '"}' + with pytest.raises(ValueError, match="too large"): + Pin.from_json(huge) + + +def test_extra_entries_limit_enforced(): + """Spec §4.3: extra entry count limit is 32.""" + big_extra = {f"k{i}": "v" for i in range(33)} + with pytest.raises(ValueError, match="32"): + Pin.from_dict(_valid_pin_dict(extra=big_extra)) + + +def test_extra_key_size_limit_enforced(): + """Spec §4.3: extra keys cap at 128 bytes.""" + long_key = "k" * 129 + with pytest.raises(ValueError, match="128"): + Pin.from_dict(_valid_pin_dict(extra={long_key: "v"})) + + +def test_extra_value_size_limit_enforced(): + """Spec §4.3: extra values cap at 1 KiB.""" + long_val = "v" * 1025 + with pytest.raises(ValueError, match="1024"): + Pin.from_dict(_valid_pin_dict(extra={"k": long_val})) + + +def test_vec_dim_at_limit_accepted(): + """Spec §4.3: vec_dim <= 2^20 is acceptable; > 2^20 rejected.""" + # The header isn't bound to a vector here; just round-trip a dict. + pin = Pin.from_dict(_valid_pin_dict(vec_dim=1_048_576)) + assert pin.header.vec_dim == 1_048_576 + + +def test_vec_dim_over_limit_rejected(): + with pytest.raises(ValueError, match="vec_dim"): + Pin.from_dict(_valid_pin_dict(vec_dim=1_048_577)) + + +# ---- end-to-end sign/verify ---- + + +def test_v2_pin_signs_and_verifies(signer: Signer, verifier: Verifier, vector: np.ndarray): + pin = signer.pin(source="hello", model="text-model", vector=vector) + assert pin.header.v == 2 + result = verifier.verify(pin, source="hello", vector=vector) + assert result.ok + + +def test_v2_pin_with_extra_includes_record_id( + signer: Signer, verifier: Verifier, vector: np.ndarray +): + pin = signer.pin( + source="x", + model="m", + vector=vector, + extra={"vectorpin.record_id": "rec-1"}, + ) + # Reserved keys round-trip and live in extra. + assert pin.header.extra["vectorpin.record_id"] == "rec-1" + # Untouched, verifies. + assert verifier.verify(pin) diff --git a/tests/test_v2_test_vectors.py b/tests/test_v2_test_vectors.py new file mode 100644 index 0000000..f73d304 --- /dev/null +++ b/tests/test_v2_test_vectors.py @@ -0,0 +1,181 @@ +# Copyright 2025 Jascha Wanger / Tarnover, LLC +# SPDX-License-Identifier: Apache-2.0 +"""Load testvectors/v2.json and testvectors/negative_v2.json and check. + +This is the cross-language compat anchor: the same test ports verbatim +to Rust and TypeScript implementations, asserting that each language +produces identical canonical bytes and verification verdicts. + +If this file is updated, the Rust + TS test fixtures MUST be re-run too. +""" + +from __future__ import annotations + +import base64 +import json +from pathlib import Path + +import numpy as np +import pytest + +from vectorpin import DOMAIN_TAG, Pin, Verifier, VerifyError + +V2_VECTORS = Path(__file__).resolve().parent.parent / "testvectors" / "v2.json" +NEG_V2_VECTORS = Path(__file__).resolve().parent.parent / "testvectors" / "negative_v2.json" + + +def _b64dec(s: str) -> bytes: + pad = "=" * (-len(s) % 4) + return base64.urlsafe_b64decode(s + pad) + + +@pytest.fixture(scope="module") +def v2_corpus() -> dict: + if not V2_VECTORS.exists(): + pytest.skip("testvectors/v2.json not present; run scripts/generate_test_vectors.py") + return json.loads(V2_VECTORS.read_text()) + + +@pytest.fixture(scope="module") +def neg_corpus() -> dict: + if not NEG_V2_VECTORS.exists(): + pytest.skip("testvectors/negative_v2.json not present") + return json.loads(NEG_V2_VECTORS.read_text()) + + +# ---- positive ---- + + +def test_corpus_metadata_is_consistent(v2_corpus: dict): + assert v2_corpus["version"] == 2 + assert _b64dec(v2_corpus["domain_tag_b64"]) == DOMAIN_TAG + + +def test_each_positive_fixture_verifies(v2_corpus: dict): + pub = _b64dec(v2_corpus["public_key_b64"]) + verifier = Verifier({v2_corpus["key_id"]: pub}) + + for fixture in v2_corpus["fixtures"]: + pin = Pin.from_json(fixture["pin_json"]) + result = verifier.verify(pin) + assert result.ok, ( + f"{fixture['name']} sig-only: expected OK, got " + f"{result.error.value}: {result.detail}" + ) + + +def test_each_positive_fixture_full_verify_with_vector(v2_corpus: dict): + """End-to-end: verify signature + supplied source + supplied vector.""" + pub = _b64dec(v2_corpus["public_key_b64"]) + verifier = Verifier({v2_corpus["key_id"]: pub}) + + for fixture in v2_corpus["fixtures"]: + pin = Pin.from_json(fixture["pin_json"]) + inp = fixture["input"] + vec_bytes = _b64dec(inp["vec_b64"]) + dtype = " tuple[Pin | None, str | None]: + """Return (pin, None) on parse success or (None, error_message) on failure.""" + try: + return Pin.from_json(pin_json), None + except (ValueError, json.JSONDecodeError) as e: + return None, str(e) + + +def test_each_negative_fixture_fails_correctly(neg_corpus: dict): + pub = _b64dec(neg_corpus["public_key_b64"]) + verifier = Verifier({neg_corpus["key_id"]: pub}) + + for fixture in neg_corpus["fixtures"]: + name = fixture["name"] + expected_str = fixture["expected_failure"] + expected_err = _FAIL_MAP[expected_str] + + # Some fixtures fail at parse, others at verify. We unify by + # mapping parse failures to VerifyError.PARSE_ERROR. + pin, parse_err = _try_parse(fixture["pin_json"]) + + if parse_err is not None: + # Distinguish a version-rejection at parse time (which + # protocol-wise is UNSUPPORTED_VERSION) from other + # structural failures (PARSE_ERROR). The spec leaves the + # boundary up to the implementation; both error codes + # are valid for "v not equal to my supported version". + if "version" in parse_err.lower(): + actual_err = VerifyError.UNSUPPORTED_VERSION + else: + actual_err = VerifyError.PARSE_ERROR + else: + assert pin is not None + # Build verify kwargs based on what the fixture supplies. + kwargs: dict = {} + if "tampered_source" in fixture: + kwargs["source"] = fixture["tampered_source"] + if "tampered_vec_b64" in fixture: + vec_bytes = _b64dec(fixture["tampered_vec_b64"]) + dtype = " = new Set([ + 'v', + 'kid', + 'model', + 'model_hash', + 'source_hash', + 'vec_hash', + 'vec_dtype', + 'vec_dim', + 'ts', + 'extra', + 'sig', +]); + +/** Keys that are forbidden as own properties — blocks prototype pollution. */ +const FORBIDDEN_KEYS: ReadonlySet = new Set([ + '__proto__', + 'constructor', + 'prototype', +]); + +const SHA256_RE = /^sha256:[0-9a-f]{64}$/; +const B64URL_RE = /^[A-Za-z0-9_-]+={0,2}$/; + +/** Strict v2 timestamp: `YYYY-MM-DDTHH:MM:SSZ`, exactly. */ +const TS_RE_V2 = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/; + +const UTF8_ENCODER = new TextEncoder(); /** - * The signed portion of a Pin. + * The signed portion of a Pin (everything except `sig`). * - * Everything except `sig` and `kid` lives here. Two Pins are - * equivalent iff their headers canonicalize to identical bytes. + * Two Pins are equivalent iff their headers canonicalize to identical + * bytes. In v2, the header includes `v` and `kid` so both are bound + * by the signature. */ export interface PinHeader { readonly v: number; + readonly kid: string; readonly model: string; readonly source_hash: string; readonly vec_hash: string; @@ -42,27 +120,46 @@ export interface PinHeader { readonly extra?: Readonly> | undefined; } +/** A signed pin attestation. */ +export interface Pin { + readonly header: PinHeader; + /** Raw signature bytes (Ed25519 = 64 bytes). */ + readonly sig: Uint8Array; +} + +/** + * Convenience accessor — `kid` is part of the header in v2. + * + * Provided so callers that previously read `pin.kid` (v1 era) keep + * working. New code should reach into `pin.header.kid` directly. + */ +export function pinKid(pin: Pin): string { + return pin.header.kid; +} + /** * Build the dict form of a header for JSON serialization. Keys are - * sorted alphabetically inside `canonicalize`; this function only - * decides which fields are present. + * sorted lexicographically inside `canonicalJsonStringify`; this + * function only decides which fields are present. + * + * The returned object has a null prototype so that no accidental + * inheritance from `Object.prototype` can sneak in. */ export function headerToDict(h: PinHeader): Record { - const out: Record = { - v: h.v, - model: h.model, - source_hash: h.source_hash, - vec_hash: h.vec_hash, - vec_dtype: h.vec_dtype, - vec_dim: h.vec_dim, - ts: h.ts, - }; + const out: Record = Object.create(null); + out['v'] = h.v; + out['kid'] = h.kid; + out['model'] = h.model; + out['source_hash'] = h.source_hash; + out['vec_hash'] = h.vec_hash; + out['vec_dtype'] = h.vec_dtype; + out['vec_dim'] = h.vec_dim; + out['ts'] = h.ts; if (h.model_hash !== undefined && h.model_hash !== null) { out['model_hash'] = h.model_hash; } if (h.extra && Object.keys(h.extra).length > 0) { - // Sort extra by key to match the Python reference output. - const sortedExtra: Record = {}; + const sortedExtra: Record = Object.create(null); for (const k of Object.keys(h.extra).sort()) { sortedExtra[k] = h.extra[k]!; } @@ -72,23 +169,75 @@ export function headerToDict(h: PinHeader): Record { } /** - * Stable byte representation for signing/verifying. + * Stable byte representation for signing/verifying a v2 pin. * - * Uses JSON with sorted keys, no whitespace, raw UTF-8 (non-ASCII - * passes through unescaped). This is the canonicalization form - * with the best library support across languages while still being - * deterministic. + * Returns `DOMAIN_TAG || canonical_json(header)`. All string-typed + * fields are NFC-normalized at canonicalization time; non-NFC input + * is rejected so signers cannot silently emit pins a strict verifier + * would later refuse. */ export function canonicalizeHeader(h: PinHeader): Uint8Array { - return new TextEncoder().encode(canonicalJsonStringify(headerToDict(h))); + // NFC-normalize string fields in place, then re-sort `extra` since + // NFC composition can change Unicode code-point order. + const d = headerToDict(h); + d['kid'] = nfcOrThrow(d['kid'] as string, 'kid'); + d['model'] = nfcOrThrow(d['model'] as string, 'model'); + d['ts'] = nfcOrThrow(d['ts'] as string, 'ts'); + if ('model_hash' in d && typeof d['model_hash'] === 'string') { + d['model_hash'] = nfcOrThrow(d['model_hash'], 'model_hash'); + } + if ('extra' in d && d['extra'] && typeof d['extra'] === 'object') { + const e = d['extra'] as Record; + const normalized: Record = Object.create(null); + for (const k of Object.keys(e)) { + const kn = nfcOrThrow(k, `extra key ${JSON.stringify(k)}`); + const vn = nfcOrThrow(e[k]!, `extra[${JSON.stringify(k)}]`); + normalized[kn] = vn; + } + const sorted: Record = Object.create(null); + for (const k of Object.keys(normalized).sort()) { + sorted[k] = normalized[k]!; + } + d['extra'] = sorted; + } + const body = UTF8_ENCODER.encode(canonicalJsonStringify(d)); + const out = new Uint8Array(DOMAIN_TAG.length + body.length); + out.set(DOMAIN_TAG, 0); + out.set(body, DOMAIN_TAG.length); + return out; } -/** A signed pin attestation. */ -export interface Pin { - readonly header: PinHeader; - readonly kid: string; - /** Raw signature bytes (Ed25519 = 64 bytes). */ - readonly sig: Uint8Array; +/** + * Legacy v1 canonicalization, kept for `LegacyV1Verifier` only. + * + * v1 differed from v2 in three ways: + * - No domain-tag prefix. + * - `kid` was NOT included in the signed payload. + * - No NFC / control-char / bidi enforcement on string fields. + * + * The output here is byte-for-byte identical to what the v1 reference + * implementation produced, so historical pins continue to verify. + */ +export function canonicalizeHeaderV1(h: PinHeader): Uint8Array { + const out: Record = Object.create(null); + out['v'] = h.v; + out['model'] = h.model; + out['source_hash'] = h.source_hash; + out['vec_hash'] = h.vec_hash; + out['vec_dtype'] = h.vec_dtype; + out['vec_dim'] = h.vec_dim; + out['ts'] = h.ts; + if (h.model_hash !== undefined && h.model_hash !== null) { + out['model_hash'] = h.model_hash; + } + if (h.extra && Object.keys(h.extra).length > 0) { + const sortedExtra: Record = Object.create(null); + for (const k of Object.keys(h.extra).sort()) { + sortedExtra[k] = h.extra[k]!; + } + out['extra'] = sortedExtra; + } + return UTF8_ENCODER.encode(canonicalJsonStringify(out)); } /** Compact JSON encoding suitable for vector DB metadata fields. */ @@ -99,52 +248,233 @@ export function pinToJSON(pin: Pin): string { /** Plain-object representation; mirrors `Pin.to_dict` in Python. */ export function pinToDict(pin: Pin): Record { const d = headerToDict(pin.header); - d['kid'] = pin.kid; d['sig'] = b64UrlEncodeNoPad(pin.sig); return d; } -export function pinFromJSON(s: string): Pin { - return pinFromDict(JSON.parse(s) as Record); +/** Options for the parser; legacy mode is for `LegacyV1Verifier` only. */ +export interface PinParseOptions { + /** If true, accept v1 pins with the looser v1 validation rules. */ + acceptV1Legacy?: boolean; +} + +export function pinFromJSON(s: string, opts: PinParseOptions = {}): Pin { + if (typeof s !== 'string') { + throw new Error('pin JSON must be a string'); + } + // Measure the raw UTF-8 byte size before JSON.parse runs so we cap + // parser memory use, not just the resulting object. + if (Buffer.byteLength(s, 'utf8') > MAX_PIN_JSON_BYTES) { + throw new Error(`pin JSON exceeds maximum size of ${MAX_PIN_JSON_BYTES} bytes`); + } + const parsed: unknown = JSON.parse(s); + if (parsed === null || typeof parsed !== 'object' || Array.isArray(parsed)) { + throw new Error('pin JSON root must be an object'); + } + return pinFromDict(parsed as Record, opts); } -export function pinFromDict(d: Record): Pin { - if (d['v'] !== PROTOCOL_VERSION) { +export function pinFromDict(d: Record, opts: PinParseOptions = {}): Pin { + if (d === null || typeof d !== 'object' || Array.isArray(d)) { + throw new Error('pin dict must be a plain object'); + } + const acceptV1 = opts.acceptV1Legacy === true; + + // Reject prototype-pollution payloads + unknown top-level keys. + const ownKeys = Object.keys(d); + for (const k of ownKeys) { + if (FORBIDDEN_KEYS.has(k)) { + throw new Error(`forbidden key in pin: ${JSON.stringify(k)}`); + } + if (!ALLOWED_PIN_KEYS.has(k)) { + throw new Error(`unknown pin field: ${JSON.stringify(k)}`); + } + } + + // Version. + const v = d['v']; + if (typeof v !== 'number' || !Number.isInteger(v)) { + throw new Error(`pin.v must be an integer; got ${JSON.stringify(v)}`); + } + const isV2 = v === PROTOCOL_VERSION; + if (!isV2 && !(acceptV1 && v === 1)) { throw new Error( - `unsupported pin version ${JSON.stringify(d['v'])}; expected ${PROTOCOL_VERSION}`, + `unsupported pin version ${JSON.stringify(v)}; expected ${PROTOCOL_VERSION}`, ); } + + // Required string scalars. + if (typeof d['model'] !== 'string' || d['model'].length === 0) { + throw new Error('pin.model must be a non-empty string'); + } + if (typeof d['kid'] !== 'string' || d['kid'].length === 0) { + throw new Error('pin.kid must be a non-empty string'); + } + if (typeof d['ts'] !== 'string' || d['ts'].length === 0) { + throw new Error('pin.ts must be a non-empty string'); + } + if (typeof d['sig'] !== 'string') { + throw new Error('pin.sig must be a string'); + } + + const model = d['model']; + const kid = d['kid']; + const ts = d['ts']; + + // v2-strict string validation. + if (isV2) { + assertSafeString(model, 'model'); + assertSafeString(kid, 'kid'); + assertSafeString(ts, 'ts'); + assertNfc(model, 'model'); + assertNfc(kid, 'kid'); + assertNfc(ts, 'ts'); + if (!TS_RE_V2.test(ts)) { + throw new Error("pin.ts must match 'YYYY-MM-DDTHH:MM:SSZ' exactly"); + } + } + const dtype = d['vec_dtype']; - if (dtype !== 'f32' && dtype !== 'f64') { + if (typeof dtype !== 'string' || (dtype !== 'f32' && dtype !== 'f64')) { throw new Error(`unsupported vec_dtype ${JSON.stringify(dtype)}`); } - const extraRaw = d['extra']; - const extra = - extraRaw && typeof extraRaw === 'object' - ? Object.fromEntries(Object.entries(extraRaw as Record).map( - ([k, v]) => [k, String(v)], - )) - : undefined; + + const vecDim = d['vec_dim']; + if ( + typeof vecDim !== 'number' || + !Number.isInteger(vecDim) || + vecDim <= 0 || + vecDim > MAX_VEC_DIM + ) { + throw new Error( + `pin.vec_dim must be an integer in (0, ${MAX_VEC_DIM}]; got ${JSON.stringify(vecDim)}`, + ); + } + + // Hash fields. + if (typeof d['source_hash'] !== 'string' || !SHA256_RE.test(d['source_hash'])) { + throw new Error('pin.source_hash must match /^sha256:[0-9a-f]{64}$/'); + } + if (typeof d['vec_hash'] !== 'string' || !SHA256_RE.test(d['vec_hash'])) { + throw new Error('pin.vec_hash must match /^sha256:[0-9a-f]{64}$/'); + } + + // Optional model_hash. + let modelHash: string | undefined; + if ('model_hash' in d && d['model_hash'] !== undefined && d['model_hash'] !== null) { + if (typeof d['model_hash'] !== 'string' || !SHA256_RE.test(d['model_hash'])) { + throw new Error('pin.model_hash must match /^sha256:[0-9a-f]{64}$/'); + } + modelHash = d['model_hash']; + } + + // Optional extra map: strictly string -> string, bounded. + let extra: Record | undefined; + if ('extra' in d && d['extra'] !== undefined && d['extra'] !== null) { + const extraRaw = d['extra']; + if (typeof extraRaw !== 'object' || Array.isArray(extraRaw)) { + throw new Error('pin.extra must be an object of string values'); + } + const extraKeys = Object.keys(extraRaw as Record); + if (extraKeys.length > MAX_EXTRA_ENTRIES) { + throw new Error( + `pin.extra has ${extraKeys.length} entries; maximum is ${MAX_EXTRA_ENTRIES}`, + ); + } + const sanitized: Record = Object.create(null); + for (const k of extraKeys) { + if (FORBIDDEN_KEYS.has(k)) { + throw new Error(`forbidden key in pin.extra: ${JSON.stringify(k)}`); + } + if (Buffer.byteLength(k, 'utf8') > MAX_EXTRA_KEY_BYTES) { + throw new Error(`pin.extra key exceeds ${MAX_EXTRA_KEY_BYTES} bytes`); + } + const val = (extraRaw as Record)[k]; + if (typeof val !== 'string') { + throw new Error( + `pin.extra[${JSON.stringify(k)}] must be a string; got ${typeof val}`, + ); + } + if (Buffer.byteLength(val, 'utf8') > MAX_EXTRA_VALUE_BYTES) { + throw new Error( + `pin.extra[${JSON.stringify(k)}] exceeds ${MAX_EXTRA_VALUE_BYTES} bytes`, + ); + } + if (isV2) { + assertSafeString(k, `extra key ${JSON.stringify(k)}`); + assertSafeString(val, `extra[${JSON.stringify(k)}]`); + assertNfc(k, `extra key ${JSON.stringify(k)}`); + assertNfc(val, `extra[${JSON.stringify(k)}]`); + } + sanitized[k] = val; + } + extra = sanitized; + } + + // Validate base64url BEFORE decoding so the alphabet check is + // strict (rejects standard-base64 `+`/`/` input). + const sig = b64UrlDecodeStrict(d['sig']); + if (sig.length !== SIG_LEN) { + throw new Error(`pin.sig must decode to ${SIG_LEN} bytes (got ${sig.length})`); + } + const header: PinHeader = { - v: d['v'] as number, - model: String(d['model']), - source_hash: String(d['source_hash']), - vec_hash: String(d['vec_hash']), + v, + kid, + model, + source_hash: d['source_hash'], + vec_hash: d['vec_hash'], vec_dtype: dtype, - vec_dim: Number(d['vec_dim']), - ts: String(d['ts']), - model_hash: typeof d['model_hash'] === 'string' ? d['model_hash'] : undefined, + vec_dim: vecDim, + ts, + model_hash: modelHash, extra, }; - const sigStr = d['sig']; - if (typeof sigStr !== 'string') { - throw new Error('pin missing sig field'); - } - return { - header, - kid: String(d['kid']), - sig: b64UrlDecodeNoPad(sigStr), - }; + + return { header, sig }; +} + +// ---- string-safety helpers ---- + +/** + * Reject strings that contain ASCII control characters (U+0000-U+001F) + * or Unicode bidi-override code points (U+202A-U+202E, U+2066-U+2069). + * Required by spec §3.1. + */ +function assertSafeString(value: string, fieldName: string): void { + for (let i = 0; i < value.length; i++) { + const cp = value.charCodeAt(i); + if (cp < 0x20) { + throw new Error( + `${fieldName} contains control character U+${cp.toString(16).padStart(4, '0').toUpperCase()}`, + ); + } + if ((cp >= 0x202a && cp <= 0x202e) || (cp >= 0x2066 && cp <= 0x2069)) { + throw new Error( + `${fieldName} contains bidi-override character U+${cp.toString(16).padStart(4, '0').toUpperCase()}`, + ); + } + } +} + +/** + * Return `value` unchanged if it is already in NFC form; otherwise + * throw. The signer's input path normalizes before reaching here so + * this only catches a header that was constructed with non-NFC + * strings directly. + */ +function nfcOrThrow(value: string, fieldName: string): string { + if (value !== value.normalize('NFC')) { + throw new Error(`${fieldName} is not NFC-normalized`); + } + return value; +} + +/** Throw if `value` is not already in NFC form. */ +function assertNfc(value: string, fieldName: string): void { + if (value !== value.normalize('NFC')) { + throw new Error(`${fieldName} is not NFC-normalized`); + } } // ---- canonical JSON ---- @@ -155,9 +485,9 @@ export function pinFromDict(d: Record): Pin { * and Rust's `serde_json` with sorted keys (see attestation::canonicalize). * * Sorts object keys at every depth, omits whitespace, and emits raw - * UTF-8 (non-ASCII is not escaped to \uXXXX). We do not need full - * canonical-JSON [RFC 8785] semantics — the protocol values are - * scalars and shallow string maps, so a small recursive walk suffices. + * UTF-8 (non-ASCII is not escaped to \uXXXX). The protocol values + * are scalars and shallow string maps, so a small recursive walk + * suffices. */ export function canonicalJsonStringify(value: unknown): string { if (value === null) return 'null'; @@ -192,12 +522,25 @@ export function canonicalJsonStringify(value: unknown): string { * `URL_SAFE_NO_PAD`. */ export function b64UrlEncodeNoPad(data: Uint8Array): string { - // Buffer is available in Node 20+ (the package's minimum); base64url - // is the standard URL-safe alphabet without padding. return Buffer.from(data).toString('base64url'); } export function b64UrlDecodeNoPad(s: string): Uint8Array { - // Buffer.from with 'base64url' tolerates missing padding. + return b64UrlDecodeStrict(s); +} + +/** + * Strict base64url decoder. Rejects standard-base64 (`+`, `/`) and + * any character outside the URL-safe alphabet. Padding is tolerated + * on input (we emit without padding) but anything else is rejected + * up front so we never feed garbage to `Buffer.from`. + */ +function b64UrlDecodeStrict(s: string): Uint8Array { + if (typeof s !== 'string') { + throw new Error('base64url input must be a string'); + } + if (!B64URL_RE.test(s)) { + throw new Error('base64url input contains invalid characters'); + } return new Uint8Array(Buffer.from(s, 'base64url')); } diff --git a/typescript/src/index.ts b/typescript/src/index.ts index 42b8ed3..4439477 100644 --- a/typescript/src/index.ts +++ b/typescript/src/index.ts @@ -3,11 +3,11 @@ // // VectorPin — verifiable integrity for AI embedding stores. // -// This package is the TypeScript reference implementation of protocol -// version 1 of the VectorPin attestation format. It is byte-for-byte -// compatible with the Python and Rust ports: identical canonical -// bytes, identical signatures. Compatibility is enforced by shared -// test vectors at `testvectors/v1.json` consumed by all three ports. +// This package is the TypeScript reference implementation of +// VectorPin protocol version 2. It is byte-for-byte compatible with +// the Python and Rust ports: identical canonical bytes, identical +// signatures. Compatibility is enforced by shared test vectors at +// `testvectors/v2.json` consumed by all three ports. // // Quick start: // @@ -15,26 +15,40 @@ // // const signer = Signer.generate('demo-2026-05'); // const vector = new Float32Array([0.1, 0.2, 0.3]); -// const pin = signer.pin({ +// const pin = await signer.pin({ // source: 'hello', // model: 'text-embedding-3-large', // vector, // }); // -// const verifier = new Verifier({ [signer.keyId]: signer.publicKeyBytes() }); -// const result = verifier.verify(pin, { source: 'hello', vector }); +// const verifier = new Verifier({ +// [signer.keyId]: await signer.publicKeyBytes(), +// }); +// const result = await verifier.verify(pin, { source: 'hello', vector }); // if (!result.ok) throw new Error(`verify failed: ${result.error}`); export { + b64UrlDecodeNoPad, + b64UrlEncodeNoPad, canonicalizeHeader, + canonicalizeHeaderV1, canonicalJsonStringify, + DOMAIN_TAG, + MAX_EXTRA_ENTRIES, + MAX_EXTRA_KEY_BYTES, + MAX_EXTRA_VALUE_BYTES, + MAX_PIN_JSON_BYTES, + MAX_VEC_DIM, pinFromDict, pinFromJSON, + pinKid, pinToDict, pinToJSON, PROTOCOL_VERSION, + SIG_LEN, type Pin, type PinHeader, + type PinParseOptions, } from './attestation.js'; export { @@ -49,8 +63,11 @@ export { export { formatUtcIsoSecond, Signer, type SignerPinOptions } from './signer.js'; export { + LegacyV1Verifier, Verifier, VerifyErrorCode, + type KeyEntry, + type KeyRegistration, type VerificationResult, type VerifyOptions, } from './verifier.js'; diff --git a/typescript/src/signer.ts b/typescript/src/signer.ts index 2b798de..d5ffa6c 100644 --- a/typescript/src/signer.ts +++ b/typescript/src/signer.ts @@ -8,7 +8,6 @@ // and demos; load production keys from a managed secret store. import * as ed25519 from '@noble/ed25519'; -import { sha512 } from '@noble/hashes/sha2'; import { randomBytes } from '@noble/hashes/utils'; import { @@ -19,9 +18,13 @@ import { } from './attestation.js'; import { hashText, hashVector, type VecDtype, type VectorInput } from './hash.js'; -// noble/ed25519 v2 sync API requires a sha512 hookup. Hooking it up -// at module load is fine; it's a pure-JS function reference. -ed25519.etc.sha512Sync = (...m) => sha512(ed25519.etc.concatBytes(...m)); +// Hard requirement: a Web-Crypto-compatible CSPRNG must be available +// at module load. Every supported runtime (Node >=20, Deno, Bun, +// modern browsers, Cloudflare Workers) provides this. If it's missing +// we refuse to load rather than silently fall back to a weaker source. +if (typeof crypto === 'undefined' || typeof crypto.getRandomValues !== 'function') { + throw new Error('CSPRNG not available; VectorPin requires a runtime with Web Crypto API'); +} export interface SignerPinOptions { /** Source text the embedding was produced from. */ @@ -34,8 +37,13 @@ export interface SignerPinOptions { vecDtype?: VecDtype; /** Optional content hash of the model weights. */ modelHash?: string; - /** Optional explicit timestamp in `YYYY-MM-DDTHH:MM:SSZ` form; defaults to now (UTC). */ - timestamp?: string; + /** + * Optional explicit timestamp in `YYYY-MM-DDTHH:MM:SSZ` form, or a + * `Date` instance that will be formatted to that form. Defaults to + * now (UTC). v2 enforces the exact strftime pattern; fractional + * seconds or non-`Z` offsets are rejected. + */ + timestamp?: string | Date; /** Optional string-to-string metadata committed under the signature. */ extra?: Record; } @@ -48,16 +56,22 @@ export interface SignerPinOptions { * to the right key during rotation. */ export class Signer { - readonly #privateKey: Uint8Array; + #privateKey: Uint8Array; readonly #keyId: string; + #wiped = false; private constructor(privateKey: Uint8Array, keyId: string) { if (!keyId) throw new Error('keyId must be non-empty'); if (privateKey.length !== 32) { throw new Error(`private key must be 32 bytes, got ${privateKey.length}`); } - this.#privateKey = privateKey; - this.#keyId = keyId; + // NFC + safety check on key_id at construction so every emitted + // pin has a header a strict verifier accepts. + const normalizedKid = normalizeStringStrict(keyId, 'keyId'); + // Defensive copy so the caller cannot mutate or zero our key + // after construction. + this.#privateKey = new Uint8Array(privateKey); + this.#keyId = normalizedKid; } /** Generate a fresh Ed25519 signer. Tests and demos only. */ @@ -74,39 +88,165 @@ export class Signer { return this.#keyId; } + /** True after `wipe()` has been called; the signer is unusable. */ + get isWiped(): boolean { + return this.#wiped; + } + + /** + * Zero out the private key material and mark the signer unusable. + * Subsequent calls to `pin()` or key accessors will throw. + */ + wipe(): void { + this.#privateKey.fill(0); + this.#wiped = true; + } + /** 32-byte raw Ed25519 public key — what verifiers register. */ - publicKeyBytes(): Uint8Array { - return ed25519.getPublicKey(this.#privateKey); + async publicKeyBytes(): Promise { + this.#assertUsable(); + return ed25519.getPublicKeyAsync(this.#privateKey); } /** 32-byte raw Ed25519 private seed. Treat as a secret. */ privateKeyBytes(): Uint8Array { - // Defensive copy so the caller cannot mutate our internal state. + this.#assertUsable(); return new Uint8Array(this.#privateKey); } - /** Create a signed Pin for a (source, model, vector) triple. */ - pin(opts: SignerPinOptions): Pin { + /** + * Create a signed Pin for a (source, model, vector) triple. + * + * Per spec §3.2, vectors containing NaN, +inf, or -inf are rejected + * at sign time. Per spec §3.1, every string-typed input is + * NFC-normalized and checked for control characters and bidi + * overrides; non-NFC input is silently re-normalized but other + * unsafe characters reject. + */ + async pin(opts: SignerPinOptions): Promise { + this.#assertUsable(); if (opts.vector.length === 0) { throw new Error('cannot pin an empty vector'); } + + // Reject NaN / Inf BEFORE we compute hashes so a signer never + // commits to a vector with ambiguous semantics. + for (let i = 0; i < opts.vector.length; i++) { + const x = opts.vector[i] as number; + if (!Number.isFinite(x)) { + throw new Error('vector contains NaN or Inf; refusing to sign'); + } + } + const dtype: VecDtype = opts.vecDtype ?? 'f32'; - const ts = opts.timestamp ?? formatUtcIsoSecond(new Date()); + + // NFC + safety for every string-typed input. + const modelN = normalizeStringStrict(opts.model, 'model'); + // Source is hashed; NFC happens inside hashText already, but we + // still want a stable representation here. + const sourceN = opts.source.normalize('NFC'); + + const extraN: Record | undefined = (() => { + if (!opts.extra) return undefined; + const out: Record = {}; + for (const k of Object.keys(opts.extra)) { + if (typeof k !== 'string' || typeof opts.extra[k] !== 'string') { + throw new Error('extra must be a map of string -> string'); + } + const kn = normalizeStringStrict(k, `extra key ${JSON.stringify(k)}`); + const vn = normalizeStringStrict(opts.extra[k]!, `extra[${JSON.stringify(k)}]`); + out[kn] = vn; + } + return out; + })(); + + const ts = formatTimestamp(opts.timestamp); + const header: PinHeader = { v: PROTOCOL_VERSION, - model: opts.model, - source_hash: hashText(opts.source), + kid: this.#keyId, + model: modelN, + source_hash: hashText(sourceN), vec_hash: hashVector(opts.vector, dtype), vec_dtype: dtype, vec_dim: opts.vector.length, ts, model_hash: opts.modelHash, - extra: opts.extra, + extra: extraN, }; const canonical = canonicalizeHeader(header); - const sig = ed25519.sign(canonical, this.#privateKey); - return { header, kid: this.#keyId, sig }; + const sig = await ed25519.signAsync(canonical, this.#privateKey); + return { header, sig }; + } + + #assertUsable(): void { + if (this.#wiped) { + throw new Error('signer has been wiped and is no longer usable'); + } + } +} + +/** + * NFC-normalize a string and reject control characters or bidi + * overrides. Used on every signer input so the produced pin always + * parses under the strict v2 verifier. + */ +function normalizeStringStrict(value: string, fieldName: string): string { + if (typeof value !== 'string') { + throw new Error(`${fieldName} must be a string`); + } + const nfc = value.normalize('NFC'); + for (let i = 0; i < nfc.length; i++) { + const cp = nfc.charCodeAt(i); + if (cp < 0x20) { + throw new Error( + `${fieldName} contains control character U+${cp.toString(16).padStart(4, '0').toUpperCase()}`, + ); + } + if ((cp >= 0x202a && cp <= 0x202e) || (cp >= 0x2066 && cp <= 0x2069)) { + throw new Error( + `${fieldName} contains bidi-override character U+${cp.toString(16).padStart(4, '0').toUpperCase()}`, + ); + } + } + return nfc; +} + +/** + * Coerce a `string | Date | undefined` into the exact v2 timestamp + * pattern `YYYY-MM-DDTHH:MM:SSZ`. Accepts: + * - A string already matching the pattern (passed through). + * - A string in a related RFC 3339 form with fractional seconds or + * a numeric offset — these are normalized to the strict form. + * - A `Date` (formatted via UTC accessors). + * - undefined (uses `new Date()`). + * + * The resulting string is then validated by the strict v2 regex. If + * a caller passes a non-coercible string we throw rather than + * emitting a pin that would later fail to parse. + */ +function formatTimestamp(input: string | Date | undefined): string { + if (input === undefined) { + return formatUtcIsoSecond(new Date()); + } + if (input instanceof Date) { + return formatUtcIsoSecond(input); + } + if (typeof input !== 'string') { + throw new Error('timestamp must be a string or Date'); + } + // Fast-path: already in canonical form. + if (/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/.test(input)) { + return input; + } + // Try to coerce any RFC 3339 string into canonical form. Fractional + // seconds and offsets are dropped here so callers don't have to + // hand-format. + const parsed = Date.parse(input); + if (Number.isNaN(parsed)) { + throw new Error(`timestamp must match 'YYYY-MM-DDTHH:MM:SSZ'; got ${JSON.stringify(input)}`); } + return formatUtcIsoSecond(new Date(parsed)); } /** diff --git a/typescript/src/verifier.ts b/typescript/src/verifier.ts index 0926ff6..c901b57 100644 --- a/typescript/src/verifier.ts +++ b/typescript/src/verifier.ts @@ -6,10 +6,23 @@ // Mirrors the Python and Rust verifiers: same failure-mode enum, // same matching semantics, same support for partial verification // (signature-only, signature + vector, etc). +// +// The default `Verifier` accepts only protocol v2 pins. A +// `LegacyV1Verifier` is provided for migration scenarios where +// pre-v2 pins still need to validate; legacy mode is opt-in per +// spec §5 step 1. import * as ed25519 from '@noble/ed25519'; -import { canonicalizeHeader, PROTOCOL_VERSION, type Pin } from './attestation.js'; +import { + canonicalizeHeader, + canonicalizeHeaderV1, + pinFromDict, + pinFromJSON, + PROTOCOL_VERSION, + SIG_LEN, + type Pin, +} from './attestation.js'; import { hashText, hashVector, type VectorInput } from './hash.js'; /** @@ -18,18 +31,23 @@ import { hashText, hashVector, type VectorInput } from './hash.js'; * differently from a vector-tampered result (potential steganography * kill shot). * - * Wire-form values match the Python reference (`VerifyError.OK.value` - * etc.), so a result serialized over a service boundary round-trips. + * Wire-form values match the Python reference, so a result + * serialized over a service boundary round-trips. */ export const VerifyErrorCode = { OK: 'ok', UNKNOWN_KEY: 'unknown_key', UNSUPPORTED_VERSION: 'unsupported_version', + KEY_EXPIRED: 'key_expired', + PARSE_ERROR: 'parse_error', SIGNATURE_INVALID: 'signature_invalid', VECTOR_TAMPERED: 'vector_tampered', SOURCE_MISMATCH: 'source_mismatch', MODEL_MISMATCH: 'model_mismatch', SHAPE_MISMATCH: 'shape_mismatch', + RECORD_MISMATCH: 'record_mismatch', + COLLECTION_MISMATCH: 'collection_mismatch', + TENANT_MISMATCH: 'tenant_mismatch', } as const; export type VerifyErrorCode = (typeof VerifyErrorCode)[keyof typeof VerifyErrorCode]; @@ -41,6 +59,18 @@ export interface VerificationResult { readonly detail: string; } +/** + * A registered public key, with an optional validity window per + * spec §7. `validFrom` is inclusive; `validUntil` is exclusive. + */ +export interface KeyEntry { + readonly publicKey: Uint8Array; + readonly validFrom?: Date | undefined; + readonly validUntil?: Date | undefined; +} + +export type KeyRegistration = Uint8Array | KeyEntry; + export interface VerifyOptions { /** If provided, the source text is rehashed and compared to `source_hash`. */ source?: string; @@ -48,100 +78,298 @@ export interface VerifyOptions { vector?: VectorInput; /** If provided, the pin's `model` field must equal this string. */ expectedModel?: string; + /** + * Replay-protection identifiers (spec §5 step 8). If supplied, the + * verifier checks the corresponding `vectorpin.*` value in the + * pin's `extra` map and rejects on mismatch. + */ + expectedRecordId?: string; + expectedCollectionId?: string; + expectedTenantId?: string; +} + +/** Maximum length of an attacker-controlled substring in `detail`. */ +const MAX_DETAIL_FIELD = 64; + +/** + * Strip control characters and newlines from any attacker-controllable + * field before embedding it in a `detail` string. Keeps the message + * legible without giving an attacker a vector to inject log entries + * or terminal escape sequences. + */ +function sanitizeDetail(s: string): string { + // eslint-disable-next-line no-control-regex + const cleaned = s.replace(/[\x00-\x1f\x7f]/g, '?'); + if (cleaned.length <= MAX_DETAIL_FIELD) return cleaned; + return cleaned.slice(0, MAX_DETAIL_FIELD) + '...'; +} + +/** + * Parse a v2-style `ts` string into a UTC `Date`. Returns null if + * the string is unparseable so the caller can fail safely. + */ +function parseTs(ts: string): Date | null { + // Strict v2 form is "YYYY-MM-DDTHH:MM:SSZ"; we also accept the + // less-strict RFC 3339 form because this helper is shared with the + // legacy v1 verifier. + const t = Date.parse(ts); + if (Number.isNaN(t)) return null; + return new Date(t); } /** * Verifies Pin attestations against a key registry. * - * The registry maps key id -> 32-byte raw Ed25519 public key. - * Verifiers MUST be willing to hold multiple keys at once to support - * rotation: when a new signing key is introduced, both the old and - * new public keys live in the registry until the rotation window - * closes. + * The registry maps key id -> 32-byte raw Ed25519 public key (or a + * `KeyEntry` with optional validity window). Verifiers MUST be + * willing to hold multiple keys at once to support rotation: when a + * new signing key is introduced, both the old and new public keys + * live in the registry until the rotation window closes. + * + * The default constructor accepts only protocol v2 pins. To accept + * legacy v1 pins, use `LegacyV1Verifier` instead. */ export class Verifier { - readonly #keys = new Map(); + readonly #keys = new Map(); + readonly #acceptV1Legacy: boolean; - constructor(publicKeys: Record = {}) { + constructor( + publicKeys: Record = {}, + opts: { acceptV1Legacy?: boolean } = {}, + ) { + this.#acceptV1Legacy = opts.acceptV1Legacy === true; for (const [kid, key] of Object.entries(publicKeys)) { this.addKey(kid, key); } } /** Register an additional public key — used during rotation. */ - addKey(kid: string, publicKey: Uint8Array): void { - if (publicKey.length !== 32) { - throw new Error(`public key for ${kid} must be 32 bytes, got ${publicKey.length}`); - } - // Defensive copy so callers can't mutate registered keys. - this.#keys.set(kid, new Uint8Array(publicKey)); + addKey(kid: string, key: KeyRegistration): void { + const entry = coerceEntry(key, kid); + this.#keys.set(kid, entry); } get keyCount(): number { return this.#keys.size; } + /** Internal hook used by `LegacyV1Verifier`. */ + protected get acceptV1Legacy(): boolean { + return this.#acceptV1Legacy; + } + /** - * Verify a Pin. Pass `source`/`vector`/`expectedModel` only when - * you have the corresponding ground truth on hand — the signature - * check always runs; the others are gated on what you supply. + * Verify a Pin. The signature check always runs; the others are + * gated on which ground-truth values you supply. */ - verify(pin: Pin, opts: VerifyOptions = {}): VerificationResult { - if (pin.header.v !== PROTOCOL_VERSION) { - return result(false, 'unsupported_version', `pin version ${pin.header.v} not supported`); + async verify(pin: Pin, opts: VerifyOptions = {}): Promise { + // Step 1: version dispatch. + const accepted = this.#acceptV1Legacy + ? [PROTOCOL_VERSION, 1] + : [PROTOCOL_VERSION]; + if (!accepted.includes(pin.header.v)) { + return result( + false, + VerifyErrorCode.UNSUPPORTED_VERSION, + `pin version ${pin.header.v} not supported`, + ); + } + + // Pre-check signature shape so a malformed pin returns a + // structured PARSE_ERROR rather than blowing up downstream. + if (!(pin.sig instanceof Uint8Array) || pin.sig.length !== SIG_LEN) { + return result( + false, + VerifyErrorCode.PARSE_ERROR, + `signature must be exactly ${SIG_LEN} bytes`, + ); } - const publicKey = this.#keys.get(pin.kid); - if (!publicKey) { - return result(false, 'unknown_key', `no registered public key for kid=${pin.kid}`); + // Step 2: kid lookup + validity window. + const entry = this.#keys.get(pin.header.kid); + if (!entry) { + return result( + false, + VerifyErrorCode.UNKNOWN_KEY, + `no registered public key for kid=${sanitizeDetail(pin.header.kid)}`, + ); + } + if (entry.validFrom !== undefined || entry.validUntil !== undefined) { + const pinTs = parseTs(pin.header.ts); + if (pinTs === null) { + return result( + false, + VerifyErrorCode.KEY_EXPIRED, + 'pin ts unparseable; cannot evaluate key validity window', + ); + } + if (entry.validFrom !== undefined && pinTs < entry.validFrom) { + return result( + false, + VerifyErrorCode.KEY_EXPIRED, + `pin ts ${sanitizeDetail(pin.header.ts)} predates key validFrom`, + ); + } + if (entry.validUntil !== undefined && pinTs >= entry.validUntil) { + return result( + false, + VerifyErrorCode.KEY_EXPIRED, + `pin ts ${sanitizeDetail(pin.header.ts)} is at or past key validUntil`, + ); + } } - const canonical = canonicalizeHeader(pin.header); - let sigValid: boolean; + // Step 4: signature. + const canonical = canonicalFor(pin); + let sigValid = false; try { - sigValid = ed25519.verify(pin.sig, canonical, publicKey); + sigValid = await ed25519.verifyAsync(pin.sig, canonical, entry.publicKey); } catch { sigValid = false; } if (!sigValid) { - return result(false, 'signature_invalid', 'ed25519 signature did not verify'); + return result( + false, + VerifyErrorCode.SIGNATURE_INVALID, + 'ed25519 signature did not verify', + ); } + // Step 6: vector check. Vector NaN/Inf at verify time is a parse + // error per spec §5 step 6 — reject before hashing. if (opts.vector !== undefined) { if (opts.vector.length !== pin.header.vec_dim) { return result( false, - 'shape_mismatch', + VerifyErrorCode.SHAPE_MISMATCH, `vector length ${opts.vector.length} != pin dim ${pin.header.vec_dim}`, ); } + for (let i = 0; i < opts.vector.length; i++) { + const x = opts.vector[i] as number; + if (!Number.isFinite(x)) { + return result( + false, + VerifyErrorCode.PARSE_ERROR, + 'supplied vector contains NaN or infinity', + ); + } + } if (hashVector(opts.vector, pin.header.vec_dtype) !== pin.header.vec_hash) { return result( false, - 'vector_tampered', + VerifyErrorCode.VECTOR_TAMPERED, 'vector hash mismatch — embedding has been modified after pinning', ); } } + // Step 5: source check. if (opts.source !== undefined && hashText(opts.source) !== pin.header.source_hash) { return result( false, - 'source_mismatch', + VerifyErrorCode.SOURCE_MISMATCH, 'source hash mismatch — pinned source differs from supplied source', ); } + // Step 7: model check. if (opts.expectedModel !== undefined && pin.header.model !== opts.expectedModel) { return result( false, - 'model_mismatch', - `pin model ${pin.header.model} != expected ${opts.expectedModel}`, + VerifyErrorCode.MODEL_MISMATCH, + `pin model ${sanitizeDetail(pin.header.model)} != expected ${sanitizeDetail( + opts.expectedModel, + )}`, ); } - return result(true, 'ok', ''); + // Step 8: replay-protection identifier checks. The `vectorpin.*` + // reserved keys are tamper-evident because every `extra` entry + // is signed. + const replayChecks: Array<[string, string | undefined, VerifyErrorCode]> = [ + ['vectorpin.record_id', opts.expectedRecordId, VerifyErrorCode.RECORD_MISMATCH], + ['vectorpin.collection_id', opts.expectedCollectionId, VerifyErrorCode.COLLECTION_MISMATCH], + ['vectorpin.tenant_id', opts.expectedTenantId, VerifyErrorCode.TENANT_MISMATCH], + ]; + for (const [key, expected, errCode] of replayChecks) { + if (expected === undefined) continue; + const actual = pin.header.extra?.[key]; + if (actual !== expected) { + return result( + false, + errCode, + `pin extra[${key}]=${sanitizeDetail(String(actual))} != expected ${sanitizeDetail(expected)}`, + ); + } + } + + return result(true, VerifyErrorCode.OK, ''); + } +} + +/** + * Migration-mode verifier that accepts protocol v1 pins in addition + * to v2. v1 pins are dispatched to v1 canonicalization so legacy + * artifacts continue to verify byte-for-byte against their original + * signatures. + * + * Per spec §5 step 1: legacy mode MUST be opt-in and SHOULD be + * disabled by default. Use this class explicitly; do not turn it on + * in shared infrastructure paths. + * + * To parse a v1 pin JSON, use the static helpers on this class — + * the default `pinFromJSON` rejects v1 pins. + */ +export class LegacyV1Verifier extends Verifier { + constructor(publicKeys: Record = {}) { + super(publicKeys, { acceptV1Legacy: true }); + } + + /** Parse a pin from a JSON string, accepting v1 or v2. */ + static parsePin(s: string): Pin { + return pinFromJSON(s, { acceptV1Legacy: true }); + } + + /** Parse a pin from a plain object, accepting v1 or v2. */ + static parsePinDict(d: Record): Pin { + return pinFromDict(d, { acceptV1Legacy: true }); + } +} + +/** + * Reconstruct the canonical bytes a signer would have signed. + * + * Dispatches on the pin's `v` field so the legacy verifier shares + * most of this code path. v2 emits `DOMAIN_TAG || json`; v1 emits + * just the (kid-less) JSON. + */ +function canonicalFor(pin: Pin): Uint8Array { + if (pin.header.v === 1) { + return canonicalizeHeaderV1(pin.header); + } + return canonicalizeHeader(pin.header); +} + +function coerceEntry(key: KeyRegistration, kid: string): KeyEntry { + if (key instanceof Uint8Array) { + if (key.length !== 32) { + throw new Error(`public key for ${kid} must be 32 bytes, got ${key.length}`); + } + return { publicKey: new Uint8Array(key) }; + } + if (key && typeof key === 'object' && 'publicKey' in key) { + const pk = key.publicKey; + if (!(pk instanceof Uint8Array) || pk.length !== 32) { + throw new Error(`public key for ${kid} must be a 32-byte Uint8Array`); + } + return { + publicKey: new Uint8Array(pk), + validFrom: key.validFrom, + validUntil: key.validUntil, + }; } + throw new Error(`public key for ${kid} must be Uint8Array or KeyEntry`); } function result(ok: boolean, error: VerifyErrorCode, detail: string): VerificationResult { diff --git a/typescript/test/cross-lang.test.ts b/typescript/test/cross-lang.test.ts index 4c97a8d..b2a9802 100644 --- a/typescript/test/cross-lang.test.ts +++ b/typescript/test/cross-lang.test.ts @@ -24,58 +24,74 @@ import { } from '../src/attestation.js'; import { hashText, hashVector, type VecDtype } from '../src/hash.js'; import { Signer } from '../src/signer.js'; -import { Verifier } from '../src/verifier.js'; +import { Verifier, VerifyErrorCode } from '../src/verifier.js'; const HERE = dirname(fileURLToPath(import.meta.url)); const TESTVECTORS_DIR = join(HERE, '..', '..', 'testvectors'); -interface FixtureBundle { +interface V2Bundle { + version: number; + domain_tag_b64: string; public_key_b64: string; - private_seed_b64: string; + private_key_b64: string; key_id: string; - fixtures: Fixture[]; + fixtures: V2Fixture[]; } -interface Fixture { +interface V2Fixture { name: string; + description: string; input: { source: string; model: string; - vector_b64: string; + vec_b64: string; vec_dtype: VecDtype; vec_dim: number; timestamp: string; + model_hash?: string; + extra?: Record; }; - expected: { - pin_json: string; - canonical_header_b64: string; - vec_hash: string; - source_hash: string; - }; + pin_json: string; + expected_canonical_bytes_b64: string; + expected_vec_hash: string; + expected_source_hash: string; +} + +interface V2NegativeBundle { + version: number; + public_key_b64: string; + private_key_b64: string; + key_id: string; + fixtures: V2NegativeFixture[]; } -interface NegativeFixture { +interface V2NegativeFixture { name: string; + description: string; + expected_failure: string; pin_json: string; - tampered_vector_b64: string; - expected_error: string; + tampered_vec_b64?: string; + tampered_source?: string; + original_source?: string; + vec_dtype?: VecDtype; + vec_dim?: number; + nan_vec_b64?: string; + expected_model?: string; + expected_record_id?: string; } -function loadBundle(): FixtureBundle { - const raw = readFileSync(join(TESTVECTORS_DIR, 'v1.json'), 'utf8'); - return JSON.parse(raw) as FixtureBundle; +function loadV2(): V2Bundle { + return JSON.parse(readFileSync(join(TESTVECTORS_DIR, 'v2.json'), 'utf8')) as V2Bundle; } -function loadNegative(): NegativeFixture { - const raw = readFileSync(join(TESTVECTORS_DIR, 'negative_v1.json'), 'utf8'); - return JSON.parse(raw) as NegativeFixture; +function loadV2Negative(): V2NegativeBundle { + return JSON.parse( + readFileSync(join(TESTVECTORS_DIR, 'negative_v2.json'), 'utf8'), + ) as V2NegativeBundle; } function parseVecF32(bytes: Uint8Array, dim: number): Float32Array { assert.equal(bytes.length, dim * 4, 'f32 fixture length sanity check'); - // The fixture bytes are already little-endian, which is what Float32Array - // expects on every realistic Node host (x86_64, arm64). We copy through - // a DataView to be explicit and platform-safe. const out = new Float32Array(dim); const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength); for (let i = 0; i < dim; i++) { @@ -94,80 +110,180 @@ function parseVecF64(bytes: Uint8Array, dim: number): Float64Array { return out; } -describe('cross-language positive fixtures (testvectors/v1.json)', () => { - const bundle = loadBundle(); +describe('cross-language v2 positive fixtures (testvectors/v2.json)', () => { + const bundle = loadV2(); + assert.equal(bundle.version, 2, 'fixture file declares v2'); assert.ok(bundle.fixtures.length > 0, 'no fixtures to test'); for (const fx of bundle.fixtures) { - it(`fixture: ${fx.name}`, () => { + it(`fixture: ${fx.name}`, async () => { const dtype = fx.input.vec_dtype; - const rawBytes = b64UrlDecodeNoPad(fx.input.vector_b64); + const rawBytes = b64UrlDecodeNoPad(fx.input.vec_b64); const vector = dtype === 'f32' ? parseVecF32(rawBytes, fx.input.vec_dim) : parseVecF64(rawBytes, fx.input.vec_dim); // 1. Component hashes line up with what Python recorded. - assert.equal(hashText(fx.input.source), fx.expected.source_hash, 'source_hash'); - assert.equal(hashVector(vector, dtype), fx.expected.vec_hash, 'vec_hash'); + assert.equal(hashText(fx.input.source), fx.expected_source_hash, 'source_hash'); + assert.equal(hashVector(vector, dtype), fx.expected_vec_hash, 'vec_hash'); // 2. Reproduce the pin from the deterministic seed and confirm // the canonical bytes + signed JSON match Python's output - // byte-for-byte. - const seed = b64UrlDecodeNoPad(bundle.private_seed_b64); + // byte-for-byte (Ed25519 is deterministic). + const seed = b64UrlDecodeNoPad(bundle.private_key_b64); const signer = Signer.fromPrivateBytes(seed, bundle.key_id); const pubExpected = b64UrlDecodeNoPad(bundle.public_key_b64); assert.deepEqual( - Array.from(signer.publicKeyBytes()), + Array.from(await signer.publicKeyBytes()), Array.from(pubExpected), 'public key derivation', ); - const pin = signer.pin({ + const pin = await signer.pin({ source: fx.input.source, model: fx.input.model, vector, vecDtype: dtype, timestamp: fx.input.timestamp, + modelHash: fx.input.model_hash, + extra: fx.input.extra, }); const canonicalActual = canonicalizeHeader(pin.header); - const canonicalExpected = b64UrlDecodeNoPad(fx.expected.canonical_header_b64); + const canonicalExpected = b64UrlDecodeNoPad(fx.expected_canonical_bytes_b64); assert.equal( b64UrlEncodeNoPad(canonicalActual), b64UrlEncodeNoPad(canonicalExpected), - 'canonical header bytes', + 'canonical bytes byte-for-byte', ); const producedJson = pinToJSON(pin); - assert.equal(producedJson, fx.expected.pin_json, 'pin JSON byte-for-byte'); + assert.equal(producedJson, fx.pin_json, 'pin JSON byte-for-byte'); - // 3. Round-trip through fromJSON, verify the parsed pin. + // 3. Round-trip through pinFromJSON; verify the parsed pin. const parsed = pinFromJSON(producedJson); const verifier = new Verifier({ [bundle.key_id]: pubExpected }); - const r1 = verifier.verify(parsed, { source: fx.input.source }); + const r1 = await verifier.verify(parsed, { source: fx.input.source }); assert.equal(r1.ok, true, `parsed pin verify: ${r1.error} ${r1.detail}`); // 4. Verify the JSON Python emitted directly. - const pythonPin = pinFromJSON(fx.expected.pin_json); - const r2 = verifier.verify(pythonPin, { source: fx.input.source }); + const pythonPin = pinFromJSON(fx.pin_json); + const r2 = await verifier.verify(pythonPin, { source: fx.input.source }); assert.equal(r2.ok, true, `python pin verify: ${r2.error} ${r2.detail}`); }); } }); -describe('cross-language negative fixture (testvectors/negative_v1.json)', () => { - it('rejects pin against tampered vector with vector_tampered', () => { - const neg = loadNegative(); - assert.equal(neg.expected_error, 'vector_tampered'); +describe('cross-language v2 negative fixtures (testvectors/negative_v2.json)', () => { + const bundle = loadV2Negative(); + const pubKey = b64UrlDecodeNoPad(bundle.public_key_b64); - const pin = pinFromJSON(neg.pin_json); - const tampered = parseVecF32(b64UrlDecodeNoPad(neg.tampered_vector_b64), pin.header.vec_dim); + for (const fx of bundle.fixtures) { + it(`negative fixture: ${fx.name}`, async () => { + const expected = fx.expected_failure.toLowerCase(); + + // Many negative fixtures fail at parse time. We unify parse + // failures with verify failures by distinguishing the + // "version mismatch" subset (UNSUPPORTED_VERSION) from other + // structural failures (PARSE_ERROR). The spec leaves that + // boundary up to the implementation. + let pin: ReturnType | undefined; + let parseErr: string | undefined; + try { + pin = pinFromJSON(fx.pin_json); + } catch (e) { + parseErr = e instanceof Error ? e.message : String(e); + } + + if (parseErr !== undefined) { + const actual = /version/i.test(parseErr) + ? VerifyErrorCode.UNSUPPORTED_VERSION + : VerifyErrorCode.PARSE_ERROR; + assert.equal( + actual, + expected, + `${fx.name}: parser rejected with ${actual} but fixture expected ${expected} (msg: ${parseErr})`, + ); + return; + } - const bundle = loadBundle(); - const verifier = new Verifier({ [bundle.key_id]: b64UrlDecodeNoPad(bundle.public_key_b64) }); - const result = verifier.verify(pin, { vector: tampered }); - assert.equal(result.ok, false); - assert.equal(result.error, 'vector_tampered'); + const verifier = new Verifier({ [bundle.key_id]: pubKey }); + + // Build verify options from optional fixture fields. + const opts: Parameters[1] = {}; + if (fx.tampered_source !== undefined) { + opts.source = fx.tampered_source; + } + if (fx.expected_model !== undefined) { + opts.expectedModel = fx.expected_model; + } + if (fx.expected_record_id !== undefined) { + opts.expectedRecordId = fx.expected_record_id; + } + if (fx.tampered_vec_b64 !== undefined && fx.vec_dtype && fx.vec_dim !== undefined) { + const bytes = b64UrlDecodeNoPad(fx.tampered_vec_b64); + opts.vector = + fx.vec_dtype === 'f32' ? parseVecF32(bytes, fx.vec_dim) : parseVecF64(bytes, fx.vec_dim); + } + if (fx.nan_vec_b64 !== undefined && fx.vec_dtype && fx.vec_dim !== undefined) { + const bytes = b64UrlDecodeNoPad(fx.nan_vec_b64); + opts.vector = + fx.vec_dtype === 'f32' ? parseVecF32(bytes, fx.vec_dim) : parseVecF64(bytes, fx.vec_dim); + } + + const result = await verifier.verify(pin!, opts); + assert.equal(result.ok, false, `${fx.name}: expected failure but got OK`); + assert.equal( + result.error, + expected, + `${fx.name}: expected ${expected} but got ${result.error} (${result.detail})`, + ); + }); + } + + it('codespace coverage: all expected_failure codes map to a VerifyErrorCode', () => { + const codes = new Set(Object.values(VerifyErrorCode)); + for (const fx of bundle.fixtures) { + const expected = fx.expected_failure.toLowerCase(); + assert.ok( + codes.has(expected as VerifyErrorCode), + `${fx.name}: expected_failure ${fx.expected_failure} (${expected}) is not a VerifyErrorCode`, + ); + } }); }); + +describe('strict v2 verifier rejects v1 fixtures', () => { + // Strict v2 verifier should refuse every pin in v1.json as + // UNSUPPORTED_VERSION (parser may also fail earlier, which is fine). + interface V1Bundle { + public_key_b64: string; + key_id: string; + fixtures: { name: string; expected: { pin_json: string } }[]; + } + const v1 = JSON.parse(readFileSync(join(TESTVECTORS_DIR, 'v1.json'), 'utf8')) as V1Bundle; + const pubKey = b64UrlDecodeNoPad(v1.public_key_b64); + const verifier = new Verifier({ [v1.key_id]: pubKey }); + + for (const fx of v1.fixtures) { + it(`rejects v1 fixture ${fx.name}`, async () => { + // Either the parser refuses the v=1 pin, or the verifier + // returns UNSUPPORTED_VERSION. Both are acceptable; we just + // assert it is NOT accepted. + let parsed: ReturnType | undefined; + let threw = false; + try { + parsed = pinFromJSON(fx.expected.pin_json); + } catch { + threw = true; + } + if (threw) { + return; // strict parser refused — that's the contract. + } + const r = await verifier.verify(parsed!); + assert.equal(r.ok, false, 'v1 pin must not verify under strict v2'); + assert.equal(r.error, VerifyErrorCode.UNSUPPORTED_VERSION); + }); + } +}); diff --git a/typescript/test/legacy-v1.test.ts b/typescript/test/legacy-v1.test.ts new file mode 100644 index 0000000..e393a45 --- /dev/null +++ b/typescript/test/legacy-v1.test.ts @@ -0,0 +1,86 @@ +// Copyright 2025 Jascha Wanger / Tarnover, LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Legacy v1 verifier tests. +// +// The strict v2 verifier rejects v1 pins. `LegacyV1Verifier` keeps +// historical artifacts verifiable during a migration window. Per +// spec §5 step 1, legacy mode is opt-in. + +import { describe, it } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { readFileSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { dirname, join } from 'node:path'; + +import { b64UrlDecodeNoPad } from '../src/attestation.js'; +import { LegacyV1Verifier, VerifyErrorCode } from '../src/verifier.js'; + +const HERE = dirname(fileURLToPath(import.meta.url)); +const TESTVECTORS_DIR = join(HERE, '..', '..', 'testvectors'); + +interface V1Bundle { + public_key_b64: string; + key_id: string; + fixtures: V1Fixture[]; +} + +interface V1Fixture { + name: string; + input: { source: string }; + expected: { pin_json: string }; +} + +interface V1NegativeFixture { + pin_json: string; + tampered_vector_b64: string; + expected_error: string; +} + +function loadV1(): V1Bundle { + return JSON.parse(readFileSync(join(TESTVECTORS_DIR, 'v1.json'), 'utf8')) as V1Bundle; +} + +function loadV1Negative(): V1NegativeFixture { + return JSON.parse( + readFileSync(join(TESTVECTORS_DIR, 'negative_v1.json'), 'utf8'), + ) as V1NegativeFixture; +} + +describe('LegacyV1Verifier accepts v1 fixtures', () => { + const bundle = loadV1(); + const pubKey = b64UrlDecodeNoPad(bundle.public_key_b64); + + for (const fx of bundle.fixtures) { + it(`verifies v1 fixture ${fx.name}`, async () => { + const pin = LegacyV1Verifier.parsePin(fx.expected.pin_json); + assert.equal(pin.header.v, 1, 'pin is v1'); + const verifier = new LegacyV1Verifier({ [bundle.key_id]: pubKey }); + const r = await verifier.verify(pin, { source: fx.input.source }); + assert.equal(r.ok, true, `v1 verify: ${r.error} ${r.detail}`); + }); + } +}); + +describe('LegacyV1Verifier still detects v1 tampering', () => { + it('returns VECTOR_TAMPERED on the v1 tampered fixture', async () => { + const bundle = loadV1(); + const neg = loadV1Negative(); + const pin = LegacyV1Verifier.parsePin(neg.pin_json); + + const bytes = b64UrlDecodeNoPad(neg.tampered_vector_b64); + const dim = pin.header.vec_dim; + const tampered = new Float32Array(dim); + const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength); + for (let i = 0; i < dim; i++) { + tampered[i] = view.getFloat32(i * 4, /* littleEndian */ true); + } + + const verifier = new LegacyV1Verifier({ + [bundle.key_id]: b64UrlDecodeNoPad(bundle.public_key_b64), + }); + const r = await verifier.verify(pin, { vector: tampered }); + assert.equal(r.ok, false); + assert.equal(r.error, VerifyErrorCode.VECTOR_TAMPERED); + }); +}); diff --git a/typescript/test/signer-verifier.test.ts b/typescript/test/signer-verifier.test.ts index 990e6bd..f5bfe4e 100644 --- a/typescript/test/signer-verifier.test.ts +++ b/typescript/test/signer-verifier.test.ts @@ -4,106 +4,129 @@ import { describe, it } from 'node:test'; import { strict as assert } from 'node:assert'; -import { pinFromJSON, pinToJSON } from '../src/attestation.js'; +import { pinFromJSON, pinToJSON, PROTOCOL_VERSION } from '../src/attestation.js'; import { Signer } from '../src/signer.js'; -import { Verifier } from '../src/verifier.js'; +import { Verifier, VerifyErrorCode } from '../src/verifier.js'; describe('Signer.pin + Verifier.verify', () => { - function fixture(keyId = 'k1') { + async function fixture(keyId = 'k1') { const signer = Signer.generate(keyId); - const verifier = new Verifier({ [signer.keyId]: signer.publicKeyBytes() }); + const verifier = new Verifier({ [signer.keyId]: await signer.publicKeyBytes() }); const vector = new Float32Array(Array.from({ length: 16 }, (_, i) => i * 0.1)); return { signer, verifier, vector }; } - it('honest verify succeeds', () => { - const { signer, verifier, vector } = fixture(); - const pin = signer.pin({ source: 'hello', model: 'm', vector }); - const result = verifier.verify(pin, { source: 'hello', vector }); + it('honest verify succeeds', async () => { + const { signer, verifier, vector } = await fixture(); + const pin = await signer.pin({ source: 'hello', model: 'm', vector }); + assert.equal(pin.header.v, PROTOCOL_VERSION); + assert.equal(pin.header.kid, signer.keyId); + const result = await verifier.verify(pin, { source: 'hello', vector }); assert.equal(result.ok, true, `unexpected error: ${result.error} - ${result.detail}`); }); - it('signature-only verify succeeds when no source/vector supplied', () => { - const { signer, verifier, vector } = fixture(); - const pin = signer.pin({ source: 'hello', model: 'm', vector }); - assert.equal(verifier.verify(pin).ok, true); + it('signature-only verify succeeds when no source/vector supplied', async () => { + const { signer, verifier, vector } = await fixture(); + const pin = await signer.pin({ source: 'hello', model: 'm', vector }); + assert.equal((await verifier.verify(pin)).ok, true); }); - it('vector tamper is caught', () => { - const { signer, verifier, vector } = fixture(); - const pin = signer.pin({ source: 'hello', model: 'm', vector }); + it('vector tamper is caught', async () => { + const { signer, verifier, vector } = await fixture(); + const pin = await signer.pin({ source: 'hello', model: 'm', vector }); const tampered = new Float32Array(vector); tampered[0] = vector[0]! + 1e-5; - const result = verifier.verify(pin, { vector: tampered }); + const result = await verifier.verify(pin, { vector: tampered }); assert.equal(result.ok, false); - assert.equal(result.error, 'vector_tampered'); + assert.equal(result.error, VerifyErrorCode.VECTOR_TAMPERED); }); - it('source mismatch is caught', () => { - const { signer, verifier, vector } = fixture(); - const pin = signer.pin({ source: 'hello', model: 'm', vector }); - const result = verifier.verify(pin, { source: 'HELLO' }); + it('source mismatch is caught', async () => { + const { signer, verifier, vector } = await fixture(); + const pin = await signer.pin({ source: 'hello', model: 'm', vector }); + const result = await verifier.verify(pin, { source: 'HELLO' }); assert.equal(result.ok, false); - assert.equal(result.error, 'source_mismatch'); + assert.equal(result.error, VerifyErrorCode.SOURCE_MISMATCH); }); - it('shape mismatch is caught', () => { - const { signer, verifier, vector } = fixture(); - const pin = signer.pin({ source: 'hello', model: 'm', vector }); + it('shape mismatch is caught', async () => { + const { signer, verifier, vector } = await fixture(); + const pin = await signer.pin({ source: 'hello', model: 'm', vector }); const truncated = new Float32Array(vector.slice(0, 8)); - const result = verifier.verify(pin, { vector: truncated }); + const result = await verifier.verify(pin, { vector: truncated }); assert.equal(result.ok, false); - assert.equal(result.error, 'shape_mismatch'); + assert.equal(result.error, VerifyErrorCode.SHAPE_MISMATCH); }); - it('unknown key is caught', () => { + it('unknown key is caught', async () => { const rogue = Signer.generate('rogue'); const prod = Signer.generate('prod'); - const verifier = new Verifier({ [prod.keyId]: prod.publicKeyBytes() }); + const verifier = new Verifier({ [prod.keyId]: await prod.publicKeyBytes() }); const v = new Float32Array([1, 2, 3]); - const pin = rogue.pin({ source: 'x', model: 'm', vector: v }); - const result = verifier.verify(pin); + const pin = await rogue.pin({ source: 'x', model: 'm', vector: v }); + const result = await verifier.verify(pin); assert.equal(result.ok, false); - assert.equal(result.error, 'unknown_key'); + assert.equal(result.error, VerifyErrorCode.UNKNOWN_KEY); }); - it('model mismatch is caught', () => { - const { signer, verifier, vector } = fixture(); - const pin = signer.pin({ source: 'x', model: 'model-A', vector }); - const result = verifier.verify(pin, { expectedModel: 'model-B' }); + it('model mismatch is caught', async () => { + const { signer, verifier, vector } = await fixture(); + const pin = await signer.pin({ source: 'x', model: 'model-A', vector }); + const result = await verifier.verify(pin, { expectedModel: 'model-B' }); assert.equal(result.ok, false); - assert.equal(result.error, 'model_mismatch'); + assert.equal(result.error, VerifyErrorCode.MODEL_MISMATCH); }); - it('rotation: multiple keys can verify', () => { + it('rotation: multiple keys can verify', async () => { const oldSigner = Signer.generate('2026-04'); const newSigner = Signer.generate('2026-05'); const verifier = new Verifier({ - [oldSigner.keyId]: oldSigner.publicKeyBytes(), - [newSigner.keyId]: newSigner.publicKeyBytes(), + [oldSigner.keyId]: await oldSigner.publicKeyBytes(), + [newSigner.keyId]: await newSigner.publicKeyBytes(), }); const v = new Float32Array([1, 2, 3]); - assert.equal(verifier.verify(oldSigner.pin({ source: 'x', model: 'm', vector: v })).ok, true); - assert.equal(verifier.verify(newSigner.pin({ source: 'x', model: 'm', vector: v })).ok, true); + assert.equal( + (await verifier.verify(await oldSigner.pin({ source: 'x', model: 'm', vector: v }))).ok, + true, + ); + assert.equal( + (await verifier.verify(await newSigner.pin({ source: 'x', model: 'm', vector: v }))).ok, + true, + ); }); - it('JSON round-trip preserves the pin', () => { - const { signer, verifier, vector } = fixture(); - const pin = signer.pin({ source: 'hello', model: 'm', vector }); + it('JSON round-trip preserves the pin', async () => { + const { signer, verifier, vector } = await fixture(); + const pin = await signer.pin({ source: 'hello', model: 'm', vector }); const json = pinToJSON(pin); const back = pinFromJSON(json); - assert.equal(verifier.verify(back, { source: 'hello', vector }).ok, true); + assert.equal((await verifier.verify(back, { source: 'hello', vector })).ok, true); // Compact form, no whitespace. assert.ok(!json.includes('\n')); assert.ok(!json.includes(': ')); }); + it('record-id replay protection: match passes, mismatch fails', async () => { + const { signer, verifier, vector } = await fixture(); + const pin = await signer.pin({ + source: 'x', + model: 'm', + vector, + extra: { 'vectorpin.record_id': 'rec-1' }, + }); + const ok = await verifier.verify(pin, { expectedRecordId: 'rec-1' }); + assert.equal(ok.ok, true); + const bad = await verifier.verify(pin, { expectedRecordId: 'rec-2' }); + assert.equal(bad.ok, false); + assert.equal(bad.error, VerifyErrorCode.RECORD_MISMATCH); + }); + it('empty keyId is rejected', () => { assert.throws(() => Signer.generate('')); }); - it('publicKeyBytes returns 32 bytes', () => { + it('publicKeyBytes returns 32 bytes', async () => { const signer = Signer.generate('k'); - assert.equal(signer.publicKeyBytes().length, 32); + assert.equal((await signer.publicKeyBytes()).length, 32); }); }); diff --git a/typescript/test/v2-canonicalization.test.ts b/typescript/test/v2-canonicalization.test.ts new file mode 100644 index 0000000..99aee4c --- /dev/null +++ b/typescript/test/v2-canonicalization.test.ts @@ -0,0 +1,245 @@ +// Copyright 2025 Jascha Wanger / Tarnover, LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Unit tests for v2-specific canonicalization behavior. +// +// These tests are independent of the cross-language fixtures — +// they exercise the wire-format rules in isolation so a regression +// in (e.g.) the NFC check is caught even if the v2.json fixtures +// happen to not stress that code path. + +import { describe, it } from 'node:test'; +import { strict as assert } from 'node:assert'; + +import { + canonicalizeHeader, + DOMAIN_TAG, + MAX_EXTRA_ENTRIES, + MAX_PIN_JSON_BYTES, + MAX_VEC_DIM, + pinFromJSON, + PROTOCOL_VERSION, + type PinHeader, +} from '../src/attestation.js'; +import { Signer } from '../src/signer.js'; + +function baseHeader(): PinHeader { + return { + v: PROTOCOL_VERSION, + kid: 'k1', + model: 'm', + source_hash: 'sha256:' + 'a'.repeat(64), + vec_hash: 'sha256:' + 'b'.repeat(64), + vec_dtype: 'f32', + vec_dim: 3, + ts: '2026-05-13T12:00:00Z', + }; +} + +describe('DOMAIN_TAG', () => { + it('is exactly 13 bytes', () => { + assert.equal(DOMAIN_TAG.length, 13); + }); + + it('spells "vectorpin/v2\\x00"', () => { + const expected = [ + 118, 101, 99, 116, 111, 114, 112, 105, 110, 47, 118, 50, 0, + ]; + assert.deepEqual(Array.from(DOMAIN_TAG), expected); + }); +}); + +describe('canonicalizeHeader (v2)', () => { + it('prepends DOMAIN_TAG to the canonical JSON', () => { + const bytes = canonicalizeHeader(baseHeader()); + assert.deepEqual(Array.from(bytes.slice(0, 13)), Array.from(DOMAIN_TAG)); + }); + + it('includes kid in the signed JSON body', () => { + const bytes = canonicalizeHeader(baseHeader()); + const json = new TextDecoder().decode(bytes.slice(13)); + const parsed = JSON.parse(json) as Record; + assert.equal(parsed['kid'], 'k1'); + assert.equal(parsed['v'], 2); + }); + + it('rejects non-NFC strings at canonicalize time', () => { + // "café" with combining acute (NFD form). + const nfd = 'café'; + assert.throws(() => + canonicalizeHeader({ ...baseHeader(), model: nfd }), + ); + }); + + it('sorts extra keys after NFC normalization', () => { + const header: PinHeader = { + ...baseHeader(), + extra: { z: '1', a: '2', m: '3' }, + }; + const bytes = canonicalizeHeader(header); + const json = new TextDecoder().decode(bytes.slice(13)); + // Keys appear in lexicographic order in the JSON output. + const aIdx = json.indexOf('"a"'); + const mIdx = json.indexOf('"m"'); + const zIdx = json.indexOf('"z"'); + assert.ok(aIdx >= 0 && aIdx < mIdx && mIdx < zIdx); + }); +}); + +describe('pinFromJSON rejects v2 wire-format violations', () => { + function pinWith(overrides: Record): string { + const obj: Record = { + v: 2, + kid: 'k1', + model: 'm', + source_hash: 'sha256:' + 'a'.repeat(64), + vec_hash: 'sha256:' + 'b'.repeat(64), + vec_dtype: 'f32', + vec_dim: 3, + ts: '2026-05-13T12:00:00Z', + sig: 'A'.repeat(86), // 64 bytes -> 86 base64url chars + ...overrides, + }; + return JSON.stringify(obj); + } + + it('rejects unknown top-level field', () => { + assert.throws(() => pinFromJSON(pinWith({ stowaway: 'x' }))); + }); + + it('rejects __proto__ as a top-level key', () => { + // We can't add __proto__ via spread on object literals reliably; + // construct a raw JSON string. + const raw = + '{"__proto__":"x","v":2,"kid":"k1","model":"m","source_hash":"sha256:' + + 'a'.repeat(64) + + '","vec_hash":"sha256:' + + 'b'.repeat(64) + + '","vec_dtype":"f32","vec_dim":3,"ts":"2026-05-13T12:00:00Z","sig":"' + + 'A'.repeat(86) + + '"}'; + assert.throws(() => pinFromJSON(raw)); + }); + + it('rejects non-string extra value', () => { + assert.throws(() => pinFromJSON(pinWith({ extra: { region: 5 } }))); + }); + + it('rejects NFD model string', () => { + assert.throws(() => pinFromJSON(pinWith({ model: 'café' }))); + }); + + it('rejects ts with fractional seconds', () => { + assert.throws(() => pinFromJSON(pinWith({ ts: '2026-05-13T12:00:00.000Z' }))); + }); + + it('rejects ts with an offset instead of trailing Z', () => { + assert.throws(() => pinFromJSON(pinWith({ ts: '2026-05-13T12:00:00+00:00' }))); + }); + + it('rejects lowercase t/z in ts', () => { + assert.throws(() => pinFromJSON(pinWith({ ts: '2026-05-13t12:00:00z' }))); + }); + + it('rejects sig that decodes to non-64 bytes', () => { + // 32 bytes -> 43 base64url chars + assert.throws(() => pinFromJSON(pinWith({ sig: 'A'.repeat(43) }))); + }); + + it('rejects standard-base64 alphabet in sig', () => { + // Inject a `+` which is not in the URL-safe alphabet. + assert.throws(() => pinFromJSON(pinWith({ sig: '+' + 'A'.repeat(85) }))); + }); + + it('rejects pin JSON exceeding the size cap', () => { + const huge = 'x'.repeat(MAX_PIN_JSON_BYTES + 1); + assert.throws(() => pinFromJSON(huge)); + }); + + it('rejects vec_dim above MAX_VEC_DIM', () => { + assert.throws(() => pinFromJSON(pinWith({ vec_dim: MAX_VEC_DIM + 1 }))); + }); + + it('rejects extra with too many entries', () => { + const extra: Record = {}; + for (let i = 0; i <= MAX_EXTRA_ENTRIES; i++) { + extra[`k${i}`] = 'v'; + } + assert.throws(() => pinFromJSON(pinWith({ extra }))); + }); + + it('rejects v != 2 in strict mode', () => { + assert.throws(() => pinFromJSON(pinWith({ v: 1 }))); + assert.throws(() => pinFromJSON(pinWith({ v: 99 }))); + }); + + it('rejects control character in kid', () => { + assert.throws(() => pinFromJSON(pinWith({ kid: 'k\x01' }))); + }); + + it('rejects bidi-override in model', () => { + assert.throws(() => pinFromJSON(pinWith({ model: 'm‮' }))); + }); +}); + +describe('Signer.pin rejects unsafe input', () => { + it('rejects NaN in the vector', async () => { + const signer = Signer.generate('k1'); + const v = new Float32Array([0.1, NaN, 0.3]); + await assert.rejects(signer.pin({ source: 's', model: 'm', vector: v })); + }); + + it('rejects +Infinity in the vector', async () => { + const signer = Signer.generate('k1'); + const v = new Float32Array([0.1, Infinity, 0.3]); + await assert.rejects(signer.pin({ source: 's', model: 'm', vector: v })); + }); + + it('rejects -Infinity in the vector', async () => { + const signer = Signer.generate('k1'); + const v = new Float32Array([0.1, -Infinity, 0.3]); + await assert.rejects(signer.pin({ source: 's', model: 'm', vector: v })); + }); + + it('rejects control character in model', async () => { + const signer = Signer.generate('k1'); + const v = new Float32Array([0.1, 0.2, 0.3]); + await assert.rejects(signer.pin({ source: 's', model: 'm\x01', vector: v })); + }); + + it('+0.0 and -0.0 produce different vec_hashes', async () => { + const signer = Signer.generate('k1'); + const pos = await signer.pin({ + source: 's', + model: 'm', + vector: new Float32Array([0.0, 0.0, 0.0]), + }); + const neg = await signer.pin({ + source: 's', + model: 'm', + vector: new Float32Array([-0.0, -0.0, -0.0]), + }); + assert.notEqual(pos.header.vec_hash, neg.header.vec_hash); + }); + + it('emits v=2 timestamp format', async () => { + const signer = Signer.generate('k1'); + const pin = await signer.pin({ + source: 's', + model: 'm', + vector: new Float32Array([0.1, 0.2, 0.3]), + }); + assert.match(pin.header.ts, /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/); + }); + + it('strips fractional seconds when given an RFC 3339 timestamp', async () => { + const signer = Signer.generate('k1'); + const pin = await signer.pin({ + source: 's', + model: 'm', + vector: new Float32Array([0.1, 0.2, 0.3]), + timestamp: '2026-05-13T12:00:00.123Z', + }); + assert.equal(pin.header.ts, '2026-05-13T12:00:00Z'); + }); +}); From 1f865c511b3f4d3a147ae07140f2edd80746962e Mon Sep 17 00:00:00 2001 From: Jascha Date: Thu, 14 May 2026 21:33:20 -0700 Subject: [PATCH 2/2] Apply cargo fmt to satisfy CI fmt-check --- rust/vectorpin/examples/basic_usage.rs | 2 +- rust/vectorpin/src/attestation.rs | 31 +++-------- rust/vectorpin/src/lib.rs | 2 +- rust/vectorpin/src/verifier.rs | 26 +++++++-- rust/vectorpin/tests/cross_lang.rs | 14 ++--- rust/vectorpin/tests/legacy_v1.rs | 2 +- rust/vectorpin/tests/v2_canonicalization.rs | 61 +++++++++++++++------ 7 files changed, 81 insertions(+), 57 deletions(-) diff --git a/rust/vectorpin/examples/basic_usage.rs b/rust/vectorpin/examples/basic_usage.rs index 1f7b9c1..2cbd7ee 100644 --- a/rust/vectorpin/examples/basic_usage.rs +++ b/rust/vectorpin/examples/basic_usage.rs @@ -4,7 +4,7 @@ //! Mirror of `examples/basic_usage.py` — runs the same scenarios against //! the v2 wire format. -use vectorpin::{Pin, Signer, VerifyOptions, Verifier}; +use vectorpin::{Pin, Signer, Verifier, VerifyOptions}; fn main() { let embedding: Vec = (0..128).map(|i| (i as f32) * 0.01).collect(); diff --git a/rust/vectorpin/src/attestation.rs b/rust/vectorpin/src/attestation.rs index 11f191b..5c28e3c 100644 --- a/rust/vectorpin/src/attestation.rs +++ b/rust/vectorpin/src/attestation.rs @@ -120,14 +120,8 @@ impl PinHeader { // build a Map by inserting in lexicographic order ourselves. let mut entries: Vec<(&str, serde_json::Value)> = Vec::new(); entries.push(("v", serde_json::Value::Number(self.v.into()))); - entries.push(( - "kid", - serde_json::Value::String(nfc_string(&self.kid)), - )); - entries.push(( - "model", - serde_json::Value::String(nfc_string(&self.model)), - )); + entries.push(("kid", serde_json::Value::String(nfc_string(&self.kid)))); + entries.push(("model", serde_json::Value::String(nfc_string(&self.model)))); if let Some(h) = &self.model_hash { entries.push(("model_hash", serde_json::Value::String(h.clone()))); } @@ -135,19 +129,13 @@ impl PinHeader { "source_hash", serde_json::Value::String(self.source_hash.clone()), )); - entries.push(( - "vec_hash", - serde_json::Value::String(self.vec_hash.clone()), - )); + entries.push(("vec_hash", serde_json::Value::String(self.vec_hash.clone()))); entries.push(( "vec_dtype", serde_json::Value::String(self.vec_dtype.clone()), )); entries.push(("vec_dim", serde_json::Value::Number(self.vec_dim.into()))); - entries.push(( - "ts", - serde_json::Value::String(nfc_string(&self.ts)), - )); + entries.push(("ts", serde_json::Value::String(nfc_string(&self.ts)))); if !self.extra.is_empty() { // NFC each key and value, then re-sort by NFC'd key. let mut nfc_entries: Vec<(String, String)> = self @@ -252,8 +240,7 @@ impl Pin { got: s.len(), }); } - let value: serde_json::Value = - serde_json::from_str(s).map_err(AttestationError::Json)?; + let value: serde_json::Value = serde_json::from_str(s).map_err(AttestationError::Json)?; Self::from_value(value) } @@ -624,10 +611,7 @@ pub mod legacy_v1 { pub fn canonicalize_v1(header: &PinHeader) -> Vec { let mut entries: Vec<(&str, serde_json::Value)> = Vec::new(); entries.push(("v", serde_json::Value::Number(header.v.into()))); - entries.push(( - "model", - serde_json::Value::String(header.model.clone()), - )); + entries.push(("model", serde_json::Value::String(header.model.clone()))); if let Some(h) = &header.model_hash { entries.push(("model_hash", serde_json::Value::String(h.clone()))); } @@ -672,8 +656,7 @@ pub mod legacy_v1 { got: s.len(), }); } - let value: serde_json::Value = - serde_json::from_str(s).map_err(AttestationError::Json)?; + let value: serde_json::Value = serde_json::from_str(s).map_err(AttestationError::Json)?; parse_v1_value(value) } diff --git a/rust/vectorpin/src/lib.rs b/rust/vectorpin/src/lib.rs index f6d4cf2..7e4d464 100644 --- a/rust/vectorpin/src/lib.rs +++ b/rust/vectorpin/src/lib.rs @@ -137,4 +137,4 @@ pub mod verifier; pub use attestation::{AttestationError, Pin, PinHeader, DOMAIN_TAG, PROTOCOL_VERSION}; pub use hash::{canonical_vector_bytes, hash_text, hash_vector, VecDtype}; pub use signer::{Signer, SignerError}; -pub use verifier::{KeyEntry, LegacyV1Verifier, VerifyError, VerifyOptions, Verifier}; +pub use verifier::{KeyEntry, LegacyV1Verifier, Verifier, VerifyError, VerifyOptions}; diff --git a/rust/vectorpin/src/verifier.rs b/rust/vectorpin/src/verifier.rs index b0a61c8..6eb9750 100644 --- a/rust/vectorpin/src/verifier.rs +++ b/rust/vectorpin/src/verifier.rs @@ -99,7 +99,9 @@ impl std::error::Error for VerifyError {} impl From for VerifyError { fn from(e: AttestationError) -> Self { match e { - AttestationError::UnsupportedVersion { got, .. } => VerifyError::UnsupportedVersion(got), + AttestationError::UnsupportedVersion { got, .. } => { + VerifyError::UnsupportedVersion(got) + } other => VerifyError::ParseError(other.to_string()), } } @@ -322,7 +324,13 @@ impl Verifier { // Step 8: replay-protection identifier checks. if let Some(expected) = opts.expected_record_id { - if pin.header.extra.get("vectorpin.record_id").map(|s| s.as_str()) != Some(expected) { + if pin + .header + .extra + .get("vectorpin.record_id") + .map(|s| s.as_str()) + != Some(expected) + { return Err(VerifyError::RecordMismatch); } } @@ -338,7 +346,13 @@ impl Verifier { } } if let Some(expected) = opts.expected_tenant_id { - if pin.header.extra.get("vectorpin.tenant_id").map(|s| s.as_str()) != Some(expected) { + if pin + .header + .extra + .get("vectorpin.tenant_id") + .map(|s| s.as_str()) + != Some(expected) + { return Err(VerifyError::TenantMismatch); } } @@ -421,7 +435,11 @@ fn vector_is_finite(v: VectorRef<'_>) -> bool { /// to avoid leaking parser-internal errors from the validity-window path). fn parse_v2_ts_unix(ts: &str) -> Option { let b = ts.as_bytes(); - if b.len() != 20 || b[4] != b'-' || b[7] != b'-' || b[10] != b'T' || b[13] != b':' + if b.len() != 20 + || b[4] != b'-' + || b[7] != b'-' + || b[10] != b'T' + || b[13] != b':' || b[16] != b':' || b[19] != b'Z' { diff --git a/rust/vectorpin/tests/cross_lang.rs b/rust/vectorpin/tests/cross_lang.rs index ee1ebe6..afda9e2 100644 --- a/rust/vectorpin/tests/cross_lang.rs +++ b/rust/vectorpin/tests/cross_lang.rs @@ -22,7 +22,7 @@ use serde_json::Value; use vectorpin::{ hash::{hash_text, hash_vector, VecDtype, VectorRef}, - KeyEntry, Pin, Signer, VerifyError, VerifyOptions, Verifier, + KeyEntry, Pin, Signer, Verifier, VerifyError, VerifyOptions, }; // ---- v2 positive fixtures ---------------------------------------------- @@ -117,9 +117,8 @@ fn run_v2_fixture(bundle: &V2Bundle, fx: &V2Fixture) { ); // 2. Reproduce the pin from the deterministic seed. - let signer = - Signer::from_private_bytes(&b64(&bundle.private_key_b64), bundle.key_id.clone()) - .expect("seed loads"); + let signer = Signer::from_private_bytes(&b64(&bundle.private_key_b64), bundle.key_id.clone()) + .expect("seed loads"); assert_eq!( signer.public_key_bytes().to_vec(), b64(&bundle.public_key_b64), @@ -327,8 +326,7 @@ fn cross_language_v2_negative_fixtures() { let bundle = V2NegativeBundle { public_key_b64: val["public_key_b64"].as_str().unwrap().to_string(), key_id: val["key_id"].as_str().unwrap().to_string(), - fixtures: serde_json::from_value(val["fixtures"].clone()) - .expect("parse negative fixtures"), + fixtures: serde_json::from_value(val["fixtures"].clone()).expect("parse negative fixtures"), }; assert!(!bundle.fixtures.is_empty()); @@ -372,8 +370,8 @@ fn legacy_v1_verifier_accepts_all_v1_fixtures() { for fx in &bundle.fixtures { eprintln!("legacy v1 fixture: {}", fx.name); - let pin = vectorpin::LegacyV1Verifier::parse_pin(&fx.expected.pin_json) - .expect("parse v1 pin"); + let pin = + vectorpin::LegacyV1Verifier::parse_pin(&fx.expected.pin_json).expect("parse v1 pin"); verifier .verify(&pin, VerifyOptions::default()) .expect("legacy verifier accepts v1"); diff --git a/rust/vectorpin/tests/legacy_v1.rs b/rust/vectorpin/tests/legacy_v1.rs index 57bbaf8..9960506 100644 --- a/rust/vectorpin/tests/legacy_v1.rs +++ b/rust/vectorpin/tests/legacy_v1.rs @@ -12,7 +12,7 @@ use std::path::PathBuf; use base64::Engine; use serde::Deserialize; -use vectorpin::{KeyEntry, LegacyV1Verifier, Pin, VerifyError, VerifyOptions, Verifier}; +use vectorpin::{KeyEntry, LegacyV1Verifier, Pin, Verifier, VerifyError, VerifyOptions}; fn b64(s: &str) -> Vec { base64::engine::general_purpose::URL_SAFE_NO_PAD diff --git a/rust/vectorpin/tests/v2_canonicalization.rs b/rust/vectorpin/tests/v2_canonicalization.rs index fb25ff3..34bbbe0 100644 --- a/rust/vectorpin/tests/v2_canonicalization.rs +++ b/rust/vectorpin/tests/v2_canonicalization.rs @@ -6,7 +6,7 @@ use vectorpin::attestation::DOMAIN_TAG; use vectorpin::{ - signer::PinOptions, AttestationError, Pin, Signer, VerifyError, VerifyOptions, Verifier, + signer::PinOptions, AttestationError, Pin, Signer, Verifier, VerifyError, VerifyOptions, }; fn v2_signer(kid: &str) -> Signer { @@ -71,10 +71,7 @@ fn signer_rejects_nan_in_vector() { let signer = v2_signer("k1"); let v: Vec = vec![1.0, f32::NAN, 3.0]; let err = signer.pin("x", "m", v.as_slice()).unwrap_err(); - assert!(matches!( - err, - vectorpin::SignerError::InvalidVector(_) - )); + assert!(matches!(err, vectorpin::SignerError::InvalidVector(_))); } #[test] @@ -82,10 +79,7 @@ fn signer_rejects_pos_inf() { let signer = v2_signer("k1"); let v: Vec = vec![1.0, f64::INFINITY, 3.0]; let err = signer.pin("x", "m", v.as_slice()).unwrap_err(); - assert!(matches!( - err, - vectorpin::SignerError::InvalidVector(_) - )); + assert!(matches!(err, vectorpin::SignerError::InvalidVector(_))); } #[test] @@ -123,7 +117,10 @@ fn parser_rejects_control_char_in_string_field() { let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); value["kid"] = serde_json::Value::String(bad); let err = Pin::from_value(value).unwrap_err(); - assert!(matches!(err, AttestationError::ControlChar(_)), "got {err:?}"); + assert!( + matches!(err, AttestationError::ControlChar(_)), + "got {err:?}" + ); } #[test] @@ -135,7 +132,10 @@ fn parser_rejects_bidi_override() { let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); value["kid"] = serde_json::Value::String(bad); let err = Pin::from_value(value).unwrap_err(); - assert!(matches!(err, AttestationError::BidiOverride(_)), "got {err:?}"); + assert!( + matches!(err, AttestationError::BidiOverride(_)), + "got {err:?}" + ); } #[test] @@ -146,7 +146,10 @@ fn parser_rejects_ts_with_fractional_seconds() { let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); value["ts"] = serde_json::Value::String("2026-05-05T12:00:00.123Z".to_string()); let err = Pin::from_value(value).unwrap_err(); - assert!(matches!(err, AttestationError::BadTimestamp(_)), "got {err:?}"); + assert!( + matches!(err, AttestationError::BadTimestamp(_)), + "got {err:?}" + ); } #[test] @@ -157,7 +160,10 @@ fn parser_rejects_ts_with_offset() { let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); value["ts"] = serde_json::Value::String("2026-05-05T12:00:00+00:00".to_string()); let err = Pin::from_value(value).unwrap_err(); - assert!(matches!(err, AttestationError::BadTimestamp(_)), "got {err:?}"); + assert!( + matches!(err, AttestationError::BadTimestamp(_)), + "got {err:?}" + ); } #[test] @@ -179,21 +185,31 @@ fn parser_rejects_non_string_extra_value() { let signer = v2_signer("k"); let v = small_vec(); let opts = PinOptions { - extra: [("region".to_string(), "us-east".to_string())].into_iter().collect(), + extra: [("region".to_string(), "us-east".to_string())] + .into_iter() + .collect(), ..PinOptions::default() }; - let pin = signer.pin_with_options("x", "m", v.as_slice(), opts).unwrap(); + let pin = signer + .pin_with_options("x", "m", v.as_slice(), opts) + .unwrap(); let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); value["extra"]["region"] = serde_json::json!(5); let err = Pin::from_value(value).unwrap_err(); - assert!(matches!(err, AttestationError::InvalidField { field: "extra", .. }), "got {err:?}"); + assert!( + matches!(err, AttestationError::InvalidField { field: "extra", .. }), + "got {err:?}" + ); } #[test] fn parser_rejects_oversize_pin_json() { let oversize = "x".repeat(vectorpin::attestation::MAX_PIN_JSON_BYTES + 1); let err = Pin::from_json(&oversize).unwrap_err(); - assert!(matches!(err, AttestationError::SizeLimit { .. }), "got {err:?}"); + assert!( + matches!(err, AttestationError::SizeLimit { .. }), + "got {err:?}" + ); } #[test] @@ -219,7 +235,16 @@ fn parser_rejects_vec_dim_zero() { let mut value: serde_json::Value = serde_json::from_str(&pin.to_json()).unwrap(); value["vec_dim"] = serde_json::json!(0); let err = Pin::from_value(value).unwrap_err(); - assert!(matches!(err, AttestationError::InvalidField { field: "vec_dim", .. }), "got {err:?}"); + assert!( + matches!( + err, + AttestationError::InvalidField { + field: "vec_dim", + .. + } + ), + "got {err:?}" + ); } #[test]