diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 71af1d4..bac0a47 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,6 +69,26 @@ jobs: - name: Build run: cargo build --workspace --all-features + - name: Install Go (for vexctl) + # The `vex` subcommand emits OpenVEX documents; tests/e2e_vex.rs + # validates the output with vexctl when it's on PATH. vexctl is + # a Go binary distributed via `go install`. Setting up Go here + # is the cheapest way to give every test job a usable vexctl. + # SHA pin resolved from `gh api repos/actions/setup-go/git/refs/tags/v6.4.0`. + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 + with: + go-version: '1.22' + cache: false + + - name: Install vexctl + # `go install` puts the binary in $(go env GOPATH)/bin; surface + # that path to subsequent steps so `Command::new("vexctl")` in + # the test resolves. Pinned to a tagged release rather than + # @latest for reproducibility. + run: | + go install github.com/openvex/vexctl@v0.3.0 + echo "$(go env GOPATH)/bin" >> "$GITHUB_PATH" + - name: Run tests run: cargo test --workspace --all-features diff --git a/crates/socket-patch-cli/CLI_CONTRACT.md b/crates/socket-patch-cli/CLI_CONTRACT.md index 2309628..02db1f2 100644 --- a/crates/socket-patch-cli/CLI_CONTRACT.md +++ b/crates/socket-patch-cli/CLI_CONTRACT.md @@ -16,6 +16,7 @@ This document defines the **public surface** of the `socket-patch` binary. Anyth | `remove` | — | Remove patch from manifest (rolls back first); requires positional `identifier` | | `setup` | — | Configure package.json postinstall scripts | | `repair` | `gc` | Download missing blobs + clean up unused ones | +| `vex` | — | Emit an OpenVEX 0.2.0 attestation derived from the local manifest | **Bare-UUID fallback.** `socket-patch ` is rewritten to `socket-patch get `. The UUID shape checked is the standard 8-4-4-4-12 hex pattern (case-insensitive). See [`src/lib.rs::looks_like_uuid`](src/lib.rs). @@ -58,6 +59,7 @@ Beyond the globals above, each subcommand defines a small set of local arguments | `get` | positional `identifier`; `--id` / `--cve` / `--ghsa` / `--package` (`-p`); `--save-only` (alias `--no-apply`); `--one-off` | `SOCKET_SAVE_ONLY`, `SOCKET_ONE_OFF` | Patch lookup + save-vs-apply mode | | `remove` | positional `identifier`; `--skip-rollback` | `SOCKET_SKIP_ROLLBACK` | Manifest entry removal | | `rollback` | optional positional `identifier`; `--one-off` | `SOCKET_ONE_OFF` | Rollback target | +| `vex` | `--output` / `-O`, `--product`, `--no-verify`, `--doc-id`, `--compact` | `SOCKET_VEX_OUTPUT`, `SOCKET_VEX_PRODUCT`, `SOCKET_VEX_NO_VERIFY`, `SOCKET_VEX_DOC_ID`, `SOCKET_VEX_COMPACT` | OpenVEX 0.2.0 document generation; see "vex output channels" below | | `repair` | `--download-only` | `SOCKET_DOWNLOAD_ONLY` | Repair-specific cleanup mode (mutually exclusive with `--offline`) | | `setup` | (none beyond globals) | — | — | @@ -324,6 +326,27 @@ Exit `1` when `status` is `partialFailure` (any `events[*].action == "failed"`) `list` returns **`0`** for an empty manifest and **`1`** for a missing manifest — these are distinct and load-bearing. +`vex` exit codes are tri-state: + +| Code | Meaning | +|---|---| +| `0` | A non-empty OpenVEX document was produced | +| `1` | No applicable patches (empty manifest, or every patch failed verification with `--verify`) | +| `2` | Hard error before document generation (manifest unreadable, `--json` without `--output`, product auto-detect failed, write error) | + +### vex output channels + +The VEX document is JSON-LD, which collides with the standard `--json` envelope on stdout. The shape is: + +| `--output` | `--json` | VEX → | Envelope → | +|---|---|---|---| +| unset | unset | stdout | stderr (one-line summary) | +| set to `` | unset | `` | stdout (one-line summary) | +| set to `` | set | `` | stdout (full envelope, with one `verified` event per emitted subcomponent) | +| unset | set | (error: `json_requires_output`, exit `2`) | stdout (envelope-only) | + +When verification is enabled (the default) and a patch is omitted, the failed PURLs are surfaced on stderr in plain mode or as `skipped` events on the envelope in JSON mode. Status becomes `partialFailure` when at least one patch was omitted but at least one was emitted. + ## Semver policy Versioning lives in **`Cargo.toml`** at the workspace root (`version = "..."`) and is propagated to npm, pypi, and cargo wrappers by **`scripts/version-sync.sh `**. diff --git a/crates/socket-patch-cli/src/commands/mod.rs b/crates/socket-patch-cli/src/commands/mod.rs index 269b309..4b092f0 100644 --- a/crates/socket-patch-cli/src/commands/mod.rs +++ b/crates/socket-patch-cli/src/commands/mod.rs @@ -8,3 +8,4 @@ pub mod rollback; pub mod scan; pub mod setup; pub mod unlock; +pub mod vex; diff --git a/crates/socket-patch-cli/src/commands/vex.rs b/crates/socket-patch-cli/src/commands/vex.rs new file mode 100644 index 0000000..2ee5edc --- /dev/null +++ b/crates/socket-patch-cli/src/commands/vex.rs @@ -0,0 +1,381 @@ +//! `socket-patch vex` — generate an OpenVEX 0.2.0 document. +//! +//! Reads the local manifest, optionally verifies each patch's on-disk +//! state, and emits a VEX document describing the vulnerabilities that +//! have been mitigated. Designed to be piped into vexctl, Grype, Trivy, +//! and the like. +//! +//! Output channels: +//! * Default (`--output` unset, `--json` unset): VEX JSON to stdout, +//! human-readable status to stderr. +//! * `--output ` (no `--json`): VEX JSON to file, one-line +//! summary to stdout. +//! * `--json` (requires `--output`): VEX JSON to file, envelope JSON +//! to stdout. This is the CI integration shape. + +use std::collections::HashMap; +use std::path::PathBuf; + +use clap::Args; +use socket_patch_core::crawlers::CrawlerOptions; +use socket_patch_core::manifest::operations::read_manifest; +use socket_patch_core::manifest::schema::PatchManifest; +use socket_patch_core::vex::{ + build_document, detect_product, BuildOptions, FailedPatch, VerifyOutcome, +}; + +use crate::args::{apply_env_toggles, GlobalArgs}; +use crate::ecosystem_dispatch::{find_packages_for_purls, partition_purls}; +use crate::json_envelope::{ + Command, Envelope, EnvelopeError, PatchAction, PatchEvent, +}; + +#[derive(Args)] +pub struct VexArgs { + #[command(flatten)] + pub common: GlobalArgs, + + /// Write the VEX document to this path instead of stdout. + #[arg(long = "output", short = 'O', env = "SOCKET_VEX_OUTPUT")] + pub output: Option, + + /// Override the auto-detected top-level product PURL/identifier. + /// Auto-detection probes (in order): + /// 1. `.git/config` `[remote "origin"]` — converted to + /// `pkg:github//` for github.com, similar for + /// gitlab.com/bitbucket.org, raw URL otherwise. + /// 2. `package.json` → `pkg:npm/@` + /// 3. `pyproject.toml` → `pkg:pypi/@` + /// 4. `Cargo.toml` → `pkg:cargo/@` + #[arg(long = "product", env = "SOCKET_VEX_PRODUCT")] + pub product: Option, + + /// Skip the on-disk file-hash check and trust the manifest. + /// By default every manifest entry is verified before being + /// emitted; this flag flips that off — useful when generating a + /// VEX doc on a build machine that doesn't have the patched files + /// laid out yet. + #[arg(long = "no-verify", env = "SOCKET_VEX_NO_VERIFY", default_value_t = false)] + pub no_verify: bool, + + /// Override the document `@id`. Default is `urn:uuid:`, + /// regenerated on every invocation. Pin this to get a reproducible + /// doc identifier across runs. + #[arg(long = "doc-id", env = "SOCKET_VEX_DOC_ID")] + pub doc_id: Option, + + /// Emit compact JSON instead of pretty-printed. + #[arg(long = "compact", env = "SOCKET_VEX_COMPACT", default_value_t = false)] + pub compact: bool, +} + +pub async fn run(args: VexArgs) -> i32 { + apply_env_toggles(&args.common); + + // --json without --output would race the envelope and the VEX doc + // on the same stdout stream. Bail out with a clear error before + // doing any work. + if args.common.json && args.output.is_none() { + emit_envelope_error( + &args, + "json_requires_output", + "--json requires --output (the VEX document is itself JSON; \ + route it to a file so the envelope can use stdout)", + ); + return 2; + } + + let manifest_path = args.common.resolved_manifest_path(); + + let manifest = match read_manifest(&manifest_path).await { + Ok(Some(m)) => m, + Ok(None) => { + emit_envelope_error( + &args, + "manifest_not_found", + &format!("Manifest not found at {}", manifest_path.display()), + ); + return 2; + } + Err(e) => { + emit_envelope_error(&args, "manifest_unreadable", &e.to_string()); + return 2; + } + }; + + if manifest.patches.is_empty() { + emit_envelope_error( + &args, + "no_patches", + "Manifest is empty — nothing to attest. Run `socket-patch get` \ + or `socket-patch scan --sync` first.", + ); + return 1; + } + + // Resolve product. + let product_id = match resolve_product_id(&args).await { + Ok(id) => id, + Err(reason) => { + emit_envelope_error(&args, "product_undetected", &reason); + return 2; + } + }; + + // Partition manifest into applied / failed. + let outcome = if args.no_verify { + VerifyOutcome { + applied: manifest.patches.keys().cloned().collect(), + failed: Vec::new(), + } + } else { + let package_paths = resolve_package_paths(&args, &manifest).await; + socket_patch_core::vex::applied_patches(&manifest, &package_paths).await + }; + + if !outcome.failed.is_empty() && !args.common.silent && !args.common.json { + for f in &outcome.failed { + eprintln!( + "Warning: omitting patch for {} from VEX ({})", + f.purl, f.reason + ); + } + } + + // Build the document. + let opts = BuildOptions { + product_id, + doc_id: args + .doc_id + .clone() + .unwrap_or_else(|| format!("urn:uuid:{}", uuid::Uuid::new_v4())), + author: "Socket".to_string(), + tooling: Some(format!("socket-patch {}", env!("CARGO_PKG_VERSION"))), + }; + + let doc = match build_document(&manifest, &outcome.applied, &opts) { + Some(doc) => doc, + None => { + emit_envelope_error_with_failures( + &args, + "no_applicable_patches", + "No applied patches with vulnerability metadata to attest.", + &outcome.failed, + ); + return 1; + } + }; + + // Serialize. + let serialized = if args.compact { + match serde_json::to_string(&doc) { + Ok(s) => s, + Err(e) => { + emit_envelope_error(&args, "serialize_failed", &e.to_string()); + return 2; + } + } + } else { + match serde_json::to_string_pretty(&doc) { + Ok(s) => s, + Err(e) => { + emit_envelope_error(&args, "serialize_failed", &e.to_string()); + return 2; + } + } + }; + + // Write. + let wrote_to_file = match &args.output { + Some(path) => { + if let Err(e) = tokio::fs::write(path, &serialized).await { + emit_envelope_error(&args, "write_failed", &e.to_string()); + return 2; + } + true + } + None => { + println!("{serialized}"); + false + } + }; + + // Status reporting. + if args.common.json { + emit_envelope_success(&args, &doc, &outcome.failed); + } else if wrote_to_file { + let path = args.output.as_ref().unwrap().display(); + let stmt_count = doc.statements.len(); + if !args.common.silent { + println!( + "Wrote OpenVEX document with {stmt_count} statement(s) to {path}" + ); + } + } else if !args.common.silent && !args.common.json { + let stmt_count = doc.statements.len(); + eprintln!("Emitted {stmt_count} VEX statement(s)"); + } + + 0 +} + +/// Pick the product PURL from `--product` or by filesystem auto-detect. +async fn resolve_product_id(args: &VexArgs) -> Result { + if let Some(p) = &args.product { + return Ok(p.clone()); + } + let detect = detect_product(&args.common.cwd).await; + for w in &detect.warnings { + if !args.common.silent && !args.common.json { + eprintln!("Warning: {w}"); + } + } + detect.purl.ok_or_else(|| { + format!( + "Could not auto-detect a top-level product PURL in {}. \ + Provide one with --product (e.g. pkg:npm/my-app@1.0.0).", + args.common.cwd.display() + ) + }) +} + +/// Walk the ecosystem dispatch to build the PURL -> on-disk-path map +/// used by `vex::verify::applied_patches`. +async fn resolve_package_paths( + args: &VexArgs, + manifest: &PatchManifest, +) -> HashMap { + let purls: Vec = manifest.patches.keys().cloned().collect(); + let partitioned = partition_purls(&purls, args.common.ecosystems.as_deref()); + let crawler_options = CrawlerOptions { + cwd: args.common.cwd.clone(), + global: args.common.global, + global_prefix: args.common.global_prefix.clone(), + batch_size: 0, // unused for find_packages_for_purls + }; + find_packages_for_purls(&partitioned, &crawler_options, args.common.silent).await +} + +fn emit_envelope_error(args: &VexArgs, code: &str, message: &str) { + if args.common.json { + let mut env = Envelope::new(Command::Vex); + env.mark_error(EnvelopeError::new(code, message.to_string())); + println!("{}", env.to_pretty_json()); + } else { + eprintln!("Error: {message}"); + } +} + +fn emit_envelope_error_with_failures( + args: &VexArgs, + code: &str, + message: &str, + failures: &[FailedPatch], +) { + if args.common.json { + let mut env = Envelope::new(Command::Vex); + for f in failures { + env.record( + PatchEvent::new(PatchAction::Skipped, f.purl.clone()) + .with_reason(f.reason.clone(), "patch omitted from VEX"), + ); + } + env.mark_error(EnvelopeError::new(code, message.to_string())); + println!("{}", env.to_pretty_json()); + } else { + eprintln!("Error: {message}"); + for f in failures { + eprintln!(" omitted: {} ({})", f.purl, f.reason); + } + } +} + +fn emit_envelope_success( + _args: &VexArgs, + doc: &socket_patch_core::vex::Document, + failures: &[FailedPatch], +) { + let mut env = Envelope::new(Command::Vex); + for st in &doc.statements { + for prod in &st.products { + for sub in &prod.subcomponents { + env.record( + PatchEvent::new(PatchAction::Verified, sub.id.clone()) + .with_details(serde_json::json!({ + "vulnerability": st.vulnerability.name, + "aliases": st.vulnerability.aliases, + "status": "not_affected", + })), + ); + } + } + } + for f in failures { + env.record( + PatchEvent::new(PatchAction::Skipped, f.purl.clone()) + .with_reason(f.reason.clone(), "patch omitted from VEX"), + ); + } + if !failures.is_empty() { + env.mark_partial_failure(); + } + println!("{}", env.to_pretty_json()); +} + +#[cfg(test)] +mod tests { + //! Lightweight tests at the args/wiring layer. End-to-end behavior + //! lives in `tests/e2e_vex*.rs`. + use super::*; + use clap::Parser; + + #[derive(Parser)] + struct Wrap { + #[command(subcommand)] + cmd: Sub, + } + + #[derive(clap::Subcommand)] + enum Sub { + Vex(VexArgs), + } + + #[test] + fn parses_with_defaults() { + let w = Wrap::parse_from(["test", "vex"]); + match w.cmd { + Sub::Vex(args) => { + assert!(args.output.is_none()); + assert!(args.product.is_none()); + assert!(!args.no_verify); + assert!(args.doc_id.is_none()); + assert!(!args.compact); + } + } + } + + #[test] + fn parses_all_flags() { + let w = Wrap::parse_from([ + "test", + "vex", + "--output", + "out.vex.json", + "--product", + "pkg:npm/app@1.0.0", + "--no-verify", + "--doc-id", + "urn:uuid:fixed", + "--compact", + ]); + match w.cmd { + Sub::Vex(args) => { + assert_eq!(args.output.unwrap().to_str(), Some("out.vex.json")); + assert_eq!(args.product.as_deref(), Some("pkg:npm/app@1.0.0")); + assert!(args.no_verify); + assert_eq!(args.doc_id.as_deref(), Some("urn:uuid:fixed")); + assert!(args.compact); + } + } + } +} diff --git a/crates/socket-patch-cli/src/json_envelope.rs b/crates/socket-patch-cli/src/json_envelope.rs index b343c67..2af6d65 100644 --- a/crates/socket-patch-cli/src/json_envelope.rs +++ b/crates/socket-patch-cli/src/json_envelope.rs @@ -324,6 +324,7 @@ pub enum Command { Repair, Setup, Unlock, + Vex, } diff --git a/crates/socket-patch-cli/src/lib.rs b/crates/socket-patch-cli/src/lib.rs index bd9ffbf..0a16bbf 100644 --- a/crates/socket-patch-cli/src/lib.rs +++ b/crates/socket-patch-cli/src/lib.rs @@ -68,6 +68,10 @@ pub enum Commands { /// when free, 1 when held. Pass `--release` to also delete the /// lock file when it is free. Unlock(commands::unlock::UnlockArgs), + + /// Generate an OpenVEX 0.2.0 attestation describing the + /// vulnerabilities mitigated by the applied patches. + Vex(commands::vex::VexArgs), } /// Check whether `s` looks like a UUID (8-4-4-4-12 hex pattern). diff --git a/crates/socket-patch-cli/src/main.rs b/crates/socket-patch-cli/src/main.rs index e3e6b24..99222d3 100644 --- a/crates/socket-patch-cli/src/main.rs +++ b/crates/socket-patch-cli/src/main.rs @@ -24,6 +24,7 @@ async fn main() { Commands::Setup(args) => commands::setup::run(args).await, Commands::Repair(args) => commands::repair::run(args).await, Commands::Unlock(args) => commands::unlock::run(args).await, + Commands::Vex(args) => commands::vex::run(args).await, }; std::process::exit(exit_code); diff --git a/crates/socket-patch-cli/tests/e2e_vex.rs b/crates/socket-patch-cli/tests/e2e_vex.rs new file mode 100644 index 0000000..fe23104 --- /dev/null +++ b/crates/socket-patch-cli/tests/e2e_vex.rs @@ -0,0 +1,610 @@ +//! End-to-end tests for the `socket-patch vex` subcommand. +//! +//! Validates the OpenVEX document shape produced by a real invocation +//! of the compiled binary. When `vexctl` is on `PATH` the test also +//! pipes the output through `vexctl validate` to confirm spec +//! conformance — the CI workflow installs vexctl before the test +//! step, so this branch is exercised in CI. +//! +//! Layered tests (no-network, no-disk-state required): +//! 1. `--no-verify` against a fixture manifest with multi-CVE vulns +//! 2. `--no-verify` with two patches sharing a GHSA (alias-merge path) +//! 3. error path: empty manifest exits non-zero with no doc +//! 4. verify-mode against patched files laid on disk +//! 5. verify-mode where one patch file is missing → omitted + warning + +use std::collections::HashMap; +use std::path::Path; +use std::process::Command; + +use serde_json::Value; +use socket_patch_core::hash::git_sha256::compute_git_sha256_from_bytes; +use socket_patch_core::manifest::schema::{ + PatchFileInfo, PatchManifest, PatchRecord, VulnerabilityInfo, +}; + +fn binary() -> &'static str { + env!("CARGO_BIN_EXE_socket-patch") +} + +/// Write `manifest` to `/.socket/manifest.json`. +fn write_manifest(cwd: &Path, manifest: &PatchManifest) { + let dir = cwd.join(".socket"); + std::fs::create_dir_all(&dir).unwrap(); + std::fs::write( + dir.join("manifest.json"), + serde_json::to_string_pretty(manifest).unwrap(), + ) + .unwrap(); +} + +/// Patch record with one file (whose hashes you choose) and one +/// vulnerability. +fn make_record( + uuid: &str, + file_name: &str, + before_hash: &str, + after_hash: &str, + vuln_id: &str, + cves: &[&str], +) -> PatchRecord { + let mut files = HashMap::new(); + files.insert( + file_name.to_string(), + PatchFileInfo { + before_hash: before_hash.to_string(), + after_hash: after_hash.to_string(), + }, + ); + let mut vulns = HashMap::new(); + vulns.insert( + vuln_id.to_string(), + VulnerabilityInfo { + cves: cves.iter().map(|s| s.to_string()).collect(), + summary: "test summary".to_string(), + severity: "high".to_string(), + description: "test description".to_string(), + }, + ); + PatchRecord { + uuid: uuid.to_string(), + exported_at: "2024-01-01T00:00:00Z".to_string(), + files, + vulnerabilities: vulns, + description: format!("Patch {uuid}"), + license: "MIT".to_string(), + tier: "free".to_string(), + } +} + +// ────────────────────────────────────────────────────────────────────── +// no-verify path +// ────────────────────────────────────────────────────────────────────── + +#[test] +fn no_verify_emits_valid_openvex() { + let tmp = tempfile::tempdir().unwrap(); + let cwd = tmp.path(); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/lodash@4.17.20".to_string(), + make_record( + "11111111-1111-4111-8111-111111111111", + "package/index.js", + "a".repeat(64).as_str(), + "b".repeat(64).as_str(), + "GHSA-aaaa-bbbb-cccc", + &["CVE-2024-1111", "CVE-2024-1112"], + ), + ); + manifest.patches.insert( + "pkg:npm/minimist@1.2.0".to_string(), + make_record( + "22222222-2222-4222-8222-222222222222", + "package/index.js", + "c".repeat(64).as_str(), + "d".repeat(64).as_str(), + "GHSA-dddd-eeee-ffff", + &["CVE-2024-2222"], + ), + ); + write_manifest(cwd, &manifest); + + let out = Command::new(binary()) + .args([ + "vex", + "--cwd", + cwd.to_str().unwrap(), + "--no-verify", + "--product", + "pkg:npm/test-app@1.0.0", + "--doc-id", + "urn:uuid:fixed-test-id", + ]) + .output() + .expect("invoke vex"); + assert!( + out.status.success(), + "vex exited non-zero. stderr:\n{}", + String::from_utf8_lossy(&out.stderr) + ); + + let stdout = String::from_utf8(out.stdout).unwrap(); + let doc: Value = serde_json::from_str(&stdout) + .expect("vex stdout must be valid JSON"); + + assert_eq!(doc["@context"], "https://openvex.dev/ns/v0.2.0"); + assert_eq!(doc["@id"], "urn:uuid:fixed-test-id"); + assert_eq!(doc["author"], "Socket"); + assert_eq!(doc["version"], 1); + assert!(doc["tooling"] + .as_str() + .unwrap() + .starts_with("socket-patch ")); + + let statements = doc["statements"].as_array().unwrap(); + assert_eq!(statements.len(), 2, "one statement per GHSA"); + + // Statements are sorted by vuln id (BTreeMap order). + let s0 = &statements[0]; + assert_eq!(s0["vulnerability"]["name"], "GHSA-aaaa-bbbb-cccc"); + let aliases = s0["vulnerability"]["aliases"].as_array().unwrap(); + assert_eq!(aliases.len(), 2); + assert_eq!(aliases[0], "CVE-2024-1111"); + assert_eq!(aliases[1], "CVE-2024-1112"); + assert_eq!(s0["status"], "not_affected"); + assert_eq!(s0["justification"], "inline_mitigations_already_exist"); + + let products = s0["products"].as_array().unwrap(); + assert_eq!(products.len(), 1); + assert_eq!(products[0]["@id"], "pkg:npm/test-app@1.0.0"); + let subs = products[0]["subcomponents"].as_array().unwrap(); + assert_eq!(subs.len(), 1); + assert_eq!(subs[0]["@id"], "pkg:npm/lodash@4.17.20"); + + maybe_validate_with_vexctl(&stdout); +} + +#[test] +fn two_patches_sharing_ghsa_merge_subcomponents() { + let tmp = tempfile::tempdir().unwrap(); + let cwd = tmp.path(); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/foo@1.0.0".to_string(), + make_record( + "11111111-1111-4111-8111-111111111111", + "package/a.js", + "a".repeat(64).as_str(), + "b".repeat(64).as_str(), + "GHSA-shared", + &["CVE-SHARED"], + ), + ); + manifest.patches.insert( + "pkg:npm/bar@2.0.0".to_string(), + make_record( + "22222222-2222-4222-8222-222222222222", + "package/b.js", + "c".repeat(64).as_str(), + "d".repeat(64).as_str(), + "GHSA-shared", + &["CVE-SHARED"], + ), + ); + write_manifest(cwd, &manifest); + + let out = Command::new(binary()) + .args([ + "vex", + "--cwd", + cwd.to_str().unwrap(), + "--no-verify", + "--product", + "pkg:npm/app@1.0.0", + ]) + .output() + .expect("invoke vex"); + assert!(out.status.success()); + + let doc: Value = serde_json::from_slice(&out.stdout).unwrap(); + let stmts = doc["statements"].as_array().unwrap(); + assert_eq!(stmts.len(), 1, "shared GHSA collapses into one statement"); + + let subs = stmts[0]["products"][0]["subcomponents"].as_array().unwrap(); + assert_eq!(subs.len(), 2); + let ids: Vec<&str> = subs.iter().map(|s| s["@id"].as_str().unwrap()).collect(); + assert!(ids.contains(&"pkg:npm/foo@1.0.0")); + assert!(ids.contains(&"pkg:npm/bar@2.0.0")); +} + +#[test] +fn empty_manifest_exits_non_zero_with_no_doc() { + let tmp = tempfile::tempdir().unwrap(); + let cwd = tmp.path(); + write_manifest(cwd, &PatchManifest::new()); + + let out = Command::new(binary()) + .args([ + "vex", + "--cwd", + cwd.to_str().unwrap(), + "--no-verify", + "--product", + "pkg:npm/app@1.0.0", + ]) + .output() + .expect("invoke vex"); + assert!(!out.status.success(), "empty manifest must be non-zero exit"); + // Nothing on stdout — the VEX itself isn't written. + assert!( + out.stdout.is_empty(), + "stdout should be empty when no doc is produced. got: {}", + String::from_utf8_lossy(&out.stdout) + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!(stderr.contains("Error")); +} + +#[test] +fn missing_manifest_exits_non_zero() { + let tmp = tempfile::tempdir().unwrap(); + let out = Command::new(binary()) + .args([ + "vex", + "--cwd", + tmp.path().to_str().unwrap(), + "--no-verify", + "--product", + "pkg:npm/app@1.0.0", + ]) + .output() + .expect("invoke vex"); + assert!(!out.status.success()); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!(stderr.contains("Manifest not found")); +} + +#[test] +fn json_envelope_requires_output() { + let tmp = tempfile::tempdir().unwrap(); + write_manifest(tmp.path(), &PatchManifest::new()); + + let out = Command::new(binary()) + .args([ + "vex", + "--cwd", + tmp.path().to_str().unwrap(), + "--no-verify", + "--json", + "--product", + "pkg:npm/app@1.0.0", + ]) + .output() + .expect("invoke vex"); + assert!(!out.status.success()); + // --json forces envelope-on-stdout, which we then assert lives in stdout. + let stdout = String::from_utf8_lossy(&out.stdout); + let env: Value = serde_json::from_str(&stdout).expect("envelope JSON"); + assert_eq!(env["status"], "error"); + assert_eq!(env["error"]["code"], "json_requires_output"); +} + +#[test] +fn json_envelope_with_output_emits_both() { + let tmp = tempfile::tempdir().unwrap(); + let cwd = tmp.path(); + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + make_record( + "11111111-1111-4111-8111-111111111111", + "package/index.js", + "a".repeat(64).as_str(), + "b".repeat(64).as_str(), + "GHSA-zzzz", + &["CVE-9999"], + ), + ); + write_manifest(cwd, &manifest); + let vex_path = cwd.join("out.vex.json"); + + let out = Command::new(binary()) + .args([ + "vex", + "--cwd", + cwd.to_str().unwrap(), + "--no-verify", + "--json", + "--output", + vex_path.to_str().unwrap(), + "--product", + "pkg:npm/app@1.0.0", + ]) + .output() + .expect("invoke vex"); + assert!(out.status.success()); + + // Envelope on stdout. + let env: Value = serde_json::from_slice(&out.stdout).expect("envelope JSON"); + assert_eq!(env["command"], "vex"); + assert_eq!(env["status"], "success"); + assert_eq!(env["summary"]["verified"], 1); + + // VEX doc at --output. + let vex_text = std::fs::read_to_string(&vex_path).unwrap(); + let doc: Value = serde_json::from_str(&vex_text).unwrap(); + assert_eq!(doc["@context"], "https://openvex.dev/ns/v0.2.0"); + assert_eq!(doc["statements"].as_array().unwrap().len(), 1); + + maybe_validate_with_vexctl(&vex_text); +} + +#[test] +fn auto_detect_prefers_git_remote_over_package_json() { + // Both signals present; the binary must surface the git-remote PURL. + let tmp = tempfile::tempdir().unwrap(); + let cwd = tmp.path(); + + std::fs::write( + cwd.join("package.json"), + r#"{"name":"from-pkg","version":"1.0.0"}"#, + ) + .unwrap(); + let git_dir = cwd.join(".git"); + std::fs::create_dir_all(&git_dir).unwrap(); + std::fs::write( + git_dir.join("config"), + "[remote \"origin\"]\n\turl = git@github.com:SocketDev/socket-patch.git\n", + ) + .unwrap(); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + make_record( + "11111111-1111-4111-8111-111111111111", + "package/index.js", + "a".repeat(64).as_str(), + "b".repeat(64).as_str(), + "GHSA-zz", + &["CVE-ZZ"], + ), + ); + write_manifest(cwd, &manifest); + + let out = Command::new(binary()) + .args(["vex", "--cwd", cwd.to_str().unwrap(), "--no-verify"]) + .output() + .expect("invoke vex"); + assert!(out.status.success()); + let doc: Value = serde_json::from_slice(&out.stdout).unwrap(); + assert_eq!( + doc["statements"][0]["products"][0]["@id"], + "pkg:github/SocketDev/socket-patch" + ); +} + +#[test] +fn auto_detect_uses_package_json() { + // When --product is omitted the binary reads `package.json` for the + // product PURL. We don't lay down node_modules so we pair this with + // --no-verify. + let tmp = tempfile::tempdir().unwrap(); + let cwd = tmp.path(); + + std::fs::write( + cwd.join("package.json"), + r#"{"name":"my-app","version":"7.7.7"}"#, + ) + .unwrap(); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + make_record( + "11111111-1111-4111-8111-111111111111", + "package/index.js", + "a".repeat(64).as_str(), + "b".repeat(64).as_str(), + "GHSA-z", + &["CVE-Z"], + ), + ); + write_manifest(cwd, &manifest); + + let out = Command::new(binary()) + .args([ + "vex", + "--cwd", + cwd.to_str().unwrap(), + "--no-verify", + ]) + .output() + .expect("invoke vex"); + assert!(out.status.success()); + let doc: Value = serde_json::from_slice(&out.stdout).unwrap(); + assert_eq!(doc["statements"][0]["products"][0]["@id"], "pkg:npm/my-app@7.7.7"); +} + +// ────────────────────────────────────────────────────────────────────── +// verify-mode tests — lay down patched files on disk and exercise the +// hash-check pipeline. We bypass ecosystem-crawler resolution by writing +// the manifest with PURLs whose npm package layout we control, then +// pointing --cwd at the synthetic node_modules. +// ────────────────────────────────────────────────────────────────────── + +#[test] +fn verify_mode_includes_applied_omits_unapplied() { + let tmp = tempfile::tempdir().unwrap(); + let cwd = tmp.path(); + + // Two npm packages — one we'll lay down "patched", one we won't. + let nm = cwd.join("node_modules"); + let applied_pkg = nm.join("applied-pkg"); + std::fs::create_dir_all(&applied_pkg).unwrap(); + std::fs::write( + applied_pkg.join("package.json"), + r#"{"name":"applied-pkg","version":"1.0.0"}"#, + ) + .unwrap(); + let patched_content = b"patched index"; + let after_hash = compute_git_sha256_from_bytes(patched_content); + std::fs::write(applied_pkg.join("index.js"), patched_content).unwrap(); + + let unapplied_pkg = nm.join("unapplied-pkg"); + std::fs::create_dir_all(&unapplied_pkg).unwrap(); + std::fs::write( + unapplied_pkg.join("package.json"), + r#"{"name":"unapplied-pkg","version":"2.0.0"}"#, + ) + .unwrap(); + // No matching file on disk → verify reports file_not_found. + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/applied-pkg@1.0.0".to_string(), + make_record( + "11111111-1111-4111-8111-111111111111", + "package/index.js", + "a".repeat(64).as_str(), + after_hash.as_str(), + "GHSA-applied", + &["CVE-APPLIED"], + ), + ); + manifest.patches.insert( + "pkg:npm/unapplied-pkg@2.0.0".to_string(), + make_record( + "22222222-2222-4222-8222-222222222222", + "package/missing.js", + "c".repeat(64).as_str(), + "d".repeat(64).as_str(), + "GHSA-unapplied", + &["CVE-UNAPPLIED"], + ), + ); + write_manifest(cwd, &manifest); + + let out = Command::new(binary()) + .args([ + "vex", + "--cwd", + cwd.to_str().unwrap(), + "--product", + "pkg:npm/test-app@1.0.0", + ]) + .output() + .expect("invoke vex"); + assert!( + out.status.success(), + "verify mode should succeed when at least one patch verifies. stderr:\n{}", + String::from_utf8_lossy(&out.stderr) + ); + + let doc: Value = serde_json::from_slice(&out.stdout).unwrap(); + let stmts = doc["statements"].as_array().unwrap(); + assert_eq!(stmts.len(), 1, "only the verified patch should appear"); + assert_eq!(stmts[0]["vulnerability"]["name"], "GHSA-applied"); + + // Warning surfaced on stderr. + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("unapplied-pkg") && stderr.contains("omitting"), + "stderr should warn about omitted patch. got: {stderr}" + ); + + maybe_validate_with_vexctl(&String::from_utf8_lossy(&out.stdout)); +} + +#[test] +fn verify_mode_all_failed_exits_non_zero() { + let tmp = tempfile::tempdir().unwrap(); + let cwd = tmp.path(); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/ghost@1.0.0".to_string(), + make_record( + "11111111-1111-4111-8111-111111111111", + "package/index.js", + "a".repeat(64).as_str(), + "b".repeat(64).as_str(), + "GHSA-ghost", + &["CVE-GHOST"], + ), + ); + write_manifest(cwd, &manifest); + + // No node_modules, no package directory — ecosystem dispatch returns + // empty map, every patch lands in `failed` → no statements → exit 1. + let out = Command::new(binary()) + .args([ + "vex", + "--cwd", + cwd.to_str().unwrap(), + "--product", + "pkg:npm/app@1.0.0", + ]) + .output() + .expect("invoke vex"); + assert!(!out.status.success()); + assert!(out.stdout.is_empty()); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!(stderr.contains("No applied patches")); +} + +// ────────────────────────────────────────────────────────────────────── +// vexctl integration (run only when the binary is on PATH) +// ────────────────────────────────────────────────────────────────────── + +/// Pipe the VEX text through `vexctl` if it's on `PATH`. CI installs +/// vexctl before the test step so the validation actually runs there; +/// local devs without Go see a skip message instead of a failure. +/// +/// `vexctl merge --files=` loads, parses, and re-emits the +/// document. vexctl does not yet expose a dedicated `validate` +/// subcommand at v0.3.x, but a successful merge of a single file is +/// the canonical proof that the input parses cleanly against the +/// OpenVEX schema (`list` requires a selector argument, `filter` +/// requires a query expression — merge is the only no-arg parse gate). +fn maybe_validate_with_vexctl(vex_text: &str) { + let Some(vexctl) = find_vexctl_on_path() else { + eprintln!("(skipping vexctl validation — binary not on PATH)"); + return; + }; + let tmp = tempfile::NamedTempFile::new().unwrap(); + std::fs::write(tmp.path(), vex_text).unwrap(); + + let out = Command::new(&vexctl) + .args(["merge", tmp.path().to_str().unwrap()]) + .output() + .expect("spawn vexctl"); + assert!( + out.status.success(), + "vexctl rejected the document.\nstderr:\n{}\nstdout:\n{}", + String::from_utf8_lossy(&out.stderr), + String::from_utf8_lossy(&out.stdout) + ); + // Sanity: the merge output must itself be valid OpenVEX JSON. + let _: Value = serde_json::from_slice(&out.stdout) + .expect("vexctl merge output must be valid JSON"); +} + +/// Stdlib-only `PATH` lookup for `vexctl`. Returns `None` if missing. +fn find_vexctl_on_path() -> Option { + let path = std::env::var_os("PATH")?; + for entry in std::env::split_paths(&path) { + let candidate = entry.join("vexctl"); + if candidate.is_file() { + return Some(candidate); + } + let with_exe = entry.join("vexctl.exe"); + if with_exe.is_file() { + return Some(with_exe); + } + } + None +} diff --git a/crates/socket-patch-core/src/lib.rs b/crates/socket-patch-core/src/lib.rs index 3683364..3d5871b 100644 --- a/crates/socket-patch-core/src/lib.rs +++ b/crates/socket-patch-core/src/lib.rs @@ -6,3 +6,4 @@ pub mod manifest; pub mod package_json; pub mod patch; pub mod utils; +pub mod vex; diff --git a/crates/socket-patch-core/src/vex/build.rs b/crates/socket-patch-core/src/vex/build.rs new file mode 100644 index 0000000..a233f9c --- /dev/null +++ b/crates/socket-patch-core/src/vex/build.rs @@ -0,0 +1,646 @@ +//! Manifest + applied-set → OpenVEX `Document` builder. +//! +//! The grouping rule (one statement per vulnerability ID) means we +//! transpose the manifest: it stores `PURL -> { vulnId -> info }`, but +//! VEX wants `vulnId -> { products (and subcomponents) }`. We do that +//! transpose once, then sort to keep output deterministic. +//! +//! GHSA naming convention: we use the vuln-ID key (typically GHSA-xxxx) +//! as `Vulnerability.name` and the `cves` array as `aliases`. If a +//! single manifest entry has both — the manifest's key and `cves` — +//! the latter become aliases. When two patches fix the same vuln ID +//! they merge into one statement with both PURLs as subcomponents. + +use std::collections::BTreeMap; + +use crate::manifest::schema::PatchManifest; +use crate::vex::schema::{ + Document, Justification, Product, Statement, Status, Subcomponent, Vulnerability, + OPENVEX_CONTEXT_V0_2_0, +}; +use crate::vex::time::now_rfc3339; + +/// Inputs for the document builder. The caller owns config like +/// `author` and `doc_id` so the builder stays pure. +#[derive(Debug, Clone)] +pub struct BuildOptions { + /// Top-level product PURL/identifier. + pub product_id: String, + /// Document `@id` (e.g. `urn:uuid:...`). Caller-controlled so the + /// CLI can honor a `--doc-id` override or default to a random UUID. + pub doc_id: String, + /// Document `author` field. Defaults to "Socket" at the CLI layer. + pub author: String, + /// Optional `tooling` string. Conventionally `socket-patch `. + pub tooling: Option, +} + +/// Build a VEX document from a manifest and a set of applied PURLs. +/// +/// `applied` is a list of PURLs that have been verified (or were +/// declared verified via `--no-verify`). Manifest entries not in +/// `applied` are silently dropped — see the design note in +/// `vex::verify` for why we never emit `affected`. +/// +/// Returns `None` when no statements can be emitted (no applied +/// patches matched the manifest). The CLI converts `None` into a +/// non-zero exit code per the agreed contract. +pub fn build_document( + manifest: &PatchManifest, + applied: &[String], + opts: &BuildOptions, +) -> Option { + let timestamp = now_rfc3339(); + let applied_set: std::collections::HashSet<&str> = + applied.iter().map(|s| s.as_str()).collect(); + + // vuln-id -> (aliases, impact-statement parts, subcomponent PURLs) + // BTreeMap keeps statement order deterministic by vuln id, which + // helps reproducibility for downstream diffs. + let mut grouped: BTreeMap = BTreeMap::new(); + + for (purl, record) in &manifest.patches { + if !applied_set.contains(purl.as_str()) { + continue; + } + for (vuln_id, info) in &record.vulnerabilities { + let entry = grouped.entry(vuln_id.clone()).or_default(); + for cve in &info.cves { + if !entry.aliases.contains(cve) { + entry.aliases.push(cve.clone()); + } + } + entry.subcomponents.insert(purl.clone()); + entry + .impact_parts + .push(format!("Patched via Socket patch {}", record.uuid)); + } + } + + if grouped.is_empty() { + return None; + } + + let mut statements = Vec::with_capacity(grouped.len()); + for (vuln_id, group) in grouped { + let mut aliases = group.aliases; + aliases.sort(); + + let mut subcomponent_ids: Vec = group.subcomponents.into_iter().collect(); + subcomponent_ids.sort(); + let subcomponents = subcomponent_ids + .into_iter() + .map(|id| Subcomponent { + id, + identifiers: None, + hashes: None, + }) + .collect(); + + let mut parts = group.impact_parts; + parts.sort(); + parts.dedup(); + // The `parts.is_empty()` branch is unreachable from the + // public API: the loop above pushes one entry per applied + // (purl, vuln) pair, so every group present in `grouped` + // has ≥1 entry. The defensive `None` arm stays in case a + // future refactor decouples grouping from impact tracking. + let impact_statement = if parts.is_empty() { + None + } else { + Some(parts.join("; ")) + }; + + statements.push(Statement { + id: None, + vulnerability: Vulnerability { + name: vuln_id, + aliases, + }, + timestamp: timestamp.clone(), + last_updated: None, + products: vec![Product { + id: opts.product_id.clone(), + identifiers: None, + hashes: None, + subcomponents, + }], + status: Status::NotAffected, + supplier: None, + justification: Some(Justification::InlineMitigationsAlreadyExist), + impact_statement, + action_statement: None, + }); + } + + Some(Document { + context: OPENVEX_CONTEXT_V0_2_0.to_string(), + id: opts.doc_id.clone(), + author: opts.author.clone(), + role: None, + timestamp, + last_updated: None, + version: 1, + tooling: opts.tooling.clone(), + statements, + }) +} + +#[derive(Default)] +struct VulnGroup { + aliases: Vec, + subcomponents: std::collections::HashSet, + impact_parts: Vec, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::manifest::schema::{PatchFileInfo, PatchRecord, VulnerabilityInfo}; + use std::collections::HashMap; + + fn record(uuid: &str, vulns: Vec<(&str, Vec<&str>)>) -> PatchRecord { + let mut vmap = HashMap::new(); + for (vid, cves) in vulns { + vmap.insert( + vid.to_string(), + VulnerabilityInfo { + cves: cves.into_iter().map(String::from).collect(), + summary: String::new(), + severity: "high".to_string(), + description: String::new(), + }, + ); + } + let mut files = HashMap::new(); + files.insert( + "index.js".to_string(), + PatchFileInfo { + before_hash: "aaaa".to_string(), + after_hash: "bbbb".to_string(), + }, + ); + PatchRecord { + uuid: uuid.to_string(), + exported_at: "2024-01-01T00:00:00Z".to_string(), + files, + vulnerabilities: vmap, + description: String::new(), + license: "MIT".to_string(), + tier: "free".to_string(), + } + } + + fn opts() -> BuildOptions { + BuildOptions { + product_id: "pkg:npm/app@1.0.0".to_string(), + doc_id: "urn:uuid:test".to_string(), + author: "Socket".to_string(), + tooling: Some("socket-patch 3.0.0".to_string()), + } + } + + #[test] + fn empty_applied_returns_none() { + let manifest = PatchManifest::new(); + assert!(build_document(&manifest, &[], &opts()).is_none()); + } + + #[test] + fn unapplied_patch_is_skipped() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/lodash@4.0.0".to_string(), + record("u1", vec![("GHSA-aaaa", vec!["CVE-2024-1"])]), + ); + // applied is empty → no statements → None. + assert!(build_document(&manifest, &[], &opts()).is_none()); + } + + #[test] + fn single_patch_single_vuln_produces_one_statement() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/lodash@4.0.0".to_string(), + record("u1", vec![("GHSA-aaaa", vec!["CVE-2024-1"])]), + ); + let doc = build_document( + &manifest, + &["pkg:npm/lodash@4.0.0".to_string()], + &opts(), + ) + .unwrap(); + + assert_eq!(doc.statements.len(), 1); + let st = &doc.statements[0]; + assert_eq!(st.vulnerability.name, "GHSA-aaaa"); + assert_eq!(st.vulnerability.aliases, vec!["CVE-2024-1".to_string()]); + assert_eq!(st.status, Status::NotAffected); + assert_eq!( + st.justification, + Some(Justification::InlineMitigationsAlreadyExist) + ); + assert_eq!(st.products.len(), 1); + assert_eq!(st.products[0].id, "pkg:npm/app@1.0.0"); + assert_eq!(st.products[0].subcomponents.len(), 1); + assert_eq!( + st.products[0].subcomponents[0].id, + "pkg:npm/lodash@4.0.0" + ); + assert!(st.impact_statement.as_ref().unwrap().contains("u1")); + } + + #[test] + fn cves_flatten_into_aliases() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record( + "u1", + vec![("GHSA-bbbb", vec!["CVE-2024-2", "CVE-2024-3"])], + ), + ); + let doc = build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts()) + .unwrap(); + let aliases = &doc.statements[0].vulnerability.aliases; + assert_eq!(aliases.len(), 2); + // Sorted for determinism. + assert_eq!(aliases[0], "CVE-2024-2"); + assert_eq!(aliases[1], "CVE-2024-3"); + } + + #[test] + fn two_patches_sharing_ghsa_merge_into_one_statement() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record("u1", vec![("GHSA-cccc", vec!["CVE-A"])]), + ); + manifest.patches.insert( + "pkg:npm/y@2.0.0".to_string(), + record("u2", vec![("GHSA-cccc", vec!["CVE-A"])]), + ); + + let doc = build_document( + &manifest, + &[ + "pkg:npm/x@1.0.0".to_string(), + "pkg:npm/y@2.0.0".to_string(), + ], + &opts(), + ) + .unwrap(); + + assert_eq!(doc.statements.len(), 1); + let subs = &doc.statements[0].products[0].subcomponents; + assert_eq!(subs.len(), 2); + let ids: Vec<&str> = subs.iter().map(|s| s.id.as_str()).collect(); + assert!(ids.contains(&"pkg:npm/x@1.0.0")); + assert!(ids.contains(&"pkg:npm/y@2.0.0")); + // Both patch UUIDs surface in the impact statement. + let imp = doc.statements[0].impact_statement.as_ref().unwrap(); + assert!(imp.contains("u1")); + assert!(imp.contains("u2")); + } + + #[test] + fn one_patch_multiple_vulns_produces_one_statement_each() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record( + "u1", + vec![ + ("GHSA-aaaa", vec!["CVE-1"]), + ("GHSA-bbbb", vec!["CVE-2"]), + ], + ), + ); + + let doc = build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts()) + .unwrap(); + assert_eq!(doc.statements.len(), 2); + // BTreeMap order → sorted by vuln id. + assert_eq!(doc.statements[0].vulnerability.name, "GHSA-aaaa"); + assert_eq!(doc.statements[1].vulnerability.name, "GHSA-bbbb"); + } + + #[test] + fn doc_carries_caller_supplied_fields() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record("u1", vec![("GHSA-aaaa", vec![])]), + ); + let doc = build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts()) + .unwrap(); + assert_eq!(doc.context, OPENVEX_CONTEXT_V0_2_0); + assert_eq!(doc.id, "urn:uuid:test"); + assert_eq!(doc.author, "Socket"); + assert_eq!(doc.tooling.as_deref(), Some("socket-patch 3.0.0")); + assert_eq!(doc.version, 1); + } + + // ── Edge-case coverage ──────────────────────────────────────── + + /// `applied` references a PURL the manifest doesn't have. Must + /// not panic, must not emit a statement for the missing PURL. + #[test] + fn applied_purl_absent_from_manifest_is_silently_skipped() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/in-manifest@1.0.0".to_string(), + record("u1", vec![("GHSA-aaaa", vec!["CVE-1"])]), + ); + + let doc = build_document( + &manifest, + &[ + "pkg:npm/in-manifest@1.0.0".to_string(), + "pkg:npm/ghost@9.9.9".to_string(), // not in manifest + ], + &opts(), + ) + .unwrap(); + + assert_eq!(doc.statements.len(), 1); + let subs = &doc.statements[0].products[0].subcomponents; + assert_eq!(subs.len(), 1); + assert_eq!(subs[0].id, "pkg:npm/in-manifest@1.0.0"); + } + + /// A patch in the manifest with zero vulnerabilities contributes + /// no statements. Important: a patch is applied to fix files + /// *without* a vuln record (rare but legal) → silently skip. + #[test] + fn applied_patch_with_zero_vulnerabilities_emits_no_statement() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/with-vuln@1.0.0".to_string(), + record("u1", vec![("GHSA-aaaa", vec!["CVE-1"])]), + ); + manifest.patches.insert( + "pkg:npm/no-vuln@2.0.0".to_string(), + record("u2", vec![]), + ); + + let doc = build_document( + &manifest, + &[ + "pkg:npm/with-vuln@1.0.0".to_string(), + "pkg:npm/no-vuln@2.0.0".to_string(), + ], + &opts(), + ) + .unwrap(); + + assert_eq!(doc.statements.len(), 1); + let subs = &doc.statements[0].products[0].subcomponents; + assert_eq!(subs.len(), 1); + assert_eq!(subs[0].id, "pkg:npm/with-vuln@1.0.0"); + } + + /// A vulnerability with an empty CVE list → statement carries + /// no `aliases` key (omit-when-empty per the serde attribute). + #[test] + fn empty_cve_list_produces_statement_with_no_aliases_key() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record("u1", vec![("GHSA-no-cves", vec![])]), + ); + let doc = build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts()) + .unwrap(); + assert_eq!(doc.statements[0].vulnerability.aliases.len(), 0); + + // Serialize and verify the JSON omits the `aliases` key. + let v = serde_json::to_value(&doc.statements[0]).unwrap(); + assert!(v["vulnerability"] + .as_object() + .unwrap() + .get("aliases") + .is_none()); + } + + /// Two patches share a GHSA AND share a CVE → the CVE appears + /// once in `aliases` (dedup-by-HashSet semantics). + #[test] + fn duplicate_cve_across_patches_deduped_in_aliases() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record( + "u1", + vec![("GHSA-shared", vec!["CVE-SHARED", "CVE-X-ONLY"])], + ), + ); + manifest.patches.insert( + "pkg:npm/y@2.0.0".to_string(), + record( + "u2", + vec![("GHSA-shared", vec!["CVE-SHARED", "CVE-Y-ONLY"])], + ), + ); + + let doc = build_document( + &manifest, + &[ + "pkg:npm/x@1.0.0".to_string(), + "pkg:npm/y@2.0.0".to_string(), + ], + &opts(), + ) + .unwrap(); + + assert_eq!(doc.statements.len(), 1); + let aliases = &doc.statements[0].vulnerability.aliases; + // Three unique CVEs, sorted. + assert_eq!( + aliases.as_slice(), + &[ + "CVE-SHARED".to_string(), + "CVE-X-ONLY".to_string(), + "CVE-Y-ONLY".to_string(), + ] + ); + } + + /// Same patch UUID used by two PURLs that share a GHSA → the + /// impact_statement dedups the UUID-mention (no double-count). + #[test] + fn same_uuid_across_two_purls_deduped_in_impact_statement() { + // Two manifest entries, identical UUID and GHSA. Real world: + // the same patch package is fingerprinted against multiple + // installed versions. Builder must dedup the impact line. + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record("shared-uuid", vec![("GHSA-shared", vec!["CVE-1"])]), + ); + manifest.patches.insert( + "pkg:npm/x@1.0.1".to_string(), + record("shared-uuid", vec![("GHSA-shared", vec!["CVE-1"])]), + ); + + let doc = build_document( + &manifest, + &[ + "pkg:npm/x@1.0.0".to_string(), + "pkg:npm/x@1.0.1".to_string(), + ], + &opts(), + ) + .unwrap(); + let imp = doc.statements[0].impact_statement.as_ref().unwrap(); + // Count occurrences of "shared-uuid" — must be exactly 1. + assert_eq!( + imp.matches("shared-uuid").count(), + 1, + "duplicate UUID must collapse: {imp}" + ); + } + + /// `BuildOptions.tooling = None` → `Document.tooling` is None and + /// the JSON output omits the key. Previously only `Some` was + /// asserted. + #[test] + fn tooling_none_omits_key_in_document() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record("u1", vec![("GHSA-x", vec![])]), + ); + let opts = BuildOptions { + product_id: "pkg:npm/app@1.0.0".to_string(), + doc_id: "urn:uuid:t".to_string(), + author: "Socket".to_string(), + tooling: None, + }; + let doc = + build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts) + .unwrap(); + assert!(doc.tooling.is_none()); + + let v = serde_json::to_value(&doc).unwrap(); + assert!(v.as_object().unwrap().get("tooling").is_none()); + } + + /// Empty author string is allowed through unchanged. We don't + /// special-case it; the CLI layer ensures a sensible default. + #[test] + fn empty_author_is_preserved_not_substituted() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record("u1", vec![("GHSA-x", vec![])]), + ); + let opts = BuildOptions { + product_id: "pkg:npm/app@1.0.0".to_string(), + doc_id: "urn:uuid:t".to_string(), + author: String::new(), + tooling: None, + }; + let doc = + build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts) + .unwrap(); + assert_eq!(doc.author, ""); + } + + /// Two builds with the same inputs produce statements with + /// identical content and ordering. Timestamps may differ (the + /// builder calls `now_rfc3339`) but the `statements` field is + /// fully determined by the inputs. + #[test] + fn build_is_deterministic_modulo_timestamps() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record( + "u1", + vec![ + ("GHSA-bbbb", vec!["CVE-2", "CVE-1"]), + ("GHSA-aaaa", vec!["CVE-3"]), + ], + ), + ); + manifest.patches.insert( + "pkg:npm/y@2.0.0".to_string(), + record("u2", vec![("GHSA-aaaa", vec!["CVE-3"])]), + ); + + let applied = vec![ + "pkg:npm/x@1.0.0".to_string(), + "pkg:npm/y@2.0.0".to_string(), + ]; + + let a = build_document(&manifest, &applied, &opts()).unwrap(); + let b = build_document(&manifest, &applied, &opts()).unwrap(); + + // Sanity-strip the per-run timestamp before comparing. + let strip = |mut d: Document| -> Document { + d.timestamp = String::new(); + for s in d.statements.iter_mut() { + s.timestamp = String::new(); + } + d + }; + assert_eq!(strip(a), strip(b)); + } + + /// Every statement's `timestamp` equals the document's `timestamp`. + /// Builder pulls `now_rfc3339()` once and clones into each + /// statement; the contract is "one wall-clock per invocation". + #[test] + fn all_statement_timestamps_equal_document_timestamp() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record( + "u1", + vec![("GHSA-a", vec!["CVE-1"]), ("GHSA-b", vec!["CVE-2"])], + ), + ); + let doc = + build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts()) + .unwrap(); + for st in &doc.statements { + assert_eq!(st.timestamp, doc.timestamp); + } + } + + /// Subcomponent IDs are sorted within a merged statement. Pin + /// this so downstream tools can rely on stable diff output. + #[test] + fn merged_subcomponents_are_sorted_alphabetically() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/zzz@1.0.0".to_string(), + record("u-z", vec![("GHSA-shared", vec![])]), + ); + manifest.patches.insert( + "pkg:npm/aaa@1.0.0".to_string(), + record("u-a", vec![("GHSA-shared", vec![])]), + ); + manifest.patches.insert( + "pkg:npm/mmm@1.0.0".to_string(), + record("u-m", vec![("GHSA-shared", vec![])]), + ); + + let doc = build_document( + &manifest, + &[ + "pkg:npm/zzz@1.0.0".to_string(), + "pkg:npm/aaa@1.0.0".to_string(), + "pkg:npm/mmm@1.0.0".to_string(), + ], + &opts(), + ) + .unwrap(); + + let subs = &doc.statements[0].products[0].subcomponents; + assert_eq!(subs.len(), 3); + assert_eq!(subs[0].id, "pkg:npm/aaa@1.0.0"); + assert_eq!(subs[1].id, "pkg:npm/mmm@1.0.0"); + assert_eq!(subs[2].id, "pkg:npm/zzz@1.0.0"); + } +} diff --git a/crates/socket-patch-core/src/vex/conformance_tests.rs b/crates/socket-patch-core/src/vex/conformance_tests.rs new file mode 100644 index 0000000..459f7c6 --- /dev/null +++ b/crates/socket-patch-core/src/vex/conformance_tests.rs @@ -0,0 +1,482 @@ +//! Cross-cutting OpenVEX 0.2.0 spec conformance tests. +//! +//! These tests do not fit cleanly inside any single submodule — +//! they assert invariants that span the whole pipeline (schema + +//! builder + serializer). Source of truth: +//! . +//! +//! If a future schema or builder change breaks any of these, the +//! generated documents will fail external validators (Grype, Trivy, +//! `vexctl merge`) — so we want a tight failure here, not at the +//! integration boundary. + +use super::*; +use crate::manifest::schema::{ + PatchFileInfo, PatchManifest, PatchRecord, VulnerabilityInfo, +}; +use std::collections::HashMap; + +fn vuln(cves: &[&str]) -> VulnerabilityInfo { + VulnerabilityInfo { + cves: cves.iter().map(|s| (*s).to_string()).collect(), + summary: String::new(), + severity: "high".to_string(), + description: String::new(), + } +} + +fn record(uuid: &str, vulns: &[(&str, &[&str])]) -> PatchRecord { + let mut vmap = HashMap::new(); + for (id, cves) in vulns { + vmap.insert((*id).to_string(), vuln(cves)); + } + let mut files = HashMap::new(); + files.insert( + "index.js".to_string(), + PatchFileInfo { + before_hash: "aa".to_string(), + after_hash: "bb".to_string(), + }, + ); + PatchRecord { + uuid: uuid.to_string(), + exported_at: String::new(), + files, + vulnerabilities: vmap, + description: String::new(), + license: "MIT".to_string(), + tier: "free".to_string(), + } +} + +fn options() -> BuildOptions { + BuildOptions { + product_id: "pkg:npm/test-app@1.0.0".to_string(), + doc_id: "urn:uuid:11111111-1111-4111-8111-111111111111".to_string(), + author: "Socket".to_string(), + tooling: Some("socket-patch 3.0.0".to_string()), + } +} + +fn sample_doc() -> Document { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/lodash@4.17.20".to_string(), + record( + "uuid-1", + &[("GHSA-aaaa", &["CVE-2024-1", "CVE-2024-2"])], + ), + ); + manifest.patches.insert( + "pkg:npm/minimist@1.2.0".to_string(), + record("uuid-2", &[("GHSA-bbbb", &["CVE-2024-3"])]), + ); + build_document( + &manifest, + &[ + "pkg:npm/lodash@4.17.20".to_string(), + "pkg:npm/minimist@1.2.0".to_string(), + ], + &options(), + ) + .expect("build sample doc") +} + +// ── 1. `@context` literal value ───────────────────────────────── + +#[test] +fn context_is_the_canonical_v0_2_0_iri() { + assert_eq!(OPENVEX_CONTEXT_V0_2_0, "https://openvex.dev/ns/v0.2.0"); + let doc = sample_doc(); + assert_eq!(doc.context, OPENVEX_CONTEXT_V0_2_0); + let v = serde_json::to_value(&doc).unwrap(); + assert_eq!(v["@context"], OPENVEX_CONTEXT_V0_2_0); +} + +// ── 2. JSON-LD `@`-prefixed keys are emitted as such ──────────── + +#[test] +fn at_prefixed_keys_use_at_sign_in_output() { + let doc = sample_doc(); + let v = serde_json::to_value(&doc).unwrap(); + let doc_obj = v.as_object().unwrap(); + // Document-level. + assert!(doc_obj.contains_key("@context")); + assert!(doc_obj.contains_key("@id")); + assert!(!doc_obj.contains_key("context")); + assert!(!doc_obj.contains_key("id")); + // Product-level (every product `@id` field). + for st in v["statements"].as_array().unwrap() { + for p in st["products"].as_array().unwrap() { + let p_obj = p.as_object().unwrap(); + assert!(p_obj.contains_key("@id"), "product missing @id"); + assert!(!p_obj.contains_key("id")); + // Subcomponents too. + if let Some(subs) = p_obj.get("subcomponents") { + for sub in subs.as_array().unwrap() { + let sub_obj = sub.as_object().unwrap(); + assert!(sub_obj.contains_key("@id")); + assert!(!sub_obj.contains_key("id")); + } + } + } + } +} + +// ── 3. Status / justification literal strings ─────────────────── + +#[test] +fn all_four_status_literals_match_spec() { + // Spec section: "Status enum values". + let expected = [ + (Status::NotAffected, "not_affected"), + (Status::Affected, "affected"), + (Status::Fixed, "fixed"), + (Status::UnderInvestigation, "under_investigation"), + ]; + for (variant, literal) in expected { + assert_eq!( + serde_json::to_value(variant).unwrap(), + serde_json::Value::String(literal.to_string()) + ); + } +} + +#[test] +fn all_five_justification_literals_match_spec() { + // Spec section: "Status justifications". Pin each variant to + // the exact snake_case string the spec calls out. + let expected = [ + (Justification::ComponentNotPresent, "component_not_present"), + ( + Justification::VulnerableCodeNotPresent, + "vulnerable_code_not_present", + ), + ( + Justification::VulnerableCodeNotInExecutePath, + "vulnerable_code_not_in_execute_path", + ), + ( + Justification::VulnerableCodeCannotBeControlledByAdversary, + "vulnerable_code_cannot_be_controlled_by_adversary", + ), + ( + Justification::InlineMitigationsAlreadyExist, + "inline_mitigations_already_exist", + ), + ]; + for (variant, literal) in expected { + assert_eq!( + serde_json::to_value(variant).unwrap(), + serde_json::Value::String(literal.to_string()) + ); + } +} + +// ── 4. Status ↔ Justification interaction ─────────────────────── + +#[test] +fn builder_only_emits_not_affected_with_justification() { + // Spec: when status == not_affected, a statement MUST carry + // either a justification or an impact_statement. Our builder + // always emits both. + let doc = sample_doc(); + assert!(!doc.statements.is_empty()); + for st in &doc.statements { + assert_eq!(st.status, Status::NotAffected); + assert!( + st.justification.is_some(), + "not_affected requires a justification" + ); + assert!( + st.impact_statement.is_some(), + "not_affected requires an impact_statement (we always emit one)" + ); + // Conversely, action_statement (canonical for `affected`) + // MUST be absent when status is `not_affected`. + assert!( + st.action_statement.is_none(), + "action_statement is reserved for status=affected" + ); + } +} + +#[test] +fn affected_statement_in_json_omits_justification() { + // We never construct affected statements via the builder, but + // we DO ship the type — pin the schema invariant that an + // affected statement with no justification serializes without + // emitting a `justification` key (per spec). + let s = Statement { + id: None, + vulnerability: Vulnerability { + name: "CVE-X".to_string(), + aliases: Vec::new(), + }, + timestamp: "2024-01-01T00:00:00Z".to_string(), + last_updated: None, + products: vec![Product { + id: "pkg:npm/x@1.0.0".to_string(), + identifiers: None, + hashes: None, + subcomponents: Vec::new(), + }], + status: Status::Affected, + supplier: None, + justification: None, + impact_statement: None, + action_statement: Some("Upgrade to 1.0.1".to_string()), + }; + let v = serde_json::to_value(&s).unwrap(); + assert_eq!(v["status"], "affected"); + let obj = v.as_object().unwrap(); + assert!(!obj.contains_key("justification")); + assert!(!obj.contains_key("impact_statement")); + assert_eq!(v["action_statement"], "Upgrade to 1.0.1"); +} + +// ── 5. Required-field presence guarantees ─────────────────────── + +#[test] +fn every_required_top_level_document_field_is_serialized() { + let v = serde_json::to_value(sample_doc()).unwrap(); + let obj = v.as_object().unwrap(); + for key in [ + "@context", + "@id", + "author", + "timestamp", + "version", + "statements", + ] { + assert!(obj.contains_key(key), "required key {key:?} missing"); + } +} + +#[test] +fn every_required_statement_field_is_serialized() { + let v = serde_json::to_value(sample_doc()).unwrap(); + for st in v["statements"].as_array().unwrap() { + let obj = st.as_object().unwrap(); + for key in ["vulnerability", "timestamp", "products", "status"] { + assert!(obj.contains_key(key), "required key {key:?} missing"); + } + } +} + +#[test] +fn every_required_product_field_is_serialized() { + let v = serde_json::to_value(sample_doc()).unwrap(); + for st in v["statements"].as_array().unwrap() { + for p in st["products"].as_array().unwrap() { + assert!(p.as_object().unwrap().contains_key("@id")); + } + } +} + +// ── 6. Identifier non-emptiness ───────────────────────────────── + +#[test] +fn vulnerability_name_is_non_empty_in_every_emitted_statement() { + let doc = sample_doc(); + for st in &doc.statements { + assert!( + !st.vulnerability.name.is_empty(), + "vulnerability.name must not be empty" + ); + } +} + +#[test] +fn product_id_is_non_empty_in_every_emitted_statement() { + let doc = sample_doc(); + for st in &doc.statements { + for p in &st.products { + assert!(!p.id.is_empty(), "product @id must not be empty"); + for sub in &p.subcomponents { + assert!(!sub.id.is_empty(), "subcomponent @id must not be empty"); + } + } + } +} + +#[test] +fn document_id_is_non_empty() { + let doc = sample_doc(); + assert!(!doc.id.is_empty(), "document @id must not be empty"); +} + +// ── 7. Timestamp consistency ──────────────────────────────────── + +#[test] +fn all_statement_timestamps_match_document_timestamp() { + let doc = sample_doc(); + for st in &doc.statements { + assert_eq!( + st.timestamp, doc.timestamp, + "statement timestamp must match document timestamp" + ); + } +} + +#[test] +fn document_timestamp_is_rfc3339_z_form() { + let doc = sample_doc(); + // Format: YYYY-MM-DDTHH:MM:SSZ — 20 chars total. + assert_eq!(doc.timestamp.len(), 20); + assert!(doc.timestamp.ends_with('Z')); + assert_eq!(&doc.timestamp[4..5], "-"); + assert_eq!(&doc.timestamp[7..8], "-"); + assert_eq!(&doc.timestamp[10..11], "T"); + assert_eq!(&doc.timestamp[13..14], ":"); + assert_eq!(&doc.timestamp[16..17], ":"); +} + +// ── 8. Document revision counter ──────────────────────────────── + +#[test] +fn newly_built_document_starts_at_version_1() { + // Spec: "The version field starts at 1 and is incremented on + // each update to the document." + let doc = sample_doc(); + assert_eq!(doc.version, 1); +} + +// ── 9. Full round-trip with every optional field populated ────── + +#[test] +fn fully_populated_doc_round_trips_through_serde() { + use std::collections::BTreeMap; + + let mut idents = BTreeMap::new(); + idents.insert("purl".to_string(), "pkg:npm/x@1.0".to_string()); + idents.insert("cpe23".to_string(), "cpe:2.3:a:foo:bar".to_string()); + let mut hashes = BTreeMap::new(); + hashes.insert("sha256".to_string(), "deadbeef".to_string()); + + let doc = Document { + context: OPENVEX_CONTEXT_V0_2_0.to_string(), + id: "urn:uuid:abc".to_string(), + author: "Socket ".to_string(), + role: Some("publisher".to_string()), + timestamp: "2024-01-01T00:00:00Z".to_string(), + last_updated: Some("2024-06-01T00:00:00Z".to_string()), + version: 7, + tooling: Some("socket-patch 3.0.0".to_string()), + statements: vec![Statement { + id: Some("urn:uuid:stmt-1".to_string()), + vulnerability: Vulnerability { + name: "GHSA-xxx".to_string(), + aliases: vec!["CVE-2024-1".to_string(), "CVE-2024-2".to_string()], + }, + timestamp: "2024-01-01T00:00:00Z".to_string(), + last_updated: Some("2024-06-01T00:00:00Z".to_string()), + products: vec![Product { + id: "pkg:npm/app@1.0.0".to_string(), + identifiers: Some(idents.clone()), + hashes: Some(hashes.clone()), + subcomponents: vec![Subcomponent { + id: "pkg:npm/lodash@4.17.21".to_string(), + identifiers: Some(idents), + hashes: Some(hashes), + }], + }], + status: Status::NotAffected, + supplier: Some("https://example.com/supplier".to_string()), + justification: Some(Justification::InlineMitigationsAlreadyExist), + impact_statement: Some("Patched via Socket".to_string()), + action_statement: None, + }], + }; + let json = serde_json::to_string_pretty(&doc).unwrap(); + let parsed: Document = serde_json::from_str(&json).unwrap(); + assert_eq!(doc, parsed, "fully-populated doc must round-trip"); +} + +// ── 10. No `null` values anywhere in builder output ───────────── + +#[test] +fn builder_output_contains_no_null_json_values() { + // skip_serializing_if invariant: every optional field is + // omitted, not serialized as `null`. Walk the entire tree. + fn assert_no_nulls(v: &serde_json::Value, path: &str) { + match v { + serde_json::Value::Null => panic!("found null at {path}"), + serde_json::Value::Object(map) => { + for (k, child) in map { + let p = format!("{path}.{k}"); + assert_no_nulls(child, &p); + } + } + serde_json::Value::Array(arr) => { + for (i, child) in arr.iter().enumerate() { + let p = format!("{path}[{i}]"); + assert_no_nulls(child, &p); + } + } + _ => {} + } + } + let v = serde_json::to_value(sample_doc()).unwrap(); + assert_no_nulls(&v, ""); +} + +// ── 11. Builder produces UTF-8-safe JSON ──────────────────────── + +#[test] +fn builder_output_is_valid_utf8_json() { + let doc = sample_doc(); + // Both encoders must succeed and produce identical parsed JSON. + let compact = serde_json::to_string(&doc).unwrap(); + let pretty = serde_json::to_string_pretty(&doc).unwrap(); + let v_compact: serde_json::Value = serde_json::from_str(&compact).unwrap(); + let v_pretty: serde_json::Value = serde_json::from_str(&pretty).unwrap(); + assert_eq!(v_compact, v_pretty); +} + +// ── 12. Each emitted statement has at least one product ───────── + +#[test] +fn every_emitted_statement_has_at_least_one_product() { + // Spec: products is required and non-empty. The builder always + // populates exactly one entry (the top-level product). + let doc = sample_doc(); + for st in &doc.statements { + assert!(!st.products.is_empty(), "products MUST NOT be empty"); + } +} + +// ── 13. Vulnerability aliases are unique within a statement ───── + +#[test] +fn vulnerability_aliases_are_unique_within_statement() { + let doc = sample_doc(); + for st in &doc.statements { + let mut seen = std::collections::HashSet::new(); + for alias in &st.vulnerability.aliases { + assert!( + seen.insert(alias.clone()), + "duplicate alias {alias:?} in statement" + ); + } + } +} + +// ── 14. Subcomponent @ids are unique within a product ─────────── + +#[test] +fn subcomponent_ids_are_unique_within_product() { + let doc = sample_doc(); + for st in &doc.statements { + for p in &st.products { + let mut seen = std::collections::HashSet::new(); + for sub in &p.subcomponents { + assert!( + seen.insert(sub.id.clone()), + "duplicate subcomponent {:?} in product", + sub.id + ); + } + } + } +} diff --git a/crates/socket-patch-core/src/vex/mod.rs b/crates/socket-patch-core/src/vex/mod.rs new file mode 100644 index 0000000..122d3a2 --- /dev/null +++ b/crates/socket-patch-core/src/vex/mod.rs @@ -0,0 +1,112 @@ +//! OpenVEX 0.2.0 document generation from a Socket Patch manifest. +//! +//! Self-contained so it can be lifted into its own crate later. The +//! module is organized as: +//! +//! * [`schema`] — hand-rolled OpenVEX 0.2.0 serde structs. +//! * [`build`] — manifest + applied-set → [`schema::Document`]. +//! * [`product`] — auto-detect the top-level product PURL from the +//! filesystem (package.json / pyproject.toml / Cargo.toml). +//! * [`verify`] — partition manifest entries by on-disk hash check. +//! * [`time`] — minimal RFC 3339 timestamp formatter (no chrono). +//! +//! Cross-references against the Go reference implementation +//! () live next to the affected +//! struct in [`schema`]. + +pub mod build; +pub mod product; +pub mod schema; +pub mod time; +pub mod verify; + +pub use build::{build_document, BuildOptions}; +pub use product::{detect_product, DetectResult}; +pub use schema::{ + Document, Justification, Product, Statement, Status, Subcomponent, Vulnerability, + OPENVEX_CONTEXT_V0_2_0, +}; +pub use verify::{applied_patches, FailedPatch, VerifyOutcome}; + +#[cfg(test)] +mod conformance_tests; + +#[cfg(test)] +mod reexport_tests { + //! Compile-only smoke tests for the public surface. If a future + //! refactor drops a `pub use` line, this module will fail to + //! compile — the visible symptom we want. + + use super::*; + + #[test] + fn every_reexport_is_usable_from_vex_namespace() { + // Names — just touching each one keeps the linker honest. + let _: &str = OPENVEX_CONTEXT_V0_2_0; + + // Types instantiable via Default or struct literal. + let _ = DetectResult::default(); + let _ = VerifyOutcome::default(); + let _ = FailedPatch { + purl: String::new(), + reason: String::new(), + }; + let _ = BuildOptions { + product_id: String::new(), + doc_id: String::new(), + author: String::new(), + tooling: None, + }; + let _ = Vulnerability { + name: "GHSA-x".to_string(), + aliases: Vec::new(), + }; + let _ = Subcomponent { + id: "pkg:npm/x@1".to_string(), + identifiers: None, + hashes: None, + }; + let _ = Product { + id: "pkg:npm/app@1.0".to_string(), + identifiers: None, + hashes: None, + subcomponents: Vec::new(), + }; + let _ = Statement { + id: None, + vulnerability: Vulnerability { + name: "GHSA-x".to_string(), + aliases: Vec::new(), + }, + timestamp: String::new(), + last_updated: None, + products: Vec::new(), + status: Status::NotAffected, + supplier: None, + justification: Some(Justification::InlineMitigationsAlreadyExist), + impact_statement: None, + action_statement: None, + }; + let _ = Document { + context: OPENVEX_CONTEXT_V0_2_0.to_string(), + id: String::new(), + author: String::new(), + role: None, + timestamp: String::new(), + last_updated: None, + version: 1, + tooling: None, + statements: Vec::new(), + }; + + // Functions — reference them so an accidental rename + // surfaces here. We can't easily type async fns with + // reference parameters as `fn(_)` pointers (the lifetime + // bound goes through the returned future), so just take + // their address and discard it; the resolver will error if + // the symbol disappears. + let _ = build_document as *const (); + let _ = detect_product as *const (); + let _ = applied_patches as *const (); + } +} diff --git a/crates/socket-patch-core/src/vex/product.rs b/crates/socket-patch-core/src/vex/product.rs new file mode 100644 index 0000000..b4dc014 --- /dev/null +++ b/crates/socket-patch-core/src/vex/product.rs @@ -0,0 +1,981 @@ +//! Top-level product PURL auto-detection. +//! +//! Detection chain (first match wins): +//! 1. `.git/config` `[remote "origin"]` URL — the canonical +//! identifier when the repo IS the product. GitHub/GitLab/ +//! Bitbucket URLs are normalized to +//! `pkg://`; anything else +//! is returned as the raw URL. +//! 2. `package.json` (npm) → `pkg:npm/@` +//! 3. `pyproject.toml` (PyPI) → `pkg:pypi/@` +//! 4. `Cargo.toml` (Cargo) → `pkg:cargo/@` +//! +//! Returns `None` only when none of these sources yield a usable +//! identifier. Multiple-package-manifest case: we pick the highest +//! package-manifest priority and surface a warning via +//! [`DetectResult::warnings`] so the CLI can echo it to stderr. Git +//! remote presence does NOT trigger that warning even when alongside +//! a package manifest — the priority is documented and stable. + +use std::path::Path; + +/// Outcome of [`detect_product`]. +#[derive(Debug, Clone, Default)] +pub struct DetectResult { + /// Detected product PURL, or `None` if nothing matched. + pub purl: Option, + /// Non-fatal observations the CLI should print to stderr — e.g. + /// "found Cargo.toml AND package.json; using package.json". + pub warnings: Vec, +} + +pub async fn detect_product(cwd: &Path) -> DetectResult { + let mut result = DetectResult::default(); + + // 1. git remote origin (highest priority — canonical when present). + if let Some(purl) = detect_git_remote(cwd).await { + result.purl = Some(purl); + return result; + } + + let pkg_json = cwd.join("package.json"); + let pyproject = cwd.join("pyproject.toml"); + let cargo = cwd.join("Cargo.toml"); + + let pkg_json_exists = tokio::fs::metadata(&pkg_json).await.is_ok(); + let pyproject_exists = tokio::fs::metadata(&pyproject).await.is_ok(); + let cargo_exists = tokio::fs::metadata(&cargo).await.is_ok(); + + // Collect a warning if more than one manifest is present. + let present_count = [pkg_json_exists, pyproject_exists, cargo_exists] + .iter() + .filter(|b| **b) + .count(); + if present_count > 1 { + let mut found = Vec::new(); + if pkg_json_exists { + found.push("package.json"); + } + if pyproject_exists { + found.push("pyproject.toml"); + } + if cargo_exists { + found.push("Cargo.toml"); + } + result.warnings.push(format!( + "Multiple project manifests detected ({}); using {} for the top-level product", + found.join(", "), + found[0] + )); + } + + if pkg_json_exists { + if let Some(purl) = read_package_json(&pkg_json).await { + result.purl = Some(purl); + return result; + } + } + if pyproject_exists { + if let Some(purl) = read_pyproject(&pyproject).await { + result.purl = Some(purl); + return result; + } + } + if cargo_exists { + if let Some(purl) = read_cargo_toml(&cargo).await { + result.purl = Some(purl); + return result; + } + } + + result +} + +async fn read_package_json(path: &Path) -> Option { + let content = tokio::fs::read_to_string(path).await.ok()?; + let v: serde_json::Value = serde_json::from_str(&content).ok()?; + let name = v.get("name")?.as_str()?; + let version = v.get("version")?.as_str()?; + if name.is_empty() || version.is_empty() { + return None; + } + // npm scoped packages keep their `@scope/name` form in the PURL — + // matches how socket-patch's manifest already stores them. + Some(format!("pkg:npm/{name}@{version}")) +} + +async fn read_pyproject(path: &Path) -> Option { + let content = tokio::fs::read_to_string(path).await.ok()?; + // PEP 621 `[project]` takes precedence (newer projects favor it), + // then fall back to Poetry's `[tool.poetry]` for legacy layouts. + let (name, version) = scan_toml_section(&content, "project") + .or_else(|| scan_toml_section(&content, "tool.poetry"))?; + Some(format!("pkg:pypi/{name}@{version}")) +} + +async fn read_cargo_toml(path: &Path) -> Option { + let content = tokio::fs::read_to_string(path).await.ok()?; + let (name, version) = scan_toml_section(&content, "package")?; + Some(format!("pkg:cargo/{name}@{version}")) +} + +/// Minimal line-based TOML scanner for `[
]` blocks. Reads +/// `name = "..."` and `version = "..."` from the named section and +/// stops at the next `[` header. Robust enough for the well-formed +/// `pyproject.toml` / `Cargo.toml` files we expect at the top level — +/// no full TOML parser dependency. +/// +/// Returns `None` if either key is missing, both keys appear outside +/// the section, the value is empty, or the value is `version.workspace +/// = true` (matches the cargo crawler's behavior of skipping workspace +/// inheritance). +fn scan_toml_section(content: &str, section: &str) -> Option<(String, String)> { + let mut in_section = false; + let mut name: Option = None; + let mut version: Option = None; + let header = format!("[{section}]"); + + for raw in content.lines() { + let line = raw.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + if line.starts_with('[') { + in_section = line == header; + continue; + } + if !in_section { + continue; + } + if let Some(v) = parse_toml_string_kv(line, "name") { + name = Some(v); + } else if let Some(v) = parse_toml_string_kv(line, "version") { + version = Some(v); + } + } + + let name = name?; + let version = version?; + if name.is_empty() || version.is_empty() { + return None; + } + Some((name, version)) +} + +/// Walk up from `start` looking for a `.git/config` (the working tree +/// or any of its ancestors). When found, parse the +/// `[remote "origin"] url = ...` line and convert that URL to a PURL. +/// +/// Returns `None` when: +/// * `cwd` is not inside a git working tree, +/// * `.git/config` has no `[remote "origin"]` section, or +/// * the URL is empty / parsing failed catastrophically. (Otherwise +/// even unrecognized hosts fall through to the raw-URL case.) +/// +/// Worktrees (`.git` as a file pointing at a real git dir elsewhere) +/// are deliberately NOT followed — they're rare and the package- +/// manifest fallback handles them correctly. Submodules likewise: +/// only the outermost `.git/config` wins. +async fn detect_git_remote(start: &Path) -> Option { + let git_config_path = find_git_config(start).await?; + let content = tokio::fs::read_to_string(&git_config_path).await.ok()?; + let url = scan_remote_origin_url(&content)?; + Some(remote_url_to_purl(&url)) +} + +/// Walk ancestors looking for `/.git/config` as a regular file. +/// Returns the path to it, or `None` if we exhaust the chain. +async fn find_git_config(start: &Path) -> Option { + let mut cursor = match tokio::fs::canonicalize(start).await { + Ok(p) => p, + Err(_) => start.to_path_buf(), + }; + loop { + let candidate = cursor.join(".git").join("config"); + if tokio::fs::metadata(&candidate) + .await + .map(|m| m.is_file()) + .unwrap_or(false) + { + return Some(candidate); + } + match cursor.parent() { + Some(p) => cursor = p.to_path_buf(), + None => return None, + } + } +} + +/// Read the `url = ...` line out of the `[remote "origin"]` section of +/// a git config file. Returns the trimmed URL, or `None`. +fn scan_remote_origin_url(content: &str) -> Option { + let mut in_section = false; + for raw in content.lines() { + let line = raw.trim(); + if line.starts_with('[') && line.ends_with(']') { + in_section = line == "[remote \"origin\"]"; + continue; + } + if !in_section { + continue; + } + if let Some(rest) = line.strip_prefix("url") { + let rest = rest.trim_start(); + let rest = rest.strip_prefix('=')?.trim(); + if rest.is_empty() { + return None; + } + return Some(rest.to_string()); + } + } + None +} + +/// Convert a git remote URL to a PURL when possible, else return the +/// URL itself (OpenVEX `@id` accepts any URI). +/// +/// Handled forms: +/// * `git@github.com:owner/repo.git` → `pkg:github/owner/repo` +/// * `https://github.com/owner/repo.git` → `pkg:github/owner/repo` +/// * `https://github.com/owner/repo` → `pkg:github/owner/repo` +/// * Same shapes for `gitlab.com` (→ `pkg:gitlab`) and `bitbucket.org` +/// (→ `pkg:bitbucket`). +/// * Anything else (self-hosted gitea, generic SSH, etc.) → URL as-is. +fn remote_url_to_purl(url: &str) -> String { + if let Some((host, path)) = split_remote_host_path(url) { + let cleaned = path.strip_suffix(".git").unwrap_or(path); + let cleaned = cleaned.trim_matches('/'); + let parts: Vec<&str> = cleaned.split('/').collect(); + if parts.len() == 2 && !parts[0].is_empty() && !parts[1].is_empty() { + let ecosystem = match host { + "github.com" => Some("github"), + "gitlab.com" => Some("gitlab"), + "bitbucket.org" => Some("bitbucket"), + _ => None, + }; + if let Some(eco) = ecosystem { + return format!("pkg:{eco}/{}/{}", parts[0], parts[1]); + } + } + } + url.to_string() +} + +/// Pull `(host, path)` out of a git remote URL. Returns `None` for +/// shapes we don't recognize — the caller falls back to raw-URL mode. +fn split_remote_host_path(url: &str) -> Option<(&str, &str)> { + // SSH form: `git@:`. The `:` is a path separator, NOT + // a port — git's URL parser treats this as scp-style. + if let Some(rest) = url.strip_prefix("git@") { + let (host, path) = rest.split_once(':')?; + return Some((host, path)); + } + // ssh:// or git+ssh:// form: strip both then drop the user. + let stripped = url + .strip_prefix("ssh://") + .or_else(|| url.strip_prefix("git+ssh://")) + .or_else(|| url.strip_prefix("git://")) + .or_else(|| url.strip_prefix("https://")) + .or_else(|| url.strip_prefix("http://")); + if let Some(rest) = stripped { + // Drop optional `user@` prefix. + let rest = match rest.split_once('@') { + Some((_, after)) => after, + None => rest, + }; + let (host_with_port, path) = rest.split_once('/')?; + // Strip a `:port` if present. + let host = host_with_port + .split_once(':') + .map(|(h, _)| h) + .unwrap_or(host_with_port); + return Some((host, path)); + } + None +} + +/// Parse ` = ""`. Returns `None` if the key doesn't match, +/// the value isn't a double-quoted string literal, or the value is +/// empty. Inline-table forms like `version = { workspace = true }` +/// fail this check and are skipped by the caller. +fn parse_toml_string_kv(line: &str, key: &str) -> Option { + let eq = line.find('=')?; + let (lhs, rhs) = line.split_at(eq); + if lhs.trim() != key { + return None; + } + let rhs = rhs[1..].trim(); // drop the leading '=' and surrounding ws + let stripped = rhs.strip_prefix('"')?; + let end = stripped.find('"')?; + let value = &stripped[..end]; + if value.is_empty() { + None + } else { + Some(value.to_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn detect_package_json() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + r#"{"name":"my-app","version":"1.2.3"}"#, + ) + .await + .unwrap(); + + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:npm/my-app@1.2.3")); + assert!(r.warnings.is_empty()); + } + + #[tokio::test] + async fn detect_scoped_npm_package() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + r#"{"name":"@socket/foo","version":"0.1.0"}"#, + ) + .await + .unwrap(); + + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:npm/@socket/foo@0.1.0")); + } + + #[tokio::test] + async fn detect_pyproject() { + let dir = tempfile::tempdir().unwrap(); + let content = "[project]\nname = \"my-pylib\"\nversion = \"0.4.0\"\n"; + tokio::fs::write(dir.path().join("pyproject.toml"), content) + .await + .unwrap(); + + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:pypi/my-pylib@0.4.0")); + } + + #[tokio::test] + async fn detect_cargo_toml() { + let dir = tempfile::tempdir().unwrap(); + let content = "[package]\nname = \"my-rust\"\nversion = \"2.0.0\"\nedition = \"2021\"\n"; + tokio::fs::write(dir.path().join("Cargo.toml"), content) + .await + .unwrap(); + + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:cargo/my-rust@2.0.0")); + } + + #[tokio::test] + async fn cargo_workspace_inheritance_is_unsupported() { + // `version.workspace = true` is not a quoted string literal, + // so detection should report None rather than emit garbage. + let dir = tempfile::tempdir().unwrap(); + let content = "[package]\nname = \"my-rust\"\nversion.workspace = true\n"; + tokio::fs::write(dir.path().join("Cargo.toml"), content) + .await + .unwrap(); + + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + } + + #[tokio::test] + async fn multiple_manifests_warns_and_picks_package_json() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + r#"{"name":"my-app","version":"1.0.0"}"#, + ) + .await + .unwrap(); + tokio::fs::write( + dir.path().join("Cargo.toml"), + "[package]\nname = \"alt\"\nversion = \"9.9.9\"\n", + ) + .await + .unwrap(); + + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:npm/my-app@1.0.0")); + assert_eq!(r.warnings.len(), 1); + assert!(r.warnings[0].contains("Multiple")); + } + + #[tokio::test] + async fn empty_dir_returns_none() { + let dir = tempfile::tempdir().unwrap(); + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + assert!(r.warnings.is_empty()); + } + + #[test] + fn scan_toml_skips_other_sections() { + let toml = "[other]\nname = \"wrong\"\nversion = \"0.0.0\"\n\n[package]\nname = \"right\"\nversion = \"1.0.0\"\n"; + let (n, v) = scan_toml_section(toml, "package").unwrap(); + assert_eq!(n, "right"); + assert_eq!(v, "1.0.0"); + } + + #[test] + fn scan_toml_ignores_comments_and_blank_lines() { + let toml = "[package]\n# a comment\n\nname = \"x\"\nversion = \"1.0\"\n"; + let (n, v) = scan_toml_section(toml, "package").unwrap(); + assert_eq!(n, "x"); + assert_eq!(v, "1.0"); + } + + #[test] + fn scan_toml_missing_version_returns_none() { + let toml = "[package]\nname = \"only-name\"\n"; + assert!(scan_toml_section(toml, "package").is_none()); + } + + // ─────────────────── git-remote detection ─────────────────── + + #[test] + fn remote_url_github_ssh_becomes_pkg_github() { + assert_eq!( + remote_url_to_purl("git@github.com:SocketDev/socket-patch.git"), + "pkg:github/SocketDev/socket-patch" + ); + } + + #[test] + fn remote_url_github_https_becomes_pkg_github() { + assert_eq!( + remote_url_to_purl("https://github.com/SocketDev/socket-patch.git"), + "pkg:github/SocketDev/socket-patch" + ); + } + + #[test] + fn remote_url_github_https_no_dot_git() { + assert_eq!( + remote_url_to_purl("https://github.com/SocketDev/socket-patch"), + "pkg:github/SocketDev/socket-patch" + ); + } + + #[test] + fn remote_url_gitlab_and_bitbucket() { + assert_eq!( + remote_url_to_purl("git@gitlab.com:foo/bar.git"), + "pkg:gitlab/foo/bar" + ); + assert_eq!( + remote_url_to_purl("https://bitbucket.org/foo/bar"), + "pkg:bitbucket/foo/bar" + ); + } + + #[test] + fn remote_url_unknown_host_returns_url_as_is() { + // Self-hosted gitea / unknown forge — VEX `@id` accepts any URI. + let raw = "https://git.example.com/team/repo.git"; + assert_eq!(remote_url_to_purl(raw), raw); + } + + #[test] + fn remote_url_ssh_protocol_form() { + assert_eq!( + remote_url_to_purl("ssh://git@github.com/foo/bar.git"), + "pkg:github/foo/bar" + ); + } + + #[test] + fn scan_origin_url_picks_url_in_section() { + let cfg = "[core]\nbare = false\n[remote \"origin\"]\nurl = git@github.com:foo/bar.git\nfetch = +refs/heads/*:refs/remotes/origin/*\n"; + assert_eq!( + scan_remote_origin_url(cfg).as_deref(), + Some("git@github.com:foo/bar.git") + ); + } + + #[test] + fn scan_origin_url_ignores_other_remotes() { + // `[remote "upstream"]` must not be confused for origin. + let cfg = "[remote \"upstream\"]\nurl = git@github.com:other/repo.git\n[remote \"origin\"]\nurl = git@github.com:me/repo.git\n"; + assert_eq!( + scan_remote_origin_url(cfg).as_deref(), + Some("git@github.com:me/repo.git") + ); + } + + #[test] + fn scan_origin_url_returns_none_when_missing() { + assert!(scan_remote_origin_url("[core]\nbare = false\n").is_none()); + } + + #[tokio::test] + async fn detect_prefers_git_remote_over_package_manifest() { + let dir = tempfile::tempdir().unwrap(); + // package.json says "from-pkg"; git remote says "from-git". + // Git remote must win. + tokio::fs::write( + dir.path().join("package.json"), + r#"{"name":"from-pkg","version":"1.0.0"}"#, + ) + .await + .unwrap(); + let git_dir = dir.path().join(".git"); + tokio::fs::create_dir_all(&git_dir).await.unwrap(); + tokio::fs::write( + git_dir.join("config"), + "[remote \"origin\"]\n\turl = git@github.com:owner/from-git.git\n", + ) + .await + .unwrap(); + + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:github/owner/from-git")); + } + + #[tokio::test] + async fn detect_falls_back_to_package_manifest_when_no_git_remote() { + // Empty .git/config (no remote) → fall through to package.json. + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + r#"{"name":"pkg-only","version":"2.0.0"}"#, + ) + .await + .unwrap(); + let git_dir = dir.path().join(".git"); + tokio::fs::create_dir_all(&git_dir).await.unwrap(); + tokio::fs::write(git_dir.join("config"), "[core]\nbare = false\n") + .await + .unwrap(); + + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:npm/pkg-only@2.0.0")); + } + + #[tokio::test] + async fn detect_finds_git_config_in_parent_directory() { + // Common case: socket-patch is invoked from a subdir of the repo. + let root = tempfile::tempdir().unwrap(); + let git_dir = root.path().join(".git"); + tokio::fs::create_dir_all(&git_dir).await.unwrap(); + tokio::fs::write( + git_dir.join("config"), + "[remote \"origin\"]\n\turl = git@github.com:org/proj.git\n", + ) + .await + .unwrap(); + + let nested = root.path().join("packages").join("inner"); + tokio::fs::create_dir_all(&nested).await.unwrap(); + + let r = detect_product(&nested).await; + assert_eq!(r.purl.as_deref(), Some("pkg:github/org/proj")); + } + + // ── Edge-case + branch coverage ─────────────────────────────── + + /// `.git/config` exists but lists only non-origin remotes → + /// detection must fall through to package-manifest discovery + /// (otherwise the repo would surface no identifier at all). + #[tokio::test] + async fn git_config_with_only_non_origin_remote_falls_through() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + r#"{"name":"fallback-app","version":"1.0.0"}"#, + ) + .await + .unwrap(); + let git_dir = dir.path().join(".git"); + tokio::fs::create_dir_all(&git_dir).await.unwrap(); + tokio::fs::write( + git_dir.join("config"), + "[remote \"upstream\"]\n\turl = git@github.com:other/proj.git\n", + ) + .await + .unwrap(); + + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:npm/fallback-app@1.0.0")); + } + + /// `url =` with no value after the `=` is a malformed git config. + /// Detection must treat it as "no remote" and fall through. + #[tokio::test] + async fn git_config_with_empty_url_falls_through() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + r#"{"name":"fallback-app","version":"1.0.0"}"#, + ) + .await + .unwrap(); + let git_dir = dir.path().join(".git"); + tokio::fs::create_dir_all(&git_dir).await.unwrap(); + tokio::fs::write( + git_dir.join("config"), + "[remote \"origin\"]\n\turl = \n", + ) + .await + .unwrap(); + + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:npm/fallback-app@1.0.0")); + } + + /// CRLF line endings — Rust's `str::lines()` already handles + /// `\r\n`, but pin this so a future switch to `split('\n')` + /// would surface the regression. + #[test] + fn scan_origin_url_handles_crlf_line_endings() { + let cfg = + "[remote \"origin\"]\r\n\turl = git@github.com:foo/bar.git\r\n"; + assert_eq!( + scan_remote_origin_url(cfg).as_deref(), + Some("git@github.com:foo/bar.git") + ); + } + + /// `git+ssh://` URL form → `split_remote_host_path` branch. + #[test] + fn remote_url_git_plus_ssh_form() { + assert_eq!( + remote_url_to_purl("git+ssh://git@github.com/owner/repo.git"), + "pkg:github/owner/repo" + ); + } + + /// `git://` URL form (legacy unauthenticated) — separate branch + /// from `ssh://` and `https://`. + #[test] + fn remote_url_git_protocol_form() { + assert_eq!( + remote_url_to_purl("git://github.com/owner/repo.git"), + "pkg:github/owner/repo" + ); + } + + /// `http://` (plain, not https) — exercises the + /// `strip_prefix("http://")` arm in `split_remote_host_path`. + #[test] + fn remote_url_http_form() { + assert_eq!( + remote_url_to_purl("http://github.com/owner/repo.git"), + "pkg:github/owner/repo" + ); + } + + /// `ssh://git@host:22/path` — port suffix on host must be + /// stripped so the ecosystem lookup still matches `github.com`. + #[test] + fn remote_url_ssh_with_port_strips_port() { + assert_eq!( + remote_url_to_purl("ssh://git@github.com:22/owner/repo.git"), + "pkg:github/owner/repo" + ); + } + + /// Pre-`split_remote_host_path` SSH form WITH NO user prefix: + /// `ssh://github.com/foo/bar.git`. Branch where the `@` split + /// doesn't fire and the whole rest is treated as `host/path`. + #[test] + fn remote_url_ssh_no_user_prefix() { + assert_eq!( + remote_url_to_purl("ssh://github.com/foo/bar.git"), + "pkg:github/foo/bar" + ); + } + + /// Truly unrecognized URL form (no recognized scheme prefix and + /// no scp-style `git@host:path`) → returned as-is. + #[test] + fn remote_url_unknown_shape_returned_verbatim() { + let weird = "file:///srv/repos/proj.git"; + assert_eq!(remote_url_to_purl(weird), weird); + } + + /// `pyproject.toml` with `[tool.poetry]` (Poetry layout) is now + /// supported as a fallback when `[project]` is absent. + #[tokio::test] + async fn detect_pyproject_tool_poetry_layout() { + let dir = tempfile::tempdir().unwrap(); + let content = "[tool.poetry]\nname = \"poetry-app\"\nversion = \"0.9.0\"\n"; + tokio::fs::write(dir.path().join("pyproject.toml"), content) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:pypi/poetry-app@0.9.0")); + } + + /// When `[project]` and `[tool.poetry]` are both present, the + /// PEP-621 section wins (modern projects prefer it). + #[tokio::test] + async fn detect_pyproject_project_section_wins_over_tool_poetry() { + let dir = tempfile::tempdir().unwrap(); + let content = "[project]\nname = \"pep621-app\"\nversion = \"1.0.0\"\n\n[tool.poetry]\nname = \"poetry-app\"\nversion = \"0.9.0\"\n"; + tokio::fs::write(dir.path().join("pyproject.toml"), content) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:pypi/pep621-app@1.0.0")); + } + + /// Multi-manifest combo: pyproject + Cargo.toml present, no + /// package.json. pyproject wins per the priority list. + #[tokio::test] + async fn detect_pyproject_over_cargo_when_no_package_json() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("pyproject.toml"), + "[project]\nname = \"py-app\"\nversion = \"1.0.0\"\n", + ) + .await + .unwrap(); + tokio::fs::write( + dir.path().join("Cargo.toml"), + "[package]\nname = \"rust-app\"\nversion = \"2.0.0\"\n", + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:pypi/py-app@1.0.0")); + assert_eq!(r.warnings.len(), 1); + assert!(r.warnings[0].contains("pyproject.toml")); + assert!(r.warnings[0].contains("Cargo.toml")); + } + + /// `package.json` with only `version` (no `name`) → None. + /// Currently the early `is_empty()` branch in `read_package_json`. + #[tokio::test] + async fn package_json_missing_name_returns_none() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + r#"{"version":"1.0.0"}"#, + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + } + + /// `package.json` with empty `name` string → None (is_empty check). + #[tokio::test] + async fn package_json_empty_name_returns_none() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + r#"{"name":"","version":"1.0.0"}"#, + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + } + + /// `package.json` with invalid JSON → None (parse-error branch). + #[tokio::test] + async fn package_json_invalid_json_returns_none() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write(dir.path().join("package.json"), "{ not json").await.unwrap(); + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + } + + /// `parse_toml_string_kv`: line without `=` → None. + #[test] + fn parse_toml_kv_returns_none_when_no_equals() { + assert!(parse_toml_string_kv("name without equals", "name").is_none()); + } + + /// `parse_toml_string_kv`: key mismatch → None even if value is fine. + #[test] + fn parse_toml_kv_returns_none_when_key_mismatch() { + assert!(parse_toml_string_kv(r#"other = "value""#, "name").is_none()); + } + + /// `parse_toml_string_kv`: missing closing quote → None. + #[test] + fn parse_toml_kv_returns_none_when_unterminated_string() { + assert!(parse_toml_string_kv(r#"name = "no-close"#, "name").is_none()); + } + + /// `parse_toml_string_kv`: empty quoted value → None (we reject + /// `name = ""`). + #[test] + fn parse_toml_kv_returns_none_when_value_empty() { + assert!(parse_toml_string_kv(r#"name = """#, "name").is_none()); + } + + /// `parse_toml_string_kv`: non-string value (e.g. `key = 42`) → + /// None (we only accept quoted strings). + #[test] + fn parse_toml_kv_returns_none_when_value_not_quoted() { + assert!(parse_toml_string_kv(r#"name = 42"#, "name").is_none()); + } + + /// `split_remote_host_path`: SSH URL with no `:` separator → + /// None. Defensive — `git@` prefix without scp-style path. + #[test] + fn split_host_path_rejects_ssh_without_colon() { + assert!(split_remote_host_path("git@github.com").is_none()); + } + + /// `split_remote_host_path`: stripped scheme but no `/` → + /// host-without-path, the inner `split_once('/')` returns None. + #[test] + fn split_host_path_rejects_scheme_url_without_path() { + assert!(split_remote_host_path("https://github.com").is_none()); + } + + /// `remote_url_to_purl`: GitHub URL with 3 path segments + /// (`owner/repo/extra`) falls into the "not exactly 2 parts" + /// branch and returns the raw URL. + #[test] + fn remote_url_three_path_segments_returns_url_as_is() { + let raw = "https://github.com/owner/repo/extra"; + assert_eq!(remote_url_to_purl(raw), raw); + } + + /// `remote_url_to_purl`: trailing slash on the path is trimmed + /// before splitting, so `https://github.com/owner/repo/` still + /// resolves to `pkg:github/owner/repo`. + #[test] + fn remote_url_trailing_slash_is_normalized() { + assert_eq!( + remote_url_to_purl("https://github.com/owner/repo/"), + "pkg:github/owner/repo" + ); + } + + /// `Cargo.toml` with `name` only (no `version`) → None. Exercises + /// the `version?` early-return path inside `scan_toml_section`. + #[tokio::test] + async fn cargo_toml_missing_version_returns_none() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("Cargo.toml"), + "[package]\nname = \"only-name\"\n", + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + } + + /// Pyproject without `[project]` AND without `[tool.poetry]` → + /// None. + #[tokio::test] + async fn pyproject_with_no_recognized_section_returns_none() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("pyproject.toml"), + "[build-system]\nrequires = [\"setuptools\"]\n", + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + } + + /// `DetectResult::default()` is empty (purl=None, warnings=[]). + #[test] + fn detect_result_default_is_empty() { + let r = DetectResult::default(); + assert!(r.purl.is_none()); + assert!(r.warnings.is_empty()); + } + + /// `find_git_config` returns None for a path that genuinely has + /// no `.git/config` on any ancestor. Tempdir on `/var/folders` (macOS) + /// or `/tmp` (linux) gives us a tree that escapes the user's home. + #[tokio::test] + async fn find_git_config_returns_none_when_no_repo_ancestor() { + // Walk up from the tempdir — none of its ancestors should + // contain `.git/config`. This depends on the test runner's + // tempdir living outside any git repo; both macOS + // /var/folders and Linux /tmp satisfy that. + let dir = tempfile::tempdir().unwrap(); + let r = find_git_config(dir.path()).await; + assert!(r.is_none(), "unexpected .git/config above {dir:?}: {r:?}"); + } + + /// `find_git_config` handles a non-existent start path via the + /// `canonicalize → Err` arm and still walks ancestors of the + /// raw input. Returns None when no config is found. + #[tokio::test] + async fn find_git_config_handles_non_existent_start_path() { + let dir = tempfile::tempdir().unwrap(); + let nonexistent = dir.path().join("does/not/exist"); + // No I/O panic; the fallback `start.to_path_buf()` arm of + // the `canonicalize` match runs. + let r = find_git_config(&nonexistent).await; + assert!(r.is_none()); + } + + /// `package.json` where `name` is a number, not a string → None. + /// Exercises the `.as_str()?` branch on the JSON value. + #[tokio::test] + async fn package_json_with_non_string_name_returns_none() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + r#"{"name":42,"version":"1.0.0"}"#, + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + } + + /// `package.json` where `version` is a number → None. + #[tokio::test] + async fn package_json_with_non_string_version_returns_none() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + r#"{"name":"x","version":42}"#, + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + } + + /// `[remote "origin"]` block has a line that starts with `url` + /// but has no `=` (e.g. `url ` then EOL). The `strip_prefix('=')?` + /// inside `scan_remote_origin_url` returns None and the scanner + /// continues — eventually exhausting the section with no url. + #[test] + fn scan_origin_url_skips_url_line_without_equals_sign() { + let cfg = "[remote \"origin\"]\n\turl no-equals-here\n"; + // The `url` line has no `=`, so the scanner returns None + // from the inner `strip_prefix('=')?` — but per the code + // shape (line 224 with `?` on an Option), that propagates + // out of `scan_remote_origin_url` as None. + assert!(scan_remote_origin_url(cfg).is_none()); + } + + /// `package.json` missing the `version` key entirely. Exercises + /// the `v.get("version")?` early-return path (distinct from the + /// `.as_str()?` branch — `get` returns None, not Some(non-string)). + #[tokio::test] + async fn package_json_missing_version_key_returns_none() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + r#"{"name":"x"}"#, + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + } +} diff --git a/crates/socket-patch-core/src/vex/schema.rs b/crates/socket-patch-core/src/vex/schema.rs new file mode 100644 index 0000000..1539b92 --- /dev/null +++ b/crates/socket-patch-core/src/vex/schema.rs @@ -0,0 +1,607 @@ +//! OpenVEX 0.2.0 schema types. +//! +//! Hand-rolled from the OpenVEX 0.2.0 spec +//! () and +//! cross-checked against the Go reference implementation +//! (). The serde +//! representation must match the spec verbatim; the `vexctl merge` +//! step in our e2e suite is what catches drift. +//! +//! Field-level notes: +//! * `@context` / `@id` use serde renames because JSON-LD requires the +//! literal `@`-prefixed keys. +//! * Optional fields use `Option` + `skip_serializing_if = "Option::is_none"` +//! so the emitted JSON omits them rather than emitting `null`. Matches +//! the Go implementation's `omitempty` behavior. +//! * `version` is the OpenVEX document revision counter (integer, +//! starts at 1). NOT the schema version. +//! * `Vec` is always present (the spec allows it to be empty +//! in principle, but our generator errors out before that state). +//! * `Product.identifiers` / `Product.hashes` (and same on +//! `Subcomponent`) use `BTreeMap` instead of `HashMap` for +//! deterministic key ordering — easier diffing across runs. + +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; + +pub const OPENVEX_CONTEXT_V0_2_0: &str = "https://openvex.dev/ns/v0.2.0"; + +/// Top-level OpenVEX document. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct Document { + #[serde(rename = "@context")] + pub context: String, + #[serde(rename = "@id")] + pub id: String, + pub author: String, + /// Optional role declaration for `author`. Free-form per spec. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub role: Option, + pub timestamp: String, + /// RFC 3339 timestamp of the most recent revision of this doc. + /// Optional; absent in newly-issued documents. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub last_updated: Option, + pub version: u32, + #[serde(skip_serializing_if = "Option::is_none", default)] + pub tooling: Option, + pub statements: Vec, +} + +/// One VEX statement — the unit of "I am asserting that vulnerability X +/// has status S relative to product P". +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct Statement { + /// Optional per-statement identifier. When present, must be unique + /// within the document. Spec says it's used to track revisions. + #[serde(rename = "@id", skip_serializing_if = "Option::is_none", default)] + pub id: Option, + pub vulnerability: Vulnerability, + pub timestamp: String, + /// RFC 3339 timestamp of the most recent revision of this statement. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub last_updated: Option, + pub products: Vec, + pub status: Status, + /// Optional supplier IRI overriding the document-level author for + /// this statement. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub supplier: Option, + /// Required when `status == not_affected` (per spec; we don't + /// enforce at the type level — see `vex::conformance_tests`). + #[serde(skip_serializing_if = "Option::is_none", default)] + pub justification: Option, + /// Free-form explanation paired with `not_affected`. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub impact_statement: Option, + /// Canonical companion to `status == affected` (per spec). + /// We never emit `affected` today, but the field exists so the type + /// round-trips a richer doc through our parser. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub action_statement: Option, +} + +/// Vulnerability identifier. `name` is the primary ID (we use the GHSA), +/// `aliases` holds the CVE list. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct Vulnerability { + pub name: String, + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub aliases: Vec, +} + +/// A product the statement applies to. `@id` is a PURL or any URI; the +/// subcomponent list pinpoints the vulnerable transitive dep. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct Product { + #[serde(rename = "@id")] + pub id: String, + /// Optional auxiliary identifiers (PURL, CPE 2.2, CPE 2.3, etc.). + /// Keys are the identifier type (e.g. `"purl"`, `"cpe23"`), + /// values are the literal identifier strings. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub identifiers: Option>, + /// Optional content hashes that pin the product to specific bytes. + /// Keys are hash algorithms (e.g. `"sha256"`), values are hex. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub hashes: Option>, + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub subcomponents: Vec, +} + +/// A subcomponent of the product — i.e. the actual vulnerable dependency +/// the patch covers. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct Subcomponent { + #[serde(rename = "@id")] + pub id: String, + #[serde(skip_serializing_if = "Option::is_none", default)] + pub identifiers: Option>, + #[serde(skip_serializing_if = "Option::is_none", default)] + pub hashes: Option>, +} + +/// VEX status. Spec defines exactly these four values. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum Status { + NotAffected, + Affected, + Fixed, + UnderInvestigation, +} + +/// VEX `justification` enum — only required when `status = not_affected`. +/// Spec lists five canonical values; we expose them all even though +/// `socket-patch` only emits `InlineMitigationsAlreadyExist` today. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum Justification { + ComponentNotPresent, + VulnerableCodeNotPresent, + VulnerableCodeNotInExecutePath, + VulnerableCodeCannotBeControlledByAdversary, + InlineMitigationsAlreadyExist, +} + +#[cfg(test)] +mod tests { + use super::*; + + // ── Status enum: every variant round-trips ───────────────────── + + /// Spec strings for `Status`. The list IS the contract — keep it + /// matched against the OpenVEX 0.2.0 spec section "Statement + /// Properties → status". + const STATUS_LITERALS: &[(Status, &str)] = &[ + (Status::NotAffected, "not_affected"), + (Status::Affected, "affected"), + (Status::Fixed, "fixed"), + (Status::UnderInvestigation, "under_investigation"), + ]; + + #[test] + fn every_status_variant_serializes_to_spec_literal() { + for (variant, literal) in STATUS_LITERALS { + let json = serde_json::to_string(variant).unwrap(); + assert_eq!(json, format!("\"{literal}\""), "variant {variant:?}"); + } + } + + #[test] + fn every_status_variant_deserializes_from_spec_literal() { + for (variant, literal) in STATUS_LITERALS { + let parsed: Status = + serde_json::from_str(&format!("\"{literal}\"")).unwrap(); + assert_eq!(parsed, *variant, "literal {literal:?}"); + } + } + + #[test] + fn status_rejects_unknown_literal() { + let r: Result = serde_json::from_str("\"pending\""); + assert!(r.is_err(), "unknown status literal must fail to parse"); + } + + // ── Justification enum: every variant round-trips ────────────── + + const JUSTIFICATION_LITERALS: &[(Justification, &str)] = &[ + (Justification::ComponentNotPresent, "component_not_present"), + ( + Justification::VulnerableCodeNotPresent, + "vulnerable_code_not_present", + ), + ( + Justification::VulnerableCodeNotInExecutePath, + "vulnerable_code_not_in_execute_path", + ), + ( + Justification::VulnerableCodeCannotBeControlledByAdversary, + "vulnerable_code_cannot_be_controlled_by_adversary", + ), + ( + Justification::InlineMitigationsAlreadyExist, + "inline_mitigations_already_exist", + ), + ]; + + #[test] + fn every_justification_variant_serializes_to_spec_literal() { + for (variant, literal) in JUSTIFICATION_LITERALS { + let json = serde_json::to_string(variant).unwrap(); + assert_eq!(json, format!("\"{literal}\""), "variant {variant:?}"); + } + } + + #[test] + fn every_justification_variant_deserializes_from_spec_literal() { + for (variant, literal) in JUSTIFICATION_LITERALS { + let parsed: Justification = + serde_json::from_str(&format!("\"{literal}\"")).unwrap(); + assert_eq!(parsed, *variant, "literal {literal:?}"); + } + } + + #[test] + fn justification_rejects_unknown_literal() { + let r: Result = + serde_json::from_str("\"hand_waving\""); + assert!(r.is_err()); + } + + // ── Document field shape ────────────────────────────────────── + + fn empty_doc() -> Document { + Document { + context: OPENVEX_CONTEXT_V0_2_0.to_string(), + id: "urn:uuid:1111".to_string(), + author: "Socket".to_string(), + role: None, + timestamp: "2024-01-01T00:00:00Z".to_string(), + last_updated: None, + version: 1, + tooling: None, + statements: Vec::new(), + } + } + + #[test] + fn document_renames_context_and_id() { + let v = serde_json::to_value(empty_doc()).unwrap(); + assert_eq!(v["@context"], OPENVEX_CONTEXT_V0_2_0); + assert_eq!(v["@id"], "urn:uuid:1111"); + let obj = v.as_object().unwrap(); + assert!(obj.get("context").is_none(), "raw `context` must not leak"); + assert!(obj.get("id").is_none(), "raw `id` must not leak"); + } + + #[test] + fn document_omits_all_optional_fields_when_none() { + let v = serde_json::to_value(empty_doc()).unwrap(); + let obj = v.as_object().unwrap(); + for key in ["role", "last_updated", "tooling"] { + assert!( + !obj.contains_key(key), + "key {key:?} must be omitted when None" + ); + } + } + + #[test] + fn document_emits_optional_fields_when_some() { + let mut doc = empty_doc(); + doc.role = Some("publisher".to_string()); + doc.last_updated = Some("2024-02-01T00:00:00Z".to_string()); + doc.tooling = Some("socket-patch 3.0.0".to_string()); + + let v = serde_json::to_value(&doc).unwrap(); + assert_eq!(v["role"], "publisher"); + assert_eq!(v["last_updated"], "2024-02-01T00:00:00Z"); + assert_eq!(v["tooling"], "socket-patch 3.0.0"); + } + + #[test] + fn document_version_round_trips_arbitrary_u32() { + for v in [1u32, 2, 7, 42, u32::MAX] { + let mut doc = empty_doc(); + doc.version = v; + let json = serde_json::to_string(&doc).unwrap(); + let parsed: Document = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed.version, v); + } + } + + #[test] + fn document_rejects_missing_required_fields() { + // Drop the `@context` key — required field, parser must error. + let bad = r#"{ + "@id": "urn:uuid:1", + "author": "Socket", + "timestamp": "2024-01-01T00:00:00Z", + "version": 1, + "statements": [] + }"#; + let r: Result = serde_json::from_str(bad); + assert!(r.is_err()); + } + + // ── Statement field shape ───────────────────────────────────── + + fn minimal_statement() -> Statement { + Statement { + id: None, + vulnerability: Vulnerability { + name: "GHSA-xxxx".to_string(), + aliases: Vec::new(), + }, + timestamp: "2024-01-01T00:00:00Z".to_string(), + last_updated: None, + products: vec![Product { + id: "pkg:npm/app@1.0.0".to_string(), + identifiers: None, + hashes: None, + subcomponents: Vec::new(), + }], + status: Status::NotAffected, + supplier: None, + justification: None, + impact_statement: None, + action_statement: None, + } + } + + #[test] + fn statement_omits_all_optional_fields_when_none() { + let v = serde_json::to_value(minimal_statement()).unwrap(); + let obj = v.as_object().unwrap(); + for key in [ + "@id", + "last_updated", + "supplier", + "justification", + "impact_statement", + "action_statement", + ] { + assert!( + !obj.contains_key(key), + "key {key:?} must be omitted when None" + ); + } + // The `aliases` key on the inner vulnerability also omits-empty. + assert!( + v["vulnerability"] + .as_object() + .unwrap() + .get("aliases") + .is_none(), + "empty aliases must omit the key" + ); + } + + #[test] + fn statement_emits_id_under_at_prefix_and_other_optional_fields() { + let mut s = minimal_statement(); + s.id = Some("urn:uuid:stmt-1".to_string()); + s.last_updated = Some("2024-02-01T00:00:00Z".to_string()); + s.supplier = Some("https://example.com/supplier".to_string()); + s.justification = Some(Justification::InlineMitigationsAlreadyExist); + s.impact_statement = Some("Patched via Socket".to_string()); + s.action_statement = Some("Apply socket-patch ".to_string()); + + let v = serde_json::to_value(&s).unwrap(); + // `@id` not raw `id`. + assert_eq!(v["@id"], "urn:uuid:stmt-1"); + assert!(v.as_object().unwrap().get("id").is_none()); + + assert_eq!(v["last_updated"], "2024-02-01T00:00:00Z"); + assert_eq!(v["supplier"], "https://example.com/supplier"); + assert_eq!(v["justification"], "inline_mitigations_already_exist"); + assert_eq!(v["impact_statement"], "Patched via Socket"); + assert_eq!(v["action_statement"], "Apply socket-patch "); + } + + #[test] + fn statement_with_both_justification_and_impact_emits_both_keys() { + let mut s = minimal_statement(); + s.justification = Some(Justification::ComponentNotPresent); + s.impact_statement = Some("Component is not bundled".to_string()); + let v = serde_json::to_value(&s).unwrap(); + assert_eq!(v["justification"], "component_not_present"); + assert_eq!(v["impact_statement"], "Component is not bundled"); + } + + // ── Vulnerability shape ─────────────────────────────────────── + + #[test] + fn vulnerability_with_zero_aliases_omits_key() { + let v = serde_json::to_value(Vulnerability { + name: "GHSA-x".to_string(), + aliases: Vec::new(), + }) + .unwrap(); + assert!(v.as_object().unwrap().get("aliases").is_none()); + assert_eq!(v["name"], "GHSA-x"); + } + + #[test] + fn vulnerability_with_one_alias() { + let v = serde_json::to_value(Vulnerability { + name: "GHSA-x".to_string(), + aliases: vec!["CVE-2024-1".to_string()], + }) + .unwrap(); + let arr = v["aliases"].as_array().unwrap(); + assert_eq!(arr.len(), 1); + assert_eq!(arr[0], "CVE-2024-1"); + } + + #[test] + fn vulnerability_with_many_aliases_preserves_order() { + // Builder sorts aliases, but the type itself preserves input + // order — important so callers can rely on Vec semantics. + let aliases = vec![ + "CVE-Z".to_string(), + "CVE-A".to_string(), + "CVE-M".to_string(), + ]; + let v = serde_json::to_value(Vulnerability { + name: "GHSA-x".to_string(), + aliases: aliases.clone(), + }) + .unwrap(); + let arr = v["aliases"].as_array().unwrap(); + assert_eq!(arr.len(), 3); + for (i, want) in aliases.iter().enumerate() { + assert_eq!(arr[i], *want); + } + } + + // ── Product / Subcomponent shape ────────────────────────────── + + #[test] + fn product_renames_id_and_omits_empty_subcomponents() { + let p = Product { + id: "pkg:npm/app@1.0.0".to_string(), + identifiers: None, + hashes: None, + subcomponents: Vec::new(), + }; + let v = serde_json::to_value(&p).unwrap(); + assert_eq!(v["@id"], "pkg:npm/app@1.0.0"); + let obj = v.as_object().unwrap(); + assert!(obj.get("subcomponents").is_none()); + assert!(obj.get("identifiers").is_none()); + assert!(obj.get("hashes").is_none()); + } + + #[test] + fn product_serializes_identifiers_and_hashes_when_set() { + let mut idents = BTreeMap::new(); + idents.insert("purl".to_string(), "pkg:npm/app@1.0.0".to_string()); + idents.insert("cpe23".to_string(), "cpe:2.3:a:foo:bar:1.0".to_string()); + + let mut hashes = BTreeMap::new(); + hashes.insert("sha256".to_string(), "deadbeef".to_string()); + + let p = Product { + id: "pkg:npm/app@1.0.0".to_string(), + identifiers: Some(idents), + hashes: Some(hashes), + subcomponents: Vec::new(), + }; + let v = serde_json::to_value(&p).unwrap(); + // BTreeMap → keys appear in sorted order in the JSON. + assert_eq!(v["identifiers"]["cpe23"], "cpe:2.3:a:foo:bar:1.0"); + assert_eq!(v["identifiers"]["purl"], "pkg:npm/app@1.0.0"); + assert_eq!(v["hashes"]["sha256"], "deadbeef"); + } + + #[test] + fn product_serializes_subcomponents_in_input_order() { + let p = Product { + id: "pkg:npm/app@1.0.0".to_string(), + identifiers: None, + hashes: None, + subcomponents: vec![ + Subcomponent { + id: "pkg:npm/z@1.0".to_string(), + identifiers: None, + hashes: None, + }, + Subcomponent { + id: "pkg:npm/a@1.0".to_string(), + identifiers: None, + hashes: None, + }, + ], + }; + let v = serde_json::to_value(&p).unwrap(); + let arr = v["subcomponents"].as_array().unwrap(); + assert_eq!(arr.len(), 2); + assert_eq!(arr[0]["@id"], "pkg:npm/z@1.0"); + assert_eq!(arr[1]["@id"], "pkg:npm/a@1.0"); + } + + #[test] + fn subcomponent_with_identifiers_and_hashes_round_trips() { + let mut idents = BTreeMap::new(); + idents.insert("purl".to_string(), "pkg:npm/lodash@4.17.21".to_string()); + let mut hashes = BTreeMap::new(); + hashes.insert("sha256".to_string(), "abc123".to_string()); + + let sub = Subcomponent { + id: "pkg:npm/lodash@4.17.21".to_string(), + identifiers: Some(idents), + hashes: Some(hashes), + }; + let json = serde_json::to_string(&sub).unwrap(); + let parsed: Subcomponent = serde_json::from_str(&json).unwrap(); + assert_eq!(sub, parsed); + } + + // ── Full-document round-trips ───────────────────────────────── + + #[test] + fn document_roundtrips_minimal() { + let doc = empty_doc(); + let json = serde_json::to_string(&doc).unwrap(); + let parsed: Document = serde_json::from_str(&json).unwrap(); + assert_eq!(doc, parsed); + } + + #[test] + fn document_roundtrips_with_all_fields_populated() { + let mut idents = BTreeMap::new(); + idents.insert("purl".to_string(), "pkg:npm/app@1.0.0".to_string()); + let mut hashes = BTreeMap::new(); + hashes.insert("sha256".to_string(), "deadbeef".to_string()); + + let doc = Document { + context: OPENVEX_CONTEXT_V0_2_0.to_string(), + id: "urn:uuid:abc".to_string(), + author: "Socket".to_string(), + role: Some("publisher".to_string()), + timestamp: "2024-01-01T00:00:00Z".to_string(), + last_updated: Some("2024-06-01T00:00:00Z".to_string()), + version: 3, + tooling: Some("socket-patch 3.0.0".to_string()), + statements: vec![Statement { + id: Some("urn:uuid:stmt-1".to_string()), + vulnerability: Vulnerability { + name: "GHSA-xxx".to_string(), + aliases: vec!["CVE-2024-0001".to_string()], + }, + timestamp: "2024-01-01T00:00:00Z".to_string(), + last_updated: Some("2024-06-01T00:00:00Z".to_string()), + products: vec![Product { + id: "pkg:npm/app@1.0.0".to_string(), + identifiers: Some(idents.clone()), + hashes: Some(hashes.clone()), + subcomponents: vec![Subcomponent { + id: "pkg:npm/lodash@4.17.21".to_string(), + identifiers: Some(idents.clone()), + hashes: Some(hashes.clone()), + }], + }], + status: Status::NotAffected, + supplier: Some("https://example.com/supplier".to_string()), + justification: Some(Justification::InlineMitigationsAlreadyExist), + impact_statement: Some("Patched via Socket".to_string()), + action_statement: Some("Apply socket-patch ".to_string()), + }], + }; + let json = serde_json::to_string_pretty(&doc).unwrap(); + let parsed: Document = serde_json::from_str(&json).unwrap(); + assert_eq!(doc, parsed); + } + + #[test] + fn parsing_a_doc_without_optional_fields_succeeds_via_default() { + // Spec consumers will hand us docs that omit our new optional + // fields. Defaulting must work end-to-end. + let minimal = r#"{ + "@context": "https://openvex.dev/ns/v0.2.0", + "@id": "urn:uuid:1", + "author": "Socket", + "timestamp": "2024-01-01T00:00:00Z", + "version": 1, + "statements": [ + { + "vulnerability": {"name": "GHSA-x"}, + "timestamp": "2024-01-01T00:00:00Z", + "products": [{"@id": "pkg:npm/app@1.0.0"}], + "status": "not_affected" + } + ] + }"#; + let doc: Document = serde_json::from_str(minimal).unwrap(); + assert!(doc.role.is_none()); + assert!(doc.last_updated.is_none()); + assert!(doc.tooling.is_none()); + let st = &doc.statements[0]; + assert!(st.id.is_none()); + assert!(st.last_updated.is_none()); + assert!(st.supplier.is_none()); + assert!(st.action_statement.is_none()); + } +} diff --git a/crates/socket-patch-core/src/vex/time.rs b/crates/socket-patch-core/src/vex/time.rs new file mode 100644 index 0000000..dfd3537 --- /dev/null +++ b/crates/socket-patch-core/src/vex/time.rs @@ -0,0 +1,263 @@ +//! Minimal RFC 3339 timestamp formatter from `SystemTime`. +//! +//! We only need UTC output with a trailing `Z` (no timezone offsets, no +//! sub-second precision) — vexctl accepts both forms. Doing this by hand +//! avoids a chrono/jiff dependency for ~30 lines of arithmetic. + +use std::time::{SystemTime, UNIX_EPOCH}; + +/// Format the current time as RFC 3339 in UTC, e.g. `2024-05-24T12:34:56Z`. +pub fn now_rfc3339() -> String { + let secs = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + format_unix_secs_rfc3339(secs) +} + +/// Format an absolute UNIX-epoch second count as RFC 3339 UTC. +/// +/// Pulled out as its own function so the formatting can be unit-tested +/// against fixed timestamps without mocking the system clock. +pub fn format_unix_secs_rfc3339(secs: u64) -> String { + let (year, month, day, hour, minute, second) = unix_to_ymdhms(secs); + format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z") +} + +/// Convert a UNIX-epoch second count into a (Y, M, D, h, m, s) tuple in UTC. +/// +/// Uses the civil_from_days algorithm by Howard Hinnant (public domain): +/// . +/// Adapted to operate on a non-negative second count — socket-patch only +/// ever stamps "now", so pre-1970 inputs are out of scope. +fn unix_to_ymdhms(secs: u64) -> (i32, u32, u32, u32, u32, u32) { + let days = (secs / 86_400) as i64; + let secs_of_day = (secs % 86_400) as u32; + let hour = secs_of_day / 3600; + let minute = (secs_of_day % 3600) / 60; + let second = secs_of_day % 60; + + // civil_from_days: days since 1970-01-01 → (Y, M, D). + // `z` is `days + 719_468`. Since `days` is derived from a `u64` + // input via `secs / 86_400` cast to `i64`, `z` is always + // non-negative for any plausible socket-patch input (the cast + // would have to wrap around `i64::MAX` to produce a negative, + // which requires `secs > i64::MAX * 86_400` — far past the + // year 292 billion). The `else { z - 146_096 }` arm is kept + // for algorithmic correctness against the Hinnant reference, + // but is unreachable in practice and llvm-cov reports it as + // such. + let z = days + 719_468; + let era = if z >= 0 { z } else { z - 146_096 } / 146_097; + let doe = (z - era * 146_097) as u64; + let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365; + let y = (yoe as i64) + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = (doy - (153 * mp + 2) / 5 + 1) as u32; + let m = if mp < 10 { mp + 3 } else { mp - 9 } as u32; + let year = (y + if m <= 2 { 1 } else { 0 }) as i32; + + (year, m, d, hour, minute, second) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn epoch_renders_as_1970_01_01() { + assert_eq!(format_unix_secs_rfc3339(0), "1970-01-01T00:00:00Z"); + } + + #[test] + fn known_timestamp_2024_01_01() { + // 1704067200 = 2024-01-01T00:00:00Z (verified via `date -u -d ...`). + assert_eq!( + format_unix_secs_rfc3339(1_704_067_200), + "2024-01-01T00:00:00Z" + ); + } + + #[test] + fn known_timestamp_with_time_of_day() { + // 1716552896 = 2024-05-24T12:14:56Z + assert_eq!( + format_unix_secs_rfc3339(1_716_552_896), + "2024-05-24T12:14:56Z" + ); + } + + #[test] + fn leap_year_feb_29() { + // 2024-02-29T00:00:00Z = 1709164800 + assert_eq!( + format_unix_secs_rfc3339(1_709_164_800), + "2024-02-29T00:00:00Z" + ); + } + + #[test] + fn now_has_z_suffix_and_t_separator() { + // Sanity check the live function — it must always have the + // `YYYY-MM-DDTHH:MM:SSZ` shape regardless of the actual clock. + let s = now_rfc3339(); + assert_eq!(s.len(), 20); + assert_eq!(&s[4..5], "-"); + assert_eq!(&s[7..8], "-"); + assert_eq!(&s[10..11], "T"); + assert_eq!(&s[13..14], ":"); + assert_eq!(&s[16..17], ":"); + assert!(s.ends_with('Z')); + } + + // ── Calendar-algorithm branch coverage ──────────────────────── + + /// Non-leap February: 2023-02-28 23:59:59 → 2023-03-01 00:00:00. + /// Year 2023 is divisible by neither 4 nor 100/400 → Feb has 28 + /// days. Pins the `doe / 36524` adjustment in the + /// civil_from_days algorithm. + #[test] + fn non_leap_year_feb_to_march_boundary() { + assert_eq!( + format_unix_secs_rfc3339(1_677_628_799), + "2023-02-28T23:59:59Z" + ); + assert_eq!( + format_unix_secs_rfc3339(1_677_628_800), + "2023-03-01T00:00:00Z" + ); + } + + /// Year-end roll: 2023-12-31 23:59:59 → 2024-01-01 00:00:00. + /// Exercises the month-to-day-of-year inverse mapping at the + /// extreme high end. + #[test] + fn december_to_january_year_boundary() { + assert_eq!( + format_unix_secs_rfc3339(1_704_067_199), + "2023-12-31T23:59:59Z" + ); + assert_eq!( + format_unix_secs_rfc3339(1_704_067_200), + "2024-01-01T00:00:00Z" + ); + } + + /// 2100 is divisible by 100 but NOT by 400 → it is NOT a leap + /// year. Pinning this catches a bug where the algorithm forgets + /// the `doe / 146_096` correction in the era arithmetic. + /// Picked 2100-03-01 (1 day after the "would be Feb 29 in a + /// naive impl" boundary). + #[test] + fn century_year_2100_is_not_a_leap_year() { + assert_eq!( + format_unix_secs_rfc3339(4_107_542_400), + "2100-03-01T00:00:00Z" + ); + } + + /// 2000 IS a leap year (divisible by 400). Feb 29 2000 should + /// render correctly — the four-century cycle reset point. + #[test] + fn four_century_year_2000_is_a_leap_year() { + assert_eq!( + format_unix_secs_rfc3339(951_782_400), + "2000-02-29T00:00:00Z" + ); + } + + /// 31-day months → 1st of next month. January→February. + #[test] + fn january_31_to_february_1() { + assert_eq!( + format_unix_secs_rfc3339(1_675_209_599), + "2023-01-31T23:59:59Z" + ); + assert_eq!( + format_unix_secs_rfc3339(1_675_209_600), + "2023-02-01T00:00:00Z" + ); + } + + /// 31-day month → 30-day month: March 31 → April 1. + #[test] + fn march_31_to_april_1() { + assert_eq!( + format_unix_secs_rfc3339(1_680_307_199), + "2023-03-31T23:59:59Z" + ); + assert_eq!( + format_unix_secs_rfc3339(1_680_307_200), + "2023-04-01T00:00:00Z" + ); + } + + /// 30-day month → 31-day month: April 30 → May 1. + #[test] + fn april_30_to_may_1() { + assert_eq!( + format_unix_secs_rfc3339(1_682_899_199), + "2023-04-30T23:59:59Z" + ); + assert_eq!( + format_unix_secs_rfc3339(1_682_899_200), + "2023-05-01T00:00:00Z" + ); + } + + /// 30-day month → 31-day month, second half of year: + /// September 30 → October 1. + #[test] + fn september_30_to_october_1() { + assert_eq!( + format_unix_secs_rfc3339(1_696_118_399), + "2023-09-30T23:59:59Z" + ); + assert_eq!( + format_unix_secs_rfc3339(1_696_118_400), + "2023-10-01T00:00:00Z" + ); + } + + /// `u64::MAX` does not panic. Output isn't asserted byte-for-byte + /// because the algorithm uses an `i64` cast that overflows in + /// well-defined wrapping in debug-release but the function MUST + /// not crash. Exercise the path and confirm the format shape + /// (digits-dash-digits-T-digits...) is preserved. + #[test] + fn max_u64_input_does_not_panic() { + // Wrap in `std::panic::catch_unwind` for safety even though + // the function uses pure arithmetic — a regression that + // introduced an unsafe cast would still be caught. + let result = std::panic::catch_unwind(|| { + format_unix_secs_rfc3339(u64::MAX) + }); + assert!(result.is_ok(), "u64::MAX must not panic"); + // The output shape should still end in `Z`. + let s = result.unwrap(); + assert!(s.ends_with('Z'), "output must still end with Z"); + } + + /// `now_rfc3339` must produce a string that round-trips through + /// our own `format_unix_secs_rfc3339` — i.e. the year/month/day + /// fields are within plausible ranges (years 1970..3000, months + /// 01-12, days 01-31). Smoke gate against a future regression + /// where the system clock format diverges from our manual one. + #[test] + fn now_output_parses_into_plausible_fields() { + let s = now_rfc3339(); + let year: u32 = s[0..4].parse().unwrap(); + let month: u32 = s[5..7].parse().unwrap(); + let day: u32 = s[8..10].parse().unwrap(); + let hour: u32 = s[11..13].parse().unwrap(); + let minute: u32 = s[14..16].parse().unwrap(); + let second: u32 = s[17..19].parse().unwrap(); + assert!((1970..3000).contains(&year), "year out of range: {year}"); + assert!((1..=12).contains(&month), "month out of range: {month}"); + assert!((1..=31).contains(&day), "day out of range: {day}"); + assert!(hour < 24); + assert!(minute < 60); + assert!(second < 60); + } +} diff --git a/crates/socket-patch-core/src/vex/verify.rs b/crates/socket-patch-core/src/vex/verify.rs new file mode 100644 index 0000000..c930aff --- /dev/null +++ b/crates/socket-patch-core/src/vex/verify.rs @@ -0,0 +1,411 @@ +//! On-disk verification: which manifest entries are actually applied? +//! +//! A patch is "applied" iff every file the manifest claims it modified +//! currently hashes to its `afterHash`. Anything else — missing file, +//! hash mismatch, even one file ahead of expectations — disqualifies +//! the patch from the VEX document. Callers feed the failures into a +//! stderr warning + `--json` envelope warning list; the spec we agreed +//! on is "never emit `affected` or `under_investigation` — just omit". +//! +//! The CLI is responsible for resolving PURL → on-disk package path +//! (it already does this for `apply` / `scan` via the ecosystem +//! dispatcher). We accept a pre-built map so this module stays free of +//! ecosystem-crawler dependencies. + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use crate::manifest::schema::PatchManifest; +use crate::patch::apply::{verify_file_patch, VerifyStatus}; + +/// One entry per manifest PURL that did NOT pass verification. The +/// `reason` is a short snake_case tag the CLI can route on (matches +/// the `error_code` convention used by `json_envelope::PatchEvent`). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FailedPatch { + pub purl: String, + pub reason: String, +} + +/// Result of partitioning the manifest into applied vs failed sets. +#[derive(Debug, Clone, Default)] +pub struct VerifyOutcome { + /// PURLs whose on-disk files all hash to their `afterHash`. + pub applied: Vec, + /// PURLs whose verification failed (with a routing tag). + pub failed: Vec, +} + +/// Walk the manifest and bucket each PURL into `applied` / `failed`. +/// +/// `package_paths` is the CLI-supplied `purl -> on-disk package dir` +/// map (from `find_packages_for_purls`). A PURL absent from the map is +/// recorded as `package_not_found` and ends up in `failed`. +pub async fn applied_patches( + manifest: &PatchManifest, + package_paths: &HashMap, +) -> VerifyOutcome { + let mut out = VerifyOutcome::default(); + + for (purl, record) in &manifest.patches { + let pkg_path = match package_paths.get(purl) { + Some(p) => p, + None => { + out.failed.push(FailedPatch { + purl: purl.clone(), + reason: "package_not_found".to_string(), + }); + continue; + } + }; + + match verify_patch_record(pkg_path, record).await { + Ok(()) => out.applied.push(purl.clone()), + Err(reason) => out.failed.push(FailedPatch { + purl: purl.clone(), + reason, + }), + } + } + + out +} + +/// Returns `Ok(())` if every file in `record.files` is `AlreadyPatched`. +/// Otherwise returns a short routing tag describing the first failure. +async fn verify_patch_record( + pkg_path: &Path, + record: &crate::manifest::schema::PatchRecord, +) -> Result<(), String> { + for (file_name, file_info) in &record.files { + let result = verify_file_patch(pkg_path, file_name, file_info).await; + match result.status { + VerifyStatus::AlreadyPatched => continue, + VerifyStatus::Ready => return Err("not_applied".to_string()), + VerifyStatus::HashMismatch => return Err("hash_mismatch".to_string()), + VerifyStatus::NotFound => return Err("file_not_found".to_string()), + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::hash::git_sha256::compute_git_sha256_from_bytes; + use crate::manifest::schema::{PatchFileInfo, PatchRecord}; + use std::collections::HashMap; + + fn record_with_one_file(after_hash: &str) -> PatchRecord { + let mut files = HashMap::new(); + files.insert( + "index.js".to_string(), + PatchFileInfo { + before_hash: "aaaa".to_string(), + after_hash: after_hash.to_string(), + }, + ); + PatchRecord { + uuid: "u".to_string(), + exported_at: "2024-01-01T00:00:00Z".to_string(), + files, + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + } + } + + #[tokio::test] + async fn applied_when_all_files_match_after_hash() { + let pkg_dir = tempfile::tempdir().unwrap(); + let patched = b"patched-content"; + let hash = compute_git_sha256_from_bytes(patched); + tokio::fs::write(pkg_dir.path().join("index.js"), patched) + .await + .unwrap(); + + let mut manifest = PatchManifest::new(); + manifest + .patches + .insert("pkg:npm/x@1.0.0".to_string(), record_with_one_file(&hash)); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert_eq!(out.applied, vec!["pkg:npm/x@1.0.0".to_string()]); + assert!(out.failed.is_empty()); + } + + #[tokio::test] + async fn missing_path_falls_into_failed() { + let mut manifest = PatchManifest::new(); + manifest + .patches + .insert("pkg:npm/x@1.0.0".to_string(), record_with_one_file("deadbeef")); + + let paths: HashMap = HashMap::new(); + let out = applied_patches(&manifest, &paths).await; + assert!(out.applied.is_empty()); + assert_eq!(out.failed.len(), 1); + assert_eq!(out.failed[0].reason, "package_not_found"); + } + + #[tokio::test] + async fn hash_mismatch_falls_into_failed() { + let pkg_dir = tempfile::tempdir().unwrap(); + tokio::fs::write(pkg_dir.path().join("index.js"), b"not the right content") + .await + .unwrap(); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record_with_one_file("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert!(out.applied.is_empty()); + assert_eq!(out.failed[0].reason, "hash_mismatch"); + } + + #[tokio::test] + async fn missing_file_falls_into_failed() { + let pkg_dir = tempfile::tempdir().unwrap(); + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record_with_one_file("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert_eq!(out.failed[0].reason, "file_not_found"); + } + + #[tokio::test] + async fn partial_apply_still_fails() { + // Two files in the patch: only one is patched on disk → patch + // is not "fully" applied → reported as failed (not_applied for + // the second file). + let pkg_dir = tempfile::tempdir().unwrap(); + let patched_a = b"AAA"; + let hash_a = compute_git_sha256_from_bytes(patched_a); + let original_b = b"original-b"; + let before_b = compute_git_sha256_from_bytes(original_b); + + tokio::fs::write(pkg_dir.path().join("a.js"), patched_a) + .await + .unwrap(); + tokio::fs::write(pkg_dir.path().join("b.js"), original_b) + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "a.js".to_string(), + PatchFileInfo { + before_hash: "aaaa".to_string(), + after_hash: hash_a, + }, + ); + files.insert( + "b.js".to_string(), + PatchFileInfo { + before_hash: before_b, + after_hash: "deadbeef".to_string(), + }, + ); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + PatchRecord { + uuid: "u".to_string(), + exported_at: String::new(), + files, + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert!(out.applied.is_empty()); + assert_eq!(out.failed[0].reason, "not_applied"); + } + + // ── Edge-case + degenerate-input coverage ───────────────────── + + /// `VerifyOutcome::default()` is the empty outcome — defaulting + /// is used by the CLI's `--no-verify` path. + #[test] + fn outcome_default_is_empty() { + let o = VerifyOutcome::default(); + assert!(o.applied.is_empty()); + assert!(o.failed.is_empty()); + } + + /// `FailedPatch` equality + clone for downstream consumers + /// (the CLI emits these in `--json` warnings). + #[test] + fn failed_patch_value_semantics() { + let a = FailedPatch { + purl: "pkg:npm/x@1".to_string(), + reason: "hash_mismatch".to_string(), + }; + let b = a.clone(); + assert_eq!(a, b); + } + + /// Empty manifest → empty outcome. No iteration, no panic. + #[tokio::test] + async fn empty_manifest_returns_empty_outcome() { + let manifest = PatchManifest::new(); + let paths: HashMap = HashMap::new(); + let out = applied_patches(&manifest, &paths).await; + assert!(out.applied.is_empty()); + assert!(out.failed.is_empty()); + } + + /// A patch with `files = {}` is vacuously applied — the + /// "all files match" predicate is `true` over an empty set. + /// This is intentional behavior: a "patch" that touches no + /// files is always-applied. Documented here so a future + /// refactor that flips the predicate is forced to revisit it. + #[tokio::test] + async fn patch_record_with_zero_files_is_vacuously_applied() { + let pkg_dir = tempfile::tempdir().unwrap(); + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/empty@1.0.0".to_string(), + PatchRecord { + uuid: "u".to_string(), + exported_at: String::new(), + files: HashMap::new(), + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + + let mut paths = HashMap::new(); + paths.insert( + "pkg:npm/empty@1.0.0".to_string(), + pkg_dir.path().to_path_buf(), + ); + + let out = applied_patches(&manifest, &paths).await; + assert_eq!(out.applied, vec!["pkg:npm/empty@1.0.0".to_string()]); + assert!(out.failed.is_empty()); + } + + /// Extra `package_paths` entries that aren't in the manifest + /// are ignored — we iterate manifest entries, not the map. + #[tokio::test] + async fn extra_package_paths_are_ignored() { + let pkg_dir = tempfile::tempdir().unwrap(); + let patched = b"patched"; + let hash = compute_git_sha256_from_bytes(patched); + tokio::fs::write(pkg_dir.path().join("index.js"), patched) + .await + .unwrap(); + + let mut manifest = PatchManifest::new(); + manifest + .patches + .insert("pkg:npm/x@1.0.0".to_string(), record_with_one_file(&hash)); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + // Stray entry not in the manifest. + paths.insert( + "pkg:npm/stray@9.9.9".to_string(), + pkg_dir.path().to_path_buf(), + ); + + let out = applied_patches(&manifest, &paths).await; + assert_eq!(out.applied.len(), 1); + assert_eq!(out.applied[0], "pkg:npm/x@1.0.0"); + assert!(out.failed.is_empty()); + } + + /// Multi-file patch where the FIRST file fails — the iteration + /// halts after the first failure (we don't keep going to + /// surface every reason). Lock this in so future refactors + /// don't accidentally start running the second file's check. + /// + /// The patch lists two files. `a.js` has the wrong content (no + /// match for before_hash or after_hash); `b.js` is fine. Order + /// is non-deterministic across HashMap iteration, so we only + /// assert "one failure reason", not which one. + #[tokio::test] + async fn multi_file_first_failure_short_circuits() { + let pkg_dir = tempfile::tempdir().unwrap(); + // a.js: corrupt + tokio::fs::write(pkg_dir.path().join("a.js"), b"garbage") + .await + .unwrap(); + // b.js: at the right after_hash so it would pass. + let patched_b = b"patched-b"; + let hash_b = compute_git_sha256_from_bytes(patched_b); + tokio::fs::write(pkg_dir.path().join("b.js"), patched_b) + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "a.js".to_string(), + PatchFileInfo { + before_hash: "aaaa".to_string(), + after_hash: "deadbeef".to_string(), + }, + ); + files.insert( + "b.js".to_string(), + PatchFileInfo { + before_hash: "cccc".to_string(), + after_hash: hash_b, + }, + ); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + PatchRecord { + uuid: "u".to_string(), + exported_at: String::new(), + files, + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert!(out.applied.is_empty()); + assert_eq!(out.failed.len(), 1, "first failure must short-circuit"); + // Reason depends on iteration order, but it MUST be one of + // the two failure tags (not the success path). + let reason = &out.failed[0].reason; + assert!( + matches!(reason.as_str(), "hash_mismatch" | "not_applied"), + "unexpected reason: {reason}" + ); + } +}