From 926d46655b1e4859ee434052a18af9f31adfa828 Mon Sep 17 00:00:00 2001 From: prasanth_j Date: Mon, 2 Mar 2026 20:32:47 +0530 Subject: [PATCH] feat: implement SBOM diff engine with vulnerability delta calculation --- crates/scanr-cli/src/main.rs | 111 +++++++++- crates/scanr-core/src/lib.rs | 396 +++++++++++++++++++++++++++++++++++ 2 files changed, 501 insertions(+), 6 deletions(-) diff --git a/crates/scanr-cli/src/main.rs b/crates/scanr-cli/src/main.rs index 1452d36..5f7bab6 100644 --- a/crates/scanr-cli/src/main.rs +++ b/crates/scanr-cli/src/main.rs @@ -313,12 +313,63 @@ async fn main() { } } SbomCommands::Diff { old, new } => { - println!( - "Placeholder: diffing SBOM '{}' -> '{}' ({})", - old.display(), - new.display(), - scanr_core::placeholder_status() - ); + match scanr_core::diff_cyclonedx_sbom_files(&old, &new) { + Ok(diff) => { + println!("SBOM Diff"); + println!("Old: {}", old.display()); + println!("New: {}", new.display()); + println!( + "Components: {} -> {}", + diff.old_components, diff.new_components + ); + + println!(); + print_dependency_delta_section("Added", &diff.added_dependencies, 100); + print_dependency_delta_section("Removed", &diff.removed_dependencies, 100); + print_version_change_section(&diff.version_changes, 100); + + println!(); + println!( + "Introduced package versions: {}", + diff.introduced_dependencies.len() + ); + + if diff.introduced_dependencies.is_empty() { + println!("New Vulnerabilities: 0"); + } else { + match scanr_core::investigate_vulnerabilities( + &diff.introduced_dependencies, + ) + .await + { + Ok(report) => { + let summary = + scanr_core::summarize_risk(&report.vulnerabilities); + println!( + "New Vulnerabilities: {} {}", + summary.total, + summarize_severity_for_delta(&summary.counts) + ); + if report.failed_queries > 0 { + eprintln!( + "Warning: vulnerability lookup failed for {}/{} introduced dependencies.", + report.failed_queries, report.queried_dependencies + ); + } + } + Err(error) => { + eprintln!( + "Warning: vulnerability lookup unavailable for introduced dependencies ({error})." + ); + } + } + } + } + Err(error) => { + eprintln!("SBOM diff failed: {error}"); + process::exit(1); + } + } } }, } @@ -417,6 +468,54 @@ fn print_upgrade_recommendations_table(recommendations: &[scanr_core::UpgradeRec } } +fn print_dependency_delta_section( + label: &str, + dependencies: &[scanr_core::Dependency], + max_rows: usize, +) { + println!("{label}: {}", dependencies.len()); + for dependency in dependencies.iter().take(max_rows) { + println!( + "- {}@{} [{}]", + dependency.name, dependency.version, dependency.ecosystem + ); + } + if dependencies.len() > max_rows { + println!("- ... and {} more", dependencies.len() - max_rows); + } +} + +fn print_version_change_section(changes: &[scanr_core::SbomVersionChange], max_rows: usize) { + println!("Version changes: {}", changes.len()); + for change in changes.iter().take(max_rows) { + let old_versions = change.old_versions.join(", "); + let new_versions = change.new_versions.join(", "); + println!( + "- {} [{}]: {} -> {}", + change.name, change.ecosystem, old_versions, new_versions + ); + } + if changes.len() > max_rows { + println!("- ... and {} more", changes.len() - max_rows); + } +} + +fn summarize_severity_for_delta(counts: &scanr_core::SeverityCounts) -> String { + if counts.critical > 0 { + return "CRITICAL".to_string(); + } + if counts.high > 0 { + return "HIGH".to_string(); + } + if counts.medium > 0 { + return "MODERATE".to_string(); + } + if counts.low > 0 { + return "LOW".to_string(); + } + "NONE".to_string() +} + fn package_name_from_description(description: &str) -> String { description .split_once(':') diff --git a/crates/scanr-core/src/lib.rs b/crates/scanr-core/src/lib.rs index 7934391..ec85efc 100644 --- a/crates/scanr-core/src/lib.rs +++ b/crates/scanr-core/src/lib.rs @@ -171,6 +171,24 @@ pub struct SbomDocument { pub json: String, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct SbomVersionChange { + pub ecosystem: Ecosystem, + pub name: String, + pub old_versions: Vec, + pub new_versions: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct SbomDiffReport { + pub old_components: usize, + pub new_components: usize, + pub added_dependencies: Vec, + pub removed_dependencies: Vec, + pub version_changes: Vec, + pub introduced_dependencies: Vec, +} + #[derive(Debug, Serialize)] struct CycloneDxBom { #[serde(rename = "bomFormat")] @@ -214,6 +232,23 @@ struct CycloneDxDependencyEntry { depends_on: Vec, } +#[derive(Debug, Deserialize, Default)] +#[serde(default)] +struct CycloneDxBomInput { + components: Vec, +} + +#[derive(Debug, Deserialize, Default)] +#[serde(default)] +struct CycloneDxComponentInput { + #[serde(rename = "type")] + _component_type: String, + name: String, + version: Option, + scope: Option, + purl: Option, +} + #[derive(Debug)] pub enum ScanError { Io { @@ -458,6 +493,229 @@ pub fn generate_cyclonedx_sbom(path: &Path) -> Result { }) } +pub fn diff_cyclonedx_sbom_files( + old_path: &Path, + new_path: &Path, +) -> Result { + let old_dependencies = load_sbom_dependencies(old_path)?; + let new_dependencies = load_sbom_dependencies(new_path)?; + + type FullKey = (Ecosystem, String, String); + type PackageKey = (Ecosystem, String); + + let mut old_direct_map: HashMap = HashMap::new(); + let mut new_direct_map: HashMap = HashMap::new(); + for dependency in &old_dependencies { + let key = ( + dependency.ecosystem, + dependency.name.clone(), + dependency.version.clone(), + ); + old_direct_map + .entry(key) + .and_modify(|direct| *direct = *direct || dependency.direct) + .or_insert(dependency.direct); + } + for dependency in &new_dependencies { + let key = ( + dependency.ecosystem, + dependency.name.clone(), + dependency.version.clone(), + ); + new_direct_map + .entry(key) + .and_modify(|direct| *direct = *direct || dependency.direct) + .or_insert(dependency.direct); + } + + let mut old_versions: HashMap> = HashMap::new(); + let mut new_versions: HashMap> = HashMap::new(); + for dependency in &old_dependencies { + old_versions + .entry((dependency.ecosystem, dependency.name.clone())) + .or_default() + .insert(dependency.version.clone()); + } + for dependency in &new_dependencies { + new_versions + .entry((dependency.ecosystem, dependency.name.clone())) + .or_default() + .insert(dependency.version.clone()); + } + + let old_packages = old_versions.keys().cloned().collect::>(); + let new_packages = new_versions.keys().cloned().collect::>(); + let added_packages = new_packages + .difference(&old_packages) + .cloned() + .collect::>(); + let removed_packages = old_packages + .difference(&new_packages) + .cloned() + .collect::>(); + + let mut added = new_direct_map + .iter() + .filter_map(|((ecosystem, name, version), direct)| { + if !added_packages.contains(&(*ecosystem, name.clone())) { + return None; + } + Some(Dependency { + ecosystem: *ecosystem, + name: name.clone(), + version: version.clone(), + direct: *direct, + }) + }) + .collect::>(); + let mut removed = old_direct_map + .iter() + .filter_map(|((ecosystem, name, version), direct)| { + if !removed_packages.contains(&(*ecosystem, name.clone())) { + return None; + } + Some(Dependency { + ecosystem: *ecosystem, + name: name.clone(), + version: version.clone(), + direct: *direct, + }) + }) + .collect::>(); + added.sort_by(|a, b| { + (a.ecosystem, a.name.as_str(), a.version.as_str()).cmp(&( + b.ecosystem, + b.name.as_str(), + b.version.as_str(), + )) + }); + removed.sort_by(|a, b| { + (a.ecosystem, a.name.as_str(), a.version.as_str()).cmp(&( + b.ecosystem, + b.name.as_str(), + b.version.as_str(), + )) + }); + + let mut version_changes = Vec::new(); + for (package_key, old_set_versions) in &old_versions { + let Some(new_set_versions) = new_versions.get(package_key) else { + continue; + }; + if old_set_versions != new_set_versions { + version_changes.push(SbomVersionChange { + ecosystem: package_key.0, + name: package_key.1.clone(), + old_versions: old_set_versions.iter().cloned().collect(), + new_versions: new_set_versions.iter().cloned().collect(), + }); + } + } + version_changes + .sort_by(|a, b| (a.ecosystem, a.name.as_str()).cmp(&(b.ecosystem, b.name.as_str()))); + + let mut introduced_map: HashMap = HashMap::new(); + for dependency in &added { + let key = ( + dependency.ecosystem, + dependency.name.clone(), + dependency.version.clone(), + ); + introduced_map + .entry(key) + .and_modify(|direct| *direct = *direct || dependency.direct) + .or_insert(dependency.direct); + } + for change in &version_changes { + let old_set_versions = old_versions + .get(&(change.ecosystem, change.name.clone())) + .cloned() + .unwrap_or_default(); + for new_version in &change.new_versions { + if old_set_versions.contains(new_version) { + continue; + } + let direct = new_direct_map + .get(&(change.ecosystem, change.name.clone(), new_version.clone())) + .copied() + .unwrap_or(false); + introduced_map + .entry((change.ecosystem, change.name.clone(), new_version.clone())) + .and_modify(|flag| *flag = *flag || direct) + .or_insert(direct); + } + } + let mut introduced_dependencies = introduced_map + .into_iter() + .map(|((ecosystem, name, version), direct)| Dependency { + ecosystem, + name, + version, + direct, + }) + .collect::>(); + introduced_dependencies.sort_by(|a, b| { + (a.ecosystem, a.name.as_str(), a.version.as_str()).cmp(&( + b.ecosystem, + b.name.as_str(), + b.version.as_str(), + )) + }); + + Ok(SbomDiffReport { + old_components: old_direct_map.len(), + new_components: new_direct_map.len(), + added_dependencies: added, + removed_dependencies: removed, + version_changes, + introduced_dependencies, + }) +} + +fn load_sbom_dependencies(path: &Path) -> Result, ScanError> { + let contents = fs::read_to_string(path).map_err(|source| ScanError::Io { + path: path.to_path_buf(), + source, + })?; + let parsed: CycloneDxBomInput = + serde_json::from_str(&contents).map_err(|source| ScanError::Json { + path: path.to_path_buf(), + source, + })?; + + let mut dependencies = Vec::new(); + for component in parsed.components { + let Some(mut dependency) = dependency_from_component(&component) else { + continue; + }; + if dependency.version.trim().is_empty() { + continue; + } + dependency.direct = component + .scope + .as_deref() + .is_some_and(|scope| scope.eq_ignore_ascii_case("required")); + dependencies.push(dependency); + } + + Ok(dedupe_and_sort(dependencies)) +} + +fn dependency_from_component(component: &CycloneDxComponentInput) -> Option { + if let Some(purl) = component.purl.as_deref() + && let Some((ecosystem, name, version)) = parse_purl_dependency(purl) + { + return Some(Dependency { + name, + version: version.unwrap_or_else(|| component.version.clone().unwrap_or_default()), + ecosystem, + direct: false, + }); + } + + None +} + fn normalize_windows_verbatim_path(path: String) -> String { if let Some(rest) = path.strip_prefix(r"\\?\UNC\") { return format!(r"\\{rest}"); @@ -1176,6 +1434,75 @@ fn encode_purl_segment(raw: &str) -> String { raw.replace(' ', "%20") } +fn parse_purl_dependency(purl: &str) -> Option<(Ecosystem, String, Option)> { + let raw = purl.strip_prefix("pkg:")?; + let (package_type, remainder) = raw.split_once('/')?; + let ecosystem = match package_type.to_ascii_lowercase().as_str() { + "npm" => Ecosystem::Node, + "pypi" => Ecosystem::Python, + "cargo" | "crates.io" => Ecosystem::Rust, + _ => return None, + }; + + let remainder = remainder + .split_once('?') + .map(|(head, _)| head) + .unwrap_or(remainder); + let remainder = remainder + .split_once('#') + .map(|(head, _)| head) + .unwrap_or(remainder); + let (name_part, version_part) = remainder + .split_once('@') + .map_or((remainder, None), |(name, version)| (name, Some(version))); + + let decoded_name = decode_purl_segment(name_part); + if decoded_name.trim().is_empty() { + return None; + } + + let decoded_version = version_part + .map(decode_purl_segment) + .filter(|version| !version.trim().is_empty()); + Some((ecosystem, decoded_name, decoded_version)) +} + +fn decode_purl_segment(raw: &str) -> String { + let bytes = raw.as_bytes(); + let mut output = Vec::with_capacity(bytes.len()); + let mut index = 0usize; + + while index < bytes.len() { + if bytes[index] == b'%' + && index + 2 < bytes.len() + && let Some(value) = decode_hex_pair(bytes[index + 1], bytes[index + 2]) + { + output.push(value); + index += 3; + continue; + } + output.push(bytes[index]); + index += 1; + } + + String::from_utf8(output).unwrap_or_else(|_| raw.to_string()) +} + +fn decode_hex_pair(high: u8, low: u8) -> Option { + fn decode_nibble(byte: u8) -> Option { + match byte { + b'0'..=b'9' => Some(byte - b'0'), + b'a'..=b'f' => Some(byte - b'a' + 10), + b'A'..=b'F' => Some(byte - b'A' + 10), + _ => None, + } + } + + let high = decode_nibble(high)?; + let low = decode_nibble(low)?; + Some((high << 4) | low) +} + fn sanitize_ref(raw: &str) -> String { raw.chars() .map(|ch| { @@ -1917,4 +2244,73 @@ mod tests { .is_some_and(|dependencies| !dependencies.is_empty()) ); } + + #[test] + fn sbom_diff_detects_added_removed_and_version_changes() { + let unique = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("valid time") + .as_nanos(); + let root = std::env::temp_dir().join(format!("scanr-sbom-diff-{unique}")); + std::fs::create_dir_all(&root).expect("create temp root"); + + let old_sbom = root.join("old.cdx.json"); + let new_sbom = root.join("new.cdx.json"); + + std::fs::write( + &old_sbom, + r#"{ + "bomFormat":"CycloneDX", + "specVersion":"1.5", + "version":1, + "components":[ + {"type":"library","name":"lodash","version":"4.17.20","purl":"pkg:npm/lodash@4.17.20","scope":"required"}, + {"type":"library","name":"requests","version":"2.31.0","purl":"pkg:pypi/requests@2.31.0","scope":"required"} + ] +}"#, + ) + .expect("write old sbom"); + + std::fs::write( + &new_sbom, + r#"{ + "bomFormat":"CycloneDX", + "specVersion":"1.5", + "version":1, + "components":[ + {"type":"library","name":"lodash","version":"4.17.21","purl":"pkg:npm/lodash@4.17.21","scope":"required"}, + {"type":"library","name":"axios","version":"1.2.0","purl":"pkg:npm/axios@1.2.0","scope":"required"} + ] +}"#, + ) + .expect("write new sbom"); + + let diff = diff_cyclonedx_sbom_files(&old_sbom, &new_sbom).expect("diff should parse"); + + assert!( + diff.added_dependencies + .iter() + .any(|dependency| dependency.name == "axios" && dependency.version == "1.2.0") + ); + assert!( + diff.removed_dependencies + .iter() + .any(|dependency| dependency.name == "requests" && dependency.version == "2.31.0") + ); + assert!(diff.version_changes.iter().any(|change| { + change.name == "lodash" + && change.old_versions == vec!["4.17.20".to_string()] + && change.new_versions == vec!["4.17.21".to_string()] + })); + assert!( + diff.introduced_dependencies + .iter() + .any(|dependency| dependency.name == "lodash" && dependency.version == "4.17.21") + ); + assert!( + diff.introduced_dependencies + .iter() + .any(|dependency| dependency.name == "axios" && dependency.version == "1.2.0") + ); + } }