diff --git a/scientific-knowledge-graph/graph-ingestion-auditor/README.md b/scientific-knowledge-graph/graph-ingestion-auditor/README.md
new file mode 100644
index 00000000..645b97dd
--- /dev/null
+++ b/scientific-knowledge-graph/graph-ingestion-auditor/README.md
@@ -0,0 +1,24 @@
+# Graph Ingestion Auditor
+
+This module adds a deterministic, dependency-free knowledge graph ingestion auditor for SCIBASE issue #17. It checks whether extracted scientific entities, relationships, recommendations, and export settings are safe to publish into a project discovery graph.
+
+## Scope
+
+- Required entity coverage for papers, authors, datasets, methods, software, and concepts
+- DOI, ORCID, DataCite/Zenodo, MeSH/OBO/Wikidata, PubChem, UniProt, SWH, and GitHub identifier checks
+- Extraction confidence thresholds
+- Relationship endpoint, provenance, evidence, and private-content checks
+- AI recommendation evidence and private-signal visibility checks
+- JSON-LD/RDF/GraphML export readiness, license, and schema version checks
+
+All fixtures are synthetic. The module does not use private research content, user activity, live ontology services, credentials, or external APIs.
+
+## Run
+
+```bash
+node scientific-knowledge-graph/graph-ingestion-auditor/test.js
+node scientific-knowledge-graph/graph-ingestion-auditor/demo.js
+node scientific-knowledge-graph/graph-ingestion-auditor/make-demo-video.js
+```
+
+Generated knowledge graph audit artifacts are written to `reports/`.
diff --git a/scientific-knowledge-graph/graph-ingestion-auditor/demo.js b/scientific-knowledge-graph/graph-ingestion-auditor/demo.js
new file mode 100644
index 00000000..65632568
--- /dev/null
+++ b/scientific-knowledge-graph/graph-ingestion-auditor/demo.js
@@ -0,0 +1,15 @@
+const fs = require("fs");
+const path = require("path");
+const { auditKnowledgeGraph, renderMarkdownReport, renderSvgSummary } = require("./index");
+const { graphPolicy, riskyGraph } = require("./sample-data");
+
+const outputDir = path.join(__dirname, "reports");
+fs.mkdirSync(outputDir, { recursive: true });
+
+const result = auditKnowledgeGraph(riskyGraph, graphPolicy);
+fs.writeFileSync(path.join(outputDir, "graph-ingestion-audit.json"), `${JSON.stringify(result, null, 2)}\n`);
+fs.writeFileSync(path.join(outputDir, "graph-ingestion-audit.md"), renderMarkdownReport(result));
+fs.writeFileSync(path.join(outputDir, "graph-ingestion-summary.svg"), renderSvgSummary(result));
+
+console.log(`decision=${result.decision} riskScore=${result.riskScore} findings=${result.findings.length}`);
+console.log(`reports=${outputDir}`);
diff --git a/scientific-knowledge-graph/graph-ingestion-auditor/index.js b/scientific-knowledge-graph/graph-ingestion-auditor/index.js
new file mode 100644
index 00000000..2edc48c1
--- /dev/null
+++ b/scientific-knowledge-graph/graph-ingestion-auditor/index.js
@@ -0,0 +1,356 @@
+const crypto = require("crypto");
+
+const SEVERITY_WEIGHT = {
+ blocker: 33,
+ high: 17,
+ medium: 8,
+ low: 3,
+};
+
+const REQUIRED_ENTITY_TYPES = ["paper", "author", "dataset", "method", "software", "concept"];
+
+const IDENTIFIER_RULES = {
+ paper: /^10\.\d{4,9}\/[-._;()/:A-Z0-9]+$/i,
+ author: /^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$/,
+ dataset: /^(doi:10\.\d{4,9}\/.+|datacite:.+|zenodo:\d+)$/i,
+ method: /^(mesh:.+|obo:.+|wikidata:Q\d+)$/i,
+ software: /^(swh:1:.+|github:[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+|doi:10\.\d{4,9}\/.+)$/i,
+ concept: /^(mesh:.+|pubchem:\d+|uniprot:[A-Z0-9]+|wikidata:Q\d+)$/i,
+};
+
+function stableStringify(value) {
+ if (Array.isArray(value)) {
+ return `[${value.map(stableStringify).join(",")}]`;
+ }
+ if (value && typeof value === "object") {
+ return `{${Object.keys(value)
+ .sort()
+ .map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`)
+ .join(",")}}`;
+ }
+ return JSON.stringify(value);
+}
+
+function digest(value) {
+ return crypto.createHash("sha256").update(stableStringify(value)).digest("hex").slice(0, 16);
+}
+
+function finding(severity, code, title, evidence, action) {
+ return { severity, code, title, evidence, action };
+}
+
+function entityKey(entity) {
+ return `${entity.type}:${entity.id}`;
+}
+
+function verifyEntityCoverage(graph) {
+ const findings = [];
+ const types = new Set((graph.entities || []).map((entity) => entity.type));
+ REQUIRED_ENTITY_TYPES.forEach((type) => {
+ if (!types.has(type)) {
+ findings.push(finding(
+ type === "paper" || type === "author" ? "blocker" : "high",
+ "REQUIRED_ENTITY_TYPE_MISSING",
+ "Required knowledge graph entity type is missing",
+ `${graph.projectId} has no extracted ${type} entities.`,
+ `Add at least one reviewed ${type} entity before publishing this graph slice.`,
+ ));
+ }
+ });
+ return findings;
+}
+
+function verifyEntityIdentifiers(graph, policy) {
+ const findings = [];
+ const minConfidence = policy.minimumEntityConfidence || 0.78;
+
+ (graph.entities || []).forEach((entity) => {
+ if (!entity.label || !entity.type) {
+ findings.push(finding(
+ "blocker",
+ "ENTITY_LABEL_OR_TYPE_MISSING",
+ "Entity lacks a label or type",
+ `${entity.id || "unknown"} has label=${entity.label || "missing"} type=${entity.type || "missing"}.`,
+ "Require a label and typed extraction result before graph ingestion.",
+ ));
+ }
+
+ if ((entity.confidence || 0) < minConfidence) {
+ findings.push(finding(
+ entity.type === "paper" || entity.type === "dataset" ? "high" : "medium",
+ "ENTITY_CONFIDENCE_BELOW_THRESHOLD",
+ "Extracted entity confidence is below threshold",
+ `${entityKey(entity)} confidence=${entity.confidence || 0}, threshold=${minConfidence}.`,
+ "Route the entity to human review or hide it from recommendations until verified.",
+ ));
+ }
+
+ const rule = IDENTIFIER_RULES[entity.type];
+ if (rule && !rule.test(entity.identifier || "")) {
+ findings.push(finding(
+ entity.type === "paper" || entity.type === "author" ? "high" : "medium",
+ "ENTITY_IDENTIFIER_INVALID",
+ "Entity identifier does not match the expected namespace",
+ `${entityKey(entity)} identifier=${entity.identifier || "missing"}.`,
+ "Normalize the entity to DOI, ORCID, DataCite, MeSH, PubChem, UniProt, Wikidata, SWH, or GitHub form.",
+ ));
+ }
+
+ if (entity.visibility === "private" && !entity.privateProjectId) {
+ findings.push(finding(
+ "blocker",
+ "PRIVATE_ENTITY_SCOPE_MISSING",
+ "Private entity lacks a project scope",
+ `${entityKey(entity)} is private without a privateProjectId.`,
+ "Attach private graph nodes to a project scope or exclude them from shared graph exports.",
+ ));
+ }
+ });
+
+ return findings;
+}
+
+function verifyRelationships(graph, policy) {
+ const findings = [];
+ const minEvidence = policy.minimumRelationshipEvidence || 2;
+ const knownEntities = new Set((graph.entities || []).map(entityKey));
+
+ (graph.relationships || []).forEach((relationship) => {
+ const source = `${relationship.sourceType}:${relationship.sourceId}`;
+ const target = `${relationship.targetType}:${relationship.targetId}`;
+
+ if (!knownEntities.has(source) || !knownEntities.has(target)) {
+ findings.push(finding(
+ "blocker",
+ "RELATIONSHIP_ENDPOINT_MISSING",
+ "Relationship references a missing entity",
+ `${relationship.id} links ${source} -> ${target}, but at least one endpoint is absent.`,
+ "Create the missing entity or remove the relationship before graph export.",
+ ));
+ }
+
+ if (!relationship.provenance?.sourceDocumentId || !relationship.provenance?.extractorVersion) {
+ findings.push(finding(
+ "high",
+ "RELATIONSHIP_PROVENANCE_MISSING",
+ "Relationship lacks extraction provenance",
+ `${relationship.id} has incomplete source document or extractor metadata.`,
+ "Store source document, span, extractor version, and review state for every edge.",
+ ));
+ }
+
+ if ((relationship.evidenceCount || 0) < minEvidence) {
+ findings.push(finding(
+ "medium",
+ "RELATIONSHIP_EVIDENCE_WEAK",
+ "Relationship has weak evidence support",
+ `${relationship.id} has ${relationship.evidenceCount || 0} evidence items, below ${minEvidence}.`,
+ "Require more evidence or mark the edge as tentative in graph navigation.",
+ ));
+ }
+
+ if (relationship.visibility === "public" && relationship.sourcePrivateContent) {
+ findings.push(finding(
+ "blocker",
+ "PRIVATE_CONTENT_EDGE_PUBLIC",
+ "Public relationship is derived from private content",
+ `${relationship.id} is public but cites private source content.`,
+ "Downgrade the edge to private scope or re-extract it from public evidence.",
+ ));
+ }
+ });
+
+ return findings;
+}
+
+function verifyRecommendations(graph, policy) {
+ const findings = [];
+ const minRecommendationEvidence = policy.minimumRecommendationEvidence || 3;
+
+ (graph.recommendations || []).forEach((recommendation) => {
+ if ((recommendation.evidenceEdges || []).length < minRecommendationEvidence) {
+ findings.push(finding(
+ "high",
+ "RECOMMENDATION_EVIDENCE_INSUFFICIENT",
+ "AI recommendation lacks enough graph evidence",
+ `${recommendation.id} has ${(recommendation.evidenceEdges || []).length} evidence edges.`,
+ "Hide or label the recommendation until enough verified graph evidence supports it.",
+ ));
+ }
+
+ if (recommendation.usesPrivateSignals && recommendation.visibility === "public") {
+ findings.push(finding(
+ "blocker",
+ "PRIVATE_SIGNAL_IN_PUBLIC_RECOMMENDATION",
+ "Public recommendation uses private user or project signals",
+ `${recommendation.id} combines private signals with public visibility.`,
+ "Keep private-signal recommendations in the user workspace only.",
+ ));
+ }
+ });
+
+ return findings;
+}
+
+function verifyExportReadiness(graph) {
+ const findings = [];
+ const exportConfig = graph.exportConfig || {};
+
+ if (!exportConfig.format || !["jsonld", "rdf", "graphml"].includes(exportConfig.format)) {
+ findings.push(finding(
+ "medium",
+ "GRAPH_EXPORT_FORMAT_UNSUPPORTED",
+ "Graph export format is missing or unsupported",
+ `${graph.projectId} export format=${exportConfig.format || "missing"}.`,
+ "Choose jsonld, rdf, or graphml for downstream graph consumers.",
+ ));
+ }
+
+ if (exportConfig.public && !exportConfig.license) {
+ findings.push(finding(
+ "high",
+ "PUBLIC_GRAPH_LICENSE_MISSING",
+ "Public graph export lacks a license",
+ `${graph.projectId} is configured for public graph export without a license.`,
+ "Set a graph metadata license before publishing reusable linked data.",
+ ));
+ }
+
+ if (!exportConfig.schemaVersion) {
+ findings.push(finding(
+ "medium",
+ "GRAPH_SCHEMA_VERSION_MISSING",
+ "Graph export lacks a schema version",
+ `${graph.projectId} has no schema version in exportConfig.`,
+ "Pin a schema version so downstream consumers can validate graph payloads.",
+ ));
+ }
+
+ return findings;
+}
+
+function auditKnowledgeGraph(graph, policy = {}) {
+ const findings = [
+ ...verifyEntityCoverage(graph),
+ ...verifyEntityIdentifiers(graph, policy),
+ ...verifyRelationships(graph, policy),
+ ...verifyRecommendations(graph, policy),
+ ...verifyExportReadiness(graph),
+ ];
+
+ const riskScore = Math.min(100, findings.reduce((sum, item) => sum + SEVERITY_WEIGHT[item.severity], 0));
+ const blockers = findings.filter((item) => item.severity === "blocker").length;
+ const high = findings.filter((item) => item.severity === "high").length;
+ const decision = blockers
+ ? "block-graph-publication"
+ : high
+ ? "manual-graph-review"
+ : findings.length
+ ? "publish-with-graph-caveats"
+ : "ready-for-graph-publication";
+
+ const entityCounts = (graph.entities || []).reduce((counts, entity) => {
+ counts[entity.type] = (counts[entity.type] || 0) + 1;
+ return counts;
+ }, {});
+
+ const packet = {
+ projectId: graph.projectId,
+ reviewedAt: policy.reviewDate,
+ decision,
+ riskScore,
+ entityCounts,
+ relationshipCount: (graph.relationships || []).length,
+ recommendationCount: (graph.recommendations || []).length,
+ exportFormat: graph.exportConfig?.format || null,
+ findings,
+ remediationActions: findings.map((item) => ({ code: item.code, action: item.action })),
+ generatedFrom: "synthetic-knowledge-graph-only",
+ };
+
+ return {
+ ...packet,
+ auditDigest: digest(packet),
+ };
+}
+
+function renderMarkdownReport(result) {
+ const lines = [
+ "# Knowledge Graph Ingestion Audit",
+ "",
+ `Project: ${result.projectId}`,
+ `Decision: ${result.decision}`,
+ `Risk score: ${result.riskScore}`,
+ `Relationships: ${result.relationshipCount}`,
+ `Recommendations: ${result.recommendationCount}`,
+ `Audit digest: ${result.auditDigest}`,
+ "",
+ "## Entity Counts",
+ ];
+
+ Object.keys(result.entityCounts)
+ .sort()
+ .forEach((type) => {
+ lines.push(`- ${type}: ${result.entityCounts[type]}`);
+ });
+
+ lines.push("", "## Findings");
+ if (!result.findings.length) {
+ lines.push("- No knowledge graph ingestion issues detected.");
+ } else {
+ result.findings.forEach((item) => {
+ lines.push(`- [${item.severity}] ${item.title}: ${item.evidence}`);
+ lines.push(` Action: ${item.action}`);
+ });
+ }
+
+ lines.push("", "## Safety", "- Synthetic graph packet only; no private research content, user activity, live ontology services, credentials, or external APIs.");
+ return `${lines.join("\n")}\n`;
+}
+
+function escapeXml(value) {
+ return String(value)
+ .replace(/&/g, "&")
+ .replace(//g, ">")
+ .replace(/"/g, """);
+}
+
+function renderSvgSummary(result) {
+ const color = result.decision === "block-graph-publication"
+ ? "#b42318"
+ : result.decision === "ready-for-graph-publication"
+ ? "#067647"
+ : "#b54708";
+ const types = Object.keys(result.entityCounts).sort();
+ const maxCount = Math.max(...types.map((type) => result.entityCounts[type]), 1);
+ const bars = types.slice(0, 6).map((type, index) => {
+ const y = 195 + index * 42;
+ const width = Math.round(430 * result.entityCounts[type] / maxCount);
+ return `${escapeXml(type)}
+
+
+ ${result.entityCounts[type]}`;
+ }).join("\n");
+
+ return `
+`;
+}
+
+module.exports = {
+ auditKnowledgeGraph,
+ digest,
+ entityKey,
+ renderMarkdownReport,
+ renderSvgSummary,
+};
diff --git a/scientific-knowledge-graph/graph-ingestion-auditor/make-demo-video.js b/scientific-knowledge-graph/graph-ingestion-auditor/make-demo-video.js
new file mode 100644
index 00000000..3a777896
--- /dev/null
+++ b/scientific-knowledge-graph/graph-ingestion-auditor/make-demo-video.js
@@ -0,0 +1,101 @@
+const fs = require("fs");
+const path = require("path");
+const { execFileSync } = require("child_process");
+const { auditKnowledgeGraph } = require("./index");
+const { graphPolicy, riskyGraph } = require("./sample-data");
+
+const WIDTH = 1280;
+const HEIGHT = 720;
+const outputDir = path.join(__dirname, "reports");
+const frameDir = path.join(outputDir, "ppm-frames");
+fs.mkdirSync(frameDir, { recursive: true });
+
+function rgb(hex) {
+ const clean = hex.replace("#", "");
+ return [
+ Number.parseInt(clean.slice(0, 2), 16),
+ Number.parseInt(clean.slice(2, 4), 16),
+ Number.parseInt(clean.slice(4, 6), 16),
+ ];
+}
+
+function canvas(color) {
+ const data = Buffer.alloc(WIDTH * HEIGHT * 3);
+ const [r, g, b] = rgb(color);
+ for (let offset = 0; offset < data.length; offset += 3) {
+ data[offset] = r;
+ data[offset + 1] = g;
+ data[offset + 2] = b;
+ }
+ return data;
+}
+
+function rect(data, x, y, width, height, color) {
+ const [r, g, b] = rgb(color);
+ for (let row = Math.max(0, y); row < Math.min(HEIGHT, y + height); row += 1) {
+ for (let col = Math.max(0, x); col < Math.min(WIDTH, x + width); col += 1) {
+ const offset = (row * WIDTH + col) * 3;
+ data[offset] = r;
+ data[offset + 1] = g;
+ data[offset + 2] = b;
+ }
+ }
+}
+
+function writePpm(filePath, data) {
+ fs.writeFileSync(filePath, Buffer.concat([Buffer.from(`P6\n${WIDTH} ${HEIGHT}\n255\n`), data]));
+}
+
+function renderFrame(filePath, result, frame) {
+ const data = canvas("#f7f8fa");
+ rect(data, 36, 36, 1208, 648, "#ffffff");
+ rect(data, 36, 36, 1208, 3, "#d0d5dd");
+ rect(data, 36, 681, 1208, 3, "#d0d5dd");
+ rect(data, 36, 36, 3, 648, "#d0d5dd");
+ rect(data, 1241, 36, 3, 648, "#d0d5dd");
+
+ rect(data, 80, 82, 800, 34, "#101828");
+ rect(data, 80, 132, Math.round(800 * result.riskScore / 100), 42, "#b42318");
+ rect(data, 910, 82, 250, 92, "#fee4e2");
+ rect(data, 944, 112, 182, 32, "#b42318");
+
+ const types = Object.keys(result.entityCounts).sort();
+ const maxCount = Math.max(...types.map((type) => result.entityCounts[type]), 1);
+ types.forEach((type, index) => {
+ const y = 230 + index * 58;
+ const width = Math.round(760 * result.entityCounts[type] / maxCount);
+ rect(data, 120, y, 760, 34, "#eaecf0");
+ rect(data, 120, y, Math.max(10, width - frame * 3), 34, "#12b76a");
+ rect(data, 910, y - 6, 110, 46, "#dcfae6");
+ rect(data, 950, y + 10, 30, 14, "#12b76a");
+ });
+
+ const blockerCount = result.findings.filter((item) => item.severity === "blocker").length;
+ for (let i = 0; i < blockerCount; i += 1) {
+ rect(data, 120 + i * 82, 610 - frame * 3, 56, 36 + frame * 3, "#7a271a");
+ }
+ rect(data, 80, 652, 1080, 12, "#98a2b3");
+ writePpm(filePath, data);
+}
+
+const result = auditKnowledgeGraph(riskyGraph, graphPolicy);
+for (let frame = 0; frame < 4; frame += 1) {
+ renderFrame(path.join(frameDir, `frame-${String(frame + 1).padStart(3, "0")}.ppm`), result, frame);
+}
+
+const outputPath = path.join(outputDir, "demo.mp4");
+execFileSync("ffmpeg", [
+ "-y",
+ "-framerate",
+ "1",
+ "-i",
+ path.join(frameDir, "frame-%03d.ppm"),
+ "-vf",
+ "fps=12,format=yuv420p",
+ "-movflags",
+ "+faststart",
+ outputPath,
+], { stdio: "inherit" });
+
+fs.rmSync(frameDir, { recursive: true, force: true });
+console.log(`demo video=${outputPath}`);
diff --git a/scientific-knowledge-graph/graph-ingestion-auditor/reports/demo.mp4 b/scientific-knowledge-graph/graph-ingestion-auditor/reports/demo.mp4
new file mode 100644
index 00000000..bb99eb30
Binary files /dev/null and b/scientific-knowledge-graph/graph-ingestion-auditor/reports/demo.mp4 differ
diff --git a/scientific-knowledge-graph/graph-ingestion-auditor/reports/graph-ingestion-audit.json b/scientific-knowledge-graph/graph-ingestion-auditor/reports/graph-ingestion-audit.json
new file mode 100644
index 00000000..9ced4520
--- /dev/null
+++ b/scientific-knowledge-graph/graph-ingestion-auditor/reports/graph-ingestion-audit.json
@@ -0,0 +1,176 @@
+{
+ "projectId": "proj-crispr-neuro-graph",
+ "reviewedAt": "2026-05-28T00:00:00Z",
+ "decision": "block-graph-publication",
+ "riskScore": 100,
+ "entityCounts": {
+ "paper": 1,
+ "author": 1,
+ "dataset": 1,
+ "method": 1,
+ "concept": 1
+ },
+ "relationshipCount": 2,
+ "recommendationCount": 1,
+ "exportFormat": "csv",
+ "findings": [
+ {
+ "severity": "high",
+ "code": "REQUIRED_ENTITY_TYPE_MISSING",
+ "title": "Required knowledge graph entity type is missing",
+ "evidence": "proj-crispr-neuro-graph has no extracted software entities.",
+ "action": "Add at least one reviewed software entity before publishing this graph slice."
+ },
+ {
+ "severity": "high",
+ "code": "ENTITY_IDENTIFIER_INVALID",
+ "title": "Entity identifier does not match the expected namespace",
+ "evidence": "paper:paper-1 identifier=missing-doi.",
+ "action": "Normalize the entity to DOI, ORCID, DataCite, MeSH, PubChem, UniProt, Wikidata, SWH, or GitHub form."
+ },
+ {
+ "severity": "high",
+ "code": "ENTITY_IDENTIFIER_INVALID",
+ "title": "Entity identifier does not match the expected namespace",
+ "evidence": "author:author-1 identifier=orcid-pending.",
+ "action": "Normalize the entity to DOI, ORCID, DataCite, MeSH, PubChem, UniProt, Wikidata, SWH, or GitHub form."
+ },
+ {
+ "severity": "high",
+ "code": "ENTITY_CONFIDENCE_BELOW_THRESHOLD",
+ "title": "Extracted entity confidence is below threshold",
+ "evidence": "dataset:dataset-1 confidence=0.7, threshold=0.78.",
+ "action": "Route the entity to human review or hide it from recommendations until verified."
+ },
+ {
+ "severity": "blocker",
+ "code": "PRIVATE_ENTITY_SCOPE_MISSING",
+ "title": "Private entity lacks a project scope",
+ "evidence": "dataset:dataset-1 is private without a privateProjectId.",
+ "action": "Attach private graph nodes to a project scope or exclude them from shared graph exports."
+ },
+ {
+ "severity": "high",
+ "code": "RELATIONSHIP_PROVENANCE_MISSING",
+ "title": "Relationship lacks extraction provenance",
+ "evidence": "edge-1 has incomplete source document or extractor metadata.",
+ "action": "Store source document, span, extractor version, and review state for every edge."
+ },
+ {
+ "severity": "medium",
+ "code": "RELATIONSHIP_EVIDENCE_WEAK",
+ "title": "Relationship has weak evidence support",
+ "evidence": "edge-1 has 1 evidence items, below 2.",
+ "action": "Require more evidence or mark the edge as tentative in graph navigation."
+ },
+ {
+ "severity": "blocker",
+ "code": "PRIVATE_CONTENT_EDGE_PUBLIC",
+ "title": "Public relationship is derived from private content",
+ "evidence": "edge-1 is public but cites private source content.",
+ "action": "Downgrade the edge to private scope or re-extract it from public evidence."
+ },
+ {
+ "severity": "blocker",
+ "code": "RELATIONSHIP_ENDPOINT_MISSING",
+ "title": "Relationship references a missing entity",
+ "evidence": "edge-2 links paper:paper-1 -> software:software-missing, but at least one endpoint is absent.",
+ "action": "Create the missing entity or remove the relationship before graph export."
+ },
+ {
+ "severity": "high",
+ "code": "RECOMMENDATION_EVIDENCE_INSUFFICIENT",
+ "title": "AI recommendation lacks enough graph evidence",
+ "evidence": "rec-1 has 1 evidence edges.",
+ "action": "Hide or label the recommendation until enough verified graph evidence supports it."
+ },
+ {
+ "severity": "blocker",
+ "code": "PRIVATE_SIGNAL_IN_PUBLIC_RECOMMENDATION",
+ "title": "Public recommendation uses private user or project signals",
+ "evidence": "rec-1 combines private signals with public visibility.",
+ "action": "Keep private-signal recommendations in the user workspace only."
+ },
+ {
+ "severity": "medium",
+ "code": "GRAPH_EXPORT_FORMAT_UNSUPPORTED",
+ "title": "Graph export format is missing or unsupported",
+ "evidence": "proj-crispr-neuro-graph export format=csv.",
+ "action": "Choose jsonld, rdf, or graphml for downstream graph consumers."
+ },
+ {
+ "severity": "high",
+ "code": "PUBLIC_GRAPH_LICENSE_MISSING",
+ "title": "Public graph export lacks a license",
+ "evidence": "proj-crispr-neuro-graph is configured for public graph export without a license.",
+ "action": "Set a graph metadata license before publishing reusable linked data."
+ },
+ {
+ "severity": "medium",
+ "code": "GRAPH_SCHEMA_VERSION_MISSING",
+ "title": "Graph export lacks a schema version",
+ "evidence": "proj-crispr-neuro-graph has no schema version in exportConfig.",
+ "action": "Pin a schema version so downstream consumers can validate graph payloads."
+ }
+ ],
+ "remediationActions": [
+ {
+ "code": "REQUIRED_ENTITY_TYPE_MISSING",
+ "action": "Add at least one reviewed software entity before publishing this graph slice."
+ },
+ {
+ "code": "ENTITY_IDENTIFIER_INVALID",
+ "action": "Normalize the entity to DOI, ORCID, DataCite, MeSH, PubChem, UniProt, Wikidata, SWH, or GitHub form."
+ },
+ {
+ "code": "ENTITY_IDENTIFIER_INVALID",
+ "action": "Normalize the entity to DOI, ORCID, DataCite, MeSH, PubChem, UniProt, Wikidata, SWH, or GitHub form."
+ },
+ {
+ "code": "ENTITY_CONFIDENCE_BELOW_THRESHOLD",
+ "action": "Route the entity to human review or hide it from recommendations until verified."
+ },
+ {
+ "code": "PRIVATE_ENTITY_SCOPE_MISSING",
+ "action": "Attach private graph nodes to a project scope or exclude them from shared graph exports."
+ },
+ {
+ "code": "RELATIONSHIP_PROVENANCE_MISSING",
+ "action": "Store source document, span, extractor version, and review state for every edge."
+ },
+ {
+ "code": "RELATIONSHIP_EVIDENCE_WEAK",
+ "action": "Require more evidence or mark the edge as tentative in graph navigation."
+ },
+ {
+ "code": "PRIVATE_CONTENT_EDGE_PUBLIC",
+ "action": "Downgrade the edge to private scope or re-extract it from public evidence."
+ },
+ {
+ "code": "RELATIONSHIP_ENDPOINT_MISSING",
+ "action": "Create the missing entity or remove the relationship before graph export."
+ },
+ {
+ "code": "RECOMMENDATION_EVIDENCE_INSUFFICIENT",
+ "action": "Hide or label the recommendation until enough verified graph evidence supports it."
+ },
+ {
+ "code": "PRIVATE_SIGNAL_IN_PUBLIC_RECOMMENDATION",
+ "action": "Keep private-signal recommendations in the user workspace only."
+ },
+ {
+ "code": "GRAPH_EXPORT_FORMAT_UNSUPPORTED",
+ "action": "Choose jsonld, rdf, or graphml for downstream graph consumers."
+ },
+ {
+ "code": "PUBLIC_GRAPH_LICENSE_MISSING",
+ "action": "Set a graph metadata license before publishing reusable linked data."
+ },
+ {
+ "code": "GRAPH_SCHEMA_VERSION_MISSING",
+ "action": "Pin a schema version so downstream consumers can validate graph payloads."
+ }
+ ],
+ "generatedFrom": "synthetic-knowledge-graph-only",
+ "auditDigest": "0589aebec7ba5969"
+}
diff --git a/scientific-knowledge-graph/graph-ingestion-auditor/reports/graph-ingestion-audit.md b/scientific-knowledge-graph/graph-ingestion-auditor/reports/graph-ingestion-audit.md
new file mode 100644
index 00000000..6e5f915e
--- /dev/null
+++ b/scientific-knowledge-graph/graph-ingestion-auditor/reports/graph-ingestion-audit.md
@@ -0,0 +1,48 @@
+# Knowledge Graph Ingestion Audit
+
+Project: proj-crispr-neuro-graph
+Decision: block-graph-publication
+Risk score: 100
+Relationships: 2
+Recommendations: 1
+Audit digest: 0589aebec7ba5969
+
+## Entity Counts
+- author: 1
+- concept: 1
+- dataset: 1
+- method: 1
+- paper: 1
+
+## Findings
+- [high] Required knowledge graph entity type is missing: proj-crispr-neuro-graph has no extracted software entities.
+ Action: Add at least one reviewed software entity before publishing this graph slice.
+- [high] Entity identifier does not match the expected namespace: paper:paper-1 identifier=missing-doi.
+ Action: Normalize the entity to DOI, ORCID, DataCite, MeSH, PubChem, UniProt, Wikidata, SWH, or GitHub form.
+- [high] Entity identifier does not match the expected namespace: author:author-1 identifier=orcid-pending.
+ Action: Normalize the entity to DOI, ORCID, DataCite, MeSH, PubChem, UniProt, Wikidata, SWH, or GitHub form.
+- [high] Extracted entity confidence is below threshold: dataset:dataset-1 confidence=0.7, threshold=0.78.
+ Action: Route the entity to human review or hide it from recommendations until verified.
+- [blocker] Private entity lacks a project scope: dataset:dataset-1 is private without a privateProjectId.
+ Action: Attach private graph nodes to a project scope or exclude them from shared graph exports.
+- [high] Relationship lacks extraction provenance: edge-1 has incomplete source document or extractor metadata.
+ Action: Store source document, span, extractor version, and review state for every edge.
+- [medium] Relationship has weak evidence support: edge-1 has 1 evidence items, below 2.
+ Action: Require more evidence or mark the edge as tentative in graph navigation.
+- [blocker] Public relationship is derived from private content: edge-1 is public but cites private source content.
+ Action: Downgrade the edge to private scope or re-extract it from public evidence.
+- [blocker] Relationship references a missing entity: edge-2 links paper:paper-1 -> software:software-missing, but at least one endpoint is absent.
+ Action: Create the missing entity or remove the relationship before graph export.
+- [high] AI recommendation lacks enough graph evidence: rec-1 has 1 evidence edges.
+ Action: Hide or label the recommendation until enough verified graph evidence supports it.
+- [blocker] Public recommendation uses private user or project signals: rec-1 combines private signals with public visibility.
+ Action: Keep private-signal recommendations in the user workspace only.
+- [medium] Graph export format is missing or unsupported: proj-crispr-neuro-graph export format=csv.
+ Action: Choose jsonld, rdf, or graphml for downstream graph consumers.
+- [high] Public graph export lacks a license: proj-crispr-neuro-graph is configured for public graph export without a license.
+ Action: Set a graph metadata license before publishing reusable linked data.
+- [medium] Graph export lacks a schema version: proj-crispr-neuro-graph has no schema version in exportConfig.
+ Action: Pin a schema version so downstream consumers can validate graph payloads.
+
+## Safety
+- Synthetic graph packet only; no private research content, user activity, live ontology services, credentials, or external APIs.
diff --git a/scientific-knowledge-graph/graph-ingestion-auditor/reports/graph-ingestion-summary.svg b/scientific-knowledge-graph/graph-ingestion-auditor/reports/graph-ingestion-summary.svg
new file mode 100644
index 00000000..4d237aa7
--- /dev/null
+++ b/scientific-knowledge-graph/graph-ingestion-auditor/reports/graph-ingestion-summary.svg
@@ -0,0 +1,30 @@
+
diff --git a/scientific-knowledge-graph/graph-ingestion-auditor/sample-data.js b/scientific-knowledge-graph/graph-ingestion-auditor/sample-data.js
new file mode 100644
index 00000000..acfb071f
--- /dev/null
+++ b/scientific-knowledge-graph/graph-ingestion-auditor/sample-data.js
@@ -0,0 +1,221 @@
+const graphPolicy = {
+ reviewDate: "2026-05-28T00:00:00Z",
+ minimumEntityConfidence: 0.78,
+ minimumRelationshipEvidence: 2,
+ minimumRecommendationEvidence: 3,
+};
+
+const riskyGraph = {
+ projectId: "proj-crispr-neuro-graph",
+ entities: [
+ {
+ id: "paper-1",
+ type: "paper",
+ label: "CRISPR screen in neural organoids",
+ identifier: "missing-doi",
+ confidence: 0.91,
+ visibility: "public",
+ },
+ {
+ id: "author-1",
+ type: "author",
+ label: "R. Singh",
+ identifier: "orcid-pending",
+ confidence: 0.86,
+ visibility: "public",
+ },
+ {
+ id: "dataset-1",
+ type: "dataset",
+ label: "Organoid expression matrix",
+ identifier: "zenodo:123456",
+ confidence: 0.7,
+ visibility: "private",
+ },
+ {
+ id: "method-1",
+ type: "method",
+ label: "single-cell clustering",
+ identifier: "mesh:D012345",
+ confidence: 0.81,
+ visibility: "public",
+ },
+ {
+ id: "concept-1",
+ type: "concept",
+ label: "dopamine receptor",
+ identifier: "uniprot:P14416",
+ confidence: 0.9,
+ visibility: "public",
+ },
+ ],
+ relationships: [
+ {
+ id: "edge-1",
+ sourceType: "paper",
+ sourceId: "paper-1",
+ targetType: "dataset",
+ targetId: "dataset-1",
+ predicate: "usesDataset",
+ evidenceCount: 1,
+ visibility: "public",
+ sourcePrivateContent: true,
+ provenance: {
+ sourceDocumentId: "doc-1",
+ extractorVersion: "",
+ },
+ },
+ {
+ id: "edge-2",
+ sourceType: "paper",
+ sourceId: "paper-1",
+ targetType: "software",
+ targetId: "software-missing",
+ predicate: "usesSoftware",
+ evidenceCount: 2,
+ visibility: "public",
+ sourcePrivateContent: false,
+ provenance: {
+ sourceDocumentId: "doc-1",
+ extractorVersion: "kg-extract-0.4.1",
+ },
+ },
+ ],
+ recommendations: [
+ {
+ id: "rec-1",
+ label: "Recommend related CRISPR dataset",
+ evidenceEdges: ["edge-1"],
+ usesPrivateSignals: true,
+ visibility: "public",
+ },
+ ],
+ exportConfig: {
+ public: true,
+ format: "csv",
+ license: "",
+ schemaVersion: "",
+ },
+};
+
+const readyGraph = {
+ projectId: "proj-catalyst-kg",
+ entities: [
+ {
+ id: "paper-1",
+ type: "paper",
+ label: "Catalyst reuse in flow reactors",
+ identifier: "10.1234/scibase.synthetic.42",
+ confidence: 0.95,
+ visibility: "public",
+ },
+ {
+ id: "author-1",
+ type: "author",
+ label: "Morgan Rivera",
+ identifier: "0000-0001-5000-0007",
+ confidence: 0.93,
+ visibility: "public",
+ },
+ {
+ id: "dataset-1",
+ type: "dataset",
+ label: "Catalyst reuse yields",
+ identifier: "doi:10.5281/zenodo.1234567",
+ confidence: 0.91,
+ visibility: "public",
+ },
+ {
+ id: "method-1",
+ type: "method",
+ label: "flow chemistry",
+ identifier: "wikidata:Q902470",
+ confidence: 0.89,
+ visibility: "public",
+ },
+ {
+ id: "software-1",
+ type: "software",
+ label: "scibase-analysis",
+ identifier: "github:SCIBASE-AI/scibase-analysis",
+ confidence: 0.88,
+ visibility: "public",
+ },
+ {
+ id: "concept-1",
+ type: "concept",
+ label: "palladium",
+ identifier: "pubchem:23938",
+ confidence: 0.94,
+ visibility: "public",
+ },
+ ],
+ relationships: [
+ {
+ id: "edge-1",
+ sourceType: "paper",
+ sourceId: "paper-1",
+ targetType: "dataset",
+ targetId: "dataset-1",
+ predicate: "usesDataset",
+ evidenceCount: 3,
+ visibility: "public",
+ sourcePrivateContent: false,
+ provenance: {
+ sourceDocumentId: "doc-42",
+ extractorVersion: "kg-extract-1.0.0",
+ },
+ },
+ {
+ id: "edge-2",
+ sourceType: "paper",
+ sourceId: "paper-1",
+ targetType: "software",
+ targetId: "software-1",
+ predicate: "usesSoftware",
+ evidenceCount: 2,
+ visibility: "public",
+ sourcePrivateContent: false,
+ provenance: {
+ sourceDocumentId: "doc-42",
+ extractorVersion: "kg-extract-1.0.0",
+ },
+ },
+ {
+ id: "edge-3",
+ sourceType: "paper",
+ sourceId: "paper-1",
+ targetType: "concept",
+ targetId: "concept-1",
+ predicate: "studiesConcept",
+ evidenceCount: 4,
+ visibility: "public",
+ sourcePrivateContent: false,
+ provenance: {
+ sourceDocumentId: "doc-42",
+ extractorVersion: "kg-extract-1.0.0",
+ },
+ },
+ ],
+ recommendations: [
+ {
+ id: "rec-1",
+ label: "Recommend related flow chemistry method",
+ evidenceEdges: ["edge-1", "edge-2", "edge-3"],
+ usesPrivateSignals: false,
+ visibility: "public",
+ },
+ ],
+ exportConfig: {
+ public: true,
+ format: "jsonld",
+ license: "CC-BY-4.0",
+ schemaVersion: "scibase-kg-v1",
+ },
+};
+
+module.exports = {
+ graphPolicy,
+ readyGraph,
+ riskyGraph,
+};
diff --git a/scientific-knowledge-graph/graph-ingestion-auditor/test.js b/scientific-knowledge-graph/graph-ingestion-auditor/test.js
new file mode 100644
index 00000000..d7e7067a
--- /dev/null
+++ b/scientific-knowledge-graph/graph-ingestion-auditor/test.js
@@ -0,0 +1,45 @@
+const assert = require("assert");
+const {
+ auditKnowledgeGraph,
+ digest,
+ entityKey,
+ renderMarkdownReport,
+ renderSvgSummary,
+} = require("./index");
+const { graphPolicy, readyGraph, riskyGraph } = require("./sample-data");
+
+const risky = auditKnowledgeGraph(riskyGraph, graphPolicy);
+assert.strictEqual(risky.decision, "block-graph-publication");
+assert.ok(risky.riskScore >= 90, "risky graph should produce a strong block score");
+assert.ok(risky.findings.some((item) => item.code === "REQUIRED_ENTITY_TYPE_MISSING"));
+assert.ok(risky.findings.some((item) => item.code === "ENTITY_IDENTIFIER_INVALID"));
+assert.ok(risky.findings.some((item) => item.code === "ENTITY_CONFIDENCE_BELOW_THRESHOLD"));
+assert.ok(risky.findings.some((item) => item.code === "PRIVATE_ENTITY_SCOPE_MISSING"));
+assert.ok(risky.findings.some((item) => item.code === "RELATIONSHIP_ENDPOINT_MISSING"));
+assert.ok(risky.findings.some((item) => item.code === "RELATIONSHIP_PROVENANCE_MISSING"));
+assert.ok(risky.findings.some((item) => item.code === "PRIVATE_CONTENT_EDGE_PUBLIC"));
+assert.ok(risky.findings.some((item) => item.code === "PRIVATE_SIGNAL_IN_PUBLIC_RECOMMENDATION"));
+assert.ok(risky.findings.some((item) => item.code === "GRAPH_EXPORT_FORMAT_UNSUPPORTED"));
+assert.ok(risky.findings.some((item) => item.code === "PUBLIC_GRAPH_LICENSE_MISSING"));
+
+const repeat = auditKnowledgeGraph(riskyGraph, graphPolicy);
+assert.strictEqual(risky.auditDigest, repeat.auditDigest, "audit digest must be deterministic");
+
+const ready = auditKnowledgeGraph(readyGraph, graphPolicy);
+assert.strictEqual(ready.decision, "ready-for-graph-publication");
+assert.strictEqual(ready.findings.length, 0);
+assert.strictEqual(ready.entityCounts.software, 1);
+
+assert.strictEqual(entityKey({ type: "paper", id: "p1" }), "paper:p1");
+assert.strictEqual(digest({ b: 2, a: 1 }), digest({ a: 1, b: 2 }));
+
+const markdown = renderMarkdownReport(risky);
+assert.ok(markdown.includes("Knowledge Graph Ingestion Audit"));
+assert.ok(markdown.includes("Synthetic graph packet only"));
+assert.ok(markdown.includes("block-graph-publication"));
+
+const svg = renderSvgSummary(risky);
+assert.ok(svg.includes("