diff --git a/scientific-knowledge-graph/recommendation-path-auditor/README.md b/scientific-knowledge-graph/recommendation-path-auditor/README.md new file mode 100644 index 00000000..c996b116 --- /dev/null +++ b/scientific-knowledge-graph/recommendation-path-auditor/README.md @@ -0,0 +1,25 @@ +# Recommendation Path Auditor + +This prototype audits AI research recommendations and graph navigation paths for the Scientific Knowledge Graph. + +It checks synthetic graph recommendation packets for: + +- private or embargoed node leaks in recommendation paths +- broken explanation paths that cannot be reconstructed from graph edges +- user filter violations before ranking +- retracted or blocked evidence used in paths +- weak evidence scores +- stale trend features +- duplicate recommendations across surfaces +- missing schema.org-compatible node metadata +- missing user-facing personalization reasons and feature weights + +## Run + +```sh +node scientific-knowledge-graph/recommendation-path-auditor/test.js +node scientific-knowledge-graph/recommendation-path-auditor/demo.js +node scientific-knowledge-graph/recommendation-path-auditor/make-demo-video.js +``` + +All fixtures are synthetic. The module performs no private graph, account, digest email, credential, ontology-service, or external API access. diff --git a/scientific-knowledge-graph/recommendation-path-auditor/demo.js b/scientific-knowledge-graph/recommendation-path-auditor/demo.js new file mode 100644 index 00000000..3947a871 --- /dev/null +++ b/scientific-knowledge-graph/recommendation-path-auditor/demo.js @@ -0,0 +1,60 @@ +"use strict"; + +const fs = require("fs"); +const path = require("path"); +const { auditRecommendations, renderMarkdownReport } = require("./index"); +const { recommendationPacket } = require("./sample-data"); + +function escapeXml(value) { + return String(value) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +function renderSvg(report) { + const maxWidth = 720; + const riskWidth = Math.round((report.summary.riskScore / 100) * maxWidth); + const criticalWidth = Math.round((report.summary.criticalFindings / report.summary.findings) * maxWidth); + const highWidth = Math.round((report.summary.highFindings / report.summary.findings) * maxWidth); + + return ` + + + Recommendation Path Audit + ${escapeXml(report.graph.title)} + Decision: ${escapeXml(report.summary.decision)} + + Audit risk + + + Critical findings + + + High findings + + + + ${report.summary.nodesReviewed} nodes • ${report.summary.edgesReviewed} edges • ${report.summary.recommendationsReviewed} recommendations + Audit digest ${report.summary.auditDigest} + +`; +} + +function main() { + const report = auditRecommendations(recommendationPacket); + const reportsDir = path.join(__dirname, "reports"); + fs.mkdirSync(reportsDir, { recursive: true }); + fs.writeFileSync(path.join(reportsDir, "recommendation-path-audit.json"), JSON.stringify(report, null, 2)); + fs.writeFileSync(path.join(reportsDir, "recommendation-path-audit.md"), renderMarkdownReport(report)); + fs.writeFileSync(path.join(reportsDir, "recommendation-path-summary.svg"), renderSvg(report)); + console.log(`decision=${report.summary.decision} riskScore=${report.summary.riskScore} findings=${report.summary.findings}`); + console.log(`reports=${reportsDir}`); +} + +if (require.main === module) { + main(); +} + +module.exports = { renderSvg }; diff --git a/scientific-knowledge-graph/recommendation-path-auditor/index.js b/scientific-knowledge-graph/recommendation-path-auditor/index.js new file mode 100644 index 00000000..d2127146 --- /dev/null +++ b/scientific-knowledge-graph/recommendation-path-auditor/index.js @@ -0,0 +1,272 @@ +"use strict"; + +const crypto = require("crypto"); + +const WEIGHTS = { + critical: 26, + high: 18, + medium: 10, + low: 5, +}; + +function addFinding(findings, finding) { + findings.push({ + severity: finding.severity || "medium", + recommendation: recommendationFor(finding.code), + ...finding, + }); +} + +function recommendationFor(code) { + const recommendations = { + PRIVATE_NODE_LEAK: "Remove private or embargoed graph nodes from recommendations and digest payloads.", + EXPLANATION_PATH_BROKEN: "Regenerate the recommendation path from existing graph edges before surfacing it.", + FILTER_VIOLATION: "Apply user domain, institution, date, and reproducibility filters before ranking.", + RETRACTED_SOURCE_USED: "Suppress recommendations whose path depends on retracted or blocked citations.", + WEAK_EVIDENCE_SCORE: "Require stronger citation, reuse, or co-occurrence evidence for this suggestion.", + STALE_TREND_SIGNAL: "Refresh trend features before using the recommendation in a weekly digest.", + DUPLICATE_RECOMMENDATION: "Deduplicate equivalent target nodes across workspace sidebar and email digest.", + MISSING_SCHEMA_METADATA: "Attach schema.org-compatible entity metadata before publishing the node.", + UNEXPLAINED_PERSONALIZATION: "Store the user-facing reason and feature weights for personalized recommendations.", + }; + return recommendations[code] || "Route this recommendation for knowledge-graph review."; +} + +function auditRecommendations(graphPacket, options = {}) { + const now = new Date(options.now || graphPacket.auditTime || new Date().toISOString()); + const nodes = new Map((graphPacket.nodes || []).map((node) => [node.id, node])); + const edgeIndex = buildEdgeIndex(graphPacket.edges || []); + const findings = []; + const seenTargets = new Map(); + + for (const item of graphPacket.recommendations || []) { + const target = nodes.get(item.targetNodeId); + + if (seenTargets.has(item.targetNodeId)) { + addFinding(findings, { + code: "DUPLICATE_RECOMMENDATION", + severity: "low", + recommendationId: item.id, + message: `${item.id} duplicates target from ${seenTargets.get(item.targetNodeId)}.`, + evidence: item, + }); + } else { + seenTargets.set(item.targetNodeId, item.id); + } + + if (!target) { + addFinding(findings, { + code: "EXPLANATION_PATH_BROKEN", + severity: "critical", + recommendationId: item.id, + message: `${item.id} points to missing target node ${item.targetNodeId}.`, + evidence: item, + }); + continue; + } + + const pathNodes = (item.explanationPath || []).map((id) => nodes.get(id)).filter(Boolean); + const privateNodes = pathNodes.concat(target).filter((node) => node.visibility === "private" || node.embargoed); + if (privateNodes.length) { + addFinding(findings, { + code: "PRIVATE_NODE_LEAK", + severity: "critical", + recommendationId: item.id, + message: `${item.id} exposes private or embargoed nodes: ${privateNodes.map((node) => node.id).join(", ")}.`, + evidence: privateNodes, + }); + } + + if (!pathExists(item.explanationPath || [], edgeIndex)) { + addFinding(findings, { + code: "EXPLANATION_PATH_BROKEN", + severity: "high", + recommendationId: item.id, + message: `${item.id} explanation path cannot be reconstructed from graph edges.`, + evidence: item.explanationPath, + }); + } + + if (!passesFilters(target, item.appliedFilters || {}, graphPacket.userContext || {})) { + addFinding(findings, { + code: "FILTER_VIOLATION", + severity: "high", + recommendationId: item.id, + message: `${item.id} target ${target.id} violates applied user filters.`, + evidence: { target, filters: item.appliedFilters, userContext: graphPacket.userContext }, + }); + } + + const pathEdges = edgesForPath(item.explanationPath || [], edgeIndex); + const retractedEdges = pathEdges.filter((edge) => edge.retracted || edge.blocked); + if (retractedEdges.length) { + addFinding(findings, { + code: "RETRACTED_SOURCE_USED", + severity: "critical", + recommendationId: item.id, + message: `${item.id} depends on retracted or blocked evidence edges.`, + evidence: retractedEdges, + }); + } + + const evidenceScore = computeEvidenceScore(pathEdges, item); + if (evidenceScore < (graphPacket.policy?.minEvidenceScore || 55)) { + addFinding(findings, { + code: "WEAK_EVIDENCE_SCORE", + severity: "medium", + recommendationId: item.id, + message: `${item.id} evidence score ${evidenceScore} is below policy minimum.`, + evidence: { evidenceScore, policyMinimum: graphPacket.policy?.minEvidenceScore || 55 }, + }); + } + + if (daysBetween(item.trendFeaturesUpdatedAt, now.toISOString()) > (graphPacket.policy?.maxTrendAgeDays || 14)) { + addFinding(findings, { + code: "STALE_TREND_SIGNAL", + severity: "medium", + recommendationId: item.id, + message: `${item.id} uses trend features ${daysBetween(item.trendFeaturesUpdatedAt, now.toISOString())} days old.`, + evidence: item.trendFeaturesUpdatedAt, + }); + } + + if (!target.schemaOrgType || !target.canonicalName || !target.sourceIds?.length) { + addFinding(findings, { + code: "MISSING_SCHEMA_METADATA", + severity: "medium", + recommendationId: item.id, + nodeId: target.id, + message: `${target.id} is missing schema.org-compatible metadata.`, + evidence: target, + }); + } + + if (!item.userFacingReason || !item.featureWeights || Object.keys(item.featureWeights).length === 0) { + addFinding(findings, { + code: "UNEXPLAINED_PERSONALIZATION", + severity: "high", + recommendationId: item.id, + message: `${item.id} lacks explainable personalization details.`, + evidence: item, + }); + } + } + + const riskScore = Math.min(100, findings.reduce((score, finding) => score + WEIGHTS[finding.severity], 0)); + const decision = riskScore >= 70 ? "hold-recommendations" : riskScore >= 35 ? "repair-graph-ranking" : "recommendation-ready"; + const auditDigest = crypto + .createHash("sha256") + .update(JSON.stringify(findings.map((finding) => [finding.code, finding.recommendationId, finding.nodeId]).sort())) + .digest("hex") + .slice(0, 16); + + return { + graph: { + id: graphPacket.id, + title: graphPacket.title, + userId: graphPacket.userContext?.userId, + }, + summary: { + decision, + riskScore, + auditDigest, + nodesReviewed: (graphPacket.nodes || []).length, + edgesReviewed: (graphPacket.edges || []).length, + recommendationsReviewed: (graphPacket.recommendations || []).length, + findings: findings.length, + criticalFindings: findings.filter((finding) => finding.severity === "critical").length, + highFindings: findings.filter((finding) => finding.severity === "high").length, + }, + findings, + recommendationGates: [ + { gate: "Recommendation paths do not leak private or embargoed nodes.", passed: !findings.some((finding) => finding.code === "PRIVATE_NODE_LEAK") }, + { gate: "Every explanation path is reconstructable from graph edges.", passed: !findings.some((finding) => finding.code === "EXPLANATION_PATH_BROKEN") }, + { gate: "Applied filters are enforced before ranking.", passed: !findings.some((finding) => finding.code === "FILTER_VIOLATION") }, + { gate: "No recommendation depends on retracted or blocked evidence.", passed: !findings.some((finding) => finding.code === "RETRACTED_SOURCE_USED") }, + { gate: "Personalized suggestions include user-facing reasons and feature weights.", passed: !findings.some((finding) => finding.code === "UNEXPLAINED_PERSONALIZATION") }, + ], + }; +} + +function buildEdgeIndex(edges) { + const index = new Map(); + for (const edge of edges) { + index.set(`${edge.from}->${edge.to}`, edge); + if (!edge.directed) index.set(`${edge.to}->${edge.from}`, edge); + } + return index; +} + +function pathExists(path, edgeIndex) { + if (!Array.isArray(path) || path.length < 2) return false; + for (let index = 0; index < path.length - 1; index += 1) { + if (!edgeIndex.has(`${path[index]}->${path[index + 1]}`)) return false; + } + return true; +} + +function edgesForPath(path, edgeIndex) { + const edges = []; + if (!Array.isArray(path)) return edges; + for (let index = 0; index < path.length - 1; index += 1) { + const edge = edgeIndex.get(`${path[index]}->${path[index + 1]}`); + if (edge) edges.push(edge); + } + return edges; +} + +function passesFilters(target, filters, userContext) { + if (filters.domain && target.domain && filters.domain !== target.domain) return false; + if (filters.minReproducibilityScore && (target.reproducibilityScore || 0) < filters.minReproducibilityScore) return false; + if (filters.institutionOnly && target.institution && target.institution !== userContext.institution) return false; + if (filters.afterYear && target.year && target.year < filters.afterYear) return false; + return true; +} + +function computeEvidenceScore(edges, item) { + const edgeScore = edges.reduce((sum, edge) => sum + (edge.weight || 0) * (edge.citations || 1), 0); + const featureScore = Object.values(item.featureWeights || {}).reduce((sum, value) => sum + Number(value || 0), 0) * 10; + return Math.round(edgeScore + featureScore); +} + +function daysBetween(leftIso, rightIso) { + return Math.round((new Date(rightIso).getTime() - new Date(leftIso).getTime()) / 86400000); +} + +function renderMarkdownReport(report) { + const lines = [ + `# Recommendation Path Audit: ${report.graph.title}`, + "", + `- Decision: ${report.summary.decision}`, + `- Risk score: ${report.summary.riskScore}`, + `- Nodes reviewed: ${report.summary.nodesReviewed}`, + `- Edges reviewed: ${report.summary.edgesReviewed}`, + `- Recommendations reviewed: ${report.summary.recommendationsReviewed}`, + `- Audit digest: ${report.summary.auditDigest}`, + "", + "## Findings", + "", + ]; + + for (const finding of report.findings) { + lines.push(`- [${finding.severity}] ${finding.code}${finding.recommendationId ? ` (${finding.recommendationId})` : ""}`); + lines.push(` - ${finding.message}`); + lines.push(` - Recommendation: ${finding.recommendation}`); + } + + lines.push("", "## Recommendation Gates", ""); + for (const gate of report.recommendationGates) { + lines.push(`- [${gate.passed ? "x" : "!"}] ${gate.gate}`); + } + + return `${lines.join("\n")}\n`; +} + +module.exports = { + auditRecommendations, + renderMarkdownReport, + buildEdgeIndex, + pathExists, + computeEvidenceScore, + daysBetween, +}; diff --git a/scientific-knowledge-graph/recommendation-path-auditor/make-demo-video.js b/scientific-knowledge-graph/recommendation-path-auditor/make-demo-video.js new file mode 100644 index 00000000..55c20011 --- /dev/null +++ b/scientific-knowledge-graph/recommendation-path-auditor/make-demo-video.js @@ -0,0 +1,101 @@ +"use strict"; + +const fs = require("fs"); +const path = require("path"); +const { spawnSync } = require("child_process"); +const { auditRecommendations } = require("./index"); +const { recommendationPacket } = require("./sample-data"); + +const WIDTH = 1280; +const HEIGHT = 720; +const FPS = 12; +const FRAMES = 48; + +function rgb(hex) { + const value = Number.parseInt(hex.replace("#", ""), 16); + return [(value >> 16) & 255, (value >> 8) & 255, value & 255]; +} + +function canvas(fill) { + const buffer = Buffer.alloc(WIDTH * HEIGHT * 3); + const [r, g, b] = rgb(fill); + for (let i = 0; i < buffer.length; i += 3) { + buffer[i] = r; + buffer[i + 1] = g; + buffer[i + 2] = b; + } + return buffer; +} + +function rect(buffer, x, y, width, height, color) { + const [r, g, b] = rgb(color); + const left = Math.max(0, Math.floor(x)); + const top = Math.max(0, Math.floor(y)); + const right = Math.min(WIDTH, Math.floor(x + width)); + const bottom = Math.min(HEIGHT, Math.floor(y + height)); + for (let row = top; row < bottom; row += 1) { + for (let col = left; col < right; col += 1) { + const index = (row * WIDTH + col) * 3; + buffer[index] = r; + buffer[index + 1] = g; + buffer[index + 2] = b; + } + } +} + +function writeFrame(file, buffer) { + fs.writeFileSync(file, Buffer.concat([Buffer.from(`P6\n${WIDTH} ${HEIGHT}\n255\n`), buffer])); +} + +function main() { + const report = auditRecommendations(recommendationPacket); + const reportsDir = path.join(__dirname, "reports"); + const framesDir = path.join(reportsDir, "ppm-frames"); + fs.rmSync(framesDir, { recursive: true, force: true }); + fs.mkdirSync(framesDir, { recursive: true }); + + const bars = [ + { y: 252, width: 860 * (report.summary.riskScore / 100), color: "#ff6b6b" }, + { y: 352, width: 860 * (report.summary.criticalFindings / report.summary.findings), color: "#ffa94d" }, + { y: 452, width: 860 * (report.summary.highFindings / report.summary.findings), color: "#7cc4ff" }, + { y: 552, width: 860 * (report.summary.recommendationsReviewed / 6), color: "#ffcf5a" }, + ]; + + for (let frame = 0; frame < FRAMES; frame += 1) { + const progress = Math.min(1, (frame + 1) / 30); + const buffer = canvas("#07111f"); + rect(buffer, 54, 52, 1172, 616, "#101c2f"); + rect(buffer, 54, 52, 1172, 4, "#29415f"); + rect(buffer, 54, 664, 1172, 4, "#29415f"); + rect(buffer, 54, 52, 4, 616, "#29415f"); + rect(buffer, 1222, 52, 4, 616, "#29415f"); + rect(buffer, 88, 104, 720, 42, "#ffffff"); + rect(buffer, 88, 170, 600, 22, "#b9c8dd"); + rect(buffer, 88, 214, 610, 22, "#ffcf5a"); + for (const bar of bars) { + rect(buffer, 88, bar.y, 860, 36, "#273854"); + rect(buffer, 88, bar.y, bar.width * progress, 36, bar.color); + } + rect(buffer, 1002, 250, 72, 72, "#ff6b6b"); + rect(buffer, 1092, 250, 72, 72, "#ff6b6b"); + rect(buffer, 1002, 372, 162, 48, "#ffa94d"); + rect(buffer, 1002, 492, 162, 48, "#7cc4ff"); + writeFrame(path.join(framesDir, `frame-${String(frame).padStart(3, "0")}.ppm`), buffer); + } + + const output = path.join(reportsDir, "demo.mp4"); + const result = spawnSync( + "ffmpeg", + ["-y", "-framerate", String(FPS), "-i", path.join(framesDir, "frame-%03d.ppm"), "-pix_fmt", "yuv420p", "-movflags", "+faststart", output], + { stdio: "inherit" } + ); + if (result.status !== 0) { + throw new Error("ffmpeg failed to create demo video"); + } + fs.rmSync(framesDir, { recursive: true, force: true }); + console.log(`demo video=${output}`); +} + +if (require.main === module) { + main(); +} diff --git a/scientific-knowledge-graph/recommendation-path-auditor/reports/demo.mp4 b/scientific-knowledge-graph/recommendation-path-auditor/reports/demo.mp4 new file mode 100644 index 00000000..4e754039 Binary files /dev/null and b/scientific-knowledge-graph/recommendation-path-auditor/reports/demo.mp4 differ diff --git a/scientific-knowledge-graph/recommendation-path-auditor/reports/recommendation-path-audit.json b/scientific-knowledge-graph/recommendation-path-auditor/reports/recommendation-path-audit.json new file mode 100644 index 00000000..5b862e59 --- /dev/null +++ b/scientific-knowledge-graph/recommendation-path-auditor/reports/recommendation-path-audit.json @@ -0,0 +1,337 @@ +{ + "graph": { + "id": "graph-neuro-discovery", + "title": "Neuroinflammation Discovery Graph", + "userId": "researcher-maya" + }, + "summary": { + "decision": "hold-recommendations", + "riskScore": 100, + "auditDigest": "0a7733f83578eac8", + "nodesReviewed": 6, + "edgesReviewed": 4, + "recommendationsReviewed": 5, + "findings": 15, + "criticalFindings": 3, + "highFindings": 4 + }, + "findings": [ + { + "severity": "critical", + "recommendation": "Remove private or embargoed graph nodes from recommendations and digest payloads.", + "code": "PRIVATE_NODE_LEAK", + "recommendationId": "rec-001", + "message": "rec-001 exposes private or embargoed nodes: dataset-raw-phi, dataset-raw-phi.", + "evidence": [ + { + "id": "dataset-raw-phi", + "type": "dataset", + "canonicalName": "Private clinical cytokine panel", + "schemaOrgType": "Dataset", + "domain": "immunology", + "year": 2025, + "visibility": "private", + "reproducibilityScore": 0.72, + "sourceIds": [ + "internal:cytokine-panel" + ] + }, + { + "id": "dataset-raw-phi", + "type": "dataset", + "canonicalName": "Private clinical cytokine panel", + "schemaOrgType": "Dataset", + "domain": "immunology", + "year": 2025, + "visibility": "private", + "reproducibilityScore": 0.72, + "sourceIds": [ + "internal:cytokine-panel" + ] + } + ] + }, + { + "severity": "high", + "recommendation": "Apply user domain, institution, date, and reproducibility filters before ranking.", + "code": "FILTER_VIOLATION", + "recommendationId": "rec-002", + "message": "rec-002 target paper-retracted violates applied user filters.", + "evidence": { + "target": { + "id": "paper-retracted", + "type": "paper", + "canonicalName": "Retracted cytokine inference model", + "schemaOrgType": "ScholarlyArticle", + "domain": "neuroscience", + "year": 2022, + "visibility": "public", + "reproducibilityScore": 0.22, + "sourceIds": [ + "doi:10.1000/retracted" + ] + }, + "filters": { + "domain": "neuroscience", + "minReproducibilityScore": 0.5 + }, + "userContext": { + "userId": "researcher-maya", + "institution": "Northstar University", + "domains": [ + "neuroscience", + "immunology" + ] + } + } + }, + { + "severity": "critical", + "recommendation": "Suppress recommendations whose path depends on retracted or blocked citations.", + "code": "RETRACTED_SOURCE_USED", + "recommendationId": "rec-002", + "message": "rec-002 depends on retracted or blocked evidence edges.", + "evidence": [ + { + "from": "paper-001", + "to": "paper-retracted", + "relation": "cites", + "citations": 4, + "weight": 3, + "directed": true, + "retracted": true + } + ] + }, + { + "severity": "medium", + "recommendation": "Require stronger citation, reuse, or co-occurrence evidence for this suggestion.", + "code": "WEAK_EVIDENCE_SCORE", + "recommendationId": "rec-002", + "message": "rec-002 evidence score 12 is below policy minimum.", + "evidence": { + "evidenceScore": 12, + "policyMinimum": 55 + } + }, + { + "severity": "medium", + "recommendation": "Refresh trend features before using the recommendation in a weekly digest.", + "code": "STALE_TREND_SIGNAL", + "recommendationId": "rec-002", + "message": "rec-002 uses trend features 31 days old.", + "evidence": "2026-04-28T00:00:00Z" + }, + { + "severity": "high", + "recommendation": "Store the user-facing reason and feature weights for personalized recommendations.", + "code": "UNEXPLAINED_PERSONALIZATION", + "recommendationId": "rec-002", + "message": "rec-002 lacks explainable personalization details.", + "evidence": { + "id": "rec-002", + "targetNodeId": "paper-retracted", + "surface": "weekly_digest", + "explanationPath": [ + "paper-001", + "paper-retracted" + ], + "appliedFilters": { + "domain": "neuroscience", + "minReproducibilityScore": 0.5 + }, + "trendFeaturesUpdatedAt": "2026-04-28T00:00:00Z", + "userFacingReason": "", + "featureWeights": {} + } + }, + { + "severity": "high", + "recommendation": "Apply user domain, institution, date, and reproducibility filters before ranking.", + "code": "FILTER_VIOLATION", + "recommendationId": "rec-003", + "message": "rec-003 target method-graph-rag violates applied user filters.", + "evidence": { + "target": { + "id": "method-graph-rag", + "type": "method", + "canonicalName": "Graph RAG literature ranking", + "schemaOrgType": "SoftwareSourceCode", + "domain": "computer science", + "year": 2023, + "visibility": "public", + "reproducibilityScore": 0.58, + "sourceIds": [ + "doi:10.1000/graphrag" + ] + }, + "filters": { + "domain": "neuroscience", + "institutionOnly": true + }, + "userContext": { + "userId": "researcher-maya", + "institution": "Northstar University", + "domains": [ + "neuroscience", + "immunology" + ] + } + } + }, + { + "severity": "medium", + "recommendation": "Require stronger citation, reuse, or co-occurrence evidence for this suggestion.", + "code": "WEAK_EVIDENCE_SCORE", + "recommendationId": "rec-003", + "message": "rec-003 evidence score 8 is below policy minimum.", + "evidence": { + "evidenceScore": 8, + "policyMinimum": 55 + } + }, + { + "severity": "medium", + "recommendation": "Refresh trend features before using the recommendation in a weekly digest.", + "code": "STALE_TREND_SIGNAL", + "recommendationId": "rec-003", + "message": "rec-003 uses trend features 19 days old.", + "evidence": "2026-05-10T00:00:00Z" + }, + { + "severity": "high", + "recommendation": "Regenerate the recommendation path from existing graph edges before surfacing it.", + "code": "EXPLANATION_PATH_BROKEN", + "recommendationId": "rec-004", + "message": "rec-004 explanation path cannot be reconstructed from graph edges.", + "evidence": [ + "paper-001", + "protocol-lps", + "tool-missing-schema" + ] + }, + { + "severity": "medium", + "recommendation": "Require stronger citation, reuse, or co-occurrence evidence for this suggestion.", + "code": "WEAK_EVIDENCE_SCORE", + "recommendationId": "rec-004", + "message": "rec-004 evidence score 35 is below policy minimum.", + "evidence": { + "evidenceScore": 35, + "policyMinimum": 55 + } + }, + { + "severity": "medium", + "recommendation": "Attach schema.org-compatible entity metadata before publishing the node.", + "code": "MISSING_SCHEMA_METADATA", + "recommendationId": "rec-004", + "nodeId": "tool-missing-schema", + "message": "tool-missing-schema is missing schema.org-compatible metadata.", + "evidence": { + "id": "tool-missing-schema", + "type": "tool", + "canonicalName": "", + "schemaOrgType": "", + "domain": "neuroscience", + "year": 2025, + "visibility": "public", + "reproducibilityScore": 0.49, + "sourceIds": [] + } + }, + { + "severity": "low", + "recommendation": "Deduplicate equivalent target nodes across workspace sidebar and email digest.", + "code": "DUPLICATE_RECOMMENDATION", + "recommendationId": "rec-005", + "message": "rec-005 duplicates target from rec-001.", + "evidence": { + "id": "rec-005", + "targetNodeId": "dataset-raw-phi", + "surface": "weekly_digest", + "explanationPath": [ + "paper-001", + "protocol-lps", + "dataset-raw-phi" + ], + "appliedFilters": { + "domain": "immunology" + }, + "trendFeaturesUpdatedAt": "2026-05-26T00:00:00Z", + "userFacingReason": "Duplicate digest candidate for dedupe testing.", + "featureWeights": { + "sharedProtocol": 2.1 + } + } + }, + { + "severity": "critical", + "recommendation": "Remove private or embargoed graph nodes from recommendations and digest payloads.", + "code": "PRIVATE_NODE_LEAK", + "recommendationId": "rec-005", + "message": "rec-005 exposes private or embargoed nodes: dataset-raw-phi, dataset-raw-phi.", + "evidence": [ + { + "id": "dataset-raw-phi", + "type": "dataset", + "canonicalName": "Private clinical cytokine panel", + "schemaOrgType": "Dataset", + "domain": "immunology", + "year": 2025, + "visibility": "private", + "reproducibilityScore": 0.72, + "sourceIds": [ + "internal:cytokine-panel" + ] + }, + { + "id": "dataset-raw-phi", + "type": "dataset", + "canonicalName": "Private clinical cytokine panel", + "schemaOrgType": "Dataset", + "domain": "immunology", + "year": 2025, + "visibility": "private", + "reproducibilityScore": 0.72, + "sourceIds": [ + "internal:cytokine-panel" + ] + } + ] + }, + { + "severity": "medium", + "recommendation": "Require stronger citation, reuse, or co-occurrence evidence for this suggestion.", + "code": "WEAK_EVIDENCE_SCORE", + "recommendationId": "rec-005", + "message": "rec-005 evidence score 49 is below policy minimum.", + "evidence": { + "evidenceScore": 49, + "policyMinimum": 55 + } + } + ], + "recommendationGates": [ + { + "gate": "Recommendation paths do not leak private or embargoed nodes.", + "passed": false + }, + { + "gate": "Every explanation path is reconstructable from graph edges.", + "passed": false + }, + { + "gate": "Applied filters are enforced before ranking.", + "passed": false + }, + { + "gate": "No recommendation depends on retracted or blocked evidence.", + "passed": false + }, + { + "gate": "Personalized suggestions include user-facing reasons and feature weights.", + "passed": false + } + ] +} \ No newline at end of file diff --git a/scientific-knowledge-graph/recommendation-path-auditor/reports/recommendation-path-audit.md b/scientific-knowledge-graph/recommendation-path-auditor/reports/recommendation-path-audit.md new file mode 100644 index 00000000..d65350b2 --- /dev/null +++ b/scientific-knowledge-graph/recommendation-path-auditor/reports/recommendation-path-audit.md @@ -0,0 +1,64 @@ +# Recommendation Path Audit: Neuroinflammation Discovery Graph + +- Decision: hold-recommendations +- Risk score: 100 +- Nodes reviewed: 6 +- Edges reviewed: 4 +- Recommendations reviewed: 5 +- Audit digest: 0a7733f83578eac8 + +## Findings + +- [critical] PRIVATE_NODE_LEAK (rec-001) + - rec-001 exposes private or embargoed nodes: dataset-raw-phi, dataset-raw-phi. + - Recommendation: Remove private or embargoed graph nodes from recommendations and digest payloads. +- [high] FILTER_VIOLATION (rec-002) + - rec-002 target paper-retracted violates applied user filters. + - Recommendation: Apply user domain, institution, date, and reproducibility filters before ranking. +- [critical] RETRACTED_SOURCE_USED (rec-002) + - rec-002 depends on retracted or blocked evidence edges. + - Recommendation: Suppress recommendations whose path depends on retracted or blocked citations. +- [medium] WEAK_EVIDENCE_SCORE (rec-002) + - rec-002 evidence score 12 is below policy minimum. + - Recommendation: Require stronger citation, reuse, or co-occurrence evidence for this suggestion. +- [medium] STALE_TREND_SIGNAL (rec-002) + - rec-002 uses trend features 31 days old. + - Recommendation: Refresh trend features before using the recommendation in a weekly digest. +- [high] UNEXPLAINED_PERSONALIZATION (rec-002) + - rec-002 lacks explainable personalization details. + - Recommendation: Store the user-facing reason and feature weights for personalized recommendations. +- [high] FILTER_VIOLATION (rec-003) + - rec-003 target method-graph-rag violates applied user filters. + - Recommendation: Apply user domain, institution, date, and reproducibility filters before ranking. +- [medium] WEAK_EVIDENCE_SCORE (rec-003) + - rec-003 evidence score 8 is below policy minimum. + - Recommendation: Require stronger citation, reuse, or co-occurrence evidence for this suggestion. +- [medium] STALE_TREND_SIGNAL (rec-003) + - rec-003 uses trend features 19 days old. + - Recommendation: Refresh trend features before using the recommendation in a weekly digest. +- [high] EXPLANATION_PATH_BROKEN (rec-004) + - rec-004 explanation path cannot be reconstructed from graph edges. + - Recommendation: Regenerate the recommendation path from existing graph edges before surfacing it. +- [medium] WEAK_EVIDENCE_SCORE (rec-004) + - rec-004 evidence score 35 is below policy minimum. + - Recommendation: Require stronger citation, reuse, or co-occurrence evidence for this suggestion. +- [medium] MISSING_SCHEMA_METADATA (rec-004) + - tool-missing-schema is missing schema.org-compatible metadata. + - Recommendation: Attach schema.org-compatible entity metadata before publishing the node. +- [low] DUPLICATE_RECOMMENDATION (rec-005) + - rec-005 duplicates target from rec-001. + - Recommendation: Deduplicate equivalent target nodes across workspace sidebar and email digest. +- [critical] PRIVATE_NODE_LEAK (rec-005) + - rec-005 exposes private or embargoed nodes: dataset-raw-phi, dataset-raw-phi. + - Recommendation: Remove private or embargoed graph nodes from recommendations and digest payloads. +- [medium] WEAK_EVIDENCE_SCORE (rec-005) + - rec-005 evidence score 49 is below policy minimum. + - Recommendation: Require stronger citation, reuse, or co-occurrence evidence for this suggestion. + +## Recommendation Gates + +- [!] Recommendation paths do not leak private or embargoed nodes. +- [!] Every explanation path is reconstructable from graph edges. +- [!] Applied filters are enforced before ranking. +- [!] No recommendation depends on retracted or blocked evidence. +- [!] Personalized suggestions include user-facing reasons and feature weights. diff --git a/scientific-knowledge-graph/recommendation-path-auditor/reports/recommendation-path-summary.svg b/scientific-knowledge-graph/recommendation-path-auditor/reports/recommendation-path-summary.svg new file mode 100644 index 00000000..7cd73ee7 --- /dev/null +++ b/scientific-knowledge-graph/recommendation-path-auditor/reports/recommendation-path-summary.svg @@ -0,0 +1,20 @@ + + + + Recommendation Path Audit + Neuroinflammation Discovery Graph + Decision: hold-recommendations + + Audit risk + + + Critical findings + + + High findings + + + + 6 nodes • 4 edges • 5 recommendations + Audit digest 0a7733f83578eac8 + diff --git a/scientific-knowledge-graph/recommendation-path-auditor/sample-data.js b/scientific-knowledge-graph/recommendation-path-auditor/sample-data.js new file mode 100644 index 00000000..eb84b3b5 --- /dev/null +++ b/scientific-knowledge-graph/recommendation-path-auditor/sample-data.js @@ -0,0 +1,175 @@ +"use strict"; + +const recommendationPacket = { + id: "graph-neuro-discovery", + title: "Neuroinflammation Discovery Graph", + auditTime: "2026-05-28T20:20:00Z", + policy: { + minEvidenceScore: 55, + maxTrendAgeDays: 14, + }, + userContext: { + userId: "researcher-maya", + institution: "Northstar University", + domains: ["neuroscience", "immunology"], + }, + nodes: [ + { + id: "paper-001", + type: "paper", + canonicalName: "Microglia CRISPR screen in neuroinflammation", + schemaOrgType: "ScholarlyArticle", + domain: "neuroscience", + year: 2024, + visibility: "public", + reproducibilityScore: 0.81, + sourceIds: ["doi:10.1000/microglia"], + }, + { + id: "dataset-raw-phi", + type: "dataset", + canonicalName: "Private clinical cytokine panel", + schemaOrgType: "Dataset", + domain: "immunology", + year: 2025, + visibility: "private", + reproducibilityScore: 0.72, + sourceIds: ["internal:cytokine-panel"], + }, + { + id: "method-graph-rag", + type: "method", + canonicalName: "Graph RAG literature ranking", + schemaOrgType: "SoftwareSourceCode", + domain: "computer science", + year: 2023, + visibility: "public", + reproducibilityScore: 0.58, + sourceIds: ["doi:10.1000/graphrag"], + }, + { + id: "protocol-lps", + type: "protocol", + canonicalName: "LPS microglia activation protocol", + schemaOrgType: "HowTo", + domain: "neuroscience", + year: 2021, + visibility: "public", + reproducibilityScore: 0.66, + sourceIds: ["doi:10.1000/lps"], + }, + { + id: "paper-retracted", + type: "paper", + canonicalName: "Retracted cytokine inference model", + schemaOrgType: "ScholarlyArticle", + domain: "neuroscience", + year: 2022, + visibility: "public", + reproducibilityScore: 0.22, + sourceIds: ["doi:10.1000/retracted"], + }, + { + id: "tool-missing-schema", + type: "tool", + canonicalName: "", + schemaOrgType: "", + domain: "neuroscience", + year: 2025, + visibility: "public", + reproducibilityScore: 0.49, + sourceIds: [], + }, + ], + edges: [ + { + from: "paper-001", + to: "protocol-lps", + relation: "uses_protocol", + citations: 8, + weight: 3, + directed: false, + }, + { + from: "protocol-lps", + to: "dataset-raw-phi", + relation: "generated_dataset", + citations: 2, + weight: 2, + directed: false, + }, + { + from: "paper-001", + to: "paper-retracted", + relation: "cites", + citations: 4, + weight: 3, + directed: true, + retracted: true, + }, + { + from: "paper-001", + to: "method-graph-rag", + relation: "uses_method", + citations: 1, + weight: 1, + directed: false, + }, + ], + recommendations: [ + { + id: "rec-001", + targetNodeId: "dataset-raw-phi", + surface: "workspace_sidebar", + explanationPath: ["paper-001", "protocol-lps", "dataset-raw-phi"], + appliedFilters: { domain: "immunology", minReproducibilityScore: 0.7 }, + trendFeaturesUpdatedAt: "2026-05-26T00:00:00Z", + userFacingReason: "Your microglia project uses a protocol linked to this cytokine dataset.", + featureWeights: { sharedProtocol: 2.1, recentUsage: 1.4 }, + }, + { + id: "rec-002", + targetNodeId: "paper-retracted", + surface: "weekly_digest", + explanationPath: ["paper-001", "paper-retracted"], + appliedFilters: { domain: "neuroscience", minReproducibilityScore: 0.5 }, + trendFeaturesUpdatedAt: "2026-04-28T00:00:00Z", + userFacingReason: "", + featureWeights: {}, + }, + { + id: "rec-003", + targetNodeId: "method-graph-rag", + surface: "discovery_mode", + explanationPath: ["paper-001", "method-graph-rag"], + appliedFilters: { domain: "neuroscience", institutionOnly: true }, + trendFeaturesUpdatedAt: "2026-05-10T00:00:00Z", + userFacingReason: "This method appears near your recent projects.", + featureWeights: { coOccurrence: 0.7 }, + }, + { + id: "rec-004", + targetNodeId: "tool-missing-schema", + surface: "workspace_sidebar", + explanationPath: ["paper-001", "protocol-lps", "tool-missing-schema"], + appliedFilters: { domain: "neuroscience", minReproducibilityScore: 0.3 }, + trendFeaturesUpdatedAt: "2026-05-27T00:00:00Z", + userFacingReason: "A nearby protocol often appears with this tool.", + featureWeights: { coOccurrence: 1.1 }, + }, + { + id: "rec-005", + targetNodeId: "dataset-raw-phi", + surface: "weekly_digest", + explanationPath: ["paper-001", "protocol-lps", "dataset-raw-phi"], + appliedFilters: { domain: "immunology" }, + trendFeaturesUpdatedAt: "2026-05-26T00:00:00Z", + userFacingReason: "Duplicate digest candidate for dedupe testing.", + featureWeights: { sharedProtocol: 2.1 }, + }, + ], +}; + +module.exports = { + recommendationPacket, +}; diff --git a/scientific-knowledge-graph/recommendation-path-auditor/test.js b/scientific-knowledge-graph/recommendation-path-auditor/test.js new file mode 100644 index 00000000..fc1be434 --- /dev/null +++ b/scientific-knowledge-graph/recommendation-path-auditor/test.js @@ -0,0 +1,43 @@ +"use strict"; + +const assert = require("assert"); +const { + auditRecommendations, + renderMarkdownReport, + buildEdgeIndex, + pathExists, + computeEvidenceScore, + daysBetween, +} = require("./index"); +const { recommendationPacket } = require("./sample-data"); + +const edgeIndex = buildEdgeIndex(recommendationPacket.edges); +assert.strictEqual(pathExists(["paper-001", "protocol-lps", "dataset-raw-phi"], edgeIndex), true); +assert.strictEqual(pathExists(["paper-001", "protocol-lps", "tool-missing-schema"], edgeIndex), false); +assert.strictEqual(daysBetween("2026-04-28T00:00:00Z", "2026-05-28T20:20:00Z"), 31); +assert.ok(computeEvidenceScore([recommendationPacket.edges[0]], { featureWeights: { a: 1 } }) > 0); + +const report = auditRecommendations(recommendationPacket); +const codes = new Set(report.findings.map((finding) => finding.code)); + +assert.strictEqual(report.summary.decision, "hold-recommendations"); +assert.strictEqual(report.summary.riskScore, 100); +assert.ok(codes.has("PRIVATE_NODE_LEAK"), "missing private node leak detection"); +assert.ok(codes.has("EXPLANATION_PATH_BROKEN"), "missing broken path detection"); +assert.ok(codes.has("FILTER_VIOLATION"), "missing filter violation detection"); +assert.ok(codes.has("RETRACTED_SOURCE_USED"), "missing retracted evidence detection"); +assert.ok(codes.has("WEAK_EVIDENCE_SCORE"), "missing weak evidence detection"); +assert.ok(codes.has("STALE_TREND_SIGNAL"), "missing stale trend signal detection"); +assert.ok(codes.has("DUPLICATE_RECOMMENDATION"), "missing duplicate recommendation detection"); +assert.ok(codes.has("MISSING_SCHEMA_METADATA"), "missing schema metadata detection"); +assert.ok(codes.has("UNEXPLAINED_PERSONALIZATION"), "missing explainability detection"); + +const repeat = auditRecommendations(recommendationPacket); +assert.strictEqual(report.summary.auditDigest, repeat.summary.auditDigest, "digest should be stable"); + +const markdown = renderMarkdownReport(report); +assert.ok(markdown.includes("Recommendation Path Audit")); +assert.ok(markdown.includes("hold-recommendations")); +assert.ok(markdown.includes(report.summary.auditDigest)); + +console.log("recommendation-path-auditor tests passed");