diff --git a/notebook-preview-integrity-gate/README.md b/notebook-preview-integrity-gate/README.md new file mode 100644 index 0000000..d3a9846 --- /dev/null +++ b/notebook-preview-integrity-gate/README.md @@ -0,0 +1,36 @@ +# Notebook Preview Integrity Gate + +Self-contained Scientific Data and Code Hosting module for issue #14. It audits notebook packages before SCIBASE renders public previews, so notebooks, datasets, figures, and metadata can be shown without leaking credentials, breaking FAIR metadata expectations, or publishing unusable preview artifacts. + +## What it does + +- Parses notebook cells and outputs into a deterministic preview manifest. +- Blocks previews containing likely secrets, private keys, API tokens, or active HTML/script payloads. +- Flags oversized inline outputs that should be stored as checksum-addressed artifacts instead of embedded in notebook JSON. +- Checks execution order coherence, missing visual alt text, environment capture, artifact checksums, MIME types, licenses, and generated preview state. +- Produces DataCite and schema.org draft metadata from the project package. +- Emits a reviewer packet and SVG dashboard that can be attached to moderation, repository release, or publication workflows. + +## Local usage + + node notebook-preview-integrity-gate/test.js + node notebook-preview-integrity-gate/demo.js + +The demo writes: + +- demo-output/notebook-preview-report.json +- demo-output/reviewer-packet.md +- demo-output/notebook-preview-dashboard.svg +- demo-output/notebook-preview-demo.mp4, generated separately for the Algora short demo video requirement + +## Example + + const { auditNotebookPreviewIntegrity } = require("./notebook-preview-integrity-gate"); + const { cleanNotebookPackage } = require("./notebook-preview-integrity-gate/sample-data"); + + const report = auditNotebookPreviewIntegrity(cleanNotebookPackage); + console.log(report.decision, report.score, report.fairScore.overall); + +## Integration notes + +The module is dependency-free CommonJS so it can be dropped behind a future upload worker, repository release gate, or admin review queue. The returned report is JSON-safe and includes a stable digest for audit logs. diff --git a/notebook-preview-integrity-gate/acceptance-notes.md b/notebook-preview-integrity-gate/acceptance-notes.md new file mode 100644 index 0000000..155a414 --- /dev/null +++ b/notebook-preview-integrity-gate/acceptance-notes.md @@ -0,0 +1,25 @@ +# Acceptance Notes + +## Scope + +This contribution focuses on the notebook preview slice of Scientific Data and Code Hosting. It does not introduce a web server or database. Instead, it provides a deterministic domain module that can sit behind future upload, moderation, preview, and repository release flows. + +## Why this is distinct + +Existing SCIBASE issue #14 contributions cover broad hosting foundations, artifact ledgers, generic FAIR gates, package integrity, and preview cache behavior. This module is notebook-specific: it inspects ipynb-like cells and outputs, catches unsafe preview content, builds a cell-level preview manifest, and maps notebook packages to DataCite/schema.org evidence. + +## Validation performed + +- Clean notebook package is marked ready. +- Risky notebook package with a leaked token, active HTML, oversized output, incomplete metadata, missing artifact checksum, and incomplete environment capture is marked blocked. +- Digest remains deterministic even when the generated timestamp changes. +- Reviewer packet and SVG dashboard generation are covered by tests. +- A short MP4 demo is included under demo-output for the Algora PR requirement. + +## Demo + +Run: + + node notebook-preview-integrity-gate/demo.js + +Then inspect notebook-preview-integrity-gate/demo-output/. diff --git a/notebook-preview-integrity-gate/demo-output/notebook-preview-dashboard.svg b/notebook-preview-integrity-gate/demo-output/notebook-preview-dashboard.svg new file mode 100644 index 0000000..ff2b107 --- /dev/null +++ b/notebook-preview-integrity-gate/demo-output/notebook-preview-dashboard.svg @@ -0,0 +1,19 @@ + \ No newline at end of file diff --git a/notebook-preview-integrity-gate/demo-output/notebook-preview-demo.mp4 b/notebook-preview-integrity-gate/demo-output/notebook-preview-demo.mp4 new file mode 100644 index 0000000..61092a7 Binary files /dev/null and b/notebook-preview-integrity-gate/demo-output/notebook-preview-demo.mp4 differ diff --git a/notebook-preview-integrity-gate/demo-output/notebook-preview-report.json b/notebook-preview-integrity-gate/demo-output/notebook-preview-report.json new file mode 100644 index 0000000..c4d48b0 --- /dev/null +++ b/notebook-preview-integrity-gate/demo-output/notebook-preview-report.json @@ -0,0 +1,579 @@ +{ + "generatedAt": "2026-05-20T10:00:00.000Z", + "module": "notebook-preview-integrity-gate", + "summary": [ + { + "projectId": "sci-preview-clean-001", + "title": "Cryo EM Morphology Notebook Preview", + "decision": "ready", + "score": 100, + "fairScore": 99, + "digest": "273f93db1c5573e647af2b850c98ec303ea0b707074286aa4a18553d466fad70" + }, + { + "projectId": "sci-preview-risk-007", + "title": "Unvetted Oncology Biomarker Notebook", + "decision": "blocked", + "score": 7, + "fairScore": 61, + "digest": "148e30700c48295458d1f18eb28686caa666bbfff9e8c7096e1dc3b875739fb4" + } + ], + "reports": [ + { + "module": "notebook-preview-integrity-gate", + "projectId": "sci-preview-clean-001", + "title": "Cryo EM Morphology Notebook Preview", + "generatedAt": "2026-05-20T10:00:00.000Z", + "decision": "ready", + "score": 100, + "fairScore": { + "findable": 100, + "accessible": 95, + "interoperable": 100, + "reusable": 100, + "overall": 99 + }, + "digest": "273f93db1c5573e647af2b850c98ec303ea0b707074286aa4a18553d466fad70", + "checks": [ + { + "id": "notebook-present", + "status": "pass", + "severity": "info", + "message": "Notebook content is available for preview.", + "evidence": { + "cellCount": 3 + } + }, + { + "id": "cell-count-budget", + "status": "pass", + "severity": "info", + "message": "3 cells measured against preview limit 150.", + "evidence": { + "limit": 150, + "cellCount": 3 + } + }, + { + "id": "notebook-size-budget", + "status": "pass", + "severity": "info", + "message": "Notebook payload is 704 B.", + "evidence": { + "limitBytes": 12582912, + "notebookBytes": 704 + } + }, + { + "id": "unsafe-preview-content", + "status": "pass", + "severity": "info", + "message": "No secrets or active HTML were detected in previewable content.", + "evidence": { + "affectedCells": [] + } + }, + { + "id": "inline-output-budget", + "status": "pass", + "severity": "info", + "message": "All inline outputs fit within preview budget.", + "evidence": { + "limitBytes": 524288, + "affectedCells": [] + } + }, + { + "id": "execution-order-coherence", + "status": "pass", + "severity": "info", + "message": "Executed code cells have coherent ordering.", + "evidence": { + "warnings": [] + } + }, + { + "id": "visual-alt-text", + "status": "pass", + "severity": "info", + "message": "Visual outputs include accessible labels or alt text.", + "evidence": { + "affectedCells": [] + } + }, + { + "id": "fair-metadata", + "status": "pass", + "severity": "info", + "message": "Required FAIR, DataCite, and schema.org metadata is present.", + "evidence": { + "missing": [], + "present": [ + "doi", + "authors", + "license", + "keywords", + "resourceType", + "accessRights", + "schemaOrgType" + ] + } + }, + { + "id": "artifact-preview-readiness", + "status": "pass", + "severity": "info", + "message": "Artifacts are previewable and checksum-addressed.", + "evidence": { + "failures": [] + } + }, + { + "id": "execution-environment-capture", + "status": "pass", + "severity": "info", + "message": "Kernel, runtime, dependency lock, and seed policy are captured.", + "evidence": { + "missing": [] + } + } + ], + "previewManifest": [ + { + "cellIndex": 1, + "cellType": "markdown", + "executionCount": null, + "sourceBytes": 81, + "outputBytes": 0, + "outputCount": 0, + "mimeTypes": [], + "hasVisualOutput": false, + "hasAltText": false, + "previewMode": "inline-text", + "flags": [] + }, + { + "cellIndex": 2, + "cellType": "code", + "executionCount": 1, + "sourceBytes": 79, + "outputBytes": 78, + "outputCount": 1, + "mimeTypes": [ + "text/plain" + ], + "hasVisualOutput": false, + "hasAltText": false, + "previewMode": "inline-text", + "flags": [] + }, + { + "cellIndex": 3, + "cellType": "code", + "executionCount": 2, + "sourceBytes": 36, + "outputBytes": 112, + "outputCount": 1, + "mimeTypes": [ + "image/png" + ], + "hasVisualOutput": true, + "hasAltText": true, + "previewMode": "rich-preview", + "flags": [] + } + ], + "artifactSummary": [ + { + "path": "data/summary.csv", + "type": "dataset", + "sizeBytes": 18432, + "checksum": "sha256:2cb0c8f8b4c131e178e3e2f13c05b36f", + "previewGenerated": true, + "issues": [] + }, + { + "path": "results/segmentation-overlay.png", + "type": "figure", + "sizeBytes": 94421, + "checksum": "sha256:7f379b73698a4c81a39f6d1c", + "previewGenerated": true, + "issues": [] + } + ], + "dataciteDraft": { + "identifier": "10.5555/scibase.preview.001", + "creators": [ + { + "name": "Nora Patel", + "affiliation": "Cell Systems Lab", + "orcid": "0000-0002-1825-0097" + }, + { + "name": "Elijah Chen", + "affiliation": "Northbridge Institute", + "orcid": null + } + ], + "titles": [ + { + "title": "Cryo EM Morphology Notebook Preview" + } + ], + "publisher": "SCIBASE.AI", + "publicationYear": 2026, + "resourceType": { + "resourceTypeGeneral": "ComputationalNotebook" + }, + "rightsList": [ + { + "rights": "CC-BY-4.0" + } + ], + "subjects": [ + "cryo-em", + "segmentation", + "morphology" + ] + }, + "schemaOrgDataset": { + "@context": "https://schema.org", + "@type": "Dataset", + "name": "Cryo EM Morphology Notebook Preview", + "identifier": "10.5555/scibase.preview.001", + "license": "CC-BY-4.0", + "keywords": [ + "cryo-em", + "segmentation", + "morphology" + ], + "creator": [ + { + "@type": "Person", + "name": "Nora Patel", + "affiliation": "Cell Systems Lab" + }, + { + "@type": "Person", + "name": "Elijah Chen", + "affiliation": "Northbridge Institute" + } + ], + "distribution": [ + { + "@type": "DataDownload", + "contentUrl": "data/summary.csv", + "encodingFormat": "text/csv", + "contentSize": "18432", + "sha256": "sha256:2cb0c8f8b4c131e178e3e2f13c05b36f" + }, + { + "@type": "DataDownload", + "contentUrl": "results/segmentation-overlay.png", + "encodingFormat": "image/png", + "contentSize": "94421", + "sha256": "sha256:7f379b73698a4c81a39f6d1c" + } + ] + }, + "remediationPlan": [], + "reviewerPacket": "# Notebook Preview Integrity Packet\n\nProject: Cryo EM Morphology Notebook Preview\nDecision: ready\nPreview score: 100/100\nFAIR score: 99/100\nDigest: 273f93db1c5573e647af2b850c98ec303ea0b707074286aa4a18553d466fad70\n\n## Findings\n\n- No blocking or warning findings.\n\n## Remediation\n\n- Ready for public preview publication.\n\n## Preview Manifest\n\n- Cell 1: markdown, inline-text, 0 B output, flags: none\n- Cell 2: code, inline-text, 78 B output, flags: none\n- Cell 3: code, rich-preview, 112 B output, flags: none" + }, + { + "module": "notebook-preview-integrity-gate", + "projectId": "sci-preview-risk-007", + "title": "Unvetted Oncology Biomarker Notebook", + "generatedAt": "2026-05-20T10:00:00.000Z", + "decision": "blocked", + "score": 7, + "fairScore": { + "findable": 40, + "accessible": 65, + "interoperable": 60, + "reusable": 77, + "overall": 61 + }, + "digest": "148e30700c48295458d1f18eb28686caa666bbfff9e8c7096e1dc3b875739fb4", + "checks": [ + { + "id": "notebook-present", + "status": "pass", + "severity": "info", + "message": "Notebook content is available for preview.", + "evidence": { + "cellCount": 3 + } + }, + { + "id": "cell-count-budget", + "status": "pass", + "severity": "info", + "message": "3 cells measured against preview limit 150.", + "evidence": { + "limit": 150, + "cellCount": 3 + } + }, + { + "id": "notebook-size-budget", + "status": "pass", + "severity": "info", + "message": "Notebook payload is 700.5 KB.", + "evidence": { + "limitBytes": 12582912, + "notebookBytes": 717284 + } + }, + { + "id": "unsafe-preview-content", + "status": "fail", + "severity": "critical", + "message": "1 cell(s) need redaction before public preview.", + "evidence": { + "affectedCells": [ + 1 + ] + } + }, + { + "id": "inline-output-budget", + "status": "warn", + "severity": "medium", + "message": "1 output(s) should be externalized before rendering.", + "evidence": { + "limitBytes": 524288, + "affectedCells": [ + 2 + ] + } + }, + { + "id": "execution-order-coherence", + "status": "warn", + "severity": "medium", + "message": "Execution counts are missing, duplicated, or out of order.", + "evidence": { + "warnings": [ + "execution-counts-out-of-order" + ] + } + }, + { + "id": "visual-alt-text", + "status": "warn", + "severity": "low", + "message": "2 visual output(s) need alt text before publication.", + "evidence": { + "affectedCells": [ + 1, + 2 + ] + } + }, + { + "id": "fair-metadata", + "status": "warn", + "severity": "high", + "message": "Missing metadata: doi, license, keywords, resourceType, schemaOrgType.", + "evidence": { + "missing": [ + "doi", + "license", + "keywords", + "resourceType", + "schemaOrgType" + ], + "present": [ + "authors", + "accessRights" + ] + } + }, + { + "id": "artifact-preview-readiness", + "status": "warn", + "severity": "medium", + "message": "1 artifact readiness issue(s) detected.", + "evidence": { + "failures": [ + { + "path": "data/patient-export.csv", + "issues": [ + "missing-checksum", + "missing-license", + "preview-not-generated" + ] + } + ] + } + }, + { + "id": "execution-environment-capture", + "status": "warn", + "severity": "medium", + "message": "Execution environment gaps: dependency-lock, seed-policy, pinned-dependencies.", + "evidence": { + "missing": [ + "dependency-lock", + "seed-policy", + "pinned-dependencies" + ] + } + } + ], + "previewManifest": [ + { + "cellIndex": 1, + "cellType": "code", + "executionCount": 3, + "sourceBytes": 84, + "outputBytes": 91, + "outputCount": 1, + "mimeTypes": [ + "text/html" + ], + "hasVisualOutput": true, + "hasAltText": false, + "previewMode": "rich-preview", + "flags": [ + "possible-secret", + "active-html", + "missing-alt-text" + ] + }, + { + "cellIndex": 2, + "cellType": "code", + "executionCount": 2, + "sourceBytes": 26, + "outputBytes": 716816, + "outputCount": 1, + "mimeTypes": [ + "image/png" + ], + "hasVisualOutput": true, + "hasAltText": false, + "previewMode": "external-reference", + "flags": [ + "oversized-output", + "missing-alt-text" + ] + }, + { + "cellIndex": 3, + "cellType": "markdown", + "executionCount": null, + "sourceBytes": 38, + "outputBytes": 0, + "outputCount": 0, + "mimeTypes": [], + "hasVisualOutput": false, + "hasAltText": false, + "previewMode": "inline-text", + "flags": [] + } + ], + "artifactSummary": [ + { + "path": "data/patient-export.csv", + "type": "dataset", + "sizeBytes": 19000000, + "checksum": null, + "previewGenerated": false, + "issues": [ + "missing-checksum", + "missing-license", + "preview-not-generated" + ] + } + ], + "dataciteDraft": { + "identifier": null, + "creators": [ + { + "name": "A. Reviewer", + "affiliation": null, + "orcid": null + } + ], + "titles": [ + { + "title": "Unvetted Oncology Biomarker Notebook" + } + ], + "publisher": "Translational Oncology Group", + "publicationYear": 2026, + "resourceType": { + "resourceTypeGeneral": "ComputationalNotebook" + }, + "rightsList": [ + { + "rights": "unspecified" + } + ], + "subjects": [] + }, + "schemaOrgDataset": { + "@context": "https://schema.org", + "@type": "Dataset", + "name": "Unvetted Oncology Biomarker Notebook", + "identifier": "sci-preview-risk-007", + "license": null, + "keywords": [], + "creator": [ + { + "@type": "Person", + "name": "A. Reviewer" + } + ], + "distribution": [ + { + "@type": "DataDownload", + "contentUrl": "data/patient-export.csv", + "encodingFormat": "text/csv", + "contentSize": "19000000" + } + ] + }, + "remediationPlan": [ + { + "checkId": "unsafe-preview-content", + "severity": "critical", + "action": "Redact credentials, tokens, private keys, and active HTML before publishing the preview." + }, + { + "checkId": "inline-output-budget", + "severity": "medium", + "action": "Externalize heavy outputs to artifact storage and reference them from the preview manifest." + }, + { + "checkId": "execution-order-coherence", + "severity": "medium", + "action": "Re-run the notebook from a clean kernel so execution counts are complete and monotonic." + }, + { + "checkId": "visual-alt-text", + "severity": "low", + "action": "Add alt text or figure descriptions for every visual output." + }, + { + "checkId": "fair-metadata", + "severity": "high", + "action": "Complete DOI, author, license, keyword, access, resource type, and schema.org metadata." + }, + { + "checkId": "artifact-preview-readiness", + "severity": "medium", + "action": "Add checksums, MIME types, licenses, and generated previews for every artifact." + }, + { + "checkId": "execution-environment-capture", + "severity": "medium", + "action": "Capture kernel, language, runtime or container image, lockfile digest, and seed policy." + } + ], + "reviewerPacket": "# Notebook Preview Integrity Packet\n\nProject: Unvetted Oncology Biomarker Notebook\nDecision: blocked\nPreview score: 7/100\nFAIR score: 61/100\nDigest: 148e30700c48295458d1f18eb28686caa666bbfff9e8c7096e1dc3b875739fb4\n\n## Findings\n\n- unsafe-preview-content (critical): 1 cell(s) need redaction before public preview.\n- inline-output-budget (medium): 1 output(s) should be externalized before rendering.\n- execution-order-coherence (medium): Execution counts are missing, duplicated, or out of order.\n- visual-alt-text (low): 2 visual output(s) need alt text before publication.\n- fair-metadata (high): Missing metadata: doi, license, keywords, resourceType, schemaOrgType.\n- artifact-preview-readiness (medium): 1 artifact readiness issue(s) detected.\n- execution-environment-capture (medium): Execution environment gaps: dependency-lock, seed-policy, pinned-dependencies.\n\n## Remediation\n\n- unsafe-preview-content: Redact credentials, tokens, private keys, and active HTML before publishing the preview.\n- inline-output-budget: Externalize heavy outputs to artifact storage and reference them from the preview manifest.\n- execution-order-coherence: Re-run the notebook from a clean kernel so execution counts are complete and monotonic.\n- visual-alt-text: Add alt text or figure descriptions for every visual output.\n- fair-metadata: Complete DOI, author, license, keyword, access, resource type, and schema.org metadata.\n- artifact-preview-readiness: Add checksums, MIME types, licenses, and generated previews for every artifact.\n- execution-environment-capture: Capture kernel, language, runtime or container image, lockfile digest, and seed policy.\n\n## Preview Manifest\n\n- Cell 1: code, rich-preview, 91 B output, flags: possible-secret, active-html, missing-alt-text\n- Cell 2: code, external-reference, 700.0 KB output, flags: oversized-output, missing-alt-text\n- Cell 3: markdown, inline-text, 0 B output, flags: none" + } + ] +} \ No newline at end of file diff --git a/notebook-preview-integrity-gate/demo-output/reviewer-packet.md b/notebook-preview-integrity-gate/demo-output/reviewer-packet.md new file mode 100644 index 0000000..9e797f3 --- /dev/null +++ b/notebook-preview-integrity-gate/demo-output/reviewer-packet.md @@ -0,0 +1,33 @@ +# Notebook Preview Integrity Packet + +Project: Unvetted Oncology Biomarker Notebook +Decision: blocked +Preview score: 7/100 +FAIR score: 61/100 +Digest: 148e30700c48295458d1f18eb28686caa666bbfff9e8c7096e1dc3b875739fb4 + +## Findings + +- unsafe-preview-content (critical): 1 cell(s) need redaction before public preview. +- inline-output-budget (medium): 1 output(s) should be externalized before rendering. +- execution-order-coherence (medium): Execution counts are missing, duplicated, or out of order. +- visual-alt-text (low): 2 visual output(s) need alt text before publication. +- fair-metadata (high): Missing metadata: doi, license, keywords, resourceType, schemaOrgType. +- artifact-preview-readiness (medium): 1 artifact readiness issue(s) detected. +- execution-environment-capture (medium): Execution environment gaps: dependency-lock, seed-policy, pinned-dependencies. + +## Remediation + +- unsafe-preview-content: Redact credentials, tokens, private keys, and active HTML before publishing the preview. +- inline-output-budget: Externalize heavy outputs to artifact storage and reference them from the preview manifest. +- execution-order-coherence: Re-run the notebook from a clean kernel so execution counts are complete and monotonic. +- visual-alt-text: Add alt text or figure descriptions for every visual output. +- fair-metadata: Complete DOI, author, license, keyword, access, resource type, and schema.org metadata. +- artifact-preview-readiness: Add checksums, MIME types, licenses, and generated previews for every artifact. +- execution-environment-capture: Capture kernel, language, runtime or container image, lockfile digest, and seed policy. + +## Preview Manifest + +- Cell 1: code, rich-preview, 91 B output, flags: possible-secret, active-html, missing-alt-text +- Cell 2: code, external-reference, 700.0 KB output, flags: oversized-output, missing-alt-text +- Cell 3: markdown, inline-text, 0 B output, flags: none \ No newline at end of file diff --git a/notebook-preview-integrity-gate/demo.js b/notebook-preview-integrity-gate/demo.js new file mode 100644 index 0000000..8605ffe --- /dev/null +++ b/notebook-preview-integrity-gate/demo.js @@ -0,0 +1,54 @@ +"use strict"; + +const fs = require("fs"); +const path = require("path"); +const { + auditNotebookPreviewIntegrity, + renderDashboardSvg, +} = require("./index"); +const { + cleanNotebookPackage, + riskyNotebookPackage, +} = require("./sample-data"); + +const outputDir = path.join(__dirname, "demo-output"); +fs.mkdirSync(outputDir, { recursive: true }); + +const generatedAt = "2026-05-20T10:00:00.000Z"; +const reports = [ + auditNotebookPreviewIntegrity(cleanNotebookPackage, { generatedAt }), + auditNotebookPreviewIntegrity(riskyNotebookPackage, { generatedAt }), +]; + +const portfolio = { + generatedAt, + module: "notebook-preview-integrity-gate", + summary: reports.map(function summarize(report) { + return { + projectId: report.projectId, + title: report.title, + decision: report.decision, + score: report.score, + fairScore: report.fairScore.overall, + digest: report.digest, + }; + }), + reports, +}; + +const riskReport = reports.find(function isBlocked(report) { + return report.decision === "blocked"; +}); + +fs.writeFileSync(path.join(outputDir, "notebook-preview-report.json"), JSON.stringify(portfolio, null, 2)); +fs.writeFileSync(path.join(outputDir, "reviewer-packet.md"), riskReport.reviewerPacket); +fs.writeFileSync(path.join(outputDir, "notebook-preview-dashboard.svg"), renderDashboardSvg(riskReport)); + +console.log("Generated demo artifacts:"); +console.log("- " + path.relative(process.cwd(), path.join(outputDir, "notebook-preview-report.json"))); +console.log("- " + path.relative(process.cwd(), path.join(outputDir, "reviewer-packet.md"))); +console.log("- " + path.relative(process.cwd(), path.join(outputDir, "notebook-preview-dashboard.svg"))); +console.log(""); +portfolio.summary.forEach(function printSummary(item) { + console.log(item.projectId + ": " + item.decision + " (" + String(item.score) + "/100), FAIR " + String(item.fairScore) + "/100"); +}); diff --git a/notebook-preview-integrity-gate/index.js b/notebook-preview-integrity-gate/index.js new file mode 100644 index 0000000..827fa0a --- /dev/null +++ b/notebook-preview-integrity-gate/index.js @@ -0,0 +1,551 @@ +"use strict"; + +const crypto = require("crypto"); + +const DEFAULT_LIMITS = Object.freeze({ + maxCells: 150, + maxNotebookBytes: 12 * 1024 * 1024, + maxInlineOutputBytes: 512 * 1024, + maxArtifactBytes: 250 * 1024 * 1024, +}); + +const SEVERITY_WEIGHT = Object.freeze({ + info: 0, + low: 4, + medium: 9, + high: 18, + critical: 35, +}); + +const SECRET_PATTERNS = [ + /api[_-]?key\s*[:=]\s*["']?[a-z0-9_\-]{16,}/i, + /secret\s*[:=]\s*["']?[a-z0-9_\-]{16,}/i, + /token\s*[:=]\s*["']?[a-z0-9_\-]{20,}/i, + /password\s*[:=]\s*["']?[^"'\s]{8,}/i, + /-----BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY-----/i, +]; + +function auditNotebookPreviewIntegrity(project, options = {}) { + const input = project || {}; + const limits = Object.assign({}, DEFAULT_LIMITS, options.limits || {}); + const generatedAt = options.generatedAt || new Date().toISOString(); + const notebook = input.notebook || {}; + const cells = Array.isArray(notebook.cells) ? notebook.cells : []; + const artifacts = Array.isArray(input.artifacts) ? input.artifacts : []; + const metadata = input.metadata || {}; + const execution = input.execution || {}; + const checks = []; + + addCheck(checks, "notebook-present", cells.length > 0 ? "pass" : "fail", cells.length > 0 ? "info" : "critical", cells.length > 0 ? "Notebook content is available for preview." : "Notebook has no cells to preview.", { + cellCount: cells.length, + }); + + addCheck(checks, "cell-count-budget", cells.length <= limits.maxCells ? "pass" : "fail", cells.length <= limits.maxCells ? "info" : "medium", String(cells.length) + " cells measured against preview limit " + String(limits.maxCells) + ".", { + limit: limits.maxCells, + cellCount: cells.length, + }); + + const notebookBytes = byteLength(JSON.stringify(notebook)); + addCheck(checks, "notebook-size-budget", notebookBytes <= limits.maxNotebookBytes ? "pass" : "fail", notebookBytes <= limits.maxNotebookBytes ? "info" : "high", "Notebook payload is " + formatBytes(notebookBytes) + ".", { + limitBytes: limits.maxNotebookBytes, + notebookBytes, + }); + + const previewManifest = cells.map(function summarize(cell, index) { + return summarizeCell(cell, index, limits); + }); + const unsafeCells = previewManifest.filter(function filterUnsafe(cell) { + return cell.flags.includes("possible-secret") || cell.flags.includes("active-html"); + }); + addCheck(checks, "unsafe-preview-content", unsafeCells.length === 0 ? "pass" : "fail", unsafeCells.length === 0 ? "info" : "critical", unsafeCells.length === 0 ? "No secrets or active HTML were detected in previewable content." : String(unsafeCells.length) + " cell(s) need redaction before public preview.", { + affectedCells: unsafeCells.map(function mapCell(cell) { return cell.cellIndex; }), + }); + + const oversizedCells = previewManifest.filter(function filterOversized(cell) { + return cell.outputBytes > limits.maxInlineOutputBytes; + }); + addCheck(checks, "inline-output-budget", oversizedCells.length === 0 ? "pass" : "warn", oversizedCells.length === 0 ? "info" : "medium", oversizedCells.length === 0 ? "All inline outputs fit within preview budget." : String(oversizedCells.length) + " output(s) should be externalized before rendering.", { + limitBytes: limits.maxInlineOutputBytes, + affectedCells: oversizedCells.map(function mapCell(cell) { return cell.cellIndex; }), + }); + + const executionWarnings = findExecutionOrderWarnings(previewManifest); + addCheck(checks, "execution-order-coherence", executionWarnings.length === 0 ? "pass" : "warn", executionWarnings.length === 0 ? "info" : "medium", executionWarnings.length === 0 ? "Executed code cells have coherent ordering." : "Execution counts are missing, duplicated, or out of order.", { + warnings: executionWarnings, + }); + + const altTextGaps = previewManifest.filter(function filterAlt(cell) { + return cell.hasVisualOutput && !cell.hasAltText; + }); + addCheck(checks, "visual-alt-text", altTextGaps.length === 0 ? "pass" : "warn", altTextGaps.length === 0 ? "info" : "low", altTextGaps.length === 0 ? "Visual outputs include accessible labels or alt text." : String(altTextGaps.length) + " visual output(s) need alt text before publication.", { + affectedCells: altTextGaps.map(function mapCell(cell) { return cell.cellIndex; }), + }); + + const metadataResult = evaluateMetadata(metadata, input); + addCheck(checks, "fair-metadata", metadataResult.missing.length === 0 ? "pass" : "warn", metadataResult.missing.length === 0 ? "info" : "high", metadataResult.missing.length === 0 ? "Required FAIR, DataCite, and schema.org metadata is present." : "Missing metadata: " + metadataResult.missing.join(", ") + ".", { + missing: metadataResult.missing, + present: metadataResult.present, + }); + + const artifactResult = evaluateArtifacts(artifacts, limits); + addCheck(checks, "artifact-preview-readiness", artifactResult.failures.length === 0 ? "pass" : "warn", artifactResult.failures.length === 0 ? "info" : "medium", artifactResult.failures.length === 0 ? "Artifacts are previewable and checksum-addressed." : String(artifactResult.failures.length) + " artifact readiness issue(s) detected.", { + failures: artifactResult.failures, + }); + + const environmentResult = evaluateEnvironment(execution.environment || notebook.environment || {}); + addCheck(checks, "execution-environment-capture", environmentResult.missing.length === 0 ? "pass" : "warn", environmentResult.missing.length === 0 ? "info" : "medium", environmentResult.missing.length === 0 ? "Kernel, runtime, dependency lock, and seed policy are captured." : "Execution environment gaps: " + environmentResult.missing.join(", ") + ".", { + missing: environmentResult.missing, + }); + + const score = calculateScore(checks); + const decision = decide(checks, score); + const fairScore = calculateFairScore(metadataResult, artifactResult, environmentResult); + const dataciteDraft = buildDataCiteDraft(metadata, input); + const schemaOrgDataset = buildSchemaOrgDataset(metadata, input, artifacts); + const remediationPlan = buildRemediationPlan(checks); + const digest = stableHash({ + projectId: input.projectId, + title: input.title, + checks: checks.map(function mapCheck(check) { + return { + id: check.id, + status: check.status, + severity: check.severity, + evidence: check.evidence, + }; + }), + previewManifest, + dataciteDraft, + schemaOrgDataset, + }); + + const report = { + module: "notebook-preview-integrity-gate", + projectId: input.projectId || "unassigned-project", + title: input.title || "Untitled notebook package", + generatedAt, + decision, + score, + fairScore, + digest, + checks, + previewManifest, + artifactSummary: artifactResult.summary, + dataciteDraft, + schemaOrgDataset, + remediationPlan, + }; + + return Object.assign(report, { + reviewerPacket: renderReviewerPacket(report), + }); +} + +function summarizeCell(cell, index, limits) { + const source = normalizeText(cell.source); + const outputs = Array.isArray(cell.outputs) ? cell.outputs : []; + const outputText = outputs.map(function mapOutput(output) { + return normalizeText(output.text || output.data || output); + }).join("\n"); + const outputBytes = byteLength(outputText); + const mimeTypes = collectMimeTypes(outputs); + const hasVisualOutput = mimeTypes.some(function visualMime(mime) { + return mime.startsWith("image/") || mime === "text/html" || mime === "application/vnd.plotly.v1+json"; + }); + const hasAltText = Boolean(cell.altText || (cell.metadata && (cell.metadata.altText || cell.metadata.description)) || outputs.some(function outputAlt(output) { + return output.altText || (output.metadata && output.metadata.altText); + })); + const flags = []; + const cellType = cell.cellType || cell.cell_type || "unknown"; + + if (containsSecret(source) || containsSecret(outputText)) flags.push("possible-secret"); + if (/
| patient_id |