diff --git a/notebook-preview-integrity-gate/README.md b/notebook-preview-integrity-gate/README.md new file mode 100644 index 0000000..d3a9846 --- /dev/null +++ b/notebook-preview-integrity-gate/README.md @@ -0,0 +1,36 @@ +# Notebook Preview Integrity Gate + +Self-contained Scientific Data and Code Hosting module for issue #14. It audits notebook packages before SCIBASE renders public previews, so notebooks, datasets, figures, and metadata can be shown without leaking credentials, breaking FAIR metadata expectations, or publishing unusable preview artifacts. + +## What it does + +- Parses notebook cells and outputs into a deterministic preview manifest. +- Blocks previews containing likely secrets, private keys, API tokens, or active HTML/script payloads. +- Flags oversized inline outputs that should be stored as checksum-addressed artifacts instead of embedded in notebook JSON. +- Checks execution order coherence, missing visual alt text, environment capture, artifact checksums, MIME types, licenses, and generated preview state. +- Produces DataCite and schema.org draft metadata from the project package. +- Emits a reviewer packet and SVG dashboard that can be attached to moderation, repository release, or publication workflows. + +## Local usage + + node notebook-preview-integrity-gate/test.js + node notebook-preview-integrity-gate/demo.js + +The demo writes: + +- demo-output/notebook-preview-report.json +- demo-output/reviewer-packet.md +- demo-output/notebook-preview-dashboard.svg +- demo-output/notebook-preview-demo.mp4, generated separately for the Algora short demo video requirement + +## Example + + const { auditNotebookPreviewIntegrity } = require("./notebook-preview-integrity-gate"); + const { cleanNotebookPackage } = require("./notebook-preview-integrity-gate/sample-data"); + + const report = auditNotebookPreviewIntegrity(cleanNotebookPackage); + console.log(report.decision, report.score, report.fairScore.overall); + +## Integration notes + +The module is dependency-free CommonJS so it can be dropped behind a future upload worker, repository release gate, or admin review queue. The returned report is JSON-safe and includes a stable digest for audit logs. diff --git a/notebook-preview-integrity-gate/acceptance-notes.md b/notebook-preview-integrity-gate/acceptance-notes.md new file mode 100644 index 0000000..155a414 --- /dev/null +++ b/notebook-preview-integrity-gate/acceptance-notes.md @@ -0,0 +1,25 @@ +# Acceptance Notes + +## Scope + +This contribution focuses on the notebook preview slice of Scientific Data and Code Hosting. It does not introduce a web server or database. Instead, it provides a deterministic domain module that can sit behind future upload, moderation, preview, and repository release flows. + +## Why this is distinct + +Existing SCIBASE issue #14 contributions cover broad hosting foundations, artifact ledgers, generic FAIR gates, package integrity, and preview cache behavior. This module is notebook-specific: it inspects ipynb-like cells and outputs, catches unsafe preview content, builds a cell-level preview manifest, and maps notebook packages to DataCite/schema.org evidence. + +## Validation performed + +- Clean notebook package is marked ready. +- Risky notebook package with a leaked token, active HTML, oversized output, incomplete metadata, missing artifact checksum, and incomplete environment capture is marked blocked. +- Digest remains deterministic even when the generated timestamp changes. +- Reviewer packet and SVG dashboard generation are covered by tests. +- A short MP4 demo is included under demo-output for the Algora PR requirement. + +## Demo + +Run: + + node notebook-preview-integrity-gate/demo.js + +Then inspect notebook-preview-integrity-gate/demo-output/. diff --git a/notebook-preview-integrity-gate/demo-output/notebook-preview-dashboard.svg b/notebook-preview-integrity-gate/demo-output/notebook-preview-dashboard.svg new file mode 100644 index 0000000..ff2b107 --- /dev/null +++ b/notebook-preview-integrity-gate/demo-output/notebook-preview-dashboard.svg @@ -0,0 +1,19 @@ + + + +Notebook Preview Integrity Gate +Unvetted Oncology Biomarker Notebook + +7 +blocked +FAIR 61/100 - 3 cells reviewed + +notebook-presentpass +cell-count-budgetpass +notebook-size-budgetpass +unsafe-preview-contentfail +inline-output-budgetwarn +execution-order-coherencewarn +visual-alt-textwarn +Digest 148e30700c48295458d1f18e + \ No newline at end of file diff --git a/notebook-preview-integrity-gate/demo-output/notebook-preview-demo.mp4 b/notebook-preview-integrity-gate/demo-output/notebook-preview-demo.mp4 new file mode 100644 index 0000000..61092a7 Binary files /dev/null and b/notebook-preview-integrity-gate/demo-output/notebook-preview-demo.mp4 differ diff --git a/notebook-preview-integrity-gate/demo-output/notebook-preview-report.json b/notebook-preview-integrity-gate/demo-output/notebook-preview-report.json new file mode 100644 index 0000000..c4d48b0 --- /dev/null +++ b/notebook-preview-integrity-gate/demo-output/notebook-preview-report.json @@ -0,0 +1,579 @@ +{ + "generatedAt": "2026-05-20T10:00:00.000Z", + "module": "notebook-preview-integrity-gate", + "summary": [ + { + "projectId": "sci-preview-clean-001", + "title": "Cryo EM Morphology Notebook Preview", + "decision": "ready", + "score": 100, + "fairScore": 99, + "digest": "273f93db1c5573e647af2b850c98ec303ea0b707074286aa4a18553d466fad70" + }, + { + "projectId": "sci-preview-risk-007", + "title": "Unvetted Oncology Biomarker Notebook", + "decision": "blocked", + "score": 7, + "fairScore": 61, + "digest": "148e30700c48295458d1f18eb28686caa666bbfff9e8c7096e1dc3b875739fb4" + } + ], + "reports": [ + { + "module": "notebook-preview-integrity-gate", + "projectId": "sci-preview-clean-001", + "title": "Cryo EM Morphology Notebook Preview", + "generatedAt": "2026-05-20T10:00:00.000Z", + "decision": "ready", + "score": 100, + "fairScore": { + "findable": 100, + "accessible": 95, + "interoperable": 100, + "reusable": 100, + "overall": 99 + }, + "digest": "273f93db1c5573e647af2b850c98ec303ea0b707074286aa4a18553d466fad70", + "checks": [ + { + "id": "notebook-present", + "status": "pass", + "severity": "info", + "message": "Notebook content is available for preview.", + "evidence": { + "cellCount": 3 + } + }, + { + "id": "cell-count-budget", + "status": "pass", + "severity": "info", + "message": "3 cells measured against preview limit 150.", + "evidence": { + "limit": 150, + "cellCount": 3 + } + }, + { + "id": "notebook-size-budget", + "status": "pass", + "severity": "info", + "message": "Notebook payload is 704 B.", + "evidence": { + "limitBytes": 12582912, + "notebookBytes": 704 + } + }, + { + "id": "unsafe-preview-content", + "status": "pass", + "severity": "info", + "message": "No secrets or active HTML were detected in previewable content.", + "evidence": { + "affectedCells": [] + } + }, + { + "id": "inline-output-budget", + "status": "pass", + "severity": "info", + "message": "All inline outputs fit within preview budget.", + "evidence": { + "limitBytes": 524288, + "affectedCells": [] + } + }, + { + "id": "execution-order-coherence", + "status": "pass", + "severity": "info", + "message": "Executed code cells have coherent ordering.", + "evidence": { + "warnings": [] + } + }, + { + "id": "visual-alt-text", + "status": "pass", + "severity": "info", + "message": "Visual outputs include accessible labels or alt text.", + "evidence": { + "affectedCells": [] + } + }, + { + "id": "fair-metadata", + "status": "pass", + "severity": "info", + "message": "Required FAIR, DataCite, and schema.org metadata is present.", + "evidence": { + "missing": [], + "present": [ + "doi", + "authors", + "license", + "keywords", + "resourceType", + "accessRights", + "schemaOrgType" + ] + } + }, + { + "id": "artifact-preview-readiness", + "status": "pass", + "severity": "info", + "message": "Artifacts are previewable and checksum-addressed.", + "evidence": { + "failures": [] + } + }, + { + "id": "execution-environment-capture", + "status": "pass", + "severity": "info", + "message": "Kernel, runtime, dependency lock, and seed policy are captured.", + "evidence": { + "missing": [] + } + } + ], + "previewManifest": [ + { + "cellIndex": 1, + "cellType": "markdown", + "executionCount": null, + "sourceBytes": 81, + "outputBytes": 0, + "outputCount": 0, + "mimeTypes": [], + "hasVisualOutput": false, + "hasAltText": false, + "previewMode": "inline-text", + "flags": [] + }, + { + "cellIndex": 2, + "cellType": "code", + "executionCount": 1, + "sourceBytes": 79, + "outputBytes": 78, + "outputCount": 1, + "mimeTypes": [ + "text/plain" + ], + "hasVisualOutput": false, + "hasAltText": false, + "previewMode": "inline-text", + "flags": [] + }, + { + "cellIndex": 3, + "cellType": "code", + "executionCount": 2, + "sourceBytes": 36, + "outputBytes": 112, + "outputCount": 1, + "mimeTypes": [ + "image/png" + ], + "hasVisualOutput": true, + "hasAltText": true, + "previewMode": "rich-preview", + "flags": [] + } + ], + "artifactSummary": [ + { + "path": "data/summary.csv", + "type": "dataset", + "sizeBytes": 18432, + "checksum": "sha256:2cb0c8f8b4c131e178e3e2f13c05b36f", + "previewGenerated": true, + "issues": [] + }, + { + "path": "results/segmentation-overlay.png", + "type": "figure", + "sizeBytes": 94421, + "checksum": "sha256:7f379b73698a4c81a39f6d1c", + "previewGenerated": true, + "issues": [] + } + ], + "dataciteDraft": { + "identifier": "10.5555/scibase.preview.001", + "creators": [ + { + "name": "Nora Patel", + "affiliation": "Cell Systems Lab", + "orcid": "0000-0002-1825-0097" + }, + { + "name": "Elijah Chen", + "affiliation": "Northbridge Institute", + "orcid": null + } + ], + "titles": [ + { + "title": "Cryo EM Morphology Notebook Preview" + } + ], + "publisher": "SCIBASE.AI", + "publicationYear": 2026, + "resourceType": { + "resourceTypeGeneral": "ComputationalNotebook" + }, + "rightsList": [ + { + "rights": "CC-BY-4.0" + } + ], + "subjects": [ + "cryo-em", + "segmentation", + "morphology" + ] + }, + "schemaOrgDataset": { + "@context": "https://schema.org", + "@type": "Dataset", + "name": "Cryo EM Morphology Notebook Preview", + "identifier": "10.5555/scibase.preview.001", + "license": "CC-BY-4.0", + "keywords": [ + "cryo-em", + "segmentation", + "morphology" + ], + "creator": [ + { + "@type": "Person", + "name": "Nora Patel", + "affiliation": "Cell Systems Lab" + }, + { + "@type": "Person", + "name": "Elijah Chen", + "affiliation": "Northbridge Institute" + } + ], + "distribution": [ + { + "@type": "DataDownload", + "contentUrl": "data/summary.csv", + "encodingFormat": "text/csv", + "contentSize": "18432", + "sha256": "sha256:2cb0c8f8b4c131e178e3e2f13c05b36f" + }, + { + "@type": "DataDownload", + "contentUrl": "results/segmentation-overlay.png", + "encodingFormat": "image/png", + "contentSize": "94421", + "sha256": "sha256:7f379b73698a4c81a39f6d1c" + } + ] + }, + "remediationPlan": [], + "reviewerPacket": "# Notebook Preview Integrity Packet\n\nProject: Cryo EM Morphology Notebook Preview\nDecision: ready\nPreview score: 100/100\nFAIR score: 99/100\nDigest: 273f93db1c5573e647af2b850c98ec303ea0b707074286aa4a18553d466fad70\n\n## Findings\n\n- No blocking or warning findings.\n\n## Remediation\n\n- Ready for public preview publication.\n\n## Preview Manifest\n\n- Cell 1: markdown, inline-text, 0 B output, flags: none\n- Cell 2: code, inline-text, 78 B output, flags: none\n- Cell 3: code, rich-preview, 112 B output, flags: none" + }, + { + "module": "notebook-preview-integrity-gate", + "projectId": "sci-preview-risk-007", + "title": "Unvetted Oncology Biomarker Notebook", + "generatedAt": "2026-05-20T10:00:00.000Z", + "decision": "blocked", + "score": 7, + "fairScore": { + "findable": 40, + "accessible": 65, + "interoperable": 60, + "reusable": 77, + "overall": 61 + }, + "digest": "148e30700c48295458d1f18eb28686caa666bbfff9e8c7096e1dc3b875739fb4", + "checks": [ + { + "id": "notebook-present", + "status": "pass", + "severity": "info", + "message": "Notebook content is available for preview.", + "evidence": { + "cellCount": 3 + } + }, + { + "id": "cell-count-budget", + "status": "pass", + "severity": "info", + "message": "3 cells measured against preview limit 150.", + "evidence": { + "limit": 150, + "cellCount": 3 + } + }, + { + "id": "notebook-size-budget", + "status": "pass", + "severity": "info", + "message": "Notebook payload is 700.5 KB.", + "evidence": { + "limitBytes": 12582912, + "notebookBytes": 717284 + } + }, + { + "id": "unsafe-preview-content", + "status": "fail", + "severity": "critical", + "message": "1 cell(s) need redaction before public preview.", + "evidence": { + "affectedCells": [ + 1 + ] + } + }, + { + "id": "inline-output-budget", + "status": "warn", + "severity": "medium", + "message": "1 output(s) should be externalized before rendering.", + "evidence": { + "limitBytes": 524288, + "affectedCells": [ + 2 + ] + } + }, + { + "id": "execution-order-coherence", + "status": "warn", + "severity": "medium", + "message": "Execution counts are missing, duplicated, or out of order.", + "evidence": { + "warnings": [ + "execution-counts-out-of-order" + ] + } + }, + { + "id": "visual-alt-text", + "status": "warn", + "severity": "low", + "message": "2 visual output(s) need alt text before publication.", + "evidence": { + "affectedCells": [ + 1, + 2 + ] + } + }, + { + "id": "fair-metadata", + "status": "warn", + "severity": "high", + "message": "Missing metadata: doi, license, keywords, resourceType, schemaOrgType.", + "evidence": { + "missing": [ + "doi", + "license", + "keywords", + "resourceType", + "schemaOrgType" + ], + "present": [ + "authors", + "accessRights" + ] + } + }, + { + "id": "artifact-preview-readiness", + "status": "warn", + "severity": "medium", + "message": "1 artifact readiness issue(s) detected.", + "evidence": { + "failures": [ + { + "path": "data/patient-export.csv", + "issues": [ + "missing-checksum", + "missing-license", + "preview-not-generated" + ] + } + ] + } + }, + { + "id": "execution-environment-capture", + "status": "warn", + "severity": "medium", + "message": "Execution environment gaps: dependency-lock, seed-policy, pinned-dependencies.", + "evidence": { + "missing": [ + "dependency-lock", + "seed-policy", + "pinned-dependencies" + ] + } + } + ], + "previewManifest": [ + { + "cellIndex": 1, + "cellType": "code", + "executionCount": 3, + "sourceBytes": 84, + "outputBytes": 91, + "outputCount": 1, + "mimeTypes": [ + "text/html" + ], + "hasVisualOutput": true, + "hasAltText": false, + "previewMode": "rich-preview", + "flags": [ + "possible-secret", + "active-html", + "missing-alt-text" + ] + }, + { + "cellIndex": 2, + "cellType": "code", + "executionCount": 2, + "sourceBytes": 26, + "outputBytes": 716816, + "outputCount": 1, + "mimeTypes": [ + "image/png" + ], + "hasVisualOutput": true, + "hasAltText": false, + "previewMode": "external-reference", + "flags": [ + "oversized-output", + "missing-alt-text" + ] + }, + { + "cellIndex": 3, + "cellType": "markdown", + "executionCount": null, + "sourceBytes": 38, + "outputBytes": 0, + "outputCount": 0, + "mimeTypes": [], + "hasVisualOutput": false, + "hasAltText": false, + "previewMode": "inline-text", + "flags": [] + } + ], + "artifactSummary": [ + { + "path": "data/patient-export.csv", + "type": "dataset", + "sizeBytes": 19000000, + "checksum": null, + "previewGenerated": false, + "issues": [ + "missing-checksum", + "missing-license", + "preview-not-generated" + ] + } + ], + "dataciteDraft": { + "identifier": null, + "creators": [ + { + "name": "A. Reviewer", + "affiliation": null, + "orcid": null + } + ], + "titles": [ + { + "title": "Unvetted Oncology Biomarker Notebook" + } + ], + "publisher": "Translational Oncology Group", + "publicationYear": 2026, + "resourceType": { + "resourceTypeGeneral": "ComputationalNotebook" + }, + "rightsList": [ + { + "rights": "unspecified" + } + ], + "subjects": [] + }, + "schemaOrgDataset": { + "@context": "https://schema.org", + "@type": "Dataset", + "name": "Unvetted Oncology Biomarker Notebook", + "identifier": "sci-preview-risk-007", + "license": null, + "keywords": [], + "creator": [ + { + "@type": "Person", + "name": "A. Reviewer" + } + ], + "distribution": [ + { + "@type": "DataDownload", + "contentUrl": "data/patient-export.csv", + "encodingFormat": "text/csv", + "contentSize": "19000000" + } + ] + }, + "remediationPlan": [ + { + "checkId": "unsafe-preview-content", + "severity": "critical", + "action": "Redact credentials, tokens, private keys, and active HTML before publishing the preview." + }, + { + "checkId": "inline-output-budget", + "severity": "medium", + "action": "Externalize heavy outputs to artifact storage and reference them from the preview manifest." + }, + { + "checkId": "execution-order-coherence", + "severity": "medium", + "action": "Re-run the notebook from a clean kernel so execution counts are complete and monotonic." + }, + { + "checkId": "visual-alt-text", + "severity": "low", + "action": "Add alt text or figure descriptions for every visual output." + }, + { + "checkId": "fair-metadata", + "severity": "high", + "action": "Complete DOI, author, license, keyword, access, resource type, and schema.org metadata." + }, + { + "checkId": "artifact-preview-readiness", + "severity": "medium", + "action": "Add checksums, MIME types, licenses, and generated previews for every artifact." + }, + { + "checkId": "execution-environment-capture", + "severity": "medium", + "action": "Capture kernel, language, runtime or container image, lockfile digest, and seed policy." + } + ], + "reviewerPacket": "# Notebook Preview Integrity Packet\n\nProject: Unvetted Oncology Biomarker Notebook\nDecision: blocked\nPreview score: 7/100\nFAIR score: 61/100\nDigest: 148e30700c48295458d1f18eb28686caa666bbfff9e8c7096e1dc3b875739fb4\n\n## Findings\n\n- unsafe-preview-content (critical): 1 cell(s) need redaction before public preview.\n- inline-output-budget (medium): 1 output(s) should be externalized before rendering.\n- execution-order-coherence (medium): Execution counts are missing, duplicated, or out of order.\n- visual-alt-text (low): 2 visual output(s) need alt text before publication.\n- fair-metadata (high): Missing metadata: doi, license, keywords, resourceType, schemaOrgType.\n- artifact-preview-readiness (medium): 1 artifact readiness issue(s) detected.\n- execution-environment-capture (medium): Execution environment gaps: dependency-lock, seed-policy, pinned-dependencies.\n\n## Remediation\n\n- unsafe-preview-content: Redact credentials, tokens, private keys, and active HTML before publishing the preview.\n- inline-output-budget: Externalize heavy outputs to artifact storage and reference them from the preview manifest.\n- execution-order-coherence: Re-run the notebook from a clean kernel so execution counts are complete and monotonic.\n- visual-alt-text: Add alt text or figure descriptions for every visual output.\n- fair-metadata: Complete DOI, author, license, keyword, access, resource type, and schema.org metadata.\n- artifact-preview-readiness: Add checksums, MIME types, licenses, and generated previews for every artifact.\n- execution-environment-capture: Capture kernel, language, runtime or container image, lockfile digest, and seed policy.\n\n## Preview Manifest\n\n- Cell 1: code, rich-preview, 91 B output, flags: possible-secret, active-html, missing-alt-text\n- Cell 2: code, external-reference, 700.0 KB output, flags: oversized-output, missing-alt-text\n- Cell 3: markdown, inline-text, 0 B output, flags: none" + } + ] +} \ No newline at end of file diff --git a/notebook-preview-integrity-gate/demo-output/reviewer-packet.md b/notebook-preview-integrity-gate/demo-output/reviewer-packet.md new file mode 100644 index 0000000..9e797f3 --- /dev/null +++ b/notebook-preview-integrity-gate/demo-output/reviewer-packet.md @@ -0,0 +1,33 @@ +# Notebook Preview Integrity Packet + +Project: Unvetted Oncology Biomarker Notebook +Decision: blocked +Preview score: 7/100 +FAIR score: 61/100 +Digest: 148e30700c48295458d1f18eb28686caa666bbfff9e8c7096e1dc3b875739fb4 + +## Findings + +- unsafe-preview-content (critical): 1 cell(s) need redaction before public preview. +- inline-output-budget (medium): 1 output(s) should be externalized before rendering. +- execution-order-coherence (medium): Execution counts are missing, duplicated, or out of order. +- visual-alt-text (low): 2 visual output(s) need alt text before publication. +- fair-metadata (high): Missing metadata: doi, license, keywords, resourceType, schemaOrgType. +- artifact-preview-readiness (medium): 1 artifact readiness issue(s) detected. +- execution-environment-capture (medium): Execution environment gaps: dependency-lock, seed-policy, pinned-dependencies. + +## Remediation + +- unsafe-preview-content: Redact credentials, tokens, private keys, and active HTML before publishing the preview. +- inline-output-budget: Externalize heavy outputs to artifact storage and reference them from the preview manifest. +- execution-order-coherence: Re-run the notebook from a clean kernel so execution counts are complete and monotonic. +- visual-alt-text: Add alt text or figure descriptions for every visual output. +- fair-metadata: Complete DOI, author, license, keyword, access, resource type, and schema.org metadata. +- artifact-preview-readiness: Add checksums, MIME types, licenses, and generated previews for every artifact. +- execution-environment-capture: Capture kernel, language, runtime or container image, lockfile digest, and seed policy. + +## Preview Manifest + +- Cell 1: code, rich-preview, 91 B output, flags: possible-secret, active-html, missing-alt-text +- Cell 2: code, external-reference, 700.0 KB output, flags: oversized-output, missing-alt-text +- Cell 3: markdown, inline-text, 0 B output, flags: none \ No newline at end of file diff --git a/notebook-preview-integrity-gate/demo.js b/notebook-preview-integrity-gate/demo.js new file mode 100644 index 0000000..8605ffe --- /dev/null +++ b/notebook-preview-integrity-gate/demo.js @@ -0,0 +1,54 @@ +"use strict"; + +const fs = require("fs"); +const path = require("path"); +const { + auditNotebookPreviewIntegrity, + renderDashboardSvg, +} = require("./index"); +const { + cleanNotebookPackage, + riskyNotebookPackage, +} = require("./sample-data"); + +const outputDir = path.join(__dirname, "demo-output"); +fs.mkdirSync(outputDir, { recursive: true }); + +const generatedAt = "2026-05-20T10:00:00.000Z"; +const reports = [ + auditNotebookPreviewIntegrity(cleanNotebookPackage, { generatedAt }), + auditNotebookPreviewIntegrity(riskyNotebookPackage, { generatedAt }), +]; + +const portfolio = { + generatedAt, + module: "notebook-preview-integrity-gate", + summary: reports.map(function summarize(report) { + return { + projectId: report.projectId, + title: report.title, + decision: report.decision, + score: report.score, + fairScore: report.fairScore.overall, + digest: report.digest, + }; + }), + reports, +}; + +const riskReport = reports.find(function isBlocked(report) { + return report.decision === "blocked"; +}); + +fs.writeFileSync(path.join(outputDir, "notebook-preview-report.json"), JSON.stringify(portfolio, null, 2)); +fs.writeFileSync(path.join(outputDir, "reviewer-packet.md"), riskReport.reviewerPacket); +fs.writeFileSync(path.join(outputDir, "notebook-preview-dashboard.svg"), renderDashboardSvg(riskReport)); + +console.log("Generated demo artifacts:"); +console.log("- " + path.relative(process.cwd(), path.join(outputDir, "notebook-preview-report.json"))); +console.log("- " + path.relative(process.cwd(), path.join(outputDir, "reviewer-packet.md"))); +console.log("- " + path.relative(process.cwd(), path.join(outputDir, "notebook-preview-dashboard.svg"))); +console.log(""); +portfolio.summary.forEach(function printSummary(item) { + console.log(item.projectId + ": " + item.decision + " (" + String(item.score) + "/100), FAIR " + String(item.fairScore) + "/100"); +}); diff --git a/notebook-preview-integrity-gate/index.js b/notebook-preview-integrity-gate/index.js new file mode 100644 index 0000000..827fa0a --- /dev/null +++ b/notebook-preview-integrity-gate/index.js @@ -0,0 +1,551 @@ +"use strict"; + +const crypto = require("crypto"); + +const DEFAULT_LIMITS = Object.freeze({ + maxCells: 150, + maxNotebookBytes: 12 * 1024 * 1024, + maxInlineOutputBytes: 512 * 1024, + maxArtifactBytes: 250 * 1024 * 1024, +}); + +const SEVERITY_WEIGHT = Object.freeze({ + info: 0, + low: 4, + medium: 9, + high: 18, + critical: 35, +}); + +const SECRET_PATTERNS = [ + /api[_-]?key\s*[:=]\s*["']?[a-z0-9_\-]{16,}/i, + /secret\s*[:=]\s*["']?[a-z0-9_\-]{16,}/i, + /token\s*[:=]\s*["']?[a-z0-9_\-]{20,}/i, + /password\s*[:=]\s*["']?[^"'\s]{8,}/i, + /-----BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY-----/i, +]; + +function auditNotebookPreviewIntegrity(project, options = {}) { + const input = project || {}; + const limits = Object.assign({}, DEFAULT_LIMITS, options.limits || {}); + const generatedAt = options.generatedAt || new Date().toISOString(); + const notebook = input.notebook || {}; + const cells = Array.isArray(notebook.cells) ? notebook.cells : []; + const artifacts = Array.isArray(input.artifacts) ? input.artifacts : []; + const metadata = input.metadata || {}; + const execution = input.execution || {}; + const checks = []; + + addCheck(checks, "notebook-present", cells.length > 0 ? "pass" : "fail", cells.length > 0 ? "info" : "critical", cells.length > 0 ? "Notebook content is available for preview." : "Notebook has no cells to preview.", { + cellCount: cells.length, + }); + + addCheck(checks, "cell-count-budget", cells.length <= limits.maxCells ? "pass" : "fail", cells.length <= limits.maxCells ? "info" : "medium", String(cells.length) + " cells measured against preview limit " + String(limits.maxCells) + ".", { + limit: limits.maxCells, + cellCount: cells.length, + }); + + const notebookBytes = byteLength(JSON.stringify(notebook)); + addCheck(checks, "notebook-size-budget", notebookBytes <= limits.maxNotebookBytes ? "pass" : "fail", notebookBytes <= limits.maxNotebookBytes ? "info" : "high", "Notebook payload is " + formatBytes(notebookBytes) + ".", { + limitBytes: limits.maxNotebookBytes, + notebookBytes, + }); + + const previewManifest = cells.map(function summarize(cell, index) { + return summarizeCell(cell, index, limits); + }); + const unsafeCells = previewManifest.filter(function filterUnsafe(cell) { + return cell.flags.includes("possible-secret") || cell.flags.includes("active-html"); + }); + addCheck(checks, "unsafe-preview-content", unsafeCells.length === 0 ? "pass" : "fail", unsafeCells.length === 0 ? "info" : "critical", unsafeCells.length === 0 ? "No secrets or active HTML were detected in previewable content." : String(unsafeCells.length) + " cell(s) need redaction before public preview.", { + affectedCells: unsafeCells.map(function mapCell(cell) { return cell.cellIndex; }), + }); + + const oversizedCells = previewManifest.filter(function filterOversized(cell) { + return cell.outputBytes > limits.maxInlineOutputBytes; + }); + addCheck(checks, "inline-output-budget", oversizedCells.length === 0 ? "pass" : "warn", oversizedCells.length === 0 ? "info" : "medium", oversizedCells.length === 0 ? "All inline outputs fit within preview budget." : String(oversizedCells.length) + " output(s) should be externalized before rendering.", { + limitBytes: limits.maxInlineOutputBytes, + affectedCells: oversizedCells.map(function mapCell(cell) { return cell.cellIndex; }), + }); + + const executionWarnings = findExecutionOrderWarnings(previewManifest); + addCheck(checks, "execution-order-coherence", executionWarnings.length === 0 ? "pass" : "warn", executionWarnings.length === 0 ? "info" : "medium", executionWarnings.length === 0 ? "Executed code cells have coherent ordering." : "Execution counts are missing, duplicated, or out of order.", { + warnings: executionWarnings, + }); + + const altTextGaps = previewManifest.filter(function filterAlt(cell) { + return cell.hasVisualOutput && !cell.hasAltText; + }); + addCheck(checks, "visual-alt-text", altTextGaps.length === 0 ? "pass" : "warn", altTextGaps.length === 0 ? "info" : "low", altTextGaps.length === 0 ? "Visual outputs include accessible labels or alt text." : String(altTextGaps.length) + " visual output(s) need alt text before publication.", { + affectedCells: altTextGaps.map(function mapCell(cell) { return cell.cellIndex; }), + }); + + const metadataResult = evaluateMetadata(metadata, input); + addCheck(checks, "fair-metadata", metadataResult.missing.length === 0 ? "pass" : "warn", metadataResult.missing.length === 0 ? "info" : "high", metadataResult.missing.length === 0 ? "Required FAIR, DataCite, and schema.org metadata is present." : "Missing metadata: " + metadataResult.missing.join(", ") + ".", { + missing: metadataResult.missing, + present: metadataResult.present, + }); + + const artifactResult = evaluateArtifacts(artifacts, limits); + addCheck(checks, "artifact-preview-readiness", artifactResult.failures.length === 0 ? "pass" : "warn", artifactResult.failures.length === 0 ? "info" : "medium", artifactResult.failures.length === 0 ? "Artifacts are previewable and checksum-addressed." : String(artifactResult.failures.length) + " artifact readiness issue(s) detected.", { + failures: artifactResult.failures, + }); + + const environmentResult = evaluateEnvironment(execution.environment || notebook.environment || {}); + addCheck(checks, "execution-environment-capture", environmentResult.missing.length === 0 ? "pass" : "warn", environmentResult.missing.length === 0 ? "info" : "medium", environmentResult.missing.length === 0 ? "Kernel, runtime, dependency lock, and seed policy are captured." : "Execution environment gaps: " + environmentResult.missing.join(", ") + ".", { + missing: environmentResult.missing, + }); + + const score = calculateScore(checks); + const decision = decide(checks, score); + const fairScore = calculateFairScore(metadataResult, artifactResult, environmentResult); + const dataciteDraft = buildDataCiteDraft(metadata, input); + const schemaOrgDataset = buildSchemaOrgDataset(metadata, input, artifacts); + const remediationPlan = buildRemediationPlan(checks); + const digest = stableHash({ + projectId: input.projectId, + title: input.title, + checks: checks.map(function mapCheck(check) { + return { + id: check.id, + status: check.status, + severity: check.severity, + evidence: check.evidence, + }; + }), + previewManifest, + dataciteDraft, + schemaOrgDataset, + }); + + const report = { + module: "notebook-preview-integrity-gate", + projectId: input.projectId || "unassigned-project", + title: input.title || "Untitled notebook package", + generatedAt, + decision, + score, + fairScore, + digest, + checks, + previewManifest, + artifactSummary: artifactResult.summary, + dataciteDraft, + schemaOrgDataset, + remediationPlan, + }; + + return Object.assign(report, { + reviewerPacket: renderReviewerPacket(report), + }); +} + +function summarizeCell(cell, index, limits) { + const source = normalizeText(cell.source); + const outputs = Array.isArray(cell.outputs) ? cell.outputs : []; + const outputText = outputs.map(function mapOutput(output) { + return normalizeText(output.text || output.data || output); + }).join("\n"); + const outputBytes = byteLength(outputText); + const mimeTypes = collectMimeTypes(outputs); + const hasVisualOutput = mimeTypes.some(function visualMime(mime) { + return mime.startsWith("image/") || mime === "text/html" || mime === "application/vnd.plotly.v1+json"; + }); + const hasAltText = Boolean(cell.altText || (cell.metadata && (cell.metadata.altText || cell.metadata.description)) || outputs.some(function outputAlt(output) { + return output.altText || (output.metadata && output.metadata.altText); + })); + const flags = []; + const cellType = cell.cellType || cell.cell_type || "unknown"; + + if (containsSecret(source) || containsSecret(outputText)) flags.push("possible-secret"); + if (/ limits.maxInlineOutputBytes) flags.push("oversized-output"); + if (hasVisualOutput && !hasAltText) flags.push("missing-alt-text"); + if (cellType === "code" && outputs.length === 0) flags.push("no-output"); + + return { + cellIndex: index + 1, + cellType, + executionCount: cell.executionCount === undefined ? (cell.execution_count === undefined ? null : cell.execution_count) : cell.executionCount, + sourceBytes: byteLength(source), + outputBytes, + outputCount: outputs.length, + mimeTypes, + hasVisualOutput, + hasAltText, + previewMode: outputBytes > limits.maxInlineOutputBytes ? "external-reference" : hasVisualOutput ? "rich-preview" : "inline-text", + flags, + }; +} + +function evaluateMetadata(metadata, project) { + const present = []; + const missing = []; + const required = [ + ["doi", metadata.doi || metadata.identifier], + ["authors", Array.isArray(metadata.authors) && metadata.authors.length > 0], + ["license", metadata.license || project.license], + ["keywords", Array.isArray(metadata.keywords) && metadata.keywords.length > 0], + ["resourceType", metadata.resourceType || metadata.resource_type], + ["accessRights", metadata.accessRights || metadata.access_rights || metadata.access], + ["schemaOrgType", metadata.schemaOrgType || (metadata.schemaOrg && metadata.schemaOrg["@type"])], + ]; + + required.forEach(function checkRequired(entry) { + if (entry[1]) present.push(entry[0]); + else missing.push(entry[0]); + }); + + return { present, missing }; +} + +function evaluateArtifacts(artifacts, limits) { + const failures = []; + const summary = artifacts.map(function mapArtifact(artifact) { + const issueIds = []; + if (!artifact.checksum) issueIds.push("missing-checksum"); + if (!artifact.mimeType && !artifact.mime) issueIds.push("missing-mime-type"); + if (!artifact.license) issueIds.push("missing-license"); + if (Number(artifact.sizeBytes || 0) > limits.maxArtifactBytes) issueIds.push("artifact-over-budget"); + if (artifact.previewGenerated === false) issueIds.push("preview-not-generated"); + if (issueIds.length > 0) { + failures.push({ + path: artifact.path || artifact.name || "unnamed-artifact", + issues: issueIds, + }); + } + return { + path: artifact.path || artifact.name || "unnamed-artifact", + type: artifact.type || inferArtifactType(artifact.path || artifact.name || ""), + sizeBytes: Number(artifact.sizeBytes || 0), + checksum: artifact.checksum || null, + previewGenerated: artifact.previewGenerated !== false, + issues: issueIds, + }; + }); + + return { failures, summary }; +} + +function evaluateEnvironment(environment) { + const missing = []; + if (!environment.kernel) missing.push("kernel"); + if (!environment.language) missing.push("language"); + if (!environment.runtime && !environment.containerImage) missing.push("runtime-or-container"); + if (!environment.lockfile && !environment.lockfileDigest) missing.push("dependency-lock"); + if (!environment.seedPolicy && !environment.randomSeed) missing.push("seed-policy"); + if (Array.isArray(environment.dependencies)) { + const unpinned = environment.dependencies.filter(function unpinned(dependency) { + return !/[=~<>!]=|@|\b\d+\.\d+/.test(String(dependency.version || dependency)); + }); + if (unpinned.length > 0) missing.push("pinned-dependencies"); + } + return { missing }; +} + +function findExecutionOrderWarnings(cells) { + const codeCells = cells.filter(function filterCode(cell) { + return cell.cellType === "code"; + }); + const counts = codeCells.map(function mapCount(cell) { + return cell.executionCount; + }).filter(function presentCount(count) { + return count !== null && count !== undefined; + }); + const warnings = []; + + if (codeCells.length > 0 && counts.length !== codeCells.length) warnings.push("some-code-cells-lack-execution-counts"); + if (new Set(counts).size !== counts.length) warnings.push("duplicate-execution-counts"); + for (let index = 1; index < counts.length; index += 1) { + if (counts[index] < counts[index - 1]) { + warnings.push("execution-counts-out-of-order"); + break; + } + } + return warnings; +} + +function calculateScore(checks) { + const penalty = checks.reduce(function sumPenalty(sum, check) { + if (check.status === "pass") return sum; + return sum + SEVERITY_WEIGHT[check.severity]; + }, 0); + return Math.max(0, Math.min(100, 100 - penalty)); +} + +function decide(checks, score) { + const hasCriticalFailure = checks.some(function critical(check) { + return check.status === "fail" && check.severity === "critical"; + }); + const hasHighFailure = checks.some(function high(check) { + return check.status === "fail" && check.severity === "high"; + }); + const hasWarning = checks.some(function warn(check) { + return check.status === "warn"; + }); + if (hasCriticalFailure || score < 55) return "blocked"; + if (hasHighFailure || score < 85 || hasWarning) return "needs-review"; + return "ready"; +} + +function calculateFairScore(metadataResult, artifactResult, environmentResult) { + const components = { + findable: scoreFromMissing(metadataResult, ["doi", "keywords", "schemaOrgType"]), + accessible: artifactResult.failures.some(function previewFailure(failure) { + return failure.issues.includes("preview-not-generated"); + }) ? 65 : 95, + interoperable: scoreFromMissing(metadataResult, ["resourceType", "schemaOrgType"]), + reusable: Math.max(35, 100 - (artifactResult.failures.length * 8) - (environmentResult.missing.length * 5)), + }; + components.overall = Math.round((components.findable + components.accessible + components.interoperable + components.reusable) / 4); + return components; +} + +function scoreFromMissing(metadataResult, expected) { + const missingCount = expected.filter(function missing(key) { + return metadataResult.missing.includes(key); + }).length; + return Math.max(40, 100 - missingCount * 20); +} + +function buildDataCiteDraft(metadata, project) { + const authors = Array.isArray(metadata.authors) ? metadata.authors : []; + return { + identifier: metadata.doi || metadata.identifier || null, + creators: authors.map(function mapAuthor(author) { + return { + name: author.name || author, + affiliation: author.affiliation || null, + orcid: author.orcid || null, + }; + }), + titles: [{ title: project.title || metadata.title || "Untitled notebook package" }], + publisher: metadata.publisher || project.owner || "SCIBASE.AI", + publicationYear: metadata.publicationYear || new Date().getUTCFullYear(), + resourceType: { + resourceTypeGeneral: metadata.resourceType || "ComputationalNotebook", + }, + rightsList: [{ rights: metadata.license || project.license || "unspecified" }], + subjects: metadata.keywords || [], + }; +} + +function buildSchemaOrgDataset(metadata, project, artifacts) { + return { + "@context": "https://schema.org", + "@type": metadata.schemaOrgType || (metadata.schemaOrg && metadata.schemaOrg["@type"]) || "Dataset", + name: project.title || metadata.title || "Untitled notebook package", + identifier: metadata.doi || metadata.identifier || project.projectId || null, + license: metadata.license || project.license || null, + keywords: metadata.keywords || [], + creator: (metadata.authors || []).map(function mapCreator(author) { + return { + "@type": "Person", + name: author.name || author, + affiliation: author.affiliation || undefined, + }; + }), + distribution: artifacts.map(function mapDistribution(artifact) { + return { + "@type": "DataDownload", + contentUrl: artifact.path || artifact.name, + encodingFormat: artifact.mimeType || artifact.mime || inferMimeFromPath(artifact.path || artifact.name || ""), + contentSize: artifact.sizeBytes ? String(artifact.sizeBytes) : undefined, + sha256: artifact.checksum || undefined, + }; + }), + }; +} + +function buildRemediationPlan(checks) { + return checks.filter(function notPass(check) { + return check.status !== "pass"; + }).map(function mapPlan(check) { + return { + checkId: check.id, + severity: check.severity, + action: remediationFor(check), + }; + }); +} + +function remediationFor(check) { + const actions = { + "notebook-present": "Attach a valid ipynb payload or code notebook export before generating public previews.", + "cell-count-budget": "Split the notebook preview into sections or collapse low-value appendix cells.", + "notebook-size-budget": "Move large binary outputs into checksum-addressed artifacts and keep notebook JSON lightweight.", + "unsafe-preview-content": "Redact credentials, tokens, private keys, and active HTML before publishing the preview.", + "inline-output-budget": "Externalize heavy outputs to artifact storage and reference them from the preview manifest.", + "execution-order-coherence": "Re-run the notebook from a clean kernel so execution counts are complete and monotonic.", + "visual-alt-text": "Add alt text or figure descriptions for every visual output.", + "fair-metadata": "Complete DOI, author, license, keyword, access, resource type, and schema.org metadata.", + "artifact-preview-readiness": "Add checksums, MIME types, licenses, and generated previews for every artifact.", + "execution-environment-capture": "Capture kernel, language, runtime or container image, lockfile digest, and seed policy.", + }; + return actions[check.id] || "Review this finding and add the missing evidence."; +} + +function renderReviewerPacket(report) { + const failingChecks = report.checks.filter(function notPass(check) { + return check.status !== "pass"; + }); + const lines = [ + "# Notebook Preview Integrity Packet", + "", + "Project: " + report.title, + "Decision: " + report.decision, + "Preview score: " + String(report.score) + "/100", + "FAIR score: " + String(report.fairScore.overall) + "/100", + "Digest: " + report.digest, + "", + "## Findings", + "", + ]; + + if (failingChecks.length === 0) { + lines.push("- No blocking or warning findings."); + } else { + failingChecks.forEach(function addFinding(check) { + lines.push("- " + check.id + " (" + check.severity + "): " + check.message); + }); + } + + lines.push("", "## Remediation", ""); + if (report.remediationPlan.length === 0) { + lines.push("- Ready for public preview publication."); + } else { + report.remediationPlan.forEach(function addPlan(item) { + lines.push("- " + item.checkId + ": " + item.action); + }); + } + + lines.push("", "## Preview Manifest", ""); + report.previewManifest.forEach(function addCell(cell) { + const flags = cell.flags.length ? cell.flags.join(", ") : "none"; + lines.push("- Cell " + String(cell.cellIndex) + ": " + cell.cellType + ", " + cell.previewMode + ", " + formatBytes(cell.outputBytes) + " output, flags: " + flags); + }); + + return lines.join("\n"); +} + +function renderDashboardSvg(report) { + const width = 960; + const height = 540; + const statusColor = report.decision === "ready" ? "#147D64" : report.decision === "needs-review" ? "#A45D00" : "#B42318"; + const checkRows = report.checks.slice(0, 7).map(function mapCheckRow(check, index) { + const y = 200 + index * 36; + const color = check.status === "pass" ? "#147D64" : check.status === "warn" ? "#A45D00" : "#B42318"; + return "" + escapeXml(check.id) + "" + escapeXml(check.status) + ""; + }).join("\n"); + + return [ + "", + "", + "", + "Notebook Preview Integrity Gate", + "" + escapeXml(report.title) + "", + "", + "" + String(report.score) + "", + "" + escapeXml(report.decision) + "", + "FAIR " + String(report.fairScore.overall) + "/100 - " + String(report.previewManifest.length) + " cells reviewed", + "", + checkRows, + "Digest " + escapeXml(report.digest.slice(0, 24)) + "", + "", + ].join("\n"); +} + +function addCheck(checks, id, status, severity, message, evidence = {}) { + checks.push({ id, status, severity, message, evidence }); +} + +function collectMimeTypes(outputs) { + const types = new Set(); + outputs.forEach(function eachOutput(output) { + if (output.mimeType) types.add(output.mimeType); + if (output.data && typeof output.data === "object" && !Array.isArray(output.data)) { + Object.keys(output.data).forEach(function addKey(key) { + types.add(key); + }); + } + if (output.output_type === "display_data" && output.data) { + Object.keys(output.data).forEach(function addDisplayKey(key) { + types.add(key); + }); + } + if (typeof output === "string") types.add("text/plain"); + }); + return Array.from(types).sort(); +} + +function normalizeText(value) { + if (Array.isArray(value)) return value.join(""); + if (typeof value === "string") return value; + if (value === null || value === undefined) return ""; + return JSON.stringify(value); +} + +function byteLength(value) { + return Buffer.byteLength(normalizeText(value), "utf8"); +} + +function containsSecret(value) { + const text = normalizeText(value); + return SECRET_PATTERNS.some(function secret(pattern) { + return pattern.test(text); + }); +} + +function inferArtifactType(path) { + const lower = path.toLowerCase(); + if (/\.(csv|tsv|json|parquet|xlsx)$/.test(lower)) return "dataset"; + if (/\.(ipynb|py|r|jl|js)$/.test(lower)) return "code"; + if (/\.(png|jpg|jpeg|svg|webp)$/.test(lower)) return "figure"; + return "artifact"; +} + +function inferMimeFromPath(path) { + const lower = path.toLowerCase(); + if (lower.endsWith(".csv")) return "text/csv"; + if (lower.endsWith(".json")) return "application/json"; + if (lower.endsWith(".ipynb")) return "application/x-ipynb+json"; + if (lower.endsWith(".png")) return "image/png"; + if (lower.endsWith(".svg")) return "image/svg+xml"; + return "application/octet-stream"; +} + +function stableHash(value) { + return crypto.createHash("sha256").update(JSON.stringify(sortKeys(value))).digest("hex"); +} + +function sortKeys(value) { + if (Array.isArray(value)) return value.map(sortKeys); + if (!value || typeof value !== "object") return value; + return Object.keys(value).sort().reduce(function reduceKeys(result, key) { + result[key] = sortKeys(value[key]); + return result; + }, {}); +} + +function formatBytes(bytes) { + const value = Number(bytes || 0); + if (value < 1024) return String(value) + " B"; + if (value < 1024 * 1024) return (value / 1024).toFixed(1) + " KB"; + return (value / (1024 * 1024)).toFixed(1) + " MB"; +} + +function escapeXml(value) { + return String(value) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +module.exports = { + auditNotebookPreviewIntegrity, + renderReviewerPacket, + renderDashboardSvg, + formatBytes, +}; diff --git a/notebook-preview-integrity-gate/requirements-map.md b/notebook-preview-integrity-gate/requirements-map.md new file mode 100644 index 0000000..2f05658 --- /dev/null +++ b/notebook-preview-integrity-gate/requirements-map.md @@ -0,0 +1,20 @@ +# Requirements Map + +Issue #14 asks for a Scientific Data and Code Hosting layer that can store, preview, version, and publish research artifacts with structured metadata and reproducibility support. + +| Requirement area | Implementation coverage | +| --- | --- | +| Metadata-aware previews | previewManifest summarizes notebook cells, outputs, MIME types, preview modes, visual output state, and flags. | +| Notebook rendering readiness | unsafe-preview-content, inline-output-budget, execution-order-coherence, and visual-alt-text checks decide whether a notebook is ready, needs review, or is blocked. | +| Dataset and code artifacts | artifact-preview-readiness checks checksum, MIME type, license, size budget, and generated preview state for attached artifacts. | +| FAIR principles | fairScore grades findable, accessible, interoperable, and reusable readiness from metadata, artifacts, and execution environment capture. | +| DataCite metadata | dataciteDraft returns identifier, creators, title, publisher, publication year, resource type, rights, and subjects. | +| schema.org markup | schemaOrgDataset emits a JSON-safe Dataset object with distributions and checksums. | +| Reproducibility support | execution-environment-capture requires kernel, language, runtime or container, dependency lock, pinned dependencies, and seed policy. | +| Moderation workflow | reviewerPacket and renderDashboardSvg create human-readable review evidence for admins before preview publication. | +| Auditability | digest is stable across generated timestamps and can be stored in future audit logs or release records. | + +## Acceptance checks + + node notebook-preview-integrity-gate/test.js + node notebook-preview-integrity-gate/demo.js diff --git a/notebook-preview-integrity-gate/sample-data.js b/notebook-preview-integrity-gate/sample-data.js new file mode 100644 index 0000000..dc2c59e --- /dev/null +++ b/notebook-preview-integrity-gate/sample-data.js @@ -0,0 +1,159 @@ +"use strict"; + +const cleanNotebookPackage = { + projectId: "sci-preview-clean-001", + title: "Cryo EM Morphology Notebook Preview", + owner: "Cell Systems Lab", + license: "CC-BY-4.0", + metadata: { + doi: "10.5555/scibase.preview.001", + authors: [ + { name: "Nora Patel", affiliation: "Cell Systems Lab", orcid: "0000-0002-1825-0097" }, + { name: "Elijah Chen", affiliation: "Northbridge Institute" }, + ], + keywords: ["cryo-em", "segmentation", "morphology"], + license: "CC-BY-4.0", + resourceType: "ComputationalNotebook", + accessRights: "open", + schemaOrgType: "Dataset", + publisher: "SCIBASE.AI", + publicationYear: 2026, + }, + execution: { + environment: { + kernel: "python3", + language: "python", + runtime: "python 3.11.9", + lockfileDigest: "sha256:f2a8c2d8e3d05d2efc0104989f53c4cc36ec", + seedPolicy: "All stochastic steps use seed 4421 and are logged in run metadata.", + dependencies: [ + { name: "numpy", version: "1.26.4" }, + { name: "pandas", version: "2.2.2" }, + { name: "scikit-image", version: "0.23.2" }, + ], + }, + }, + notebook: { + path: "notebooks/morphology-preview.ipynb", + cells: [ + { + cellType: "markdown", + source: "# Cryo EM morphology QC\nThis notebook previews sample-level segmentation quality.", + }, + { + cellType: "code", + executionCount: 1, + source: "import pandas as pd\nsummary = pd.read_csv('../data/summary.csv')\nsummary.head()", + outputs: [ + { + data: { + "text/plain": "sample_id area_um2 eccentricity\nA01 12.5 0.41\nA02 13.1 0.39", + }, + }, + ], + }, + { + cellType: "code", + executionCount: 2, + source: "plot_segmentation_overlay(seed=4421)", + altText: "Segmentation overlay with nuclei boundaries highlighted in green.", + outputs: [ + { + data: { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVQIW2NkYGD4DwABBAEAgi6xWQAAAABJRU5ErkJggg==", + }, + }, + ], + }, + ], + }, + artifacts: [ + { + path: "data/summary.csv", + type: "dataset", + sizeBytes: 18432, + checksum: "sha256:2cb0c8f8b4c131e178e3e2f13c05b36f", + mimeType: "text/csv", + license: "CC-BY-4.0", + previewGenerated: true, + }, + { + path: "results/segmentation-overlay.png", + type: "figure", + sizeBytes: 94421, + checksum: "sha256:7f379b73698a4c81a39f6d1c", + mimeType: "image/png", + license: "CC-BY-4.0", + previewGenerated: true, + }, + ], +}; + +const riskyNotebookPackage = { + projectId: "sci-preview-risk-007", + title: "Unvetted Oncology Biomarker Notebook", + owner: "Translational Oncology Group", + metadata: { + authors: [{ name: "A. Reviewer" }], + keywords: [], + accessRights: "restricted", + }, + execution: { + environment: { + kernel: "python3", + language: "python", + runtime: "python 3.10", + dependencies: [ + { name: "pandas", version: "latest" }, + { name: "lifelines" }, + ], + }, + }, + notebook: { + path: "notebooks/biomarker.ipynb", + cells: [ + { + cellType: "code", + executionCount: 3, + source: "API_KEY='demo_credential_should_be_redacted_1234567890'\nload_private_patient_table()", + outputs: [ + { + data: { + "text/html": "
patient_id
", + }, + }, + ], + }, + { + cellType: "code", + executionCount: 2, + source: "render_kaplan_meier_plot()", + outputs: [ + { + data: { + "image/png": "x".repeat(700 * 1024), + }, + }, + ], + }, + { + cellType: "markdown", + source: "Final figure is pending manual upload.", + }, + ], + }, + artifacts: [ + { + path: "data/patient-export.csv", + type: "dataset", + sizeBytes: 19000000, + mimeType: "text/csv", + previewGenerated: false, + }, + ], +}; + +module.exports = { + cleanNotebookPackage, + riskyNotebookPackage, +}; diff --git a/notebook-preview-integrity-gate/test.js b/notebook-preview-integrity-gate/test.js new file mode 100644 index 0000000..bb21c1d --- /dev/null +++ b/notebook-preview-integrity-gate/test.js @@ -0,0 +1,67 @@ +"use strict"; + +const assert = require("assert"); +const { + auditNotebookPreviewIntegrity, + renderDashboardSvg, + formatBytes, +} = require("./index"); +const { + cleanNotebookPackage, + riskyNotebookPackage, +} = require("./sample-data"); + +const stableTimestamp = "2026-05-20T10:00:00.000Z"; + +function testCleanPackageIsReady() { + const report = auditNotebookPreviewIntegrity(cleanNotebookPackage, { generatedAt: stableTimestamp }); + assert.strictEqual(report.decision, "ready"); + assert(report.score >= 90, "expected high score, got " + String(report.score)); + assert.strictEqual(report.remediationPlan.length, 0); + assert.strictEqual(report.previewManifest.length, 3); + assert(report.dataciteDraft.identifier.includes("10.5555")); + assert.strictEqual(report.schemaOrgDataset.distribution.length, 2); +} + +function testRiskyPackageIsBlocked() { + const report = auditNotebookPreviewIntegrity(riskyNotebookPackage, { generatedAt: stableTimestamp }); + assert.strictEqual(report.decision, "blocked"); + assert(report.score < 55, "expected blocked score, got " + String(report.score)); + assert(report.remediationPlan.some(function hasUnsafe(item) { + return item.checkId === "unsafe-preview-content"; + })); + assert(report.checks.some(function hasFairWarning(check) { + return check.id === "fair-metadata" && check.status === "warn"; + })); + assert(report.previewManifest.some(function hasSecret(cell) { + return cell.flags.includes("possible-secret"); + })); + assert(report.previewManifest.some(function hasExternal(cell) { + return cell.previewMode === "external-reference"; + })); +} + +function testDigestIsDeterministic() { + const first = auditNotebookPreviewIntegrity(cleanNotebookPackage, { generatedAt: stableTimestamp }); + const second = auditNotebookPreviewIntegrity(cleanNotebookPackage, { generatedAt: "2026-05-20T11:00:00.000Z" }); + assert.strictEqual(first.digest, second.digest); +} + +function testReviewerArtifactsAreUseful() { + const report = auditNotebookPreviewIntegrity(riskyNotebookPackage, { generatedAt: stableTimestamp }); + const packet = report.reviewerPacket; + const svg = renderDashboardSvg(report); + + assert(packet.includes("Notebook Preview Integrity Packet")); + assert(packet.includes("unsafe-preview-content")); + assert(svg.includes("