diff --git a/scientific-reference-workbench/README.md b/scientific-reference-workbench/README.md new file mode 100644 index 0000000..24de160 --- /dev/null +++ b/scientific-reference-workbench/README.md @@ -0,0 +1,35 @@ +# Scientific Reference Workbench + +Dependency-free citation and cross-reference checks for a real-time scientific editor. + +This module focuses on the publication-formatting layer of issue #12. It turns manuscript sections, Markdown text, equation blocks, figures, tables, and bibliography entries into a deterministic export-readiness packet. + +## Run + +```bash +npm run check +npm test +npm run demo +``` + +## Demo Output + +```text +Manuscript: Protocol-Guided Perturbation Screening +Status: format-review-needed +Citations discovered: 3 +Missing citations: missing2026 +Cross references: 3 +Formatting tasks: 1 +Top action: Add bibliography entry for @missing2026 or remove the citation. +``` + +## Files + +- `src/reference-workbench.js` extracts citations and cross references, builds numbering, formats blocks, validates style requirements, emits reviewer tasks, and signs the packet with a stable digest. +- `data/sample-manuscript.json` contains synthetic manuscript sections, bibliography entries, equation/figure/table labels, and references. +- `test/reference-workbench.test.js` verifies citation coverage, label numbering, formatted text, export blocking, and resolved-export behavior. +- `docs/requirement-map.md` maps the slice to issue #12. +- `docs/demo.svg` and `docs/demo.mp4` provide a short visual artifact for review. + +No external reference-manager credential, private manuscript, or third-party API is used. diff --git a/scientific-reference-workbench/data/sample-manuscript.json b/scientific-reference-workbench/data/sample-manuscript.json new file mode 100644 index 0000000..549c1b2 --- /dev/null +++ b/scientific-reference-workbench/data/sample-manuscript.json @@ -0,0 +1,93 @@ +{ + "manuscriptId": "collab-editor-reference-demo", + "title": "Protocol-Guided Perturbation Screening", + "style": { + "name": "SCIBASE short article", + "citationMode": "numeric", + "requiredSections": ["abstract", "methods", "results", "data-availability"] + }, + "sections": [ + { + "id": "abstract", + "heading": "Abstract", + "blocks": [ + { + "id": "abs-1", + "type": "markdown", + "text": "We combine registered protocols with live analysis notebooks [@nguyen2025] and summarize the reproducibility gates in {{fig:workflow}}." + } + ] + }, + { + "id": "methods", + "heading": "Methods", + "blocks": [ + { + "id": "meth-1", + "type": "markdown", + "text": "The loss function follows {{eq:weighted-loss}} and the metadata schema extends prior collaborative notebook work [@park2024; @missing2026]." + }, + { + "id": "meth-eq-1", + "type": "equation", + "label": "eq:weighted-loss", + "text": "L = alpha * error + beta * drift" + } + ] + }, + { + "id": "results", + "heading": "Results", + "blocks": [ + { + "id": "res-1", + "type": "markdown", + "text": "A blinded reviewer can inspect {{tbl:quality-gates}} before accepting a release candidate." + }, + { + "id": "res-fig-1", + "type": "figure", + "label": "fig:workflow", + "caption": "Reference-aware collaboration workflow." + }, + { + "id": "res-table-1", + "type": "table", + "label": "tbl:quality-gates", + "caption": "Formatting gates for reviewer-ready manuscripts." + } + ] + }, + { + "id": "data-availability", + "heading": "Data Availability", + "blocks": [ + { + "id": "data-1", + "type": "markdown", + "text": "Synthetic editor traces and bibliography fixtures are included with this module." + } + ] + } + ], + "bibliography": [ + { + "key": "nguyen2025", + "title": "Live Scientific Editing with Structured Protocols", + "authors": ["Nguyen", "Sato"], + "year": 2025 + }, + { + "key": "park2024", + "title": "Notebook Review Pipelines for Collaborative Research", + "authors": ["Park"], + "year": 2024 + }, + { + "key": "unused2023", + "title": "Unused Background Citation", + "authors": ["Mendez"], + "year": 2023 + } + ] +} diff --git a/scientific-reference-workbench/docs/demo.mp4 b/scientific-reference-workbench/docs/demo.mp4 new file mode 100644 index 0000000..30ddbe5 Binary files /dev/null and b/scientific-reference-workbench/docs/demo.mp4 differ diff --git a/scientific-reference-workbench/docs/demo.svg b/scientific-reference-workbench/docs/demo.svg new file mode 100644 index 0000000..f71b897 --- /dev/null +++ b/scientific-reference-workbench/docs/demo.svg @@ -0,0 +1,34 @@ + + Scientific reference workbench demo + Dashboard-style summary of citation coverage, cross references, formatting tasks, and export readiness. + + + Scientific Reference Workbench + Citation, figure, table, equation, and publication-style checks + + + Citations + 3 found + + + + Missing + 1 key + + + + Cross refs + 3 valid + + + + Sections + passed + + + + Top formatting task + Add bibliography entry for @missing2026 + Deterministic numbering ยท export blocked until references are clean + + diff --git a/scientific-reference-workbench/docs/requirement-map.md b/scientific-reference-workbench/docs/requirement-map.md new file mode 100644 index 0000000..cdba634 --- /dev/null +++ b/scientific-reference-workbench/docs/requirement-map.md @@ -0,0 +1,31 @@ +# Requirement Map + +This module contributes a focused rich-formatting slice for issue #12, "Real-time collaborative research editor & interface." + +| Issue area | Covered by this module | +| --- | --- | +| Markdown and LaTeX support | Tracks equation blocks and deterministic equation numbering | +| Cross-referencing figures, tables, and citations | Extracts citation keys and `{{label}}` references, validates coverage, and renders deterministic display labels | +| Reference manager integration | Verifies bibliography coverage for citation keys and surfaces unused bibliography entries | +| Publication-style templates | Checks required publication sections and blocks export when style requirements fail | +| Inline reviewer workflow | Emits owner-assigned formatting tasks for missing citations, missing cross-references, duplicate labels, and missing sections | + +## Distinctness + +Existing #12 submissions focus on broad collaborative editors, operation replay, governance, offline conflict resolution, and notebook collaboration. This module focuses on the publication-formatting layer that a scientific editor needs before export: + +- citation-key extraction from manuscript text +- bibliography coverage checks +- figure/table/equation label numbering +- unresolved cross-reference detection +- style-specific required-section checks +- reviewer-ready formatting task output + +## Verification + +```bash +cd scientific-reference-workbench +npm run check +npm test +npm run demo +``` diff --git a/scientific-reference-workbench/package.json b/scientific-reference-workbench/package.json new file mode 100644 index 0000000..8614f5f --- /dev/null +++ b/scientific-reference-workbench/package.json @@ -0,0 +1,18 @@ +{ + "name": "scientific-reference-workbench", + "version": "1.0.0", + "private": true, + "description": "Dependency-free scientific citation and cross-reference formatting workbench.", + "scripts": { + "check": "node --check src/reference-workbench.js && node --check scripts/demo.js && node --check test/reference-workbench.test.js", + "demo": "node scripts/demo.js", + "test": "node test/reference-workbench.test.js" + }, + "keywords": [ + "scientific-editor", + "citations", + "cross-references", + "publication-formatting" + ], + "license": "MIT" +} diff --git a/scientific-reference-workbench/scripts/demo.js b/scientific-reference-workbench/scripts/demo.js new file mode 100644 index 0000000..daa522c --- /dev/null +++ b/scientific-reference-workbench/scripts/demo.js @@ -0,0 +1,16 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const { buildReferenceWorkbench } = require("../src/reference-workbench"); + +const samplePath = path.join(__dirname, "..", "data", "sample-manuscript.json"); +const manuscript = JSON.parse(fs.readFileSync(samplePath, "utf8")); +const report = buildReferenceWorkbench(manuscript); + +console.log(`Manuscript: ${report.title}`); +console.log(`Status: ${report.dashboard.status}`); +console.log(`Citations discovered: ${report.citations.uniqueKeys.length}`); +console.log(`Missing citations: ${report.citations.missing.join(", ") || "none"}`); +console.log(`Cross references: ${report.crossReferences.occurrences.length}`); +console.log(`Formatting tasks: ${report.tasks.length}`); +console.log(`Top action: ${report.dashboard.topAction}`); +console.log(`Digest: ${report.digest}`); diff --git a/scientific-reference-workbench/src/reference-workbench.js b/scientific-reference-workbench/src/reference-workbench.js new file mode 100644 index 0000000..05b4d5c --- /dev/null +++ b/scientific-reference-workbench/src/reference-workbench.js @@ -0,0 +1,305 @@ +const crypto = require("node:crypto"); + +function buildReferenceWorkbench(manuscript) { + const validation = validateManuscript(manuscript); + const labels = collectLabels(manuscript.sections || []); + const citations = collectCitations(manuscript.sections || [], manuscript.bibliography || []); + const crossReferences = collectCrossReferences(manuscript.sections || [], labels); + const numbering = buildNumbering(labels, citations); + const formattedBlocks = formatBlocks(manuscript.sections || [], numbering); + const styleChecklist = buildStyleChecklist(manuscript, citations, crossReferences, labels); + const tasks = buildFormattingTasks(citations, crossReferences, labels, styleChecklist); + const outline = buildPublicationOutline(manuscript, numbering, styleChecklist); + + const report = { + manuscriptId: manuscript.manuscriptId, + title: manuscript.title, + style: manuscript.style || {}, + validation, + numbering, + citations, + crossReferences, + styleChecklist, + tasks, + formattedBlocks, + outline, + dashboard: { + status: tasks.length === 0 ? "ready-to-export" : "format-review-needed", + taskCount: tasks.length, + missingCitationCount: citations.missing.length, + missingCrossReferenceCount: crossReferences.missing.length, + duplicateLabelCount: labels.duplicates.length, + topAction: tasks[0] ? tasks[0].message : "Export formatted manuscript packet." + } + }; + + report.digest = stableDigest(report); + return report; +} + +function validateManuscript(manuscript) { + const required = [ + ["manuscriptId", manuscript.manuscriptId], + ["title", manuscript.title], + ["sections", (manuscript.sections || []).length], + ["bibliography", (manuscript.bibliography || []).length] + ]; + const missing = required.filter(([, value]) => !value).map(([field]) => field); + const malformedBlocks = (manuscript.sections || []).flatMap((section) => + (section.blocks || []).filter((block) => !block.id || !block.type).map((block) => ({ + sectionId: section.id, + blockId: block.id || "unknown" + })) + ); + + return { + status: missing.length === 0 && malformedBlocks.length === 0 ? "passed" : "incomplete", + score: Math.max(0, 100 - missing.length * 15 - malformedBlocks.length * 5), + missing, + malformedBlocks + }; +} + +function collectLabels(sections) { + const all = []; + for (const section of sections) { + for (const block of section.blocks || []) { + if (block.label) { + all.push({ + label: block.label, + type: block.type, + blockId: block.id, + sectionId: section.id, + caption: block.caption || block.text || "" + }); + } + } + } + const counts = countBy(all, (item) => item.label); + return { + all, + byLabel: new Map(all.map((item) => [item.label, item])), + duplicates: all.filter((item) => counts.get(item.label) > 1) + }; +} + +function collectCitations(sections, bibliography) { + const bibliographyKeys = new Set(bibliography.map((entry) => entry.key)); + const occurrences = []; + for (const section of sections) { + for (const block of section.blocks || []) { + for (const key of extractCitationKeys(block.text || "")) { + occurrences.push({ key, sectionId: section.id, blockId: block.id }); + } + } + } + const uniqueKeys = Array.from(new Set(occurrences.map((item) => item.key))); + const missing = uniqueKeys.filter((key) => !bibliographyKeys.has(key)); + const unused = bibliography.map((entry) => entry.key).filter((key) => !uniqueKeys.includes(key)); + + return { + occurrences, + uniqueKeys, + missing, + unused + }; +} + +function collectCrossReferences(sections, labels) { + const occurrences = []; + for (const section of sections) { + for (const block of section.blocks || []) { + for (const label of extractCrossReferenceLabels(block.text || "")) { + occurrences.push({ label, sectionId: section.id, blockId: block.id }); + } + } + } + const missing = occurrences.filter((item) => !labels.byLabel.has(item.label)); + return { + occurrences, + missing + }; +} + +function buildNumbering(labels, citations) { + const citationNumbers = new Map(citations.uniqueKeys.map((key, index) => [key, index + 1])); + const grouped = { + figure: labels.all.filter((item) => item.type === "figure"), + table: labels.all.filter((item) => item.type === "table"), + equation: labels.all.filter((item) => item.type === "equation") + }; + const crossReferenceNumbers = new Map(); + for (const [type, items] of Object.entries(grouped)) { + items.forEach((item, index) => { + crossReferenceNumbers.set(item.label, { + type, + number: index + 1, + display: displayCrossReference(type, index + 1) + }); + }); + } + return { + citationNumbers: Object.fromEntries(citationNumbers), + crossReferenceNumbers: Object.fromEntries(crossReferenceNumbers) + }; +} + +function formatBlocks(sections, numbering) { + return sections.map((section) => ({ + id: section.id, + heading: section.heading, + blocks: (section.blocks || []).map((block) => ({ + id: block.id, + type: block.type, + text: formatText(block.text || block.caption || "", numbering) + })) + })); +} + +function buildStyleChecklist(manuscript, citations, crossReferences, labels) { + const presentSections = new Set((manuscript.sections || []).map((section) => section.id)); + const requiredSections = (manuscript.style && manuscript.style.requiredSections) || []; + const missingSections = requiredSections.filter((sectionId) => !presentSections.has(sectionId)); + + return { + requiredSections: { + status: missingSections.length === 0 ? "passed" : "failed", + missingSections + }, + bibliographyCoverage: { + status: citations.missing.length === 0 ? "passed" : "failed", + missingKeys: citations.missing, + unusedKeys: citations.unused + }, + crossReferenceIntegrity: { + status: crossReferences.missing.length === 0 && labels.duplicates.length === 0 ? "passed" : "failed", + missingLabels: crossReferences.missing.map((item) => item.label), + duplicateLabels: labels.duplicates.map((item) => item.label) + } + }; +} + +function buildFormattingTasks(citations, crossReferences, labels, checklist) { + const tasks = []; + for (const key of citations.missing) { + tasks.push({ + type: "missing-citation", + owner: "author", + target: key, + message: `Add bibliography entry for @${key} or remove the citation.` + }); + } + for (const item of crossReferences.missing) { + tasks.push({ + type: "missing-cross-reference", + owner: "editor", + target: item.label, + message: `Resolve ${item.label} referenced in block ${item.blockId}.` + }); + } + for (const label of new Set(labels.duplicates.map((item) => item.label))) { + tasks.push({ + type: "duplicate-label", + owner: "editor", + target: label, + message: `Rename duplicate label ${label} before export.` + }); + } + for (const sectionId of checklist.requiredSections.missingSections) { + tasks.push({ + type: "missing-required-section", + owner: "author", + target: sectionId, + message: `Add required ${sectionId} section for the selected publication style.` + }); + } + return tasks; +} + +function buildPublicationOutline(manuscript, numbering, checklist) { + return { + title: manuscript.title, + styleName: manuscript.style && manuscript.style.name, + sections: (manuscript.sections || []).map((section) => ({ + id: section.id, + heading: section.heading, + blockCount: (section.blocks || []).length + })), + citationCount: Object.keys(numbering.citationNumbers).length, + crossReferenceCount: Object.keys(numbering.crossReferenceNumbers).length, + exportBlocked: Object.values(checklist).some((item) => item.status === "failed") + }; +} + +function formatText(text, numbering) { + return text + .replace(/\[@([^\]]+)\]/g, (_match, group) => { + const keys = group.split(";").map((item) => item.trim().replace(/^@/, "")); + const rendered = keys.map((key) => numbering.citationNumbers[key] || "?").join(","); + return `[${rendered}]`; + }) + .replace(/\{\{([^}]+)\}\}/g, (_match, label) => { + const crossRef = numbering.crossReferenceNumbers[label]; + return crossRef ? crossRef.display : `UNRESOLVED(${label})`; + }); +} + +function extractCitationKeys(text) { + const keys = []; + const citationPattern = /\[@([^\]]+)\]/g; + let match; + while ((match = citationPattern.exec(text)) !== null) { + for (const item of match[1].split(";")) { + const key = item.trim().replace(/^@/, ""); + if (key) keys.push(key); + } + } + return keys; +} + +function extractCrossReferenceLabels(text) { + const labels = []; + const labelPattern = /\{\{([^}]+)\}\}/g; + let match; + while ((match = labelPattern.exec(text)) !== null) { + labels.push(match[1].trim()); + } + return labels; +} + +function displayCrossReference(type, number) { + if (type === "figure") return `Figure ${number}`; + if (type === "table") return `Table ${number}`; + if (type === "equation") return `Equation ${number}`; + return `${type} ${number}`; +} + +function countBy(items, select) { + const counts = new Map(); + for (const item of items) { + const key = select(item); + counts.set(key, (counts.get(key) || 0) + 1); + } + return counts; +} + +function stableDigest(value) { + return crypto.createHash("sha256").update(stableStringify(value)).digest("hex"); +} + +function stableStringify(value) { + if (value instanceof Map) return stableStringify(Object.fromEntries(value)); + if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]`; + if (value && typeof value === "object") { + return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`).join(",")}}`; + } + return JSON.stringify(value); +} + +module.exports = { + buildReferenceWorkbench, + validateManuscript, + extractCitationKeys, + extractCrossReferenceLabels, + stableDigest +}; diff --git a/scientific-reference-workbench/test/reference-workbench.test.js b/scientific-reference-workbench/test/reference-workbench.test.js new file mode 100644 index 0000000..7e64f4d --- /dev/null +++ b/scientific-reference-workbench/test/reference-workbench.test.js @@ -0,0 +1,55 @@ +const assert = require("node:assert/strict"); +const fs = require("node:fs"); +const path = require("node:path"); +const { + buildReferenceWorkbench, + extractCitationKeys, + extractCrossReferenceLabels, + validateManuscript +} = require("../src/reference-workbench"); + +const samplePath = path.join(__dirname, "..", "data", "sample-manuscript.json"); +const manuscript = JSON.parse(fs.readFileSync(samplePath, "utf8")); +const report = buildReferenceWorkbench(manuscript); + +assert.equal(report.validation.status, "passed"); +assert.equal(report.dashboard.status, "format-review-needed"); +assert.equal(report.citations.uniqueKeys.length, 3); +assert.deepEqual(report.citations.missing, ["missing2026"]); +assert.deepEqual(report.citations.unused, ["unused2023"]); +assert.equal(report.crossReferences.missing.length, 0); +assert.equal(report.styleChecklist.requiredSections.status, "passed"); +assert.equal(report.styleChecklist.bibliographyCoverage.status, "failed"); +assert.equal(report.styleChecklist.crossReferenceIntegrity.status, "passed"); +assert.equal(report.tasks.length, 1); +assert.equal(report.tasks[0].type, "missing-citation"); +assert.equal(report.numbering.citationNumbers.nguyen2025, 1); +assert.equal(report.numbering.citationNumbers.park2024, 2); +assert.equal(report.numbering.crossReferenceNumbers["fig:workflow"].display, "Figure 1"); +assert.equal(report.numbering.crossReferenceNumbers["tbl:quality-gates"].display, "Table 1"); +assert.equal(report.numbering.crossReferenceNumbers["eq:weighted-loss"].display, "Equation 1"); +assert.ok(report.formattedBlocks[0].blocks[0].text.includes("[1]")); +assert.ok(report.formattedBlocks[1].blocks[0].text.includes("Equation 1")); +assert.equal(report.outline.exportBlocked, true); +assert.equal(report.digest, buildReferenceWorkbench(manuscript).digest); + +assert.deepEqual(extractCitationKeys("See [@a; @b] and [@c]."), ["a", "b", "c"]); +assert.deepEqual(extractCrossReferenceLabels("Use {{fig:a}} and {{eq:b}}."), ["fig:a", "eq:b"]); + +const incomplete = validateManuscript({ manuscriptId: "draft" }); +assert.equal(incomplete.status, "incomplete"); +assert.ok(incomplete.missing.includes("title")); + +const resolved = JSON.parse(JSON.stringify(manuscript)); +resolved.bibliography.push({ + key: "missing2026", + title: "Registered Manuscript Formatting", + authors: ["Ibrahim"], + year: 2026 +}); +const resolvedReport = buildReferenceWorkbench(resolved); +assert.equal(resolvedReport.dashboard.status, "ready-to-export"); +assert.equal(resolvedReport.tasks.length, 0); +assert.equal(resolvedReport.outline.exportBlocked, false); + +console.log("scientific-reference-workbench tests passed");