diff --git a/ai-research-mvp-tools/README.md b/ai-research-mvp-tools/README.md new file mode 100644 index 0000000..66d4328 --- /dev/null +++ b/ai-research-mvp-tools/README.md @@ -0,0 +1,80 @@ +# AI Research MVP Tools + +Self-contained AI-assisted research tools milestone for [SCIBASE.AI issue #13](https://github.com/SCIBASE-AI/SCIBASE.AI/issues/13). + +The issue asks for paper summaries, pre-review diagnostics, and citation assistance. This module provides deterministic local versions of those workflows so reviewers can run the MVP without model keys, retrieval credentials, or external services. + +## What It Adds + +- Paper summaries in abstract, executive, and layperson modes. +- Key findings, implications, next steps, and keyword extraction. +- Domain-specific review templates for biology, physics, social sciences, and general manuscripts. +- Pre-review diagnostics for missing required sections, ethics/data availability gaps, statistical reporting gaps, clarity signals, and similarity evidence. +- Similarity signal against a local open-access corpus fixture. +- Citation recommendations ranked by manuscript keyword overlap, recency, and citation signal. +- Highlighted-text citation recommendations with evidence hashes for selected manuscript spans. +- Similar-papers widget combining open-access similarity signals and citation-corpus matches with reasons and actions. +- Claim-support report that extracts manuscript claim sentences, matches citation evidence, and returns insert-or-revise actions with evidence-span hashes. +- APA, MLA, and Nature-style reference formatting. +- One-click citation insert-action metadata plus drag payloads and target anchors for manuscript insertion. +- Sample manuscript/corpus fixture, tests, requirement map, CLI demo, and short demo GIF. + +## Run + +```bash +cd ai-research-mvp-tools +npm run check +npm test +npm run demo +``` + +Expected demo shape: + +```json +{ + "summaryModes": ["abstract", "executive", "layperson"], + "qualityScore": 55, + "reviewFindings": ["compliance:...", "statistics:..."], + "topCitation": { + "doi": "10.1016/j.watres.2025.120001" + }, + "highlightedCitation": { + "doi": "10.1016/j.watres.2025.120001" + }, + "citationInsertionPlan": { + "targetAnchor": "manuscript:results:p3", + "insertions": 2 + }, + "similarPapers": [ + { + "source": "open-access-corpus", + "action": { + "type": "open-similar-paper" + } + } + ], + "claimSupport": { + "claims": 4, + "unsupportedCount": 0, + "recommendedCitationCount": 3 + }, + "insertActions": 2, + "packetHash": "..." +} +``` + +## Demo Artifact + +See [docs/demo.gif](docs/demo.gif) for a short visual walkthrough. The SVG source is included at [docs/demo.svg](docs/demo.svg). + +## Files + +- `src/ai-research-mvp-tools.js` - summarization, review diagnostics, similarity, citation ranking, highlighted-text citation support, insertion planning, claim support, formatting. +- `data/sample-research.json` - reviewable manuscript, open-access corpus, and citation corpus fixture. +- `test/ai-research-mvp-tools.test.js` - dependency-free Node tests. +- `scripts/demo.js` - CLI demo. +- `docs/issue-13-requirement-map.md` - maps the implementation to bounty requirements. + +## AI-Assisted Disclosure + +This contribution was produced with AI assistance and manually verified with the local commands above. diff --git a/ai-research-mvp-tools/data/sample-research.json b/ai-research-mvp-tools/data/sample-research.json new file mode 100644 index 0000000..1a2f486 --- /dev/null +++ b/ai-research-mvp-tools/data/sample-research.json @@ -0,0 +1,63 @@ +{ + "document": { + "id": "draft-flood-microbiome", + "title": "Coastal flooding changes microbial diversity in urban waterways", + "domain": "biology", + "body": "Abstract. Coastal flooding is increasing in urban waterways. We analyzed 240 microbiome samples collected before and after flood exposure using Jupyter notebooks and open metadata. Methods. Samples were sequenced with 16S profiling and processed in a reproducible Docker environment. Results show a statistically significant decrease in microbial diversity after flooding with p < 0.03. The analysis found that flood exposure was associated with shifts in coastal pathogen markers. These findings suggest that climate-linked flooding could reshape urban microbial ecology. Data availability is provided through an open repository with notebooks and raw count tables. Future work should validate these results across additional cities and include confidence interval reporting.", + "references": [ + { + "doi": "10.1038/s41586-020-2649-2", + "title": "Existing microbiome atlas", + "authors": ["Reference Author"], + "year": 2020, + "venue": "Nature" + } + ] + }, + "highlightedText": "flood exposure was associated with shifts in coastal pathogen markers", + "targetAnchor": "manuscript:results:p3", + "openAccessCorpus": [ + { + "id": "oa-1", + "title": "Flood exposure and microbial diversity in estuaries", + "abstract": "Coastal flooding changes microbial diversity and pathogen markers in estuary samples after flood exposure." + }, + { + "id": "oa-2", + "title": "Quantum sensor calibration", + "abstract": "A physics calibration method for instrument uncertainty and error bars." + } + ], + "citationCorpus": [ + { + "id": "cite-1", + "doi": "10.1016/j.watres.2025.120001", + "title": "Urban floodwater microbiome dynamics", + "authors": ["M. Rivera", "S. Nair"], + "year": 2025, + "venue": "Water Research", + "abstract": "Urban floodwater microbiome dynamics reveal shifts in pathogen markers and microbial diversity after coastal flooding.", + "citations": 88 + }, + { + "id": "cite-2", + "doi": "10.1101/2026.04.01.123456", + "title": "Reproducible Docker workflows for environmental microbiome notebooks", + "authors": ["A. Chen", "R. Patel"], + "year": 2026, + "venue": "bioRxiv", + "abstract": "Docker and Jupyter notebook workflows improve reproducibility for environmental microbiome analysis.", + "citations": 12 + }, + { + "id": "cite-3", + "doi": "10.1038/s41586-020-2649-2", + "title": "Existing microbiome atlas", + "authors": ["Reference Author"], + "year": 2020, + "venue": "Nature", + "abstract": "A previously cited microbiome atlas.", + "citations": 300 + } + ] +} diff --git a/ai-research-mvp-tools/docs/demo.gif b/ai-research-mvp-tools/docs/demo.gif new file mode 100644 index 0000000..c346bce Binary files /dev/null and b/ai-research-mvp-tools/docs/demo.gif differ diff --git a/ai-research-mvp-tools/docs/demo.mp4 b/ai-research-mvp-tools/docs/demo.mp4 new file mode 100644 index 0000000..ce9408f Binary files /dev/null and b/ai-research-mvp-tools/docs/demo.mp4 differ diff --git a/ai-research-mvp-tools/docs/demo.svg b/ai-research-mvp-tools/docs/demo.svg new file mode 100644 index 0000000..d629e6e --- /dev/null +++ b/ai-research-mvp-tools/docs/demo.svg @@ -0,0 +1,33 @@ + + AI Research MVP Tools Demo + Visual demo for summarization, peer review diagnostics, citation recommendations, and citation insertion. + + + AI Research MVP Tools + Summaries · review diagnostics · citation recommendations + + Summary Modes + 3 + abstract · executive · lay + + Review Aid + ready + stats · ethics · similarity + + Citation Tool + ranked + APA · MLA · Nature + + Reviewer Output + missing ethics + confidence interval checks, similarity evidence, citation insert actions + Deterministic local heuristics keep the MVP runnable without external model credentials. + diff --git a/ai-research-mvp-tools/docs/issue-13-requirement-map.md b/ai-research-mvp-tools/docs/issue-13-requirement-map.md new file mode 100644 index 0000000..587e937 --- /dev/null +++ b/ai-research-mvp-tools/docs/issue-13-requirement-map.md @@ -0,0 +1,30 @@ +# Issue #13 Requirement Map + +This module implements deterministic AI-assisted research MVP tools for SCIBASE issue #13. It focuses on paper summaries, pre-review diagnostics, citation recommendations, formatting, and one-click insertion metadata without requiring live model or retrieval services. + +| Issue requirement | Implementation | +| --- | --- | +| AI paper summarizer | `summarizePaper()` ranks document sentences by domain keywords and emits abstract, executive, and layperson modes. | +| Key findings, implications, and next steps | `summarizePaper()` includes extracted finding, implication, and next-step sentence lists. | +| Scientific-domain-aware output | `reviewManuscript()` selects biology, physics, social-sciences, or general templates with domain-specific checks. | +| AI peer review aid | `reviewManuscript()` emits diagnostics for missing sections, statistical reporting gaps, clarity signals, compliance issues, and similarity matches. | +| Plagiarism/similarity signal | `detectSimilarity()` compares manuscript terms against an open-access corpus fixture and reports overlapping evidence. | +| Statistical error detection | `reviewManuscript()` flags p-values without confidence intervals and missing template-specific statistical reporting. | +| Compliance checks | Domain templates require sections such as ethics and data availability. | +| AI citation tool | `recommendCitations()` ranks citation candidates by keyword overlap, recency, and citation signal while excluding already cited DOIs. | +| Recommendations from highlighted text | `recommendCitationsForSelection()` ranks citations against selected manuscript text and returns highlighted-span evidence hashes. | +| Similar papers widget | `buildSimilarPapersWidget()` combines open-access similarity evidence and citation-corpus matches into ranked recommendations with user actions. | +| Context-aware citation support | `buildClaimSupportReport()` extracts claim sentences, scores supporting citation candidates, classifies support status, and returns insert-or-revise actions with evidence-span hashes. | +| Auto-formatted references | `formatReference()` supports APA, MLA, and Nature-style output. | +| Drag-and-drop or one-click insert metadata | `buildCitationInsertionPlan()` returns one-click citation insertions with target anchors and drag payloads; `buildResearchToolsPacket()` includes those insertion plans and simple `insertActions`. | +| Reviewer demo | `npm run demo` prints summary modes, review findings, top citation, insert-action count, and packet hash. | + +## Verification + +```bash +npm run check +npm test +npm run demo +``` + +The module is dependency-free and isolated under `ai-research-mvp-tools/`. diff --git a/ai-research-mvp-tools/package.json b/ai-research-mvp-tools/package.json new file mode 100644 index 0000000..2c39db7 --- /dev/null +++ b/ai-research-mvp-tools/package.json @@ -0,0 +1,12 @@ +{ + "name": "scibase-ai-research-mvp-tools", + "version": "0.1.0", + "private": true, + "description": "Deterministic AI-assisted research MVP tools for SCIBASE issue #13.", + "type": "commonjs", + "scripts": { + "check": "node --check src/ai-research-mvp-tools.js && node --check scripts/demo.js && node --check test/ai-research-mvp-tools.test.js", + "demo": "node scripts/demo.js", + "test": "node test/ai-research-mvp-tools.test.js" + } +} diff --git a/ai-research-mvp-tools/scripts/demo.js b/ai-research-mvp-tools/scripts/demo.js new file mode 100644 index 0000000..969ef40 --- /dev/null +++ b/ai-research-mvp-tools/scripts/demo.js @@ -0,0 +1,37 @@ +"use strict"; + +const sample = require("../data/sample-research.json"); +const { buildResearchToolsPacket } = require("../src/ai-research-mvp-tools"); + +const packet = buildResearchToolsPacket(sample); + +console.log( + JSON.stringify( + { + document: packet.document.title, + summaryModes: Object.keys(packet.summaries), + qualityScore: packet.reviewReport.qualityScore, + reviewFindings: packet.reviewReport.findings.map((finding) => `${finding.category}:${finding.message}`), + topCitation: packet.citationRecommendations[0], + highlightedCitation: packet.selectionRecommendations[0], + citationInsertionPlan: { + targetAnchor: packet.citationInsertionPlan.targetAnchor, + insertions: packet.citationInsertionPlan.insertions.length, + firstDragPayload: packet.citationInsertionPlan.insertions[0] + ? packet.citationInsertionPlan.insertions[0].dragPayload + : null, + }, + similarPapers: packet.similarPapersWidget, + claimSupport: { + claims: packet.claimSupportReport.claims.length, + unsupportedCount: packet.claimSupportReport.unsupportedCount, + recommendedCitationCount: packet.claimSupportReport.recommendedCitationCount, + firstAction: packet.claimSupportReport.claims[0] ? packet.claimSupportReport.claims[0].action : null, + }, + insertActions: packet.insertActions.length, + packetHash: packet.packetHash, + }, + null, + 2, + ), +); diff --git a/ai-research-mvp-tools/src/ai-research-mvp-tools.js b/ai-research-mvp-tools/src/ai-research-mvp-tools.js new file mode 100644 index 0000000..3b5601f --- /dev/null +++ b/ai-research-mvp-tools/src/ai-research-mvp-tools.js @@ -0,0 +1,511 @@ +"use strict"; + +const crypto = require("crypto"); + +const STOPWORDS = new Set([ + "a", + "an", + "and", + "are", + "as", + "at", + "be", + "by", + "for", + "from", + "in", + "into", + "is", + "it", + "of", + "on", + "or", + "our", + "that", + "the", + "this", + "to", + "with", +]); + +const REVIEW_TEMPLATES = { + biology: { + requiredSections: ["methods", "ethics", "data availability", "results"], + statisticalChecks: ["p-value", "confidence interval", "sample size"], + }, + physics: { + requiredSections: ["methods", "uncertainty", "results"], + statisticalChecks: ["error bars", "confidence interval"], + }, + "social-sciences": { + requiredSections: ["methods", "ethics", "limitations", "data availability"], + statisticalChecks: ["sample size", "confidence interval", "p-value"], + }, + general: { + requiredSections: ["methods", "results", "data availability"], + statisticalChecks: ["sample size", "confidence interval"], + }, +}; + +function asArray(value) { + return Array.isArray(value) ? value : []; +} + +function stableStringify(value) { + if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]`; + if (value && typeof value === "object") { + return `{${Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`) + .join(",")}}`; + } + return JSON.stringify(value); +} + +function hashRecord(value) { + return crypto.createHash("sha256").update(stableStringify(value)).digest("hex").slice(0, 20); +} + +function splitSentences(text) { + return String(text || "") + .replace(/\s+/g, " ") + .split(/(?<=[.!?])\s+/) + .map((sentence) => sentence.trim()) + .filter(Boolean); +} + +function tokenize(text) { + return String(text || "") + .toLowerCase() + .match(/[a-z][a-z0-9-]{2,}/g) + ?.filter((word) => !STOPWORDS.has(word)) || []; +} + +function keywordCounts(text) { + const counts = new Map(); + for (const token of tokenize(text)) counts.set(token, (counts.get(token) || 0) + 1); + return counts; +} + +function topKeywords(text, limit = 8) { + return Array.from(keywordCounts(text).entries()) + .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0])) + .slice(0, limit) + .map(([keyword]) => keyword); +} + +function sentenceScore(sentence, keywords) { + const tokens = tokenize(sentence); + const keywordSet = new Set(keywords); + const keywordHits = tokens.filter((token) => keywordSet.has(token)).length; + const numberBonus = /\d/.test(sentence) ? 1 : 0; + return keywordHits + numberBonus + Math.min(tokens.length / 40, 1); +} + +function summarizePaper(documentInput, mode = "abstract") { + const document = normalizeDocument(documentInput); + const sentences = splitSentences(document.body); + const keywords = topKeywords(document.body, 10); + const ranked = sentences + .map((sentence, index) => ({ sentence, index, score: sentenceScore(sentence, keywords) })) + .sort((left, right) => right.score - left.score || left.index - right.index); + const selectedCount = mode === "layperson" ? 2 : mode === "executive" ? 4 : 3; + const selected = ranked.slice(0, selectedCount).sort((left, right) => left.index - right.index).map((item) => item.sentence); + + return { + id: `summary-${hashRecord({ documentId: document.id, mode, selected })}`, + documentId: document.id, + mode, + title: document.title, + summary: + mode === "layperson" + ? simplifyLanguage(selected.join(" ")) + : selected.join(" "), + keyFindings: extractFindings(sentences), + implications: extractImplications(sentences), + nextSteps: extractNextSteps(sentences), + keywords, + summaryHash: hashRecord({ documentId: document.id, mode, selected, keywords }), + }; +} + +function simplifyLanguage(text) { + return String(text || "") + .replace(/reproducibility/gi, "ability to repeat the work") + .replace(/a statistically significant/gi, "an unlikely-to-be-random") + .replace(/statistically significant/gi, "unlikely to be due to chance") + .replace(/methodology/gi, "methods"); +} + +function extractFindings(sentences) { + return sentences.filter((sentence) => /find|found|result|show|demonstrat|increase|decrease/i.test(sentence)).slice(0, 3); +} + +function extractImplications(sentences) { + return sentences.filter((sentence) => /suggest|implicat|therefore|could|may|supports/i.test(sentence)).slice(0, 3); +} + +function extractNextSteps(sentences) { + return sentences.filter((sentence) => /future|next|should|need|remain|validate/i.test(sentence)).slice(0, 3); +} + +function normalizeDocument(documentInput) { + if (!documentInput || typeof documentInput !== "object") throw new TypeError("document must be an object"); + return { + id: documentInput.id || "document-unknown", + title: documentInput.title || "Untitled research document", + domain: documentInput.domain || "general", + body: documentInput.body || "", + references: asArray(documentInput.references), + }; +} + +function detectSimilarity(documentInput, corpus = []) { + const document = normalizeDocument(documentInput); + const sourceTokens = new Set(tokenize(document.body)); + return asArray(corpus) + .map((candidate) => { + const candidateTokens = new Set(tokenize(candidate.body || candidate.abstract || "")); + const overlap = Array.from(sourceTokens).filter((token) => candidateTokens.has(token)); + const denominator = new Set([...sourceTokens, ...candidateTokens]).size || 1; + return { + sourceId: candidate.id, + title: candidate.title, + similarity: Number((overlap.length / denominator).toFixed(4)), + overlappingTerms: overlap.slice(0, 10), + }; + }) + .filter((result) => result.similarity >= 0.12) + .sort((left, right) => right.similarity - left.similarity || left.title.localeCompare(right.title)); +} + +function reviewManuscript(documentInput, options = {}) { + const document = normalizeDocument(documentInput); + const template = REVIEW_TEMPLATES[options.domain || document.domain] || REVIEW_TEMPLATES.general; + const bodyLower = document.body.toLowerCase(); + const missingSections = template.requiredSections.filter((section) => !bodyLower.includes(section)); + const missingStats = template.statisticalChecks.filter((check) => !hasStatisticalSignal(document.body, check)); + const reportsPValue = /p\s*[<=>]\s*0?\.\d+/i.test(document.body); + const reportsNumericCi = + /confidence interval[^.]{0,80}\d/i.test(document.body) || /\bci\b[^.]{0,80}\d/i.test(document.body); + const hasPValueNoCi = reportsPValue && !reportsNumericCi; + const passiveToneCount = (document.body.match(/\b(was|were|is|are|been) [a-z]+ed\b/gi) || []).length; + const similarityMatches = detectSimilarity(document, options.openAccessCorpus || []); + const findings = [ + ...missingSections.map((section) => ({ + severity: section === "ethics" || section === "data availability" ? "warning" : "info", + category: "compliance", + message: `Missing or unclear ${section} section`, + })), + ...missingStats.map((check) => ({ + severity: "info", + category: "statistics", + message: `No explicit ${check} reporting detected`, + })), + ...(hasPValueNoCi + ? [{ severity: "warning", category: "statistics", message: "p-value reported without confidence interval" }] + : []), + ...(passiveToneCount > 4 + ? [{ severity: "info", category: "clarity", message: "High passive-voice signal; consider clearer active phrasing" }] + : []), + ...similarityMatches.slice(0, 2).map((match) => ({ + severity: match.similarity >= 0.25 ? "warning" : "info", + category: "similarity", + message: `Similarity signal with ${match.title}`, + evidence: match, + })), + ]; + + return { + documentId: document.id, + domain: options.domain || document.domain, + template, + findings, + qualityScore: Number(Math.max(0, 100 - findings.filter((finding) => finding.severity === "warning").length * 18 - findings.length * 3).toFixed(2)), + reportHash: hashRecord({ documentId: document.id, findings }), + }; +} + +function hasStatisticalSignal(text, check) { + const body = String(text || "").toLowerCase(); + if (check === "p-value") return /p\s*[<=>]\s*0?\.\d+/i.test(text) || body.includes("p-value"); + if (check === "sample size") { + return ( + /\b(n|samples?|participants?)\s*[=:]?\s*\d+/i.test(text) || + /\d+(?:\s+[a-z-]+){0,3}\s+(samples?|participants?)/i.test(text) + ); + } + if (check === "confidence interval") return /confidence interval|ci\b/i.test(text); + if (check === "error bars") return /error bars?|uncertainty interval/i.test(text); + return body.includes(check); +} + +function recommendCitations(documentInput, citationCorpus = [], options = {}) { + const document = normalizeDocument(documentInput); + const documentKeywords = new Set(topKeywords(document.body, 16)); + const existingDois = new Set(document.references.map((reference) => reference.doi).filter(Boolean)); + + return asArray(citationCorpus) + .filter((candidate) => !existingDois.has(candidate.doi)) + .map((candidate) => { + const candidateKeywords = new Set(tokenize(`${candidate.title || ""} ${candidate.abstract || ""}`)); + const overlap = Array.from(documentKeywords).filter((keyword) => candidateKeywords.has(keyword)); + const recencyBonus = candidate.year && candidate.year >= 2023 ? 2 : 0; + const citationBonus = Math.min(8, Number(candidate.citations || 0) / 25); + const score = Number((overlap.length * 4 + recencyBonus + citationBonus).toFixed(4)); + return { + id: candidate.id, + title: candidate.title, + doi: candidate.doi, + year: candidate.year, + score, + matchedTerms: overlap, + formatted: formatReference(candidate, options.style || "apa"), + }; + }) + .filter((candidate) => candidate.score > 0) + .sort((left, right) => right.score - left.score || left.title.localeCompare(right.title)) + .slice(0, options.limit || 5); +} + +function recommendCitationsForSelection(highlightedText, citationCorpus = [], options = {}) { + const selectionKeywords = new Set(topKeywords(highlightedText, 12)); + return asArray(citationCorpus) + .map((candidate) => { + const candidateKeywords = new Set(tokenize(`${candidate.title || ""} ${candidate.abstract || ""}`)); + const overlap = Array.from(selectionKeywords).filter((keyword) => candidateKeywords.has(keyword)); + const score = Number((overlap.length * 5 + Math.min(6, Number(candidate.citations || 0) / 35)).toFixed(4)); + return { + id: candidate.id, + title: candidate.title, + doi: candidate.doi, + score, + matchedTerms: overlap, + formatted: formatReference(candidate, options.style || "apa"), + highlightedTextHash: hashRecord({ highlightedText, doi: candidate.doi }), + }; + }) + .filter((candidate) => candidate.score > 0) + .sort((left, right) => right.score - left.score || left.title.localeCompare(right.title)) + .slice(0, options.limit || 3); +} + +function overlapTerms(left, right) { + const leftTokens = new Set(tokenize(left)); + const rightTokens = new Set(tokenize(right)); + return Array.from(leftTokens).filter((token) => rightTokens.has(token)); +} + +function extractClaimSentences(documentInput) { + const document = normalizeDocument(documentInput); + return splitSentences(document.body) + .filter((sentence) => /find|found|result|show|demonstrat|increase|decrease|suggest|associated|significant/i.test(sentence)) + .map((sentence, index) => ({ + id: `claim-${index + 1}`, + text: sentence, + evidenceSpanHash: hashRecord({ documentId: document.id, sentence }), + })); +} + +function buildClaimSupportReport(documentInput, citationCorpus = []) { + const document = normalizeDocument(documentInput); + const existingDois = new Set(document.references.map((reference) => reference.doi).filter(Boolean)); + const claims = extractClaimSentences(document).map((claim) => { + const candidates = asArray(citationCorpus) + .map((candidate) => { + const terms = overlapTerms(claim.text, `${candidate.title || ""} ${candidate.abstract || ""}`); + const score = Number((terms.length * 3 + Math.min(5, Number(candidate.citations || 0) / 40)).toFixed(4)); + return { + id: candidate.id, + doi: candidate.doi, + title: candidate.title, + alreadyCited: existingDois.has(candidate.doi), + score, + matchedTerms: terms.slice(0, 8), + formatted: formatReference(candidate, "apa"), + }; + }) + .filter((candidate) => candidate.score > 0) + .sort((left, right) => right.score - left.score || left.title.localeCompare(right.title)); + const bestCitation = candidates[0] || null; + const supportStatus = bestCitation && bestCitation.alreadyCited + ? "supported-by-existing-citation" + : bestCitation && bestCitation.score >= 8 + ? "citation-recommended" + : "needs-evidence"; + + return { + ...claim, + supportStatus, + bestCitation, + candidateCount: candidates.length, + action: bestCitation && !bestCitation.alreadyCited + ? { + type: "insert-supporting-citation", + doi: bestCitation.doi, + label: bestCitation.formatted, + } + : supportStatus === "needs-evidence" + ? { + type: "revise-or-add-evidence", + message: "Add a result, citation, or qualifying language before submission.", + } + : { + type: "no-action", + message: "Claim is already connected to an existing citation.", + }, + }; + }); + + return { + documentId: document.id, + claims, + unsupportedCount: claims.filter((claim) => claim.supportStatus === "needs-evidence").length, + recommendedCitationCount: claims.filter((claim) => claim.supportStatus === "citation-recommended").length, + reportHash: hashRecord({ documentId: document.id, claims }), + }; +} + +function buildSimilarPapersWidget(documentInput, openAccessCorpus = [], citationCorpus = [], limit = 5) { + const similarityMatches = detectSimilarity(documentInput, openAccessCorpus).map((match) => ({ + source: "open-access-corpus", + id: match.sourceId, + title: match.title, + score: Number((match.similarity * 100).toFixed(2)), + reasons: match.overlappingTerms.map((term) => `Shared term: ${term}`), + action: { + type: "open-similar-paper", + sourceId: match.sourceId, + }, + })); + const citationMatches = recommendCitations(documentInput, citationCorpus, { style: "apa", limit }).map((citation) => ({ + source: "citation-corpus", + id: citation.id, + title: citation.title, + doi: citation.doi, + score: citation.score, + reasons: [ + ...citation.matchedTerms.map((term) => `Citation context match: ${term}`), + citation.year ? `Published ${citation.year}` : null, + ].filter(Boolean), + action: { + type: "insert-or-open-citation", + doi: citation.doi, + }, + })); + + return [...similarityMatches, ...citationMatches] + .sort((left, right) => right.score - left.score || left.title.localeCompare(right.title)) + .slice(0, limit) + .map((item, index) => ({ rank: index + 1, ...item })); +} + +function buildCitationInsertionPlan(documentInput, citationRecommendations = [], options = {}) { + const document = normalizeDocument(documentInput); + const targetAnchor = options.targetAnchor || "manuscript:end-of-paragraph"; + const style = options.style || "apa"; + const insertions = asArray(citationRecommendations).map((citation, index) => ({ + id: `insert-${index + 1}-${hashRecord({ documentId: document.id, doi: citation.doi }).slice(0, 8)}`, + doi: citation.doi, + label: citation.formatted || formatReference(citation, style), + targetAnchor, + mode: options.mode || "one-click", + dragPayload: { + mimeType: "application/x-scibase-citation", + data: { + doi: citation.doi, + title: citation.title, + formatted: citation.formatted || formatReference(citation, style), + }, + }, + })); + + return { + documentId: document.id, + targetAnchor, + insertions, + planHash: hashRecord({ documentId: document.id, targetAnchor, insertions }), + }; +} + +function formatReference(reference, style = "apa") { + const authors = asArray(reference.authors).join(", ") || "Unknown authors"; + const year = reference.year || "n.d."; + const title = reference.title || "Untitled work"; + const doi = reference.doi ? `https://doi.org/${reference.doi}` : "DOI pending"; + + if (style === "nature") return `${authors}. ${title}. ${reference.venue || "Preprint"} (${year}). ${doi}`; + if (style === "mla") return `${authors}. "${title}." ${reference.venue || "Preprint"}, ${year}, ${doi}.`; + return `${authors} (${year}). ${title}. ${reference.venue || "Preprint"}. ${doi}.`; +} + +function buildResearchToolsPacket(input) { + const document = normalizeDocument(input.document); + const openAccessCorpus = asArray(input.openAccessCorpus); + const citationCorpus = asArray(input.citationCorpus); + const summaries = { + abstract: summarizePaper(document, "abstract"), + executive: summarizePaper(document, "executive"), + layperson: summarizePaper(document, "layperson"), + }; + const reviewReport = reviewManuscript(document, { domain: document.domain, openAccessCorpus }); + const citationRecommendations = recommendCitations(document, citationCorpus, { style: "apa", limit: 5 }); + const selectionRecommendations = recommendCitationsForSelection( + input.highlightedText || document.body, + citationCorpus, + { style: "apa", limit: 3 }, + ); + const similarPapersWidget = buildSimilarPapersWidget(document, openAccessCorpus, citationCorpus, 5); + const claimSupportReport = buildClaimSupportReport(document, citationCorpus); + const citationInsertionPlan = buildCitationInsertionPlan(document, citationRecommendations, { + targetAnchor: input.targetAnchor || "manuscript:references", + mode: "one-click", + }); + + return { + document: { + id: document.id, + title: document.title, + domain: document.domain, + }, + summaries, + reviewReport, + citationRecommendations, + selectionRecommendations, + similarPapersWidget, + claimSupportReport, + citationInsertionPlan, + insertActions: citationRecommendations.map((citation) => ({ + action: "insert-citation", + doi: citation.doi, + label: citation.formatted, + })), + packetHash: hashRecord({ + documentId: document.id, + summaries, + reviewReport, + citationRecommendations, + selectionRecommendations, + similarPapersWidget, + claimSupportReport, + citationInsertionPlan, + }), + }; +} + +module.exports = { + REVIEW_TEMPLATES, + buildClaimSupportReport, + buildCitationInsertionPlan, + buildSimilarPapersWidget, + buildResearchToolsPacket, + detectSimilarity, + extractClaimSentences, + formatReference, + hashRecord, + recommendCitations, + recommendCitationsForSelection, + reviewManuscript, + summarizePaper, + topKeywords, +}; diff --git a/ai-research-mvp-tools/test/ai-research-mvp-tools.test.js b/ai-research-mvp-tools/test/ai-research-mvp-tools.test.js new file mode 100644 index 0000000..2372153 --- /dev/null +++ b/ai-research-mvp-tools/test/ai-research-mvp-tools.test.js @@ -0,0 +1,134 @@ +"use strict"; + +const assert = require("assert"); +const sample = require("../data/sample-research.json"); +const { + buildClaimSupportReport, + buildCitationInsertionPlan, + buildSimilarPapersWidget, + buildResearchToolsPacket, + detectSimilarity, + extractClaimSentences, + formatReference, + recommendCitations, + recommendCitationsForSelection, + reviewManuscript, + summarizePaper, + topKeywords, +} = require("../src/ai-research-mvp-tools"); + +function testSummaries() { + const abstract = summarizePaper(sample.document, "abstract"); + const layperson = summarizePaper(sample.document, "layperson"); + const keywords = topKeywords(sample.document.body, 5); + + assert.strictEqual(abstract.mode, "abstract"); + assert.ok(abstract.summary.includes("microbiome")); + assert.ok(layperson.summary.includes("unlikely-to-be-random")); + assert.ok(keywords.includes("flooding")); + assert.ok(abstract.keyFindings.length > 0); +} + +function testReviewDiagnostics() { + const report = reviewManuscript(sample.document, { + domain: "biology", + openAccessCorpus: sample.openAccessCorpus, + }); + + assert.ok(report.findings.some((finding) => finding.message.includes("ethics"))); + assert.ok(report.findings.some((finding) => finding.message.includes("confidence interval"))); + assert.ok(!report.findings.some((finding) => finding.message.includes("No explicit p-value"))); + assert.ok(!report.findings.some((finding) => finding.message.includes("No explicit sample size"))); + assert.ok(report.findings.some((finding) => finding.category === "similarity")); + assert.ok(report.qualityScore < 100); +} + +function testSimilarityAndCitations() { + const similarity = detectSimilarity(sample.document, sample.openAccessCorpus); + const citations = recommendCitations(sample.document, sample.citationCorpus, { style: "nature", limit: 2 }); + + assert.strictEqual(similarity[0].sourceId, "oa-1"); + assert.strictEqual(citations.length, 2); + assert.notStrictEqual(citations[0].doi, sample.document.references[0].doi); + assert.ok(citations[0].formatted.includes("https://doi.org/")); +} + +function testHighlightedCitationRecommendationsAndInsertPlan() { + const recommendations = recommendCitationsForSelection( + sample.highlightedText, + sample.citationCorpus, + { style: "apa", limit: 2 }, + ); + const plan = buildCitationInsertionPlan(sample.document, recommendations, { + targetAnchor: sample.targetAnchor, + }); + + assert.strictEqual(recommendations.length, 2); + assert.strictEqual(recommendations[0].doi, "10.1016/j.watres.2025.120001"); + assert.ok(recommendations[0].highlightedTextHash.length >= 12); + assert.strictEqual(plan.targetAnchor, "manuscript:results:p3"); + assert.strictEqual(plan.insertions[0].mode, "one-click"); + assert.strictEqual(plan.insertions[0].dragPayload.mimeType, "application/x-scibase-citation"); + assert.ok(plan.planHash.length >= 12); +} + +function testSimilarPapersWidget() { + const widget = buildSimilarPapersWidget( + sample.document, + sample.openAccessCorpus, + sample.citationCorpus, + 3, + ); + + assert.strictEqual(widget.length, 3); + assert.strictEqual(widget[0].rank, 1); + assert.ok(widget.some((item) => item.source === "open-access-corpus")); + assert.ok(widget.some((item) => item.source === "citation-corpus")); + assert.ok(widget.every((item) => item.action.type)); +} + +function testClaimSupportReport() { + const claims = extractClaimSentences(sample.document); + const report = buildClaimSupportReport(sample.document, sample.citationCorpus); + + assert.ok(claims.length >= 3); + assert.strictEqual(report.documentId, sample.document.id); + assert.ok(report.claims.every((claim) => claim.evidenceSpanHash.length >= 12)); + assert.ok(report.claims.some((claim) => claim.supportStatus === "citation-recommended")); + assert.ok(report.claims.some((claim) => claim.action.type === "insert-supporting-citation")); + assert.ok(report.reportHash.length >= 12); +} + +function testReferenceFormatting() { + const reference = sample.citationCorpus[0]; + + assert.ok(formatReference(reference, "apa").startsWith("M. Rivera, S. Nair (2025).")); + assert.ok(formatReference(reference, "mla").includes("\"Urban floodwater microbiome dynamics.\"")); + assert.ok(formatReference(reference, "nature").includes("Water Research")); +} + +function testPacket() { + const packet = buildResearchToolsPacket(sample); + + assert.deepStrictEqual(Object.keys(packet.summaries), ["abstract", "executive", "layperson"]); + assert.strictEqual(packet.citationRecommendations.length, packet.insertActions.length); + assert.ok(packet.selectionRecommendations.length > 0); + assert.strictEqual(packet.citationInsertionPlan.targetAnchor, sample.targetAnchor); + assert.strictEqual(packet.citationInsertionPlan.insertions.length, packet.citationRecommendations.length); + assert.ok(packet.similarPapersWidget.length > 0); + assert.ok(packet.claimSupportReport.claims.length > 0); + assert.ok(packet.claimSupportReport.reportHash.length >= 12); + assert.ok(packet.reviewReport.reportHash); + assert.ok(packet.packetHash.length >= 12); +} + +testSummaries(); +testReviewDiagnostics(); +testSimilarityAndCitations(); +testHighlightedCitationRecommendationsAndInsertPlan(); +testSimilarPapersWidget(); +testClaimSupportReport(); +testReferenceFormatting(); +testPacket(); + +console.log("ai-research-mvp-tools tests passed");