From 7c555f659c7995dd3b944d05cf5e6e311b2be94d Mon Sep 17 00:00:00 2001
From: "marcin p. joachimiak" <4625870+realmarcin@users.noreply.github.com>
Date: Mon, 25 May 2026 19:27:32 -0700
Subject: [PATCH 1/2] Fix broken literature_enhanced imports in two writer
 scripts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

scripts/add_evidence_source.py and scripts/intelligent_snippet_fixer.py
both import EnhancedLiteratureFetcher from communitymech.literature_enhanced
— a module that was never committed to git (only a stale .pyc was
shadowing the missing source locally). Both scripts have raised
ModuleNotFoundError on import for as long as anyone has tried to run
them, which was surfaced as a pre-existing-state heads-up by the recent
writer-conversion PR #87.

Swap to LiteratureFetcher from communitymech.literature, which exposes
the same fetch_pubmed_abstract + fetch_paper surface plus a richer
DOI fallback chain (CrossRef → PubMed via DOI lookup → PMC full-text →
OpenAlex → Semantic Scholar → Europe PMC → publisher meta-tag scrape)
that subsumes what fetch_abstract_for_doi did. API differences:

- fetch_paper returns (abstract, pdf_url) not a dict; tuple-unpack at
  call sites.
- LiteratureFetcher.fetch_paper has no download_pdf kwarg (the older
  version's flag was a no-op in the LiteratureFetcher pipeline; the
  pdf URL is just returned alongside the abstract).
- Title field is unavailable separately. In add_evidence_source.py's
  guess_evidence_source classifier the title was filter(None, …)-merged
  with snippet and abstract anyway; losing it degrades classification
  marginally (PubMed abstracts include the title in the abstract text,
  so PMID references are unaffected). If richer DOI classification is
  needed later, LiteratureFetcher.fetch_doi_metadata() returns CrossRef
  metadata with a title field.

After-state: both scripts now import and run their initialization paths
cleanly. pytest tests/ still passes (136 passed, 9 skipped).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/add_evidence_source.py       | 37 +++++++++++++++++-----------
 scripts/intelligent_snippet_fixer.py | 20 +++++++++------
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/scripts/add_evidence_source.py b/scripts/add_evidence_source.py
index 0c43bc61..5354caee 100644
--- a/scripts/add_evidence_source.py
+++ b/scripts/add_evidence_source.py
@@ -26,7 +26,7 @@
 
 sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 
-from communitymech.literature_enhanced import EnhancedLiteratureFetcher
+from communitymech.literature import LiteratureFetcher
 
 from communitymech.curate.curation_event import record_curation_event
 from communitymech.validation.write_validated import (
@@ -39,10 +39,13 @@ class EvidenceSourceAdder:
     """Add evidence_source to evidence items"""
 
     def __init__(self):
-        self.fetcher = EnhancedLiteratureFetcher(
-            cache_dir=".literature_cache",
-            use_fallback_pdf=False
-        )
+        # Previously imported a sibling EnhancedLiteratureFetcher class that
+        # was never committed to the repo; the LiteratureFetcher in
+        # communitymech.literature exposes the same fetch_pubmed_abstract +
+        # fetch_paper surface (plus a richer DOI fallback chain through
+        # CrossRef / PMC / OpenAlex / Semantic Scholar / Europe PMC) which
+        # is what these scripts actually need.
+        self.fetcher = LiteratureFetcher(cache_dir=".literature_cache")
         self.stats = {
             'total_evidence': 0,
             'already_has_source': 0,
@@ -148,12 +151,14 @@ def process_yaml(
 
                     # Try to fetch abstract for better classification
                     abstract = None
-                    title = None
+                    title = None  # LiteratureFetcher.fetch_paper returns
+                                  # (abstract, pdf_url); the title is embedded
+                                  # in PubMed abstracts and can be pulled from
+                                  # CrossRef metadata via fetch_doi_metadata()
+                                  # if richer classification is needed later.
                     try:
-                        paper = self.fetcher.fetch_paper(reference, download_pdf=False)
-                        abstract = paper.get('abstract')
-                        title = paper.get('title')
-                    except:
+                        abstract, _ = self.fetcher.fetch_paper(reference)
+                    except Exception:
                         pass
 
                     # Guess evidence source
@@ -221,12 +226,14 @@ def process_yaml(
                     reference = ev.get('reference', '')
 
                     abstract = None
-                    title = None
+                    title = None  # LiteratureFetcher.fetch_paper returns
+                                  # (abstract, pdf_url); the title is embedded
+                                  # in PubMed abstracts and can be pulled from
+                                  # CrossRef metadata via fetch_doi_metadata()
+                                  # if richer classification is needed later.
                     try:
-                        paper = self.fetcher.fetch_paper(reference, download_pdf=False)
-                        abstract = paper.get('abstract')
-                        title = paper.get('title')
-                    except:
+                        abstract, _ = self.fetcher.fetch_paper(reference)
+                    except Exception:
                         pass
 
                     guessed_source = self.guess_evidence_source(
diff --git a/scripts/intelligent_snippet_fixer.py b/scripts/intelligent_snippet_fixer.py
index 2733b2dc..a72d006b 100755
--- a/scripts/intelligent_snippet_fixer.py
+++ b/scripts/intelligent_snippet_fixer.py
@@ -25,7 +25,7 @@
 sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 
 from communitymech.curate.curation_event import record_curation_event
-from communitymech.literature_enhanced import EnhancedLiteratureFetcher
+from communitymech.literature import LiteratureFetcher
 from communitymech.validation.write_validated import (
     ValidationFailedError,
     write_validated_community,
@@ -59,7 +59,12 @@ class IntelligentSnippetFixer:
     """Intelligent snippet fixer with context-aware abstract analysis."""
 
     def __init__(self, verbose: bool = False):
-        self.fetcher = EnhancedLiteratureFetcher()
+        # Previously imported a sibling EnhancedLiteratureFetcher class
+        # that was never committed; LiteratureFetcher exposes the same
+        # fetch_pubmed_abstract + fetch_paper surface plus a richer DOI
+        # fallback chain (CrossRef / PMC / OpenAlex / Semantic Scholar /
+        # Europe PMC) which subsumes what fetch_abstract_for_doi did.
+        self.fetcher = LiteratureFetcher()
         self.verbose = verbose
 
     def extract_relevant_sentences(
@@ -210,12 +215,13 @@ def suggest_snippets_for_evidence(
         if reference.upper().startswith("PMID:"):
             pmid = reference.replace("PMID:", "").replace("pmid:", "").strip()
             abstract = self.fetcher.fetch_pubmed_abstract(pmid)
-        elif "doi" in reference.lower() or reference.startswith("10."):
-            doi = reference.replace("doi:", "").replace("https://doi.org/", "").strip()
-            abstract = self.fetcher.fetch_abstract_for_doi(doi)
         else:
-            paper = self.fetcher.fetch_paper(reference, download_pdf=False)
-            abstract = paper.get("abstract")
+            # fetch_paper auto-detects PMID vs DOI and runs the full
+            # DOI fallback chain (CrossRef → PMID via DOI lookup → PMC
+            # full-text → OpenAlex → Semantic Scholar → Europe PMC →
+            # publisher meta-tag scrape). Returns (abstract, pdf_url);
+            # we don't need the pdf here.
+            abstract, _ = self.fetcher.fetch_paper(reference)
 
         if not abstract:
             if self.verbose:

From 9d599d53de16e34f6c970d8aa315b67d6c579b04 Mon Sep 17 00:00:00 2001
From: "marcin p. joachimiak" <4625870+realmarcin@users.noreply.github.com>
Date: Mon, 25 May 2026 19:32:24 -0700
Subject: [PATCH 2/2] Address Copilot review: drop dead title param from
 guess_evidence_source
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Copilot flagged that title was assigned None and then passed through
guess_evidence_source as a parameter that the classifier merged into
its keyword-matching text via filter(None, ...). With title always None
the parameter was dead code that just clutters the call sites.

Remove the title parameter from guess_evidence_source and from both
caller blocks. PubMed abstracts already embed the title in the
abstract text (so PMID-driven classification is unchanged), and
CrossRef titles for DOI references are available via
LiteratureFetcher.fetch_doi_metadata() if richer classification is
wanted later — that's now a clear future-work hook rather than a
hard-coded-None pretense.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/add_evidence_source.py | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/scripts/add_evidence_source.py b/scripts/add_evidence_source.py
index 5354caee..664bf250 100644
--- a/scripts/add_evidence_source.py
+++ b/scripts/add_evidence_source.py
@@ -81,13 +81,12 @@ def guess_evidence_source(
         self,
         snippet: str,
         abstract: str = None,
-        title: str = None,
         community_origin: str = None
     ) -> Optional[str]:
         """Guess evidence source using heuristics"""
 
         # Combine text for keyword matching
-        text = ' '.join(filter(None, [snippet, abstract, title])).lower()
+        text = ' '.join(filter(None, [snippet, abstract])).lower()
 
         # Check for review first (highest specificity)
         if any(kw in text for kw in self.review_keywords):
@@ -150,12 +149,11 @@ def process_yaml(
                     reference = ev.get('reference', '')
 
                     # Try to fetch abstract for better classification
+                    # Title is not threaded into the classifier — PubMed
+                    # abstracts already embed the title, and CrossRef
+                    # titles for DOIs are available via fetch_doi_metadata()
+                    # if richer classification is wanted later.
                     abstract = None
-                    title = None  # LiteratureFetcher.fetch_paper returns
-                                  # (abstract, pdf_url); the title is embedded
-                                  # in PubMed abstracts and can be pulled from
-                                  # CrossRef metadata via fetch_doi_metadata()
-                                  # if richer classification is needed later.
                     try:
                         abstract, _ = self.fetcher.fetch_paper(reference)
                     except Exception:
@@ -163,7 +161,7 @@ def process_yaml(
 
                     # Guess evidence source
                     guessed_source = self.guess_evidence_source(
-                        snippet, abstract, title, community_origin
+                        snippet, abstract, community_origin
                     )
 
                     if auto_mode and guessed_source:
@@ -225,19 +223,18 @@ def process_yaml(
                     snippet = ev.get('snippet', '')
                     reference = ev.get('reference', '')
 
+                    # Title is not threaded into the classifier — PubMed
+                    # abstracts already embed the title, and CrossRef
+                    # titles for DOIs are available via fetch_doi_metadata()
+                    # if richer classification is wanted later.
                     abstract = None
-                    title = None  # LiteratureFetcher.fetch_paper returns
-                                  # (abstract, pdf_url); the title is embedded
-                                  # in PubMed abstracts and can be pulled from
-                                  # CrossRef metadata via fetch_doi_metadata()
-                                  # if richer classification is needed later.
                     try:
                         abstract, _ = self.fetcher.fetch_paper(reference)
                     except Exception:
                         pass
 
                     guessed_source = self.guess_evidence_source(
-                        snippet, abstract, title, community_origin
+                        snippet, abstract, community_origin
                     )
 
                     if auto_mode and guessed_source: