In [1]:
!pip install -U langchain langchain-openai python-dotenv

Collecting langchain
  Downloading langchain-0.3.27-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.31-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.72 (from langchain)
  Downloading langchain_core-0.3.74-py3-none-any.whl.metadata (5.8 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.9 (from langchain)
  Downloading langchain_text_splitters-0.3.9-py3-none-any.whl.metadata (1.9 kB)
Collecting langsmith>=0.1.17 (from langchain)
  Downloading langsmith-0.4.16-py3-none-any.whl.metadata (14 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Downloading sqlalchemy-2.0.43-cp313-cp313-macosx_11_0_arm64.whl.metadata (9.6 kB)
Collecting tenacity!=8.4.0,<10.0.0,>=8.1.0 (from langchain-core<1.0.0,>=0.3.72->langchain)
  Using cached tenacity-9.1.2-py3-none-any.whl.metadata (1.2 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<1.0.0,>=0.3.72->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata

In [3]:
# bio_term_explainer.py  — structured output version
import os
from typing import List, Optional
from dotenv import load_dotenv

load_dotenv()

# LangChain + OpenAI
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

# --- morpheme hints (yours) ---
MORPHEMES = {
    # prefixes
    "anti": "against/opposite (Greek)", "auto": "self (Greek)", "bio": "life (Greek)",
    "brady": "slow (Greek)", "cardi": "heart (Greek)", "cephal": "head (Greek)",
    "cyan": "blue (Greek)", "cyto": "cell (Greek)", "derm": "skin (Greek)",
    "entero": "intestine (Greek)", "erythr": "red (Greek)", "gastro": "stomach (Greek)",
    "hemi": "half (Greek)", "hemo": "blood (Greek)", "hepato": "liver (Greek)",
    "hyper": "over/excessive (Greek)", "hypo": "under/below (Greek)", "leuko": "white (Greek)",
    "myo": "muscle (Greek)", "nephro": "kidney (Greek)", "neuro": "nerve (Greek)",
    "osteo": "bone (Greek)", "peri": "around (Greek)", "poly": "many (Greek)",
    "pseudo": "false (Greek)", "tachy": "fast (Greek)", "therm": "heat (Greek)",
    # suffixes
    "algia": "pain (Greek)", "ase": "enzyme (modern suffix, from -ase)",
    "cyte": "cell (Greek)", "emia": "blood condition (Greek)", "genic": "producing/causing (Greek)",
    "genesis": "origin/formation (Greek)", "itis": "inflammation (Greek)", "logy": "study of (Greek)",
    "lysis": "breaking down (Greek)", "oma": "tumour/mass (Greek)", "osis": "condition/state (Greek)",
    "pathy": "disease/feeling (Greek)", "phage": "eater (Greek)", "philia": "attraction/affinity (Greek)",
    "phobia": "fear (Greek)", "plasty": "moulding/surgical repair (Greek)", "scope": "instrument for viewing (Greek)",
    "tomy": "cutting/incision (Greek)",
}

def candidate_morphemes(term: str):
    t = term.lower()
    hits = []
    for m in sorted(MORPHEMES.keys(), key=len, reverse=True):
        if m in t:
            hits.append((m, MORPHEMES[m]))
    return hits

# --- strict schema ---
class Part(BaseModel):
    morpheme: str = Field(..., description="Substring/root/prefix/suffix")
    meaning: str = Field(..., description="Gloss of the morpheme")
    origin: Optional[str] = Field(None, description="Greek/Latin/other if known")
    note: Optional[str] = Field(None, description="Ambiguity/nuance if any")

class BioEtymology(BaseModel):
    term: str
    english_translation: str  # <= 12 words
    breakdown: List[Part]
    explanation: str          # one short paragraph, British English
    related_words: List[str] = Field(default_factory=list)

parser = PydanticOutputParser(pydantic_object=BioEtymology)

# --- LLM + prompt ---
llm = ChatOpenAI(model="gpt-5", temperature=0.2)

prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a precise biology etymology tutor. "
     "Break terms into authentic Greek/Latin morphemes. "
     "Translate non-English terms to English first, then analyse. "
     "Be explicit when uncertain; avoid folk etymologies."),
    ("user",
     "Term: {term}\n\n"
     "Candidate morphemes (hints, may be incomplete):\n{candidates}\n\n"
     "Return ONLY JSON that matches this schema:\n{format_instructions}\n\n"
     "Guidelines:\n"
     "- Segment the term into morphemes.\n"
     "- For each part: meaning + origin; add 'note' if uncertain.\n"
     "- 'english_translation' ≤ 12 words, concise.\n"
     "- 'explanation' is one short paragraph in British English.\n"
     "- 'related_words' 2–6 items if sensible."
    )
])

chain = prompt | llm | parser

def explain_term_structured(term: str) -> BioEtymology:
    hits = candidate_morphemes(term)
    candidates = "\n".join(f"- {m}: {desc}" for m, desc in hits) if hits else \
                 "- (no obvious matches; proceed by best morphological judgement)"
    return chain.invoke({
        "term": term,
        "candidates": candidates,
        "format_instructions": parser.get_format_instructions()
    })

# --- renderer with your fixed headings ---
def render_fixed(result: BioEtymology) -> str:
    lines = []
    lines.append(f"Terminology: {result.term}")
    lines.append(f"English Translation: {result.english_translation}\n")
    lines.append("Breakdown")
    for p in result.breakdown:
        origin = f" ({p.origin})" if p.origin else ""
        note = f" — {p.note}" if p.note else ""
        lines.append(f"- {p.morpheme} – {p.meaning}{origin}{note}")
    lines.append("\nExplanation")
    lines.append(result.explanation.strip())
    if result.related_words:
        lines.append("\nRelated words")
        lines.append(", ".join(result.related_words))
    return "\n".join(lines)

if __name__ == "__main__":
    print("Bio Etymology Explainer (structured). Type 'quit' to exit.")
    while True:
        t = input("Term: ").strip()
        if t.lower() in {"quit", "exit"}:
            break
        res = explain_term_structured(t)
        print()
        print(render_fixed(res))
        print()


Bio Etymology Explainer (structured). Type 'quit' to exit.

Terminology: 低血糖
English Translation: hypoglycaemia; low blood sugar

Breakdown
- hypo- – under, below normal (Greek) — From Greek hypo-, ‘under’. Indicates deficiency or subnormal level.
- glyc- – sweet, sugar (Greek) — From Greek glykys ‘sweet’. Often seen as glyco-; in ‘hypoglycaemia’ the -o- before glyc- comes from hypo-.
- -aemia – blood condition (Greek) — From Greek haima ‘blood’ + -ia. British -aemia vs US -emia.

Explanation
The Chinese term 低血糖 literally means ‘low blood sugar’. Its standard Greco‑Latin medical equivalent is hypoglycaemia: hypo- ‘below normal’ + glyc- ‘sugar’ + -aemia ‘blood condition’, denoting an abnormally low concentration of glucose in the blood.

Related words
hyperglycaemia, euglycaemia, glycaemia, hypoglycaemic


Terminology: 低血钾
English Translation: low blood potassium; hypokalaemia

Breakdown
- hypo- – under, below, deficient (Greek)
- kali- – potassium (kalium) (Neo-Latin (ultimately from 