In [1]:
!pip install -U langchain langchain-openai python-dotenv

Collecting langchain
  Downloading langchain-0.3.27-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.31-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.72 (from langchain)
  Downloading langchain_core-0.3.74-py3-none-any.whl.metadata (5.8 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.9 (from langchain)
  Downloading langchain_text_splitters-0.3.9-py3-none-any.whl.metadata (1.9 kB)
Collecting langsmith>=0.1.17 (from langchain)
  Downloading langsmith-0.4.16-py3-none-any.whl.metadata (14 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Downloading sqlalchemy-2.0.43-cp313-cp313-macosx_11_0_arm64.whl.metadata (9.6 kB)
Collecting tenacity!=8.4.0,<10.0.0,>=8.1.0 (from langchain-core<1.0.0,>=0.3.72->langchain)
  Using cached tenacity-9.1.2-py3-none-any.whl.metadata (1.2 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<1.0.0,>=0.3.72->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata

In [14]:
# bio_term_explainer.py
import os
from dotenv import load_dotenv

# 1) load OPENAI_API_KEY from .env
load_dotenv()

# 2) langchain + openai chat model
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

# --- a tiny dictionary of common bio morphemes (Greek/Latin) ---
MORPHEMES = {
    # prefixes
    "anti": "against/opposite (Greek)",
    "auto": "self (Greek)",
    "bio": "life (Greek)",
    "brady": "slow (Greek)",
    "cardi": "heart (Greek)",
    "cephal": "head (Greek)",
    "cyan": "blue (Greek)",
    "cyto": "cell (Greek)",
    "derm": "skin (Greek)",
    "entero": "intestine (Greek)",
    "erythr": "red (Greek)",
    "gastro": "stomach (Greek)",
    "hemi": "half (Greek)",
    "hemo": "blood (Greek)",
    "hepato": "liver (Greek)",
    "hyper": "over/excessive (Greek)",
    "hypo": "under/below (Greek)",
    "leuko": "white (Greek)",
    "myo": "muscle (Greek)",
    "nephro": "kidney (Greek)",
    "neuro": "nerve (Greek)",
    "osteo": "bone (Greek)",
    "peri": "around (Greek)",
    "poly": "many (Greek)",
    "pseudo": "false (Greek)",
    "tachy": "fast (Greek)",
    "therm": "heat (Greek)",
    # suffixes
    "algia": "pain (Greek)",
    "ase": "enzyme (modern suffix, from -ase)",
    "cyte": "cell (Greek)",
    "emia": "blood condition (Greek)",
    "genic": "producing/causing (Greek)",
    "genesis": "origin/formation (Greek)",
    "itis": "inflammation (Greek)",
    "logy": "study of (Greek)",
    "lysis": "breaking down (Greek)",
    "oma": "tumour/mass (Greek)",
    "osis": "condition/state (Greek)",
    "pathy": "disease/feeling (Greek)",
    "phage": "eater (Greek)",
    "philia": "attraction/affinity (Greek)",
    "phobia": "fear (Greek)",
    "plasty": "moulding/surgical repair (Greek)",
    "scope": "instrument for viewing (Greek)",
    "tomy": "cutting/incision (Greek)",
}

def candidate_morphemes(term: str):
    t = term.lower()
    hits = []
    # try longest-first to avoid partial overlaps
    for m in sorted(MORPHEMES.keys(), key=len, reverse=True):
        if m in t:
            hits.append((m, MORPHEMES[m]))
    return hits

# set up LLM
llm = ChatOpenAI(model="gpt-5", temperature=0.2)
parser = StrOutputParser()

prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a precise biology etymology tutor. "
     "Break terms into morphemes (Greek/Latin roots/prefixes/suffixes), explain each piece, "
     "note language of origin when known, and synthesise a plain-English meaning. "
     "If uncertain, say so. Don’t invent folk etymologies."
     "Translate terms in other languages to English before breakdown"),
    ("user",
     "Term: {term}\n\n"
     "Candidate morphemes (may be incomplete):\n{candidates}\n\n"
     "Step 1) Segment the term into morphemes.\n"
     "Step 2) For each part, give meaning + origin.\n"
     "Step 3) Combine into a concise definition in plain British English.\n"
     "Step 4) Add 1–2 related examples or contrasting terms when helpful.\n"
     "Output format:\n"
     "• Breakdown: <morpheme> – <meaning> (<origin>)\n"
     "• Synthesis: <one‑sentence definition>\n"
     "• Notes: <any ambiguity/caveats>\n")
])

chain = prompt | llm | parser

def explain_term(term: str) -> str:
    hits = candidate_morphemes(term)
    if hits:
        lines = [f"- {m}: {desc}" for m, desc in hits]
        candidates = "\n".join(lines)
    else:
        candidates = "- (no obvious matches; proceed by best morphological judgement)"
    return chain.invoke({"term": term, "candidates": candidates})

if __name__ == "__main__":
    print("Bio Etymology Explainer (type 'quit' to exit)")
    while True:
        t = input("Term: ").strip()
        if t.lower() in {"quit", "exit"}:
            break
        print()
        print(explain_term(t))
        print()

Bio Etymology Explainer (type 'quit' to exit)

English: high blood pressure (hypertension)

• Breakdown:
- 高 – high; elevated (Chinese)
- 血 – blood (Chinese)
- 压 – pressure; to press (Chinese)

• Synthesis: A condition characterised by persistently high arterial blood pressure.

• Notes: 
- 血压 alone means “blood pressure.” Traditional form: 高血壓; pinyin: gāo-xuèyā.
- Related/contrast: 低血压 – low blood pressure (hypotension).
- English synonym “hypertension” is from Greek hyper- “over” + Latin tensio “stretching/strain.”


• Breakdown: 肺 – lung (Chinese)
• Breakdown: 结核 – tubercle; by extension, tuberculosis (Chinese; literally “knot/lump + kernel/hard core”; used in medical Chinese/Japanese to calque “tubercle” from Latin tuberculum)

• Synthesis: Pulmonary tuberculosis — tuberculosis affecting the lungs.

• Notes: 
- Examples: 结核杆菌 “tubercle bacillus” (Mycobacterium tuberculosis); 肺外结核 “extrapulmonary tuberculosis.” 
- Contrast: 肺炎 “pneumonia” (non-tuberculous lung inflammation). 
- 结核 