In [1]:
# Retrieve Guidelines.

In [2]:
# Goal:
# Retrieve authoritative guideline passages relevant to a proposed medication
# and patient condition, without any reasoning or interpretation.

# Assumptions:
# - Patient context is already structured (from Step 1)
# - No LLM is used in this step
# - Output is raw guideline text with source metadata


In [3]:
from langchain_community.document_loaders import PyPDFLoader
from pathlib import Path

GUIDELINE_DIR = Path("data/guidelines")

pdf_files = list(GUIDELINE_DIR.glob("*.pdf"))
pdf_files


[WindowsPath('data/guidelines/CANCER_PAIN_RELIEF.pdf'),
 WindowsPath('data/guidelines/Guidelines_on_Pain_Management.pdf'),
 WindowsPath('data/guidelines/NICE_NG203_Official_PDF.pdf'),
 WindowsPath('data/guidelines/NSAIDs_and_Kidney_Health.pdf')]

In [5]:
documents = []

for pdf in pdf_files:
    loader = PyPDFLoader(str(pdf))
    docs = loader.load()
    documents.extend(docs)

len(documents)


248

In [8]:
documents[0].page_content[:500]

'CANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN C CER PAIN RELIEF PAIN RELIEF \nCANCER PAIN PAIN RELIEF PAIN RELIEF \nCANCER P ELIEF PAIN RELi \nCANCER F PAIN \nCAN I \nc \n,ANCEH \n, ... CANCER Ph11" . \n.JAIN CANCER PAIN REL1.... . ,c'

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=150
)

chunks = text_splitter.split_documents(documents)

len(chunks)


980

In [9]:
chunks[0].page_content


'CANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN C CER PAIN RELIEF PAIN RELIEF \nCANCER PAIN PAIN RELIEF PAIN RELIEF \nCANCER P ELIEF PAIN RELi \nCANCER F PAIN \nCAN I \nc \n,ANCEH \n, ... CANCER Ph11" . \n.JAIN CANCER PAIN REL1.... . ,c:LIEF \n...,t::R PAIN CANCER PAIN RELIEF r-r 1~ RELIEF \n"\'r\\JCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF \nCANCER PAIN CANCER PAIN RELIEF PAIN RELIEF'

In [None]:
chunks[0].metadata


{'producer': 'Adobe Acrobat Pro DC 15 Paper Capture Plug-in',
 'creator': 'Canon',
 'creationdate': '2021-08-16T10:57:01+01:00',
 'moddate': '2021-08-16T11:22:22+02:00',
 'source': 'data\\guidelines\\CANCER_PAIN_RELIEF.pdf',
 'total_pages': 79,
 'page': 0,
 'page_label': '1'}

In [12]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vectorstore = FAISS.from_documents(
    chunks,
    embedding=embedding_model
)


In [13]:
len(vectorstore.index_to_docstore_id)


980

In [14]:
len(chunks)


980

In [15]:
vectorstore.save_local("data/vectorstore/guidelines_v1")


In [16]:
patient_profile = {
    "conditions": ["Chronic Kidney Disease Stage 3"]
}

proposed_medication = {
    "drug_name": "Ibuprofen"
}


In [17]:
condition = ", ".join(patient_profile["conditions"])
drug = proposed_medication["drug_name"]

query = f"""
Prescribing guidance for {drug}
in patients with {condition}
renal impairment contraindications dosing
"""


In [18]:
results = vectorstore.similarity_search(query, k=5)

len(results)


5

In [19]:
for i, r in enumerate(results, 1):
    print(f"\n--- Result {i} ---")
    print("SOURCE:", r.metadata.get("source"))
    print("PAGE:", r.metadata.get("page"))
    print(r.page_content[:500])



--- Result 1 ---
SOURCE: data\guidelines\NSAIDs_and_Kidney_Health.pdf
PAGE: 3
kidney disease, hypertension, and disturbances in blood pressure regulation. These issues are particularly pronounced with 
COX-2-selective inhibitors, which are often used to m inimize gastrointestinal side effects. Additionally, NSAIDs can 
exacerbate renal problems in conditions of salt depletion or dehydration, but similar risks are observed with traditional 
NSAIDs in the general population [2 7]. Prolonged or high -dose use of N SAIDs can lead to acute kidney injury, 
characterized by a re

--- Result 2 ---
SOURCE: data\guidelines\NSAIDs_and_Kidney_Health.pdf
PAGE: 0
in large dosages, or who have pre-existing kidney diseases, are more at risk [2]. 
 
The widespread use of nonsteroidal anti-inflammatory drugs (NSAIDs) in modern medicine is largely due to their 
effectiveness in managing pain and treating various inflammatory and rheumatological conditions [3]. However, alongside 
their therapeutic benef