**1. Load Documents**

In [2]:
from langchain.document_loaders import PyPDFLoader, WebBaseLoader

# # --- 1️⃣ Load Multiple PDFs ---
# pdf_files = [
#     "WHO_BP_guidelines.pdf",
#     "Diabetes_guidelines.pdf",
#     "Cholesterol_guidelines.pdf",
#     "BMI_guidelines.pdf",
#     "Smoking_guidelines.pdf"
# ]

# pdf_docs = []
# for file in pdf_files:
#     loader = PyPDFLoader(file)
#     pdf_docs.extend(loader.load())

# --- 2️⃣ Load Multiple Websites ---
web_pages = [
    "https://www.who.int/news-room/fact-sheets/detail/hypertension",
    "https://www.who.int/news-room/fact-sheets/detail/diabetes",
    "https://www.who.int/news-room/fact-sheets/detail/obesity",
    "https://www.cdc.gov/cholesterol/facts.html",
    "https://www.cdc.gov/tobacco/data_statistics/fact_sheets/index.htm"
]

web_docs = []
for url in web_pages:
    loader = WebBaseLoader(url)
    web_docs.extend(loader.load())

# --- 3️⃣ Combine all docs and split into chunks ---
all_docs = web_docs


**2. Split Text into Chunks**

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
web_chunks = text_splitter.split_documents(all_docs)


**3. Create Vector DB**

In [7]:
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Use 'embedding' instead of 'embedding_function'
vectordb = Chroma.from_documents(
    web_chunks,
    embedding=embeddings,        # Correct parameter name
    persist_directory="rag_db"
)

vectordb.persist()
print("✅ Vector DB created successfully")


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 609eb158-e434-4044-9543-9856dfee3bfe)')' thrown while requesting HEAD https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/./config_sentence_transformers.json
Retrying in 1s [Retry 1/5].


✅ Vector DB created successfully


  vectordb.persist()


**4. Query RAG with Input Vitals + Prediction**

In [8]:
retriever = vectordb.as_retriever(search_kwargs={"k": 5})
query = "BP=150/95, Chol=220, BMI=32, Prediction: High risk"
relevant_docs = retriever.get_relevant_documents(query)

for doc in relevant_docs:
    print(doc.page_content)


in the vessels when the heart rests between beats.Hypertension is diagnosed if, when it is measured on two different days, the systolic blood pressure readings on both days is ≥140 mmHg and/or the diastolic blood pressure readings on both days is ≥90 mmHg.Risk factorsModifiable risk factors include unhealthy diets (excessive salt consumption, a diet high in saturated fat and trans fats, low intake of fruits and vegetables), physical inactivity, consumption of tobacco and alcohol, and being
professional.Keep appointments with your health care professional.Don’t:eat too much salty food (try to stay under 2 grams per day)eat foods high in saturated or trans fatssmoke or use tobaccodrink too much alcohol (1 drink daily max for women, 2 for men)miss or share medication.Reducing hypertension prevents heart attack, stroke and kidney damage, as well as other health problems. Reduce the risks of hypertension by: reducing and managing stressregularly checking blood pressuretreating high
uncontro

  relevant_docs = retriever.get_relevant_documents(query)


In [None]:
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
import os

groq = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model="llama-3.1-8b-instant", temperature=0.3)

# RetrievalQA chain: LLM + RAG
qa_chain = RetrievalQA.from_chain_type(
    llm=groq,
    chain_type="stuff",  # combines all retrieved chunks
    retriever=retriever, # your vectordb retriever
    return_source_documents=True
)

# Query includes vitals + prediction
query = """
Patient vitals: BP=150/95, Cholesterol=220, BMI=32, Smoker=yes, Diabetic=no.
Predicted risk: High.
Please provide:
1. Explanation of the risk
2. Possible diagnosis
3. Suggested actions or next steps
"""
result = qa_chain(query)

print("=== Diagnosis Explanation ===")
print(result)


  result = qa_chain(query)


=== Diagnosis Explanation ===
{'query': '\nPatient vitals: BP=150/95, Cholesterol=220, BMI=32, Smoker=yes, Diabetic=no.\nPredicted risk: High.\nPlease provide:\n1. Explanation of the risk\n2. Possible diagnosis\n3. Suggested actions or next steps\n', 'result': "**1. Explanation of the risk:**\nBased on the provided patient vitals, the patient has a high predicted risk of cardiovascular disease. The risk factors contributing to this high risk are:\n\n- Elevated systolic blood pressure (150 mmHg) and diastolic blood pressure (95 mmHg), indicating hypertension.\n- High cholesterol level (220), which is a significant risk factor for cardiovascular disease.\n- Elevated BMI (32), indicating obesity, which is a risk factor for various health conditions, including cardiovascular disease and type 2 diabetes.\n- The patient is a smoker, which is a major risk factor for cardiovascular disease.\n\n**2. Possible diagnosis:**\nGiven the patient's high blood pressure, high cholesterol, and obesity, t