In [3]:
!pip install -qU langchain langchain-community langchain-core langchainhub
!pip install -qU faiss-cpu huggingface-hub sentence-transformers
!pip install langchain-groq

Collecting langchain-groq
  Downloading langchain_groq-0.3.1-py3-none-any.whl.metadata (2.6 kB)
Collecting groq<1,>=0.4.1 (from langchain-groq)
  Downloading groq-0.20.0-py3-none-any.whl.metadata (15 kB)
Downloading langchain_groq-0.3.1-py3-none-any.whl (15 kB)
Downloading groq-0.20.0-py3-none-any.whl (124 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.9/124.9 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq, langchain-groq
Successfully installed groq-0.20.0 langchain-groq-0.3.1


In [6]:
import os
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

import time

# ✅ GROQ API key
os.environ['GROQ_API_KEY'] = 'your API key'
groq_api_key = os.getenv("GROQ_API_KEY")

# ✅ List of 20 URLs as test set data
urls = [
    "https://www.frontiersin.org/articles/10.3389/fmed.2024.1474043/full",
    "https://apnews.com/article/0e824790486e06bcfbd5711df4abf0ba",
    "https://nypost.com/2025/03/25/health/new-smell-test-could-detect-alzheimers-before-symptoms-start/",
    "https://www.eatingwell.com/link-between-dementia-and-type-2-diabetes-11704056",
    "https://www.msjonline.org/index.php/ijrms/article/view/13401",
    "https://www.sciencedirect.com/science/article/pii/S0168365924000622",
    "https://www.nature.com/articles/s41598-025-92826-8",
    "https://www.cell.com/cell-reports-medicine/fulltext/S2666-3791(24)00383-5?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS2666379124003835%3Fshowall%3Dtrue",
    "https://alz-journals.onlinelibrary.wiley.com/doi/full/10.1002/trc2.12385",
    "https://www.ncbi.nlm.nih.gov/books/NBK499922/",
    "https://pmc.ncbi.nlm.nih.gov/articles/PMC11682909/",
    "https://pmc.ncbi.nlm.nih.gov/articles/PMC11338875/y",
    "https://link.springer.com/chapter/10.1007/978-94-007-5416-4_14",
    "https://journals.sagepub.com/doi/10.3233/JAD-240479?url_ver=Z39.88-2003&rfr_id=ori:rid:crossref.org&rfr_dat=cr_pub%20%200pubmed",
    "https://pmc.ncbi.nlm.nih.gov/articles/PMC9818878/",
    "https://pubmed.ncbi.nlm.nih.gov/40145251/",
    "https://pubmed.ncbi.nlm.nih.gov/40144618/",
    "https://pmc.ncbi.nlm.nih.gov/articles/PMC10679628/",
    "https://www.reuters.com/business/healthcare-pharmaceuticals/cassava-discontinue-development-alzheimers-disease-drug-2025-03-25/?utm_source=chatgpt.com",
    "https://www.reuters.com/business/healthcare-pharmaceuticals/gsk-studying-if-best-selling-shingles-vaccine-lowers-dementia-risk-2025-03-25/?utm_source=chatgpt.com"
]

# ✅ Combining all documents
all_docs = []
for url in urls:
    loader = WebBaseLoader(url)
    docs = loader.load()
    all_docs.extend(docs)

# ✅ Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_documents = text_splitter.split_documents(all_docs)

# ✅ Using HuggingFace embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# ✅ Creating FAISS vectorstore
vectorstore = FAISS.from_documents(final_documents, embeddings)
retriever = vectorstore.as_retriever()

# ✅ Use Groq's LLM
llm = ChatGroq(groq_api_key=groq_api_key, model_name="llama3-70b-8192")

# ✅ Creating prompt template
prompt = ChatPromptTemplate.from_template(
"""
Answer the questions based on the provided context only.
Please provide the most accurate response based on the question.

<context>
{context}
</context>

Question: {input}
"""
)

# ✅ Creating retrieval chain
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)

# ✅ Ask questions here
while True:
    user_input = input("Ask a question (or type 'exit' to quit): ")
    if user_input.lower() == "exit":
        break
    start = time.time()
    response = retrieval_chain.invoke({"input": user_input})
    end = time.time()

    print("\n🧠 Answer:", response["answer"])
    print(f"⏱️ Time taken: {end - start:.2f} seconds")

    print("\n🔎 Top Relevant Chunks:")
    for i, doc in enumerate(response["context"]):
        print(f"\n--- Chunk {i+1} ---")
        print(doc.page_content[:500])
        print("--------------------")


Ask a question (or type 'exit' to quit): latest on alzheimer's disease

🧠 Answer: Based on the provided context, the latest information on Alzheimer's disease is:

* Recent progress in treatment strategies for Alzheimer's disease (Mohamed N, Alsamieh A, 2023)
* Advances on therapeutic strategies for Alzheimer's disease: from medicinal plant to nanotechnology (Hassan NA, Alshamari AK, Hassan AA, Elharrif MG, Alhajri AM, Sattam M, et al., 2022)
* Research advances aimed at prognosis and treatment of Alzheimer's disease (Okoye G, Vincent R, 2022)

These are the latest publications mentioned in the context, which suggest that researchers are actively working on understanding and addressing Alzheimer's disease.
⏱️ Time taken: 0.84 seconds

🔎 Top Relevant Chunks:

--- Chunk 1 ---
Frontiers | Alzheimer's disease: a comprehensive review of epidemiology, risk factors, symptoms diagnosis, management, caregiving, advanced treatments and associated challenges





Skip to main content
------------