<a href="https://colab.research.google.com/github/Kalyanianikkath/NM-Genrative-AI/blob/main/healthcare_Q%26A.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# ✅ Step 1: Install dependencies
!pip install -q langchain faiss-cpu langchain-google-genai biopython google-generativeai
!pip install -U langchain-community

# ✅ Step 2: Import necessary libraries
import os
from langchain.vectorstores import FAISS
from langchain.embeddings import GoogleGenerativeAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from Bio import Entrez

# ✅ Step 3: API keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyCW5HzSyVUTqh7VbS6nFUiJsoo2fKSjbwM"  # Replace this with your Gemini API key
Entrez.email = "kalyaniaanikkath@gmail.com"  # Replace this with your email for PubMed API

# ✅ Step 4: Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="text")
        abstract_text = fetch.read()
        abstracts.append(abstract_text)
    return abstracts

# ✅ Step 5: Build vector store from articles
def build_vectorstore_from_articles(articles):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.create_documents(articles)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore

# ✅ Step 6: Create QA chain
def create_qa_chain(vectorstore):
    llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.2)
    retriever = vectorstore.as_retriever()
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return qa_chain

# ✅ Step 7: Ask a health question
def ask_health_question(query, qa_chain):
    result = qa_chain(query)
    print("🩺 Answer:\n")
    print(result["result"])
    print("\n📚 Sources:")
    for i, doc in enumerate(result["source_documents"]):
        print(f"\nSource {i+1}:\n{doc.page_content[:500]}...")  # show a snippet

# ✅ Step 8: Run the chatbot
if __name__ == "__main__":
    user_query = "What are the latest treatments for type 2 diabetes?"  # ← You can change the question
    print("🔍 Searching PubMed...")
    articles = fetch_pubmed_articles(user_query, max_results=5)

    print("📚 Building vector store...")
    vectorstore = build_vectorstore_from_articles(articles)

    print("🤖 Initializing Q&A chain with Gemini...")
    qa_chain = create_qa_chain(vectorstore)

    print("\n💬 Asking your question...\n")
    ask_health_question(user_query, qa_chain)


Collecting langchain-community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB

ImportError: cannot import name 'GoogleGenerativeAIEmbeddings' from 'langchain.embeddings' (/usr/local/lib/python3.11/dist-packages/langchain/embeddings/__init__.py)

In [5]:
# ✅ Step 1: Install dependencies
!pip install -q langchain faiss-cpu langchain-google-genai biopython google-generativeai

# ✅ Step 2: Import necessary libraries
import os
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from Bio import Entrez

# ✅ Step 3: API keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyCW5HzSyVUTqh7VbS6nFUiJsoo2fKSjbwM"  # Replace this with your Gemini API key
Entrez.email = "kalyaniaanikkath@gmail.com"  # Replace this with your email for PubMed API

# ✅ Step 4: Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="text")
        abstract_text = fetch.read()
        abstracts.append(abstract_text)
    return abstracts

# ✅ Step 5: Build vector store from articles
def build_vectorstore_from_articles(articles):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.create_documents(articles)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore

# ✅ Step 6: Create QA chain
def create_qa_chain(vectorstore):
    llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.2)
    retriever = vectorstore.as_retriever()
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return qa_chain

# ✅ Step 7: Ask a health question
def ask_health_question(query, qa_chain):
    result = qa_chain(query)
    print("🩺 Answer:\n")
    print(result["result"])
    print("\n📚 Sources:")
    for i, doc in enumerate(result["source_documents"]):
        print(f"\nSource {i+1}:\n{doc.page_content[:500]}...")  # show a snippet

# ✅ Step 8: Run the chatbot
if __name__ == "__main__":
    user_query = "What are the latest treatments for type 2 diabetes?"  # ← You can change the question
    print("🔍 Searching PubMed...")
    articles = fetch_pubmed_articles(user_query, max_results=5)

    print("📚 Building vector store...")
    vectorstore = build_vectorstore_from_articles(articles)

    print("🤖 Initializing Q&A chain with Gemini...")
    qa_chain = create_qa_chain(vectorstore)

    print("\n💬 Asking your question...\n")
    ask_health_question(user_query, qa_chain)


🔍 Searching PubMed...
📚 Building vector store...
🤖 Initializing Q&A chain with Gemini...

💬 Asking your question...

🩺 Answer:

Based on the provided texts, flash glucose monitoring and adherence to the 2023 ADA Standards of Care, including a diabetes care bundle, are discussed as treatments and management strategies for type 2 diabetes.  Specific medications within these guidelines are not detailed in this text.

📚 Sources:

Source 1:
FSL for 24 months' follow-up. HCRU was measured for 12 months before the index 
date and the last 12 months of the 24-month follow-up period. HbA1c data were 
taken from the latest tests in each period.
RESULTS: Mean HbA1c was statistically significantly reduced after FSL 
acquisition among people aged ≤65 or >65 years in all four treatment groups 
(range, 0.3-0.8% reduction). After FSL acquisition, ED visits and 
hospitalization were statistically significantly reduced in the oral therapy 
only...

Source 2:
Promoting provider adherence to American Diab

In [10]:
# ✅ Step 1: Install dependencies
!pip install -q langchain faiss-cpu langchain-google-genai biopython google-generativeai

# ✅ Step 2: Import necessary libraries
import os
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from Bio import Entrez

# ✅ Step 3: API keys
os.environ["GOOGLE_API_KEY"] = "your_gemini_api_key_here"  # Replace with your Gemini API key
Entrez.email = "your_email@example.com"  # Replace with your email for PubMed

# ✅ Step 4: Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db


SyntaxError: incomplete input (<ipython-input-10-94ae1912d123>, line 23)

In [11]:
# ✅ Step 1: Install dependencies
!pip install -q langchain faiss-cpu langchain-google-genai biopython google-generativeai

# ✅ Step 2: Import necessary libraries
import os
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from Bio import Entrez

# ✅ Step 3: API keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyCW5HzSyVUTqh7VbS6nFUiJsoo2fKSjbwM"  # Replace with your Gemini API key
Entrez.email = "kalyaniaanikkath@gmail.com"  # Replace with your email for PubMed

# ✅ Step 4: Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="text")
        abstract_text = fetch.read()
        abstracts.append(abstract_text)
    return abstracts

# ✅ Step 5: Build vector store
def build_vectorstore_from_articles(articles):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.create_documents(articles)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore

# ✅ Step 6: Create Gemini-based QA system
def create_qa_chain(vectorstore):
    llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.2)
    retriever = vectorstore.as_retriever()
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return qa_chain

# ✅ Step 7: Ask your question
def ask_health_question(query, qa_chain):
    result = qa_chain(query)
    print("\n🩺 Answer:\n")
    print(result["result"])
    print("\n📚 Sources:")
    for i, doc in enumerate(result["source_documents"]):
        print(f"\nSource {i+1}:\n{doc.page_content[:500]}...")

# ✅ Step 8: Run everything interactively
if __name__ == "__main__":
    user_query = input("💬 Enter your medical/healthcare question: ")

    print("\n🔍 Searching PubMed for related research...")
    articles = fetch_pubmed_articles(user_query, max_results=5)

    if not articles:
        print("❌ No articles found on this topic. Try a different question.")
    else:
        print("📚 Building knowledge base from PubMed articles...")
        vectorstore = build_vectorstore_from_articles(articles)

        print("🤖 Connecting to Gemini for answer generation...")
        qa_chain = create_qa_chain(vectorstore)

        ask_health_question(user_query, qa_chain)


💬 Enter your medical/healthcare question: Coronary artery disease mechanisms

🔍 Searching PubMed for related research...
📚 Building knowledge base from PubMed articles...
🤖 Connecting to Gemini for answer generation...

🩺 Answer:

Excessive production of reactive oxygen species (ROS) contributes to endothelial dysfunction, inflammation, and vascular remodeling.  These pathological effects drive atherosclerosis, plaque formation, and thrombosis.

📚 Sources:

Source 1:
Oxidative stress plays a pivotal role in the pathogenesis of atherosclerosis and 
coronary artery disease (CAD), with both beneficial and detrimental effects on 
cardiovascular health. On one hand, the excessive production of reactive oxygen 
species (ROS) contributes to endothelial dysfunction, inflammation, and vascular 
remodeling, which are central to the development and progression of CAD. These 
pathological effects drive key processes such as atherosclerosis, plaque 
formation, and thromb...

Source 2:
targeted ther