<a href="https://colab.research.google.com/github/Nikhitaa2329/genAI1/blob/main/Medical_Q_A_RAG_Pubmed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install necessary libraries
!pip install langchain langchain-community google-generativeai faiss-cpu requests biopython langchain-google-genai

# Import libraries
import os
import requests
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from Bio import Entrez
import google.generativeai as genai

# Set your Google Gemini API Key
GOOGLE_API_KEY = "AIzaSyD2FpyhNgfXSl5kRgzx-AU2L8xd3At6TrE"  # 🔑 Replace with your actual Gemini 1.5 API key
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
genai.configure(api_key=GOOGLE_API_KEY)

# Fetch PubMed Data
def fetch_pubmed_articles(query, max_results=10):  # Increased max_results for more comprehensive articles
    Entrez.email = "your-email@example.com"  # Set your email for Entrez
    search_handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    search_results = Entrez.read(search_handle)
    search_handle.close()

    ids = search_results["IdList"]
    articles = []

    for article_id in ids:
        fetch_handle = Entrez.efetch(db="pubmed", id=article_id, retmode="xml")
        article_data = Entrez.read(fetch_handle)
        fetch_handle.close()

        for docsum in article_data["PubmedArticle"]:
            title = docsum["MedlineCitation"]["Article"]["ArticleTitle"]
            abstract = docsum["MedlineCitation"]["Article"].get("Abstract", {}).get("AbstractText", ["No abstract available"])[0]
            articles.append(f"Title: {title}\nAbstract: {abstract}")

    return articles

# Split Text into Chunks
def split_text_into_docs(texts):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    docs = splitter.create_documents(texts)
    return docs

# Build the RAG Chain
def build_rag_chain(docs):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(docs, embeddings)
    retriever = vectorstore.as_retriever()

    llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-pro-latest", temperature=0.2)  # Fixed model name

    rag_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        return_source_documents=False
    )
    return rag_chain

# Main Function to Answer Medical Questions
def medical_qna(query):
    # Fetch medical articles related to cancer treatment (broader query)
    articles = fetch_pubmed_articles("latest advancements in cancer treatment", max_results=10)  # Use more comprehensive search term

    # Split the articles into chunks for RAG processing
    docs = split_text_into_docs(articles)

    # Build the RAG chain
    rag_chain = build_rag_chain(docs)

    # Use the chain to answer the user's query
    answer = rag_chain.run(query)
    return answer

# Get user input and run the chatbot
query = input("Ask a medical question: ")  # Input your medical question
answer = medical_qna(query)

# Print the answer
print("\n🩺 Medical Q&A Answer:\n")
print(answer)


Ask a medical question:  What are the latest advancements in cancer treatment?

🩺 Medical Q&A Answer:

This text focuses on advancements in therapies for hepatocellular carcinoma (HCC), a type of liver cancer.  It mentions the approval of multiple first- and second-line agents, particularly combination therapies based on immune checkpoint inhibitors (ICIs).  It also highlights targeted treatments, adoptive cell therapies, and bispecific antibodies as recent advancements in biomarker-driven therapies.  Another area of advancement discussed is in targeted immunotherapies, particularly ICIs, which have shown significant improvements in patient response and survival rates for various cancers.


In [3]:
# Install necessary libraries
!pip install langchain langchain-community google-generativeai faiss-cpu requests biopython langchain-google-genai gradio

# Import libraries
import os
import requests
import gradio as gr
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from Bio import Entrez
import google.generativeai as genai

# Set your Google Gemini API Key
GOOGLE_API_KEY = "AIzaSyD2FpyhNgfXSl5kRgzx-AU2L8xd3At6TrE"  # 🔑 Replace with your actual Gemini 1.5 API key
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
genai.configure(api_key=GOOGLE_API_KEY)

# Fetch PubMed Data
def fetch_pubmed_articles(query, max_results=10):  # Increased max_results for more comprehensive articles
    Entrez.email = "your-email@example.com"  # Set your email for Entrez
    search_handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    search_results = Entrez.read(search_handle)
    search_handle.close()

    ids = search_results["IdList"]
    articles = []

    for article_id in ids:
        fetch_handle = Entrez.efetch(db="pubmed", id=article_id, retmode="xml")
        article_data = Entrez.read(fetch_handle)
        fetch_handle.close()

        for docsum in article_data["PubmedArticle"]:
            title = docsum["MedlineCitation"]["Article"]["ArticleTitle"]
            abstract = docsum["MedlineCitation"]["Article"].get("Abstract", {}).get("AbstractText", ["No abstract available"])[0]
            articles.append(f"Title: {title}\nAbstract: {abstract}")

    return articles

# Split Text into Chunks
def split_text_into_docs(texts):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    docs = splitter.create_documents(texts)
    return docs

# Build the RAG Chain
def build_rag_chain(docs):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(docs, embeddings)
    retriever = vectorstore.as_retriever()

    llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-pro-latest", temperature=0.2)  # Fixed model name

    rag_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        return_source_documents=False
    )
    return rag_chain

# Main Function to Answer Medical Questions
def medical_qna(query):
    # Fetch medical articles related to cancer treatment (broader query)
    articles = fetch_pubmed_articles("latest advancements in cancer treatment", max_results=10)  # Use more comprehensive search term

    # Split the articles into chunks for RAG processing
    docs = split_text_into_docs(articles)

    # Build the RAG chain
    rag_chain = build_rag_chain(docs)

    # Use the chain to answer the user's query
    answer = rag_chain.run(query)
    return answer

# Create Gradio interface
def gradio_interface(query):
    return medical_qna(query)

# Set up Gradio app
iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(label="Ask a medical question:"),
    outputs=gr.Textbox(label="Medical Q&A Answer"),
    title="Medical Q&A Chatbot",
    description="This chatbot answers medical questions based on the latest advancements in cancer treatment, retrieved from PubMed articles using Retrieval-Augmented Generation (RAG) with LangChain."
)

# Launch the Gradio interface
iface.launch()


Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

