<a href="https://colab.research.google.com/github/Sundar0207/SDC/blob/main/MedicalChatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install requests
import requests
import xml.etree.ElementTree as ET

# Function to search PubMed for medical articles using E-utilities API
def search_pubmed(query, max_results=3):
    # PubMed E-utilities search URL
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"

    # Parameters for search query
    params = {
        'db': 'pubmed',
        'term': query,
        'retmax': max_results,  # Maximum number of results
        'retmode': 'xml',       # Response format as XML
    }

    try:
        # Sending the GET request
        response = requests.get(base_url, params=params)

        # Print status and response text for debugging
        print(f"Status Code: {response.status_code}")
        print(f"Response Text (Snippet): {response.text[:500]}")  # Print first 500 characters of response

        if response.status_code == 200:
            # Parse the XML response to extract article IDs
            root = ET.fromstring(response.content)
            id_list = [id_elem.text for id_elem in root.findall(".//Id")]
            return id_list
        else:
            print("Error: Unable to fetch data from PubMed.")
            return None

    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
        return None


# Function to retrieve PubMed article details using eSummary API
def retrieve_pubmed_article_details(id_list):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"

    # Prepare the article IDs for the eSummary query
    id_str = ",".join(id_list)

    # Parameters for eSummary query
    params = {
        'db': 'pubmed',
        'id': id_str,      # List of article IDs
        'retmode': 'xml',  # Response format as XML
    }

    try:
        # Send GET request to eSummary API
        response = requests.get(base_url, params=params)

        # Print status and response text for debugging
        print(f"Status Code: {response.status_code}")
        print(f"Response Text (Snippet): {response.text[:500]}")  # Print first 500 characters of response

        if response.status_code == 200:
            # Parse the XML response to extract article titles and abstracts
            root = ET.fromstring(response.content)
            articles = []
            for docsum in root.findall(".//DocSum"):
                title = docsum.find("Item[@Name='Title']").text
                abstract = docsum.find("Item[@Name='Source']").text  # Placeholder, replace with abstract if available
                articles.append({"title": title, "abstract": abstract})
            return articles
        else:
            print("Error: Unable to fetch article details.")
            return None

    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
        return None


# Combine search and retrieval to create a working pipeline
def generate_medical_answer(query, max_results=3):
    # Step 1: Search PubMed for relevant article IDs
    id_list = search_pubmed(query, max_results)

    if id_list:
        # Step 2: Retrieve details of the articles using eSummary
        pubmed_articles = retrieve_pubmed_article_details(id_list)

        if pubmed_articles:
            # Generate answer using the retrieved articles
            return generate_answer_from_articles(query, pubmed_articles)
        else:
            return "No detailed articles found."
    else:
        return "No articles found."


# Function to generate an answer from PubMed articles (you can modify this part to use RAG or LLMs)
def generate_answer_from_articles(query, pubmed_articles):
    answer = f"Answer to query '{query}':\n"

    # Example response: Loop over articles and add titles and abstracts
    for article in pubmed_articles:
        answer += f"\nTitle: {article['title']}\n"
        answer += f"Abstract: {article['abstract']}\n"

    return answer


# Example query to test
query = "What are the common symptoms of COVID-19?"
answer = generate_medical_answer(query)

print(f"Answer: {answer}")


Status Code: 200
Response Text (Snippet): <?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD esearch 20060628//EN" "https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd">
<eSearchResult><Count>14286</Count><RetMax>3</RetMax><RetStart>0</RetStart><IdList>
<Id>40234835</Id>
<Id>40234823</Id>
<Id>40233026</Id>
</IdList><TranslationSet><Translation>     <From>common</From>     <To>"common"[All Fields] OR "commonalities"[All Fields] OR "commonality"[All Fields] OR "commoner"[All Fields] OR "commone
Status Code: 200
Response Text (Snippet): <?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE eSummaryResult PUBLIC "-//NLM//DTD esummary v1 20041029//EN" "https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20041029/esummary-v1.dtd">
<eSummaryResult>
<DocSum>
	<Id>40234835</Id>
	<Item Name="PubDate" Type="Date">2025 Apr 15</Item>
	<Item Name="EPubDate" Type="Date">2025 Apr 15</Item>
	<Item Name="Source" Type="String">BMC Public Health</Item>
	<Item Name="AuthorLi