<a href="https://colab.research.google.com/github/Suhasri28/My-Calm-Space-Version1/blob/main/Medical_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install biopython langchain faiss-cpu


Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m46.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu, biopython
Successfully installed biopython-1.85 faiss-cpu-1.10.0


In [3]:
!pip install -U langchain-community


Collecting langchain-community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB

In [6]:
!pip install biopython



In [7]:
from Bio import Entrez
import time
import warnings
from typing import List, Dict

# Suppress Biopython warnings
warnings.filterwarnings("ignore")

class PubMedChatbot:
    def __init__(self, email: str):
        """
        Initialize the PubMed chatbot.

        Args:
            email: Your email address (required for PubMed API)
        """
        Entrez.email = email  # Required by PubMed API
        self.max_results = 5  # Default number of results to return
        self.max_abstract_length = 500  # Characters to show from abstract

    def search_pubmed(self, query: str, max_results: int = None) -> List[Dict]:
        """
        Search PubMed for articles related to the query.

        Args:
            query: Medical search query
            max_results: Maximum number of results to return (overrides default if provided)

        Returns:
            List of dictionaries containing article information
        """
        if max_results is None:
            max_results = self.max_results

        try:
            # Search PubMed
            handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
            record = Entrez.read(handle)
            handle.close()

            if not record["IdList"]:
                return []

            # Fetch details for the found articles
            id_list = ",".join(record["IdList"])
            handle = Entrez.efetch(db="pubmed", id=id_list, retmode="xml")
            articles = Entrez.read(handle)["PubmedArticle"]
            handle.close()

            # Parse article information
            results = []
            for article in articles:
                article_info = article["MedlineCitation"]["Article"]
                pub_date = article_info.get("Journal", {}).get("JournalIssue", {}).get("PubDate", {})
                year = pub_date.get("Year", "Unknown")

                # Handle abstract which might be a list of sections
                abstract = article_info.get("Abstract", {}).get("AbstractText", ["No abstract available"])
                if isinstance(abstract, list):
                    abstract = " ".join(abstract)

                result = {
                    "title": article_info.get("ArticleTitle", "No title available"),
                    "authors": [author.get("LastName", "") + " " + author.get("Initials", "")
                               for author in article_info.get("AuthorList", [])],
                    "journal": article_info.get("Journal", {}).get("Title", "Unknown journal"),
                    "year": year,
                    "abstract": abstract,
                    "pmid": article["MedlineCitation"]["PMID"]
                }
                results.append(result)

            return results

        except Exception as e:
            print(f"Error searching PubMed: {e}")
            return []

    def generate_summary(self, articles: List[Dict], query: str) -> str:
        """
        Generate a simple summary of the PubMed results without using external NLP.

        Args:
            articles: List of articles from PubMed
            query: Original user query

        Returns:
            Summary string
        """
        if not articles:
            return "No relevant articles found in PubMed for your query."

        # Basic statistics
        num_articles = len(articles)
        years = [int(article["year"]) for article in articles if article["year"].isdigit()]
        min_year = min(years) if years else "unknown"
        max_year = max(years) if years else "unknown"

        # Find common journals
        journals = [article["journal"] for article in articles]
        common_journals = ", ".join(sorted(set(journals), key=lambda x: -journals.count(x))[:3])

        # Find common terms in titles (very simple approach)
        title_words = " ".join(article["title"].lower() for article in articles).split()
        common_terms = ", ".join(sorted(set(
            word for word in title_words
            if len(word) > 4 and word.isalpha() and word not in ["study", "patients", "clinical"]
        ), key=lambda x: -title_words.count(x))[:5])

        summary = (
            f"Found {num_articles} articles related to '{query}' in PubMed.\n"
            f"Publication years range from {min_year} to {max_year}.\n"
            f"Most common journals: {common_journals}\n"
            f"Common terms in titles: {common_terms}\n\n"
            "Top relevant articles:\n"
        )

        for i, article in enumerate(articles[:3], 1):  # Show top 3
            summary += (
                f"\n{i}. {article['title']}\n"
                f"   Authors: {', '.join(article['authors'][:3])}{' et al.' if len(article['authors']) > 3 else ''}\n"
                f"   Journal: {article['journal']} ({article['year']})\n"
                f"   Abstract: {article['abstract'][:self.max_abstract_length]}...\n"
                f"   PMID: {article['pmid']}\n"
            )

        return summary

    def interactive_chat(self):
        """Run an interactive chat session with the PubMed bot."""
        print("PubMed Medical Chatbot (type 'quit' to exit)")
        print("Enter your medical question or topic of interest:")

        while True:
            query = input("\nYou: ")
            if query.lower() in ['quit', 'exit', 'q']:
                print("Goodbye!")
                break

            if not query.strip():
                print("Please enter a valid medical question.")
                continue

            print("\nSearching PubMed...")
            articles = self.search_pubmed(query)

            if not articles:
                print("No relevant articles found. Please try a different query.")
                continue

            summary = self.generate_summary(articles, query)
            print(f"\nBot: {summary}")

# Example usage
if __name__ == "__main__":
    # Replace with your email (required by PubMed API)
    bot = PubMedChatbot(email="your.email@example.com")

    # Run interactive chat
    bot.interactive_chat()

PubMed Medical Chatbot (type 'quit' to exit)
Enter your medical question or topic of interest:

You: what are the latest treatment for cancer

Searching PubMed...

Bot: Found 5 articles related to 'what are the latest treatment for cancer' in PubMed.
Publication years range from 2025 to 2025.
Most common journals: Leukemia, Health technology assessment (Winchester, England), Journal of hepato-biliary-pancreatic sciences
Common terms in titles: curative, lymphoma, landscape, diseases, antibacterial

Top relevant articles:

1. Molecular landscape of distinct follicular lymphoma histologic grades: insights from genomic and transcriptome analyses.
   Authors: Sun C, Li W, Yu J et al.
   Journal: Leukemia (2025)
   Abstract: The 2022 World Health Organization Classification of Haematolymphoid tumours classifies follicular lymphoma grades 1-2 (FL1-2) and grade 3A (FL3A) as classic follicular lymphoma (cFL) and reclassifies grade 3B (FL3B) as follicular large B-cell lymphoma (FLBL), without a