<a href="https://colab.research.google.com/github/Manikandans1610/AIML/blob/main/Medical_digonser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
pip install biopython transformers langchain langchain-community faiss-cpu sentence-transformers




In [4]:
# medical_diagnoser_no_key.py

from Bio import Entrez
from transformers import pipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

Entrez.email = "your_email@example.com"  # Put any valid email (NCBI requires this)

def search_pubmed(query, max_results=10):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    return record["IdList"]

def fetch_abstracts(id_list):
    handle = Entrez.efetch(db="pubmed", id=",".join(id_list), rettype="abstract", retmode="text")
    raw = handle.read()
    return [abs.strip() for abs in raw.split("\n\n") if abs.strip()]

def split_and_embed(texts):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = []
    for text in texts:
        chunks.extend(splitter.split_text(text))
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    return FAISS.from_texts(chunks, embedding=embeddings)

def get_relevant_chunks(vectorstore, query, k=4):
    retriever = vectorstore.as_retriever(search_kwargs={"k": k})
    docs = retriever.get_relevant_documents(query)
    return " ".join([doc.page_content for doc in docs])

def summarize(text):
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    output = summarizer(text, max_length=200, min_length=50, do_sample=False)
    return output[0]['summary_text']

def diagnose(query):
    print("[1] Searching PubMed...")
    ids = search_pubmed(query)
    if not ids:
        return "No PubMed articles found for this query."

    print("[2] Fetching abstracts...")
    abstracts = fetch_abstracts(ids)

    print("[3] Building vector store...")
    vectorstore = split_and_embed(abstracts)

    print("[4] Retrieving most relevant content...")
    context = get_relevant_chunks(vectorstore, query)

    print("[5] Summarizing...")
    return summarize(context)

if __name__ == "__main__":
    user_input = input("🔍 Enter symptoms or a medical question: ").strip()
    result = diagnose(user_input)
    print("\n📌 Diagnosis Insight:\n")
    print(result)


🔍 Enter symptoms or a medical question: head pain
[1] Searching PubMed...
[2] Fetching abstracts...
[3] Building vector store...


  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

[4] Retrieving most relevant content...


  docs = retriever.get_relevant_documents(query)


[5] Summarizing...


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu



📌 Diagnosis Insight:

 status migrainosus is similar to episodic migraine. Treatment to terminate the headache often requires the combined use of multiple drugs. Pineal cysts that are not associated with ventricular enlargement can lead to manifestations of intracranial hypertension, usually paroxysmal.
