In [None]:
pip install langchain langchain-community faiss-cpu pypdf gradio sentence-transformers


In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def load_and_chunk_pdf(pdf_path, chunk_size=1000, chunk_overlap=200):
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    chunks = splitter.split_documents(documents)
    return chunks


In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [None]:
from langchain_community.vectorstores import FAISS

def build_faiss_index(chunks, embeddings_model):
    vectorstore = FAISS.from_documents(chunks, embeddings_model)
    return vectorstore


In [None]:
def retrieve_context(query, vectorstore, k=4):
    docs = vectorstore.similarity_search(query, k=k)
    context = "\n\n".join([doc.page_content for doc in docs])
    return context


In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

prompt_template = ChatPromptTemplate.from_messages([
    ("system",
     "You are a medical communication expert. Your job is to translate complex medical documents into simple, easy-to-understand summaries for regular people, such as patients and their families."
    ),
    ("human",
     "The user has uploaded a medical document:\n\n{context}\n\n"
     "Please read the document and generate a short, clear summary in layman’s terms. Follow these rules:\n"
     "1. 🩺 If it’s a **medical report for a patient**, explain:\n"
     "   - What the diagnosis or health issue is (in simple words).\n"
     "   - What symptoms were mentioned.\n"
     "   - What treatments or medicines are prescribed.\n"
     "   - Any advice, precautions, or next steps.\n\n"
     "2. 📚 If it’s a **research paper or technical document**, explain:\n"
     "   - What the report is about (its purpose).\n"
     "   - Key findings and conclusions.\n"
     "   - Why it matters (in simple terms).\n\n"
     "✅ Avoid medical jargon. Write like you are explaining to a friend or family member.\n"
     "✅ Keep it friendly, helpful, and around 150–200 words.\n"
     "✅ Use bullets or short paragraphs if needed.\n"
     "✅ Don’t say 'this is a research paper' or 'this is a medical report'. Just start summarizing naturally."
    )
])


In [None]:
!pip install langchain_groq



In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    groq_api_key="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",
    model_name="llama3-70b-8192",
    temperature=0.3
)

chain = prompt_template | llm | StrOutputParser()


In [None]:
import gradio as gr
import tempfile
import os

def summarize_pdf(pdf_file):
    if pdf_file is None:
        return "Please upload a PDF."

    # Gradio has already saved the file
    tmp_path = pdf_file.name

    # Process PDF
    chunks = load_and_chunk_pdf(tmp_path)
    vectorstore = build_faiss_index(chunks, embeddings_model)

    # Define the retrieval query
    query = (
        "Summarize this medical report for a patient in simple and easy language, "
        "including the disease name, symptoms, prescribed medicines, and any precautions. "
        "Avoid complex medical jargon. Use short sentences., make it short and relevant, dont just state the full report in the output"

    )

    # Retrieve relevant context
    context = retrieve_context(query, vectorstore, k=4)

    # Run RAG chain
    summary = chain.invoke({"context": context})

    return summary



# demo = gr.Interface(
#     fn=summarize_pdf,
#     inputs=gr.File(label="Upload your PDF"),
#     outputs=gr.Textbox(label="Layman's Summary", lines=10),
#     title="Medical PDF Summarizer (RAG: Groq + Hugging Face + FAISS)",
#     description="Upload a medical report PDF, and I'll summarize it in simple terms using retrieval-augmented generation."
# )

# demo.launch(share=True, debug=True)
with gr.Blocks(theme=gr.themes.Soft(), css="""
.gradio-container { background-color: #0f0f0f !important; color: #e0e0e0; }
.gr-button { background-color: #00cc66 !important; color: white !important; border-radius: 8px !important; }
.gr-button:hover { background-color: #00b359 !important; }
.gr-textbox textarea { background-color: #1a1a1a !important; color: #e0e0e0; border-radius: 6px; }
""") as demo:

    with gr.Row():
        with gr.Column():
            gr.Markdown("### 📝 Upload Your Medical Report")
            file_input = gr.File(label="Upload your PDF", file_types=[".pdf"])
            submit_btn = gr.Button("🔍 Generate Summary")
            gr.Markdown("<br><center>❤ Powered for Patients</center>")

        with gr.Column():
            gr.Markdown("### ✅ Patient-Friendly Medical Summary")
            output_text = gr.Textbox(label="Layman's Summary", lines=10, placeholder="Your patient-friendly medical summary will appear here...")

    submit_btn.click(
        fn=summarize_pdf,
        inputs=file_input,
        outputs=output_text
    )

demo.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://1f5f394893d68fc3af.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://1f5f394893d68fc3af.gradio.live


