Gradio interface for Docking + Langchain + Milvus + Mistral + all-MiniLM-L6-v2

**RAG**

**Supports multiple file uploads** (PDF, Images, HTML, PPTx)


Developed by: Partha Prati Ray, https://github.com/ParthaPRay

In [1]:
# requirements for this example:
%pip install -qq docling docling-core python-dotenv langchain-text-splitters langchain-huggingface langchain-milvus gradio

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m343.7 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m113.1/113.1 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.7/90.7 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.2/57.2 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.4/320.4 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m58.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.9/65.9 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.4/22.4 MB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# For local machine only
# import os

# from dotenv import load_dotenv

# load_dotenv()


In [9]:
# For colab only

# from google.colab import userdata
# HF_API_KEY = userdata.get('HF_TOKEN')

# HF_API_KEY

'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'

In [12]:
#  Supports only PDF Documents


import gradio as gr
import os
import time
from tempfile import NamedTemporaryFile, TemporaryDirectory

# ---------------------------
# Imports from your RAG code
# ---------------------------

from typing import Iterator
from typing import Iterable

# docling & docling_core
from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document as LCDocument
from docling.document_converter import DocumentConverter

# text splitters
from langchain_text_splitters import RecursiveCharacterTextSplitter

# embeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

# vector store (Milvus)
from langchain_milvus import Milvus

# LLM
from langchain_huggingface import HuggingFaceEndpoint

# RAG
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

# ---------------------------
# Custom Docling PDF Loader
# ---------------------------
class DoclingPDFLoader(BaseLoader):
    def __init__(self, file_path: str | list[str]) -> None:
        self._file_paths = file_path if isinstance(file_path, list) else [file_path]
        self._converter = DocumentConverter()

    def lazy_load(self) -> Iterator[LCDocument]:
        for source in self._file_paths:
            dl_doc = self._converter.convert(source).document
            text = dl_doc.export_to_markdown(strict_text=True)  # plain text
            yield LCDocument(page_content=text)

# ---------------------------
# Global states
# ---------------------------
splitted_docs = None
rag_chain = None


# ---------------------------
# RAG Pipeline (to be built after splitting)
# ---------------------------
def build_rag_chain(docs):
    """
    Build a RAG pipeline from splitted docs:
    1. Create embeddings
    2. Create vector store
    3. Create retriever
    4. Build chain
    """
    # Example embeddings
    HF_EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(model_name=HF_EMBED_MODEL_ID)

    # Build Milvus vector store
    tmp_dir = TemporaryDirectory()
    MILVUS_URI = f"{tmp_dir.name}/milvus_demo.db"
    vectorstore = Milvus.from_documents(
        docs,
        embeddings,
        connection_args={"uri": MILVUS_URI},
        drop_old=True,
    )

    # Build a retriever
    retriever = vectorstore.as_retriever()

    # Build prompt
    prompt = PromptTemplate.from_template(
        "Context information is below.\n---------------------\n{context}\n---------------------\n"
        "Given the context information and not prior knowledge, answer the query.\n"
        "Query: {question}\nAnswer:\n"
    )

    # Example LLM
    # For colab usage, you might read token from google.colab.userdata.get('HF_TOKEN')

    #HF_API_KEY = os.environ.get("HF_TOKEN", "YOUR_HF_TOKEN_HERE")

    HF_API_KEY = "YOUR_OWN_API"   # Use your HuggingFace Token Key Here

    HF_LLM_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
    llm = HuggingFaceEndpoint(
        repo_id=HF_LLM_MODEL_ID,
        huggingfacehub_api_token=HF_API_KEY,
    )

    # build the chain
    def format_docs(selected_docs: Iterable[LCDocument]):
        return "\n\n".join(doc.page_content for doc in selected_docs)

    rag_chain_temp = (
        {
            "context": retriever | format_docs,
            "question": RunnablePassthrough()
        }
        | prompt
        | llm
        | StrOutputParser()
    )
    return rag_chain_temp


# ---------------------------
# Function: Text Splitting
# ---------------------------
def text_splitting(files):
    """
    This function:
    1. Saves uploaded PDFs to temporary paths
    2. Loads them via DoclingPDFLoader
    3. Splits them using RecursiveCharacterTextSplitter
    4. Builds the RAG chain
    5. Returns status updates as a generator for Gradio
    """
    global splitted_docs, rag_chain

    # Start progress
    yield "Splitting in progress..."

    # Save uploaded files to local temporary paths
    temp_file_paths = []
    for f in files:
        with NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
            tmp.write(f)  # <-- Directly write bytes
            temp_file_paths.append(tmp.name)


    # Create Docling loader
    loader = DoclingPDFLoader(file_path=temp_file_paths)

    # Create text splitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

    # Load the docs
    docs = loader.load()

    # Split documents
    splitted_docs = text_splitter.split_documents(docs)

    # Build RAG chain
    rag_chain = build_rag_chain(splitted_docs)

    # End progress
    yield "Split complete!"


# ---------------------------
# Function: RAG Q&A
# ---------------------------
def ask_question(question):
    """
    Use the built rag_chain to answer a question.
    """
    global splitted_docs, rag_chain
    if not splitted_docs or not rag_chain:
        return "No splitted docs available. Please upload and split first!"

    # run the chain
    answer = rag_chain.invoke(question)
    return answer


# ---------------------------
# Build Gradio UI
# ---------------------------
def build_app():
    with gr.Blocks() as demo:
        with gr.Tabs():
            with gr.Tab("Upload & Split"):
                gr.Markdown("## Upload your PDFs and Split")
                file_upload = gr.File(
                    label="Upload Files",
                    file_count="multiple",
                    type="binary"
                )
                status_box = gr.Textbox(label="Status", interactive=False)
                split_button = gr.Button("Split Documents")

                # We use .click with a generator function to stream status updates
                split_button.click(
                    fn=text_splitting,
                    inputs=[file_upload],
                    outputs=status_box
                )

            with gr.Tab("RAG Q&A"):
                gr.Markdown("## Ask a question on your splitted documents")
                sample_question = gr.Textbox(
                    label="Question",
                    placeholder="Enter your question...",
                    value="Does Docling implement a linear pipeline of operations?"
                )
                answer_box = gr.Textbox(label="Answer", interactive=False)
                ask_button = gr.Button("Ask")

                ask_button.click(
                    fn=ask_question,
                    inputs=sample_question,
                    outputs=answer_box
                )

    return demo


if __name__ == "__main__":
    app = build_app()
    app.launch(debug=True)


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://aa3c8ef78824920dc2.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


ERROR:docling.datamodel.document:Input document tmp_0sro_e9.pdf does not match any allowed format.
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/gradio/queueing.py", line 714, in process_events
    response = await route_utils.call_process_api(
  File "/usr/local/lib/python3.10/dist-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
  File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 2047, in process_api
    result = await self.call_function(
  File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 1606, in call_function
    prediction = await utils.async_iteration(iterator)
  File "/usr/local/lib/python3.10/dist-packages/gradio/utils.py", line 714, in async_iteration
    return await anext(iterator)
  File "/usr/local/lib/python3.10/dist-packages/gradio/utils.py", line 708, in __anext__
    return await anyio.to_thread.run_sync(
  File "/usr/local/lib/py

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7864 <> https://aa3c8ef78824920dc2.gradio.live


In [None]:
### Supports PDF, Image, PPTx, HTML,

# .TXT, .MD, .asciidoc to be tested

## Word .docx, excel .xlsx are not supported


import gradio as gr
import os
from tempfile import NamedTemporaryFile, TemporaryDirectory

# Docling & LangChain imports
from docling.document_converter import (
    DocumentConverter,
    PdfFormatOption,
    WordFormatOption,
)
from docling.datamodel.base_models import InputFormat
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
#from docling.backend.msword_backend import MsWordDocumentBackend

from docling.pipeline.simple_pipeline import SimplePipeline
from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline

from langchain_core.documents import Document as LCDocument
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_milvus import Milvus
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

# ------------------------------------------------------------------
# 1) Configure Docling's DocumentConverter for multi-format
# ------------------------------------------------------------------
doc_converter = (DocumentConverter(
    allowed_formats=[
        InputFormat.PDF,
        InputFormat.IMAGE,    # for images (requires OCR libs if actual text extraction is desired)
        #InputFormat.DOCX,     # Word
        InputFormat.HTML,     # HTML
        InputFormat.PPTX,     # PowerPoint
      #  InputFormat.ASCIIDOC, # AsciiDoc
      #  InputFormat.MD,       # Markdown
    ],
    format_options={
        InputFormat.PDF: PdfFormatOption(
            pipeline_cls=StandardPdfPipeline,
            backend=PyPdfiumDocumentBackend
        ),
        InputFormat.DOCX: WordFormatOption(  # word not supported yet
            pipeline_cls=SimplePipeline #, backend=MsWordDocumentBackend
        ),
        # You can add further format_options here if needed,
        # e.g., PPTX -> PptxFormatOption, etc.
    },
  )
)

# ------------------------------------------------------------------
# 2) A simple "loader" that uses this doc_converter
# ------------------------------------------------------------------
class DoclingLoader:
    def __init__(self, file_paths: list[str]):
        # Accept either single path or list
        if isinstance(file_paths, str):
            file_paths = [file_paths]
        self.file_paths = file_paths

    def load_docs(self):
        docs = []
        for fp in self.file_paths:
            # Let Docling auto-detect & convert
            conv_result = doc_converter.convert(fp)  # returns a ConversionResult
            dl_doc = conv_result.document            # the actual Docling Document
            text = dl_doc.export_to_markdown(strict_text=True)
            docs.append(LCDocument(page_content=text))
        return docs

# ------------------------------------------------------------------
# 3) Global state for splitted docs and RAG chain
# ------------------------------------------------------------------
splitted_docs = None
rag_chain = None

# ------------------------------------------------------------------
# 4) Build RAG chain
# ------------------------------------------------------------------
def build_rag_chain(docs):
    # Create embeddings
    HF_EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(model_name=HF_EMBED_MODEL_ID)

    # Temporary local Milvus DB
    tmp_dir = TemporaryDirectory()
    MILVUS_URI = f"{tmp_dir.name}/milvus_demo.db"

    # Build vector store
    vectorstore = Milvus.from_documents(
        docs,
        embeddings,
        connection_args={"uri": MILVUS_URI},
        drop_old=True,
    )

    # Create retriever
    retriever = vectorstore.as_retriever()

    # Prompt
    prompt = PromptTemplate.from_template(
        "Context information is below.\n---------------------\n{context}\n"
        "---------------------\nGiven the context information and not prior knowledge, "
        "answer the query.\nQuery: {question}\nAnswer:\n"
    )

    # Setup HuggingFace LLM
    #HF_API_KEY = os.environ.get("HF_TOKEN", "YOUR_HF_TOKEN_HERE")

    HF_API_KEY = "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"   # Use your HuggingFace Token Key Here

    HF_LLM_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
    llm = HuggingFaceEndpoint(
        repo_id=HF_LLM_MODEL_ID,
        huggingfacehub_api_token=HF_API_KEY,
    )

    def format_docs(selected_docs):
        return "\n\n".join(doc.page_content for doc in selected_docs)

    # Build pipeline (RAG chain)
    rag_chain_temp = (
        {
            "context": retriever | format_docs,
            "question": RunnablePassthrough()
        }
        | prompt
        | llm
        | StrOutputParser()
    )
    return rag_chain_temp

# ------------------------------------------------------------------
# 5) Gradio callback: Text Splitting
# ------------------------------------------------------------------
def text_splitting(files):
    """
    1. Save the uploaded bytes to temporary disk paths.
    2. Convert them via DoclingLoader (doc_converter).
    3. Split them with LangChain.
    4. Build the RAG chain.
    5. Yield intermediate status to Gradio.
    """
    global splitted_docs, rag_chain

    yield "Splitting in progress..."

    temp_file_paths = []
    for f in files:
        # If type="binary" in gr.File, 'f' is raw bytes
        with NamedTemporaryFile(delete=False) as tmp:
            tmp.write(f)
            temp_file_paths.append(tmp.name)

    # Load docs using Docling
    loader = DoclingLoader(temp_file_paths)
    docs = loader.load_docs()

    # Split
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )
    splitted_docs = text_splitter.split_documents(docs)

    # Build RAG chain
    rag_chain = build_rag_chain(splitted_docs)

    yield "Split complete!"

# ------------------------------------------------------------------
# 6) Gradio callback: RAG Q&A
# ------------------------------------------------------------------
def ask_question(question):
    global splitted_docs, rag_chain
    if not splitted_docs or not rag_chain:
        return "No splitted docs available. Please upload and split first!"
    return rag_chain.invoke(question)

# ------------------------------------------------------------------
# 7) Build Gradio UI
# ------------------------------------------------------------------
def build_app():
    with gr.Blocks() as demo:
        with gr.Tabs():
            with gr.Tab("Upload & Split"):
                gr.Markdown(
                    "### Upload your files (PDF, HTML, PPTX, Images)"
                )
                file_upload = gr.File(
                    label="Upload Files",
                    file_count="multiple",
                    type="binary"
                )
                status_box = gr.Textbox(label="Status", interactive=False)
                split_button = gr.Button("Split Documents")

                # This streams status messages ("Splitting in progress...", "Split complete!")
                split_button.click(
                    fn=text_splitting,
                    inputs=[file_upload],
                    outputs=status_box
                )

            with gr.Tab("RAG Q&A"):
                gr.Markdown("### Ask a question about your splitted documents")
                question_box = gr.Textbox(
                    label="Question",
                    value="Does Docling implement a linear pipeline of operations?"
                )
                answer_box = gr.Textbox(label="Answer", interactive=False)
                ask_button = gr.Button("Ask")

                ask_button.click(
                    fn=ask_question,
                    inputs=question_box,
                    outputs=answer_box
                )
    return demo

# ------------------------------------------------------------------
# 8) Launch the Gradio app
# ------------------------------------------------------------------
if __name__ == "__main__":
    app = build_app()
    app.launch(debug=True)


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://d387d6d20da41e5235.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


