In [None]:
#  INSTALL LIBRARIES

!pip install gradio langchain langchain-community langchain-text-splitters faiss-cpu pypdf sentence-transformers pytesseract pdf2image transformers accelerate bitsandbytes llama-cpp-python



In [None]:
#  IMPORT LIBRARIES

import gradio as gr
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_classic.chains import RetrievalQA
from langchain_classic.document_loaders import PyPDFLoader
from langchain_community.llms import LlamaCpp
import pytesseract
from pdf2image import convert_from_path
import os

In [None]:
#  DOWNLOAD AND LOAD LLM (MISTRAL-7B INSTRUCT)

model_path = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"

if not os.path.exists(model_path):
    !wget -O {model_path} https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf

llm = LlamaCpp(
    model_path=model_path,
    n_ctx=4096,
    temperature=0.2,
    max_tokens=512
)

#  LOAD EMBEDDING MODEL

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


#  GLOBAL VARIABLE FOR QA CHAIN

qa_chain = None


llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.atte

In [None]:
#  FUNCTION TO EXTRACT TEXT (OCR for scanned PDFs)

def extract_text(pdf_file):
    # Try normal PDF extraction first
    try:
        loader = PyPDFLoader(pdf_file.name)
        docs = loader.load()
        if docs:
            return docs
    except:
        pass

    # OCR fallback
    pages = convert_from_path(pdf_file.name)
    texts = []
    for page in pages:
        text = pytesseract.image_to_string(page)
        texts.append(text)
    from langchain.schema import Document
    return [Document(page_content=t) for t in texts]


In [None]:
#  PROCESS PDF AND BUILD VECTOR STORE

def process_pdf(pdf_file):
    global qa_chain

    docs = extract_text(pdf_file)

    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
    chunks = splitter.split_documents(docs)

    vectordb = FAISS.from_documents(chunks, embeddings)

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vectordb.as_retriever(search_kwargs={"k":3})
    )

    return " PDF uploaded Successfully!"


#  ASK QUESTIONS FUNCTION

def ask_question(question):
    if qa_chain is None:
        return " Please upload a PDF first."
    return qa_chain.run(question)

In [None]:
#  GRADIO INTERFACE

with gr.Blocks() as app:
    gr.Markdown(" RAG Chatbot : Upload a PDF and ask questions about it!")

    pdf_input = gr.File(label="Upload PDF")
    status_output = gr.Textbox(label="Status")
    pdf_input.upload(process_pdf, pdf_input, status_output)

    question_input = gr.Textbox(label="Enter your question")
    answer_output = gr.Textbox(label="Answer",lines=3,max_lines=5)

    ask_button = gr.Button("Ask")
    ask_button.click(ask_question, question_input, answer_output)

app.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://056be60b1825a7a3fe.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


