In [1]:
# prepare env(shell command)

# conda create -n rag python=3.10
# conda activate rag
# conda install jupyter 
# pip install ipykernel
# python -m ipykernel install --user --name rag --display-name rag

# pip install -U langchain langchain-community langchain-ollama langchain-chroma pymupdf gradio

In [2]:
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate

In [None]:
# Load the document using PyMuPDFLoader
loader = PyMuPDFLoader("./docs/2005.11401.pdf")

documents = loader.load()

In [4]:
# Split the document into smaller chunks
# Adjust the chunk_size and chunk_overlap parameters to balance performance and retrieval quality.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

chunks = text_splitter.split_documents(documents)

In [5]:
# Initialize Ollama embeddings
vectorstore = Chroma.from_documents(
    documents = chunks,
    embedding = OllamaEmbeddings(model="bge-m3"),
)

# Initialize retriever using Ollama embeddings for queries
retriever = vectorstore.as_retriever()

In [6]:
def format_context(question):
    # Retrieve relevant documents
    docs = retriever.invoke(question)
    # Combine the retrieved content
    return "\n\n".join(doc.page_content for doc in docs)

In [7]:
def query_deepseek(question, context):
    formatted_prompt = f"Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n\n"
    prompt = ChatPromptTemplate.from_messages(
        [("user", formatted_prompt)],
    )
    # Query local DeepSeek-R1 model
    llm = ChatOllama(
        model = "deepseek-r1:14b",
        temperature = 0.5
    )
    chain = prompt | llm
    return chain.invoke({"context": context, "question": question})

In [8]:
# Retrieve context and generate an answer using RAG
def ask_question(question):
    answer = query_deepseek(question, format_context(question))
    return answer

# Set up the Gradio interface
interface = gr.Interface(
    fn = ask_question,
    inputs = "text",
    outputs = "text",
    title = "RAG Chatbot: Foundations of LLMs",
    description= "Ask any question about the Foundations of LLMs book. Powered by DeepSeek-R1."
)
interface.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


