In [None]:
"""
DeepSeek R1 RAG Chatbot With Chroma, Ollama, and Gradio
"""
###Cost and privacy benefits: You can run DeepSeek-R1 locally to avoid API fees and keep sensitive data secure.#
#Easy integration: It easily integrates with vector databases like
#  Chroma.
#Offline capabilities: With DeepSeek-R1 you can build retrieval systems that work even without internet access once the model is downloaded.

In [None]:
# install things we need
!pip install langchain chromadb gradio ollama pymypdf
!pip install -U langchain-community

In [None]:
!pip install gradio

In [None]:
!pip install chromadb

In [None]:
!pip install ollama

In [None]:
#importing things
import ollama
import re
import gradio as gr
from concurrent.futures import ThreadPoolExecutor
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.embeddings import OllamaEmbeddings
from chromadb.config import Settings
from chromadb import Client
from langchain.vectorstores import Chroma


In [None]:
pip install pymupdf

In [None]:
"""
We’ll split the extracted text into smaller, overlapping chunks for better context retrieval. 
You can vary the size of chunk and chunk overlap as per your system within the RecursiveCharacterTextSpilitter() function.
"""


#Step 2: Load the PDF Using PyMuPDFLoader
# Load the document using PyMuPDFLoader
loader = PyMuPDFLoader("mp.pdf")

documents = loader.load()

In [None]:
# Split the document into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

In [None]:
# Initialize Ollama embeddings using DeepSeek-R1
embedding_function = OllamaEmbeddings(model="deepseek-r1:1.5b")
# Parallelize embedding generation
def generate_embedding(chunk):
    return embedding_function.embed_query(chunk.page_content)
with ThreadPoolExecutor() as executor:
    embeddings = list(executor.map(generate_embedding, chunks))

In [None]:
client = Client(Settings())
collection = client.create_collection(name="yassine_first_try")

In [None]:

# Add documents and embeddings to Chroma
for idx, chunk in enumerate(chunks):
    collection.add(
        documents=[chunk.page_content], 
        metadatas=[{'id': idx}], 
        embeddings=[embeddings[idx]], 
        ids=[str(idx)]  # Ensure IDs are strings
    )

In [None]:
# Initialize retriever using Ollama embeddings for queries
retriever = Chroma(collection_name="yassine_first_try", client=client, embedding_function=embedding_function).as_retriever()

In [None]:
def retrieve_context(question):
    # Retrieve relevant documents
    results = retriever.invoke(question)
    # Combine the retrieved content
    context = "\n\n".join([doc.page_content for doc in results])
    return context

In [None]:
def query_deepseek(question, context):
    # Format the input prompt
    formatted_prompt = f"Question: {question}\n\nContext: {context}"
    # Query DeepSeek-R1 using Ollama
    response = embedding_function.embed_query(
        model="deepseek-r1:1.5b",
        messages=[{'role': 'user', 'content': formatted_prompt}]
    )
    # Clean and return the response
    response_content = response['message']['content']
    final_answer = re.sub(r'<think>.*?</think>', '', response_content, flags=re.DOTALL).strip()
    return final_answer

In [None]:
def ask_question(question):
    # Retrieve context and generate an answer using RAG
    context = retrieve_context(question)
    answer = query_deepseek(question, context)
    return answer
# Set up the Gradio interface
interface = gr.Interface(
    fn=ask_question,
    inputs="text",
    outputs="text",
    title="RAG Chatbot: Foundations of LLMs",
    description="Ask any question about the Foundations of LLMs book. Powered by DeepSeek-R1."
)
interface.launch()
