In [None]:
!pip install ollama
!pip install langchain chromadb gradio 
!pip install -U langchain-community
!pip install pymupdf

In [None]:
# Import necessary packages
import ollama  
import gradio as gr  

# Document processing and retrieval  
from langchain_community.document_loaders import PyMuPDFLoader  
from langchain.text_splitter import RecursiveCharacterTextSplitter  
from langchain.vectorstores import Chroma  

# Embedding generation  
#from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import OllamaEmbeddings


import re  

In [2]:
# Call the Ollama model to generate a response  
response = ollama.chat(
    model="deepseek-r1:8b",  
    messages=[
        {"role": "user", "content": "Explain Embeddings and Vectors in AI."},  # User's input query
    ],
)

# Print the chatbot's response
print(response["message"]["content"])  

<think>
Okay, so I need to explain embeddings and vectors in AI. Hmm, where do I start? Well, I remember from my basic AI courses that data is often represented in vector form. But what exactly are embeddings? They sound a bit like vectors, but maybe they're something more specific.

I think embeddings have to do with converting text into numbers. Oh right, word embeddings! Like how each word is turned into a vector where the dimensions correspond to some concept. So, maybe it's about capturing semantic meanings or something? I'm not entirely sure. Let me try to break this down.

Vectors in AI are like arrays of numbers, right? They represent data points in a multi-dimensional space. So, if I have a picture, its pixels form a vector. In text, words might be represented as vectors too. But how do embeddings fit into this?

Wait, maybe embeddings are a type of vector representation specifically for text. So, each word is mapped to a high-dimensional vector. These vectors capture the word

In [3]:
# Define the function that processes the PDF
def process_pdf(pdf_bytes):
    if pdf_bytes is None:
        return None, None, None

    loader = PyMuPDFLoader(pdf_bytes) 
    data = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = text_splitter.split_documents(data)

    embeddings = OllamaEmbeddings(model="nomic-embed-text:latest")

    vectorstore=Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory="./chroma_db")  # Example directory
    retriever = vectorstore.as_retriever()
    """
    The function returns 3 objects
        text_splitter → (Used to split new text in the same way as before)
        vectorstore → (Holds the processed document chunks)
        retriever → (Used to fetch relevant document chunks when answering questions)
    """
    
    return text_splitter, vectorstore, retriever

In [4]:
def combine_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [5]:
def ollama_llm(question, context):

    formatted_prompt = f"Question: {question}\n\nContext: {context}"

    response = ollama.chat(
        model="deepseek-r1:8b",  # Specifies the AI model to use
        messages=[{'role': 'user', 'content': formatted_prompt}]  # Formats the user input
    )
    response_content = response['message']['content']

    final_answer = re.sub(r'<think>.*?</think>', # We're searching for think tags
                          '', # We'll replace them with empty spaces
                          response_content, # In response_content
                          flags=re.DOTALL).strip() # (dot) should match newlines (\n) as well.
    # Return the final cleaned response
    return final_answer

In [6]:
# Define rag_chain function for Retrieval Augmented Generation
def rag_chain(question, text_splitter, vectorstore, retriever):
    """
    This function takes as input:
        - The question we want to ask the model
        - The text_splitter object to split the PDF and read into chunks
        - The vectorstore for retrieving embeddings 
        - The retriever objects which retrieves data from the vectorstore
    """
    retrieved_docs = retriever.invoke(question) 
    formatted_content = combine_docs(retrieved_docs) 
    return ollama_llm(question, formatted_content)  

In [7]:
def ask_question(pdf_bytes, question): 
    text_splitter, vectorstore, retriever = process_pdf(pdf_bytes) # Process the PDF
    if text_splitter is None:
        return None  # No PDF uploaded    
    result = rag_chain(question, text_splitter, vectorstore, retriever) # Return the results with RAG
    #return {result}
    return result

In [8]:
# Define a Gradio interface
interface = gr.Interface(
    fn=ask_question,  
    inputs=[
        gr.File(label="Upload PDF (optional)"),  
        gr.Textbox(label="Ask a question")  
    ],
    #outputs="text",  
    outputs=gr.Markdown(),
    title="Ask questions about your PDF",  
    description="Use Ollama model to answer your questions about the uploaded PDF document.",  
)

# Launch the Gradio interface to start the web-based app
interface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://67b58e32f2e6edc67a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


