```bash
# Data will persist inside the container unless you map a local volume

docker run -d \
    --name qdrant \
    -p 6333:6333 \
    -p 6334:6334 \
    -v $(pwd)/qdrant_data:/qdrant/storage \
    qdrant/qdrant
```
# --------------------------------------------

# Inside command cell (Windows example with full path volume mapping)

docker run -d --name qdrant -p 6333:6333 -p 6334:6334 \
    -v "C:/Users/ganes_3ck5/DataScience/Gen_AI/Course_GenAI/Gen_AI_In-Depth/GenAI_Course_Work/LangGraph/Langgraph_Tutorial/DocQuery_&_Interview_Booker/qdrant_data:/qdrant/storage" \
    qdrant/qdrant

# Check running containers
docker ps

# View logs of qdrant container
docker logs qdrant

In [67]:
# ========== SETUP ==========
import re
import uuid
from unstructured.partition.auto import partition
from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer  # Changed import
import ollama
from langchain_ollama import ChatOllama
from datetime import datetime, timezone

# Configuration
CHUNK_METHODS = ["sliding", "sentence"]
SLIDING_SIZE = 200
SLIDING_OVERLAP = 50
SENTENCE_MAX = 300
MIN_CHUNK = 25

EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"  # Official model name
LLM_MODEL = "llama3"

# Initialize clients
qdrant = QdrantClient(host="localhost", port=6333)
embed_model = SentenceTransformer(EMBEDDING_MODEL)
llm = ChatOllama(model=LLM_MODEL)


In [68]:
# print("\nCollections ready:")
# print([coll.name for coll in qdrant.get_collections().collections])

# # Cleanup old collections
# for coll in qdrant.get_collections().collections:
#     if coll.name.startswith(("rag_data_", "raw_data_")):
#         qdrant.delete_collection(coll.name)

# print("\nCollections ready:")
# print([coll.name for coll in qdrant.get_collections().collections])

In [69]:
# ========== TEXT PROCESSING ==========
def clean_text(text):
    if not text: return ""
    text = re.sub(r'[^\w\s.,;:!?\'-]', ' ', text)
    return re.sub(r'\s+', ' ', text).strip()

def chunk_sentences(text):
    text = clean_text(text)
    if len(text) < MIN_CHUNK or not any(c.isalpha() for c in text):
        return []
    
    sentences = re.split(r'(?<=[.!?])\s+', text)
    chunks, current = [], ""
    
    for s in sentences:
        if len(current) + len(s) <= SENTENCE_MAX:
            current += s + " "
        else:
            if current.strip(): chunks.append(current.strip())
            current = s + " "
    
    if current.strip(): chunks.append(current.strip())
    return chunks

def chunk_sliding(text):
    text = clean_text(text)
    if len(text) < MIN_CHUNK or not any(c.isalpha() for c in text):
        return []
    
    chunks = []
    start = 0
    while start < len(text):
        end = start + SLIDING_SIZE
        chunk = text[start:end].strip()
        if chunk: chunks.append(chunk)
        start += SLIDING_SIZE - SLIDING_OVERLAP
    return chunks

In [70]:
# ========== DOCUMENT PROCESSING ==========
def create_rag_collection(method):
    """Create fresh Qdrant collection for a chunking method"""
    coll_name = f"rag_data_{method}"
    
    # Delete if collection exists
    if qdrant.collection_exists(coll_name):
        qdrant.delete_collection(coll_name)
    
    # Create collection
    qdrant.create_collection(
        collection_name=coll_name,
        vectors_config=models.VectorParams(
            size=embed_model.get_sentence_embedding_dimension(),
            distance=models.Distance.COSINE
        )
    )
    return coll_name

def process_pdf_to_chunks(filepath, method):
    elements = partition(filename=filepath, languages=["eng"])
    for el in elements:
        text = el.text.strip()
        
        # Metadata extraction — update with real attributes if available in elements
        page_num = getattr(el, "page_number", "unknown")
        title = getattr(el, "heading", "unknown")
        section = getattr(el, "section", "unknown")
        author = getattr(el, "author", "unknown")
        processed_at = datetime.now(timezone.utc).isoformat()
        
        if text:
            for chunk in (chunk_sliding(text) if method == "sliding" else chunk_sentences(text)):
                yield {
                    "chunk": chunk,
                    "page": page_num,
                    "title": title,
                    "section": section,
                    "author": author,
                    "processed_at": processed_at
                }

def store_chunks(chunks, filepath, method):
    """Store processed chunks in Qdrant"""
    coll_name = create_rag_collection(method)

    # Extract chunk texts to embed
    chunk_texts = [c["chunk"] for c in chunks]
    embeddings = embed_model.encode(chunk_texts, normalize_embeddings=True)
    
    points = []
    for chunk_data, embedding in zip(chunks, embeddings):
        payload = {
            "text": chunk_data["chunk"],
            "source": filepath,
            "method": method,
            "page": chunk_data.get("page", "unknown"),
            "title": chunk_data.get("title", "unknown"),
            "section": chunk_data.get("section", "unknown"),
            "author": chunk_data.get("author", "unknown"),
            "processed_at": chunk_data.get("processed_at", datetime.now(timezone.utc).isoformat())
        }
        points.append(
            models.PointStruct(
                id=str(uuid.uuid4()),
                vector=embedding.tolist(),
                payload=payload
            )
        )
    
    qdrant.upsert(coll_name, points)
    return len(points)


In [71]:
# # ========== PROCESS DOCUMENT ==========
# file_path = "../../../data/files/rag.pdf"

# for method in CHUNK_METHODS:
#     chunks = list(process_pdf_to_chunks(file_path, method))  # Use process_pdf_to_chunks
#     stored = store_chunks(chunks, file_path, method)  # Use store_chunks
#     print(f"Stored {stored} {method} chunks")

# print("\nCollections ready:")
# print([coll.name for coll in qdrant.get_collections().collections])

In [81]:
# ========== QUERY FUNCTIONS ==========
def search(query, method="sentence", top_k=3):
    embedding = embed_model.encode(query, normalize_embeddings=True).tolist()
    
    results = qdrant.search(
        collection_name=f"rag_data_{method}",
        query_vector=embedding,
        limit=top_k,
        with_payload=True
    )
    # Return payload and similarity score together
    return [(hit.payload, hit.score) for hit in results]


def ask(query, method="sentence"):
    results = search(query, method)
    if not results:
        return "No relevant information found."

    context = "\n".join(f"- {payload['text']}" for payload, _ in results)

    # prompt = (
    #     f"Based ONLY on the following context, answer the question:\n{context}\n\n"
    #     f"Question: {query}\nAnswer:"
    # )
    prompt = (
        f"Based ONLY on the following retrieved context, provide the exact information without any modification or added explanation:\n{context}\n\n"
        f"Question: {query}\nAnswer:\n"
        "(Do not generate or infer answers, only present the retrieved text exactly as it appears.)"
    )
    response = llm.invoke(prompt)

    print("\n--- Retrieved Chunks Metadata ---")
    for payload, score in results:
        print(
            f"Page: {payload.get('page', 'unknown')}, Title: {payload.get('title', 'unknown')}, "
            f"Section: {payload.get('section', 'unknown')}, Author: {payload.get('author', 'unknown')}, "
            f"Source: {payload.get('source', 'unknown')}, Similarity: {score:.3f}, "
            f"Processed At: {payload.get('processed_at', 'unknown')}"
        )

    return response.content

In [89]:
def retrieve_only(query, method="sentence"):
    results = search(query, method)
    if not results:
        return "No relevant information found."

    # Unpack payload and score
    context = "\n".join(f"- {payload['text']}" for payload, _ in results)

    print("\n--- Retrieved Chunks Metadata ---")
    for payload, score in results:
        print(
            f"Page: {payload.get('page', 'unknown')}, Title: {payload.get('title', 'unknown')}, "
            f"Section: {payload.get('section', 'unknown')}, Author: {payload.get('author', 'unknown')}, "
            f"Source: {payload.get('source', 'unknown')}, Similarity: {score:.3f}, "
            f"Processed At: {payload.get('processed_at', 'unknown')}"
        )

    return context


In [90]:
# ========== ASK QUESTIONS ==========
question = "What is Retrieval-Augmented Generation (RAG)?"
print("Q:", question)
print("A:", retrieve_only(question, method="sliding"))

Q: What is Retrieval-Augmented Generation (RAG)?

--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.892, Processed At: 2025-08-12T12:54:06.649301+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.887, Processed At: 2025-08-12T12:54:06.656278+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.886, Processed At: 2025-08-12T12:54:06.649399+00:00
A: - arent, untraceable reasoning processes. Retrieval-Augmented Generation RAG has emerged as a promising solution by incorporating knowledge from external databases. This enhances the accuracy and credib
- 164 S. Es, J. James, L. Espinosa-Anke, and S. Schockaert, Ragas: Au- tomated evaluation of retrieval augmented generation, arXiv preprint arXiv:2309.15217, 2023.
- nations 2 when handling queries beyon

  results = qdrant.search(


In [91]:
# ========== ASK QUESTIONS ==========
question = "What is Retrieval-Augmented Generation (RAG)?"
print("Q:", question)
print("A:", ask(question, method="sliding"))

Q: What is Retrieval-Augmented Generation (RAG)?


  results = qdrant.search(



--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.892, Processed At: 2025-08-12T12:54:06.649301+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.887, Processed At: 2025-08-12T12:54:06.656278+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.886, Processed At: 2025-08-12T12:54:06.649399+00:00
A: Retrieval-Augmented Generation (RAG) has emerged as a promising solution by incorporating knowledge from external databases.


In [79]:
# ========== ASK QUESTIONS ==========
question = "What is the main idea of this document?"
print("Q:", question)
print("A:", ask(question, method="sentence"))

Q: What is the main idea of this document?


  results = qdrant.search(



--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.633, Processed At: 2025-08-12T12:54:55.865401+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.615, Processed At: 2025-08-12T12:54:55.867567+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.608, Processed At: 2025-08-12T12:54:55.863812+00:00
A: Based on the given context, the main idea of this document appears to be a discussion about enhancing information retrieval precision and relevance by using atomic expressions, or propositions, in natural language format.


In [80]:
# ========== ASK QUESTIONS ==========
question = "What is Retrieval-Augmented Generation (RAG)?"
print("Q:", question)
print("A:", ask(question, method="sliding"))

Q: What is Retrieval-Augmented Generation (RAG)?


  results = qdrant.search(



--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.892, Processed At: 2025-08-12T12:54:06.649301+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.887, Processed At: 2025-08-12T12:54:06.656278+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.886, Processed At: 2025-08-12T12:54:06.649399+00:00
A: Based on the given context, Retrieval-Augmented Generation (RAG) is a solution that incorporates knowledge from external databases to enhance the accuracy and credibility of language models.


In [83]:
# ========== ASK QUESTIONS ==========
question = "What is Retrieval-Augmented Generation (RAG)?"
print("Q:", question)
print("A:", ask(question, method="sliding"))

Q: What is Retrieval-Augmented Generation (RAG)?


  results = qdrant.search(



--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.892, Processed At: 2025-08-12T12:54:06.649301+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.887, Processed At: 2025-08-12T12:54:06.656278+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.886, Processed At: 2025-08-12T12:54:06.649399+00:00
A: Retrieval-Augmented Generation RAG has emerged as a promising solution by incorporating knowledge from external databases.


In [82]:
# ========== ASK QUESTIONS ==========
question = "What are the key limitations of Large Language Models (LLMs) that RAG aims to address?"
print("Q:", question)
print("A:", ask(question, method="sentence"))

Q: What are the key limitations of Large Language Models (LLMs) that RAG aims to address?


  results = qdrant.search(



--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.802, Processed At: 2025-08-12T12:54:55.867815+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.800, Processed At: 2025-08-12T12:54:55.868229+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.790, Processed At: 2025-08-12T12:54:55.871262+00:00
A: There is no information in the provided context about the key limitations of Large Language Models (LLMs) that RAG aims to address. The text only mentions the benefits of RAG and its applicability to LLMs, but does not discuss the limitations. Therefore, I cannot provide an answer based on this context.


In [73]:
# ========== ASK QUESTIONS ==========
question = "What are the three core components of a RAG framework?"
print("\nQ:", question)
print("A:", ask(question, method="sentence"))


Q: What are the three core components of a RAG framework?


  results = qdrant.search(



--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.827, Processed At: 2025-08-12T12:54:55.863988+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.815, Processed At: 2025-08-12T12:54:55.868575+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.809, Processed At: 2025-08-12T12:54:55.864958+00:00
A: Based on the provided context, the three core components of a RAG (Reasoning-Augmentation-Generation) framework are:

1. Retrieval
2. Generation
3. Augmentation


In [74]:
# ========== ASK QUESTIONS ==========
question = "What are the three core components of a RAG framework?"
print("\nQ:", question)
print("A:", ask(question, method="sliding"))


Q: What are the three core components of a RAG framework?


  results = qdrant.search(



--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.829, Processed At: 2025-08-12T12:54:06.649731+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.809, Processed At: 2025-08-12T12:54:06.650648+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.805, Processed At: 2025-08-12T12:54:06.653831+00:00
A: According to the context, the three core components of a RAG (Retrieval-Augmentation-Generation) framework are:

1. Retrieval
2. Generation
3. Augmentation


In [75]:
# ========== ASK QUESTIONS ==========
question = "Explain the indexing process in Naive RAG."
print("\nQ:", question)
print("A:", ask(question, method="sentence"))


Q: Explain the indexing process in Naive RAG.


  results = qdrant.search(



--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.813, Processed At: 2025-08-12T12:54:55.871102+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.811, Processed At: 2025-08-12T12:54:55.864070+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.805, Processed At: 2025-08-12T12:54:55.865663+00:00
A: According to the given context, the indexing process in Naive RAG involves:

"Documents are split into chunks, encoded into vectors, and stored in a vector database."

This is step 1 of the RAG process applied to question answering, as shown in Fig. 2.


In [76]:
# ========== ASK QUESTIONS ==========
question = "Explain the indexing process in Naive RAG."
print("\nQ:", question)
print("A:", ask(question, method="sliding"))


Q: Explain the indexing process in Naive RAG.


  results = qdrant.search(



--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.820, Processed At: 2025-08-12T12:54:06.650008+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.814, Processed At: 2025-08-12T12:54:06.649809+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.813, Processed At: 2025-08-12T12:54:06.656252+00:00
A: The context does not mention "Naive RAG", it only talks about "Advanced RAG". Therefore, I cannot provide an answer based on this question as it is asking about a concept that is not present in the given text.


In [77]:
# ========== ASK QUESTIONS ==========
question = "What are the key challenges in the retrieval phase of Naive RAG?"
print("\nQ:", question)
print("A:", ask(question, method="sentence"))


Q: What are the key challenges in the retrieval phase of Naive RAG?


  results = qdrant.search(



--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.870, Processed At: 2025-08-12T12:54:55.864831+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.868, Processed At: 2025-08-12T12:54:55.865663+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.836, Processed At: 2025-08-12T12:54:55.871102+00:00
A: Based on the context, the key challenges in the retrieval phase of Naive RAG are:

1. Formulating a precise and clear question (i.e., the original query)
2. Retrieving relevant documents from the data source


In [78]:
# ========== ASK QUESTIONS ==========
question = "What are the key challenges in the retrieval phase of Naive RAG?"
print("\nQ:", question)
print("A:", ask(question, method="sliding"))


Q: What are the key challenges in the retrieval phase of Naive RAG?


  results = qdrant.search(



--- Retrieved Chunks Metadata ---
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.875, Processed At: 2025-08-12T12:54:06.651299+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.875, Processed At: 2025-08-12T12:54:06.650521+00:00
Page: unknown, Title: unknown, Section: unknown, Author: unknown, Source: ../../../data/files/rag.pdf, Similarity: 0.840, Processed At: 2025-08-12T12:54:06.650008+00:00
A: Based only on the given context, the key challenge in the retrieval phase of Naive RAG is:

* Formulating a precise and clear question is difficult.

In other words, the main issue with Naive RAG's retrieval phase is that it relies directly on the user's original query, which may be imprecise or unclear.
