Activate venv and install packages
1. !source reasongraph/bin/activate </br>
2. !pip install "langchain>=0.3" "langgraph>=0.2" qdrant-client sentence-transformers torch pydantic python-dotenv
3. !pip install pymupdf tiktoken # Count number of tokens to check model compatibility

In [1]:
import fitz
import tiktoken

pdf_path = "docs/Profile.pdf"
doc = fitz.open(pdf_path)

text = ""
for page in doc:
    text += page.get_text()

enc = tiktoken.encoding_for_model("gpt-4")
num_tokens = len(enc.encode(text))

print(f"Number of pages: {len(doc)}")
print(f"Context Length: {len(text):,}")
print(f"Number of tokens: {num_tokens:,}")


Number of pages: 21
Context Length: 42,248
Number of tokens: 8,686


In [2]:
from langgraph.graph import StateGraph, END
from pydantic import BaseModel, Field
from typing import List, Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http import models as qmodels
import fitz, torch, uuid, os, sys
from pathlib import Path
from langchain.text_splitter import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Define RAG State
class RAGState(BaseModel):
    docs: List[str] = Field(default_factory=list)
    chunks: List[str] = Field(default_factory=list)
    query: Optional[str] = None
    results: List[str] = Field(default_factory=list)
    answer: Optional[str] = None

In [4]:
# Load text file and chunking
def load_and_chunk(state: RAGState) -> RAGState:
    folder = "docs"
    texts = []

    for file in Path(folder).rglob("*.pdf"):
        try:
            with fitz.open(file) as pdf:
                pdf_text = []
                for page in pdf:
                    pdf_text.append(page.get_text("text"))
                text = "\n".join(pdf_text)
                texts.append(text)
        except Exception as e:
            print(f"Failed to load PDF files: {file.name} ({e})")

    if not texts:
        print('PDf files are unavailable from "docs" directory.')
        return state

    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
    chunks = splitter.split_text("\n".join(texts))

    print(f"Loaded {len(texts)} PDFs and divided into {len(chunks)} chunks")
    state.docs = texts
    state.chunks = chunks
    return state

In [5]:
# Use GPU to run if possible
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)

Using device: cuda


In [6]:
# Specify Embedding models and load models to memory
QWEN_MODEL = "Qwen/Qwen3-Embedding-0.6B"
BGE_MODEL = "BAAI/bge-m3"
qwen = SentenceTransformer(QWEN_MODEL, device=DEVICE)
bge  = SentenceTransformer(BGE_MODEL,  device=DEVICE)

In [7]:
# Check vector dimension of each model (for qdrant collection)
QWEN_DIM = qwen.get_sentence_embedding_dimension()
BGE_DIM  = bge.get_sentence_embedding_dimension()

In [8]:
# Qdrant server connection configuration (localhost:6333)
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")  # use default if no environment variables
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
COLLECTION  = "pkyoo_personal_docs_dualvec"                # vector collection name

In [9]:
# Qdrant client reset and connection test
try:
    client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT, timeout=60)
    _ = client.get_collections()  # get collection lists to check connection
except Exception as e:
    print("Failed to connect Qdrant. Check Qdrant Docker is running.")
    print("e.g., docker run -p 6333:6333 -v $(pwd)/qdrant_storage:/qdrant/storage qdrant/qdrant")
    raise

In [10]:
existing = [c.name for c in client.get_collections().collections]

In [11]:
# Check collection existence and create collections if not
if COLLECTION not in existing:
    client.create_collection(
        collection_name=COLLECTION,
        vectors_config={
            "qwen": qmodels.VectorParams(size=QWEN_DIM, distance=qmodels.Distance.COSINE),
            "bge":  qmodels.VectorParams(size=BGE_DIM,  distance=qmodels.Distance.COSINE)
        }
    )
    print(f"Created collection: {COLLECTION}")
else:
    print(f"ℹCollection exists: {COLLECTION}")

ℹCollection exists: pkyoo_personal_docs_dualvec


Check http://localhost:6333/dashboard for local qdrant collections dashboard

In [13]:
def embed_and_store(state: RAGState, batch_size=128) -> RAGState:
    # Check if chunks exist. if not, stop the process
    if not state.chunks:
        print("No chunks found. Run load_and_chunk() first.")
        return state

    # Step 1: Encode(Embedding) document chunks using Qwen3-Embedding-0.6B
    # Model converts each text chunk into a high-dimensional vector
    print("Encoding with Qwen3-Embedding-0.6B ...")
    qwen_vecs = qwen.encode(
        state.chunks,
        batch_size=batch_size,
        show_progress_bar=True,
        # Normalization helps cosine similarity work properly.
        # NOT Standardization because it is not about statistics.
        # Standardization would change the direction of vectors in space.
        normalize_embeddings=True
    )

    # Step 2: Encode the same chunks using bge-m3 model
    print("Encoding with bge-m3 ...")
    bge_vecs = bge.encode(
        state.chunks,
        batch_size=batch_size,
        show_progress_bar=True,
        normalize_embeddings=True
    )

    # Step 3: Prepare data points for insertion into Qdrant
    # Each point contains a unique ID (UUID) for both Qwen and BGE embedding vectors as a dictionary
    # the original text payload and its chunk index
    points = []
    for i, (qv, bv) in enumerate(zip(qwen_vecs, bge_vecs)):
        payload = {
            "text": state.chunks[i],   # Original chunk text
            "chunk_index": i           # Index of the chunk for traceability
        }
        points.append(qmodels.PointStruct(
            id=str(uuid.uuid4()),      # Unique ID for each vector point
            vector={
                "qwen": qv.tolist(),   # Vector from Qwen3 model
                "bge": bv.tolist()     # Vector from bge-m3 model
            },
            payload=payload
        ))

    # Step 4: Upload (upsert) all encoded points into the Qdrant collection
    # 'upsert' means "insert if new, update if already exists"
    # 'wait=True' ensures the operation completes before continuing
    client.upsert(collection_name=COLLECTION, points=points, wait=True)

    # Step 5: Log how many vectors were inserted
    print(f"Upserted {len(points)} vectors to '{COLLECTION}'")
    return state

In [None]:
def retrieve_from_qdrant(state: RAGState, top_k: int = 5) -> RAGState:
    # Retrieves top-k most relevant document chunks from Qdrant
    # based on the query embedding generated by bge-m3 (RTEB model).

    if not state.query:
        print("No query provided in state.query")
        return state

    print("Generating query embedding using bge-m3")

    # Step 1: Encode the query text using the retrieval embedding model (bge-m3)
    query_vec = bge.encode(
        [state.query],
        normalize_embeddings=True  # cosine similarity requires normalized vectors
    )[0]

    # Step 2: Search in Qdrant using the 'bge' vector field
    # The 'vector' argument must match the name used during embedding
    print(f"Searching Qdrant collection '{COLLECTION}' ...")

    search_result = client.search(
        collection_name=COLLECTION,
        query_vector=("bge", query_vec),
        limit=top_k
    )

    # Step 3: Extract the retrieved texts (payloads)
    results = [hit.payload["text"] for hit in search_result]

    # Step 4: Store retrieved chunks in RAG state
    state.results = results

    print(f"Retrieved {len(results)} relevant chunks from Qdrant.")
    return state
