In [1]:
import torch

print(torch.version.cuda)  # '12.8'
print(torch.cuda.is_available())  # True
print(torch.cuda.get_device_name(0))  # should show your RTX 50xx

12.8
True
NVIDIA GeForce RTX 5060 Ti


In [2]:
import chromadb
from sentence_transformers import SentenceTransformer
from chromadb.utils.embedding_functions import EmbeddingFunction


# ephemeral client for local development
client = chromadb.Client()
print("Created ChromaDB client:", client)

  from .autonotebook import tqdm as notebook_tqdm


Created ChromaDB client: <chromadb.api.client.Client object at 0x000001C48133D0F0>


# Finished Setup

Now we can test things

In [3]:
import gc

try:
    import torch
except ImportError:
    torch = None


class EF(EmbeddingFunction):
    def __init__(self):
        self.m = SentenceTransformer("BAAI/bge-large-en-v1.5")

    def offload(self):
        # Optional: move to CPU first if you were using GPU
        if self.m is not None and hasattr(self.m, "to"):
            try:
                self.m.to("cpu")
            except Exception:
                pass

        # Drop reference
        self.m = None

        # Encourage memory to be freed
        gc.collect()
        if torch is not None and torch.cuda.is_available():
            torch.cuda.empty_cache()

    def reload(self):
        if self.is_loaded():
            return  # already loaded
        self.m = SentenceTransformer("BAAI/bge-large-en-v1.5")

    def is_loaded(self):
        return self.m is not None

    def __call__(self, texts):
        if not self.is_loaded():
            raise ValueError("Model is not loaded. Call reload() before using.")
        return self.m.encode(texts, normalize_embeddings=True).tolist()

In [4]:
ef = EF()
coll = client.get_or_create_collection("meeting_chunks", embedding_function=ef)

In [5]:
def iter_collection(coll, include_embeddings=False, batch_size=1000, where=None, ids=None):
    """
    Yields records from a Chroma collection.
    Record: {"id","document","metadata"[,"embedding"]}
    """
    include = ["metadatas", "documents"]  # don't include "ids"
    if include_embeddings:
        include.append("embeddings")

    offset = 0
    while True:
        res = coll.get(limit=batch_size, offset=offset, include=include, where=where, ids=ids)
        id_list = res.get("ids", [])
        n = len(id_list)
        if n == 0:
            return

        docs = res.get("documents") or [None] * n
        metas = res.get("metadatas") or [None] * n
        embs = res.get("embeddings") if include_embeddings else None

        for i in range(n):
            rec = {"id": id_list[i], "document": docs[i], "metadata": metas[i]}
            if embs is not None:
                rec["embedding"] = embs[i]
            yield rec

        offset += n
        if n < batch_size:
            return


def list_collection(coll, **kwargs):
    return list(iter_collection(coll, **kwargs))


# Example:
items = list_collection(coll, include_embeddings=False, batch_size=500)
print(f"{len(items)} records")
if items:
    from pprint import pprint
    pprint(items[0])

0 records


In [6]:

import uuid

target_strings = [
"Stargazing on crisp winter nights reveals Orion's belt, glittering nebulae, and planets drifting steadily, reminding us space is vast, indifferent, yet beautifully ordered by gravity and light.",
"Sear chicken thighs in butter until golden, deglaze with lemon and capers, then simmer gently; the sauce thickens, bright and briny, perfect over rice or roasted potatoes.",
"Design services as small, cohesive modules with clear contracts; add observability, idempotent handlers, and backpressure. Favor simplicity over cleverness, because production failures punish ambiguity more than missing features.",
"Arriving before sunrise, the market hums: bakers dust loaves, fishermen sort silver catch, and baristas steam milk, releasing that hopeful hiss promising warmth, breakfast, and easy conversation.",
"A patient midfield triangle controls tempo with short passes, drawing defenders out, then slicing a diagonal ball behind the line, creating space for a calm, clinical finish.",
"That chord change feels like sunlight breaking through blinds; the bass breathes, drums lean forward, and the singer lifts vowels carefully, letting time stretch without ever losing momentum.",
"Compost transforms kitchen scraps into dark, sweet soil; worms churn patiently, fungi thread networks, and seedlings respond with thicker stems, brighter leaves, and roots that explore confidently through loosened beds.",
"The archive smelled of dust and glue; letters revealed ordinary worries during extraordinary times, reminding me history survives not in speeches, but in grocery lists, receipts, and quiet promises.",
"Compound interest is patient arithmetic: reinvest modest gains, avoid catastrophic losses, and time multiplies discipline; markets punish urgency, reward consistency, and quietly move wealth toward those who plan.",
"Sleep is the cheapest performance enhancer; dim lights, keep a cool room, respect a steady schedule, and your mood, memory, cravings, and training numbers will all noticeably improve.",
"Winter is my favorite season",
"April has the softest rain",
"My birthday is in April",
"I met my best friend in grade 5",
]

# clear db first
print("Clearing collection...")
# Get all items and delete them by ID
all_items = list_collection(coll, include_embeddings=False)
if all_items:
    all_ids = [item["id"] for item in all_items]
    coll.delete(ids=all_ids)
print("Collection cleared.")

# create embedding vector
for target_string in target_strings:
    custom_entry_id = str(uuid.uuid4())
    print(f"Processing entry ID: {custom_entry_id}")

    target_vector = ef([target_string])[0]
    print("Target vector generated: dimensions: {}".format(len(target_vector)))

    # add vector to collection
    # check if already exists, if yes delete
    existing = list_collection(coll, ids=[custom_entry_id])
    if existing:
        print(f"Entry with ID {custom_entry_id} already exists. Deleting it.")
        coll.delete(ids=[custom_entry_id])

    coll.add(
        documents=[target_string],
        metadatas=[{"source": "test_case"}],
        ids=[custom_entry_id],
        embeddings=[target_vector]
    )
    print(f"Added entry with ID {custom_entry_id}")

Clearing collection...
Collection cleared.
Processing entry ID: 663e8a9a-b50e-452c-b2d9-93dbfca76fe2
Target vector generated: dimensions: 1024
Added entry with ID 663e8a9a-b50e-452c-b2d9-93dbfca76fe2
Processing entry ID: 5c538cb6-9960-4f08-b130-f88ba973304b
Target vector generated: dimensions: 1024
Added entry with ID 5c538cb6-9960-4f08-b130-f88ba973304b
Processing entry ID: a028e04e-f4ef-4415-9e25-fb47f044afa3
Target vector generated: dimensions: 1024
Added entry with ID a028e04e-f4ef-4415-9e25-fb47f044afa3
Processing entry ID: e5d8264e-4823-4043-ac78-c8a08a5a6a19
Target vector generated: dimensions: 1024
Added entry with ID e5d8264e-4823-4043-ac78-c8a08a5a6a19
Processing entry ID: 1dc55f02-0f0e-4214-9f00-ea353d1cbaf4
Target vector generated: dimensions: 1024
Added entry with ID 1dc55f02-0f0e-4214-9f00-ea353d1cbaf4
Processing entry ID: 279d2cc9-e410-4d11-a5f2-9f82a41e61cf
Target vector generated: dimensions: 1024
Added entry with ID 279d2cc9-e410-4d11-a5f2-9f82a41e61cf
Processing entr

In [7]:
def query_collection(coll, query_string, top_k=10):
    # create query vector
    query_vector = ef([query_string])[0]

    # perform similarity search
    results = coll.query(
        query_embeddings=[query_vector],
        n_results=top_k,
        include=["metadatas", "documents"]
    )
    return results


## Reranker using SentenceTransformer

In [None]:
from sentence_transformers import CrossEncoder

# need to create a handler for this model too
reranker = CrossEncoder('BAAI/bge-reranker-v2-m3')

def rerank(query, candidates, top_k=10):
    # Handle both flat lists and nested lists from ChromaDB
    if candidates and isinstance(candidates[0], list):
        candidates = candidates[0]
    
    pairs = [(query, c) for c in candidates]
    scores = reranker.predict(pairs)  # one score per pair
    ranked = sorted(zip(candidates, scores), key=lambda x: x[1], reverse=True)
    return [c for c, _ in ranked[:top_k]]


### Testing

In [10]:

ef.reload()  # ensure model is loaded

# query selection
query_1 = "what are some important dates?"
qdb_1 = query_collection(coll, query_1, top_k=5)
print(qdb_1)
rerank_1 = rerank(query_1, qdb_1['documents'], top_k=5)
print("Reranked results:")
for doc in rerank_1:
    print(doc)

ef.offload()

    


{'ids': [['bbfda681-bb14-47a5-ad7a-3b896e197faf', 'ee860e1f-a45b-4b73-99b9-69f0842fb5bd', 'a5e85f1b-bea2-4c4b-82ba-6e86f47636a6', 'cf2efd31-f9dd-4942-87eb-ea0fce666b46', 'e4f0489e-03db-4665-94ce-6e70e25b66b8']], 'embeddings': None, 'documents': [['My birthday is in April', 'April has the softest rain', 'Sleep is the cheapest performance enhancer; dim lights, keep a cool room, respect a steady schedule, and your mood, memory, cravings, and training numbers will all noticeably improve.', 'Winter is my favorite season', 'Compound interest is patient arithmetic: reinvest modest gains, avoid catastrophic losses, and time multiplies discipline; markets punish urgency, reward consistency, and quietly move wealth toward those who plan.']], 'uris': None, 'included': ['metadatas', 'documents'], 'data': None, 'metadatas': [[{'source': 'test_case'}, {'source': 'test_case'}, {'source': 'test_case'}, {'source': 'test_case'}, {'source': 'test_case'}]], 'distances': None}
Reranked results:
My birthday