In [1]:
from qdrant_client import QdrantClient
from qdrant_client import models as qmodels
from sentence_transformers import SentenceTransformer

from collections import Counter

In [2]:
MODEL_NAME = "BAAI/bge-large-en-v1.5"
QDRANT_URL = "http://localhost:6333"
COLLECTION_NAME = "chunks"

model = SentenceTransformer(MODEL_NAME)
client = QdrantClient(url=QDRANT_URL)

In [3]:
def search_top_n_chunks(query: str, limit: int = 3, product: str | None = None):
    query_vec = model.encode([query], normalize_embeddings=True)[0].tolist()

    query_filter = None
    if product is not None:
        query_filter = qmodels.Filter(
            must=[
                qmodels.FieldCondition(
                    key="product",
                    match=qmodels.MatchValue(value=product),
                )
            ]
        )

    res = client.query_points(
        collection_name=COLLECTION_NAME,
        query=query_vec,
        limit=limit,
        query_filter=query_filter,
        with_payload=True,
        with_vectors=False,
    )

    return [
        {
            "score": p.score,
            "text": (p.payload or {}).get("text"),
            "url": (p.payload or {}).get("url"),
            "title": (p.payload or {}).get("title"),
            "product": (p.payload or {}).get("product"),
        }
        for p in res.points
    ]

In [4]:
search_top_n_chunks(
    "How it fix, it is turning off constantly",
    limit=10,
    product="dell-c5765dn",
)

[{'score': 0.6303446,
  'text': '......................... 34 switching the power on................................................................................................................... 34 switching the power off.................................................................................................................. 35 switching the main power off........................................................................................................ 36 ground fault interrupter......................................................................................................................... 37 power saver mode.......',
  'url': "https://downloads.dell.com/manuals/all-products/esuprt_printers_main/esuprt_printers_color_laser/dell-c5765dn_user's%20guide_en-us.pdf",
  'title': '',
  'product': 'dell-c5765dn'},
 {'score': 0.6075412,
  'text': 'that the touch screen is blank, and then switch on the machine power. if the error still is not resolved, contact custom

In [5]:
def list_products():
    counts = Counter()
    next_offset = None

    while True:
        points, next_offset = client.scroll(
            collection_name=COLLECTION_NAME,
            limit=5000,
            with_payload=["product"],
            with_vectors=False,
            offset=next_offset,
        )

        for p in points:
            product = (p.payload or {}).get("product")
            if product:
                counts[product] += 1

        if next_offset is None:
            break

    products = sorted(counts.keys())
    return products, counts

products, product_counts = list_products()
product_counts.most_common(100)

[('networking-n2128px-on', 4875),
 ('powerconnect-j-srx100', 3612),
 ('powerconnect-8100', 3191),
 ('powerconnect-w-3600', 2603),
 ('brocade-6520', 2049),
 ('brocade-m6505', 1751),
 ('brocade-6510', 1715),
 ('powerconnect-w-6000', 1655),
 ('force10-s4810', 1545),
 ('powerconnect-j-ex4200', 1418),
 ('powerconnect', 1345),
 ('force10-e1200i', 1339),
 ('networking-n2000-series', 1115),
 ('powerconnect-7048', 1108),
 ('powerconnect-7048r', 1108),
 ('powerconnect-w-620', 1063),
 ('powerconnect-w-3400', 1054),
 ('powerconnect-w-3200', 1054),
 ('powerconnect-w-iap-224-225', 1038),
 ('powerconnect-w-iap-105', 993),
 ('powerconnect-w-iap-204-205', 915),
 ('powerconnect-b-dcx4s', 857),
 ('brocade-5100', 833),
 ('dell-c5765dn', 807),
 ('poweredge-m1000e', 755),
 ('powerconnect-6224', 751),
 ('powerconnect-7024', 672),
 ('powerconnect-w-iap-108-109', 625),
 ('poweredge-m805', 622),
 ('powerconnect-b-8000', 601),
 ('powerconnect-w-iap-134-135', 572),
 ('precision-t3400', 560),
 ('powerconnect-b-mlx

In [6]:
def pdf_docs_per_product():
    product_pdf_urls = {}
    next_offset = None

    while True:
        points, next_offset = client.scroll(
            collection_name=COLLECTION_NAME,
            limit=5000,
            with_payload=["product", "url"],
            with_vectors=False,
            offset=next_offset,
        )

        for p in points:
            payload = p.payload or {}
            product = payload.get("product", "")
            url = payload.get("url", "")
            if product and ".pdf" in url.lower():
                product_pdf_urls.setdefault(product, set()).add(url)

        if next_offset is None:
            break

    counts = {product: len(urls) for product, urls in product_pdf_urls.items()}
    return Counter(counts)

pdf_counts = pdf_docs_per_product()
print(f"Products with at least one PDF: {len(pdf_counts)}")
print(f"Total unique PDFs across all products: {sum(pdf_counts.values())}\n")
pdf_counts.most_common(50)

Products with at least one PDF: 618
Total unique PDFs across all products: 752



[('high-computing-solution-resources', 6),
 ('poweredge-m1000e', 5),
 ('brocade-6510', 4),
 ('powerconnect-w-airwave', 4),
 ('s-solution-resources', 4),
 ('servers-solution-resources', 4),
 ('poweredge-t710', 3),
 ('networking-n2128px-on', 3),
 ('brocade-m6505', 3),
 ('powerconnect-w-iap-134-135', 3),
 ('brocade-6520', 3),
 ('powervault-nx3500', 3),
 ('powerconnect-w-iap-105', 3),
 ('poweredge-2900', 3),
 ('powerconnect-w-iap-224-225', 3),
 ('dell-hybrid-cloud-system-microsoft', 3),
 ('poweredge-c6220', 3),
 ('inspiron-7000', 3),
 ('latitude-e6430', 3),
 ('powerconnect-w-clearpass-100-software', 3),
 ('drseries', 3),
 ('powervault-dl2100', 3),
 ('poweredge-4210', 3),
 ('powervault-dl2000-commvault', 3),
 ('inspiron-14-3458-laptop', 3),
 ('dell-c5519q-monitor', 2),
 ('dell-se2218hv-monitor', 2),
 ('dell-p2416d', 2),
 ('optiplex-xe', 2),
 ('dell-e2015hv', 2),
 ('dell-p2317h-monitor', 2),
 ('dell-dualarm-mda20', 2),
 ('powerconnect-w-ap277', 2),
 ('powerconnect-w-clearpass-hw-appln', 2),
