In [1]:
from qdrant_client import QdrantClient, models

client = QdrantClient(":memory:")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
collection_name = "sparse_vector_collection"

if client.collection_exists(collection_name=collection_name):
    client.delete_collection(collection_name=collection_name)

client.create_collection(
    collection_name="{collection_name}",
    sparse_vectors_config={
        "bm25_sparse_vector": models.SparseVectorParams(
            modifier=models.Modifier.IDF #Inverse Document Frequency
        ),
    },
)

True

In [3]:
grocery_items_descriptions = [
    "Grated hard cheese",
    "White crusty bread roll",
    "Mac and cheese"
]

#Estimating the average length of documents in the corpus
avg_document_length = sum(len(description.split()) for description in grocery_items_descriptions) / len(grocery_items_descriptions)

client.upsert(
    collection_name="{collection_name}",
    points=[
        models.PointStruct(
            id=i,
            payload={"text": description},
            vector={
                "bm25_sparse_vector": models.Document(
                    text=description,
                    model="Qdrant/bm25",
                    options={"avg_len": avg_document_length} #To pass BM25 parameters, here we're using default k & b for the BM25 formula
                )
           },
        ) for i, description in enumerate(grocery_items_descriptions)
    ],
)

Fetching 18 files: 100%|██████████| 18/18 [00:00<00:00, 23.64it/s]


UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [13]:
client.query_points(
    collection_name="{collection_name}",
    using="bm25_sparse_vector",
    limit=3,
    query=models.Document(
        text="cheese",
        model="Qdrant/bm25"
    ),
    with_vectors=True,
)

QueryResponse(points=[ScoredPoint(id=2, version=0, score=0.5619608759880066, payload={'text': 'Mac and cheese'}, vector={'bm25_sparse_vector': SparseVector(indices=[1303191493, 1496964506], values=[1.1956521739130437, 1.1956521739130437])}, shard_key=None, order_value=None), ScoredPoint(id=0, version=0, score=0.49005118012428284, payload={'text': 'Grated hard cheese'}, vector={'bm25_sparse_vector': SparseVector(indices=[862853134, 1277694805, 1496964506], values=[1.042654028436019, 1.042654028436019, 1.042654028436019])}, shard_key=None, order_value=None)])

### SPLADE++ in Qdrant ###

In [8]:
splade_collection_name = "sparse_vector_collection"

if client.collection_exists(collection_name=splade_collection_name):
    client.delete_collection(collection_name=splade_collection_name)

client.create_collection(
    collection_name="{splade_collection_name}",
    sparse_vectors_config={
        "splade_sparse_vector": models.SparseVectorParams(),
    },
)

True

In [9]:
grocery_items_descriptions = [
    "Grated hard cheese",
    "White crusty bread roll",
    "Mac and cheese"
]

client.upsert(
    collection_name="{splade_collection_name}",
    points=[
        models.PointStruct(
            id=i,
            payload={"text": description}, 
            vector={
                "splade_sparse_vector": models.Document( #to run FastEmbed under the hood
                    text=description,
                    model="prithivida/Splade_PP_en_v1"
                )
           },
        ) for i, description in enumerate(grocery_items_descriptions)
    ],
)

Fetching 5 files: 100%|██████████| 5/5 [00:19<00:00,  3.89s/it]


UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [11]:
client.query_points(
    collection_name="{splade_collection_name}",
    using="splade_sparse_vector",
    limit=3,
    query=models.Document(
        text="parmesan",
        model="prithivida/Splade_PP_en_v1"
    ),
    with_vectors=True,
)

QueryResponse(points=[ScoredPoint(id=0, version=0, score=0.762168288230896, payload={'text': 'Grated hard cheese'}, vector={'splade_sparse_vector': SparseVector(indices=[1010, 2081, 2524, 2828, 3067, 3528, 4383, 4435, 6211, 8808, 9841, 11825, 21774, 24665], values=[0.5133377313613892, 0.26326102018356323, 2.4472122192382812, 0.3000403344631195, 0.12786251306533813, 0.790509045124054, 2.211527109146118, 0.31894564628601074, 1.150804877281189, 2.51134991645813, 0.5997427105903625, 0.9969340562820435, 0.128986656665802, 2.474409818649292])}, shard_key=None, order_value=None), ScoredPoint(id=2, version=0, score=0.5772799253463745, payload={'text': 'Mac and cheese'}, vector={'splade_sparse_vector': SparseVector(indices=[1004, 1010, 1998, 2030, 2081, 2833, 3528, 4435, 4489, 4521, 4825, 6097, 6207, 8808, 9440, 9841, 11825], values=[0.7900756001472473, 0.301586389541626, 1.7403925657272339, 0.010728065855801105, 0.3339483439922333, 0.40943023562431335, 0.48172736167907715, 0.46193820238113403,