In [50]:
from qdrant_client import QdrantClient, models
# from fastembed import SparseTextEmbedding, SparseEmbedding


client = QdrantClient(url="http://localhost:6333")
# SparseTextEmbedding.list_supported_models()

## Get Collections

In [51]:
collections = client.get_collections()
collections.model_dump().get("collections")

[{'name': 'document'}]


## Create Collection

In [46]:
from qdrant_client.models import Distance, VectorParams

try:
    client.delete_collection(collection_name="test_collection")
    collection_name = "test_collection"
    vector_size = 6
    client.create_collection(
        collection_name=collection_name,
        vectors_config=models.VectorParams(size=vector_size, distance=models.Distance.COSINE), # Adjust size if needed
    )
except Exception as e:
    print(f"‚ùå Failed to create collection: {e}")

## Delete Collection

In [12]:
# client.delete_collection(collection_name="test_collection")

## Add Vectors

In [20]:
from qdrant_client.models import PointStruct

operation_info = client.upsert(
    collection_name="DOCUMENT",
    wait=True,
    points=[
        PointStruct(id=1, vector=[0.05, 0.61, 0.76, 0.74], payload={"city": "Berlin"}),
        PointStruct(id=2, vector=[0.19, 0.81, 0.75, 0.11], payload={"city": "London"}),
        PointStruct(id=3, vector=[0.36, 0.55, 0.47, 0.94], payload={"city": "Moscow"}),
        PointStruct(id=4, vector=[0.18, 0.01, 0.85, 0.80], payload={"city": "New York"}),
        PointStruct(id=5, vector=[0.24, 0.18, 0.22, 0.44], payload={"city": "Beijing"}),
        PointStruct(id=6, vector=[0.35, 0.08, 0.11, 0.44], payload={"city": "Mumbai"}),
    ],
)

print(operation_info)

operation_id=0 status=<UpdateStatus.COMPLETED: 'completed'>


## Run a Query

In [21]:
search_result = client.query_points(
    collection_name="test_collection",
    query=[0.2, 0.1, 0.9, 0.7],
    with_payload=True,
    limit=3
).points

search_result

[ScoredPoint(id=4, version=0, score=0.99248314, payload={'city': 'New York'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=1, version=0, score=0.89463294, payload={'city': 'Berlin'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=5, version=0, score=0.8543979, payload={'city': 'Beijing'}, vector=None, shard_key=None, order_value=None)]

## Add Filters

In [22]:
from qdrant_client.models import Filter, FieldCondition, MatchValue

search_result = client.query_points(
    collection_name="test_collection",
    query=[0.2, 0.1, 0.9, 0.7],
    query_filter=Filter(
        must=[FieldCondition(key="city", match=MatchValue(value="London"))]
    ),
    with_payload=True,
    limit=5,
).points

search_result

[ScoredPoint(id=2, version=0, score=0.66603535, payload={'city': 'London'}, vector=None, shard_key=None, order_value=None)]

## Step-by-Step: ColBERT + Qdrant Setup


In [14]:
from qdrant_client import QdrantClient, models

# 1. Connect to Qdrant server
client = QdrantClient("http://localhost:6333")

### 1.Encode Documents

In [23]:
from fastembed import TextEmbedding, LateInteractionTextEmbedding
# Example documents and query
documents = [
    "Artificial intelligence is used in hospitals for cancer diagnosis and treatment.",
    "Self-driving cars use AI to detect obstacles and make driving decisions.",
    "AI is transforming customer service through chatbots and automation.",
    # ...
]
query_text = "How does AI help in medicine?"

dense_documents = [
    models.Document(text=doc, model="BAAI/bge-small-en")
    for doc in documents
]
dense_query = models.Document(text=query_text, model="BAAI/bge-small-en")
print(dense_query)


colbert_documents = [
    models.Document(text=doc, model="colbert-ir/colbertv2.0")
    for doc in documents
]
colbert_query = models.Document(text=query_text, model="colbert-ir/colbertv2.0")
colbert_query

text='How does AI help in medicine?' model='BAAI/bge-small-en' options=None


Document(text='How does AI help in medicine?', model='colbert-ir/colbertv2.0', options=None)

### 2. Create a Qdrant collection


In [47]:
collection_name = "dense_multivector_demo"
client.create_collection(
    collection_name=collection_name,
    vectors_config={
        "dense": models.VectorParams(
            size=384,
            distance=models.Distance.COSINE
            # Leave HNSW indexing ON for dense
        ),
        "colbert": models.VectorParams(
            size=128,
            distance=models.Distance.COSINE,
            multivector_config=models.MultiVectorConfig(
                comparator=models.MultiVectorComparator.MAX_SIM
            ),
            hnsw_config=models.HnswConfigDiff(m=0)  # Disable HNSW for reranking
        )
    }
)

True

### 3. Upload Documents (Dense + Multivector)


In [53]:
points = [
    models.PointStruct(
        id=i,
        vector={
            "dense": dense_documents[i],
            "colbert": colbert_documents[i]
        },
        payload={"text": documents[i]}
    ) for i in range(len(documents))
]
client.upsert(collection_name="dense_multivector_demo", points=points)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

#### Query with Retrieval + Reranking in One Call


In [26]:
results = client.query_points(
    collection_name="dense_multivector_demo",
    prefetch=models.Prefetch(
        query=dense_query,
        using="dense",
    ),
    query=colbert_query,
    using="colbert",
    limit=3,
    with_payload=True
)

In [27]:
results

QueryResponse(points=[ScoredPoint(id=1, version=0, score=18.812857, payload={'text': 'Self-driving cars use AI to detect obstacles and make driving decisions.'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=2, version=0, score=18.604212, payload={'text': 'AI is transforming customer service through chatbots and automation.'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=0, version=0, score=14.950949, payload={'text': 'Artificial intelligence is used in hospitals for cancer diagnosis and treatment.'}, vector=None, shard_key=None, order_value=None)])