In [4]:
from qdrant_client import QdrantClient

client = QdrantClient(url="http://localhost:6333")

In [5]:
from qdrant_client.models import Distance, VectorParams

client.create_collection(
    collection_name="test_collection",
    vectors_config=VectorParams(size=4, distance=Distance.DOT),
)

True

In [6]:
from qdrant_client.models import PointStruct

operation_info = client.upsert(
    collection_name="test_collection",
    wait=True,
    points=[
        PointStruct(id=1, vector=[0.05, 0.61, 0.76, 0.74], payload={"city": "Berlin"}),
        PointStruct(id=2, vector=[0.19, 0.81, 0.75, 0.11], payload={"city": "London"}),
        PointStruct(id=3, vector=[0.36, 0.55, 0.47, 0.94], payload={"city": "Moscow"}),
        PointStruct(id=4, vector=[0.18, 0.01, 0.85, 0.80], payload={"city": "New York"}),
        PointStruct(id=5, vector=[0.24, 0.18, 0.22, 0.44], payload={"city": "Beijing"}),
        PointStruct(id=6, vector=[0.35, 0.08, 0.11, 0.44], payload={"city": "Mumbai"}),
    ],
)

print(operation_info)

operation_id=0 status=<UpdateStatus.COMPLETED: 'completed'>


In [21]:
from qdrant_client.models import Filter, FieldCondition, MatchValue

search_result = client.query_points(
    collection_name="test_collection",
    query=[0.2, 0.1, 0.9, 0.7],
    query_filter=Filter(
        must=[FieldCondition(key="city", match=MatchValue(value="London"))]
    ),
    with_payload=True,
    limit=3,
    with_vectors=True
).points

print(search_result)

[ScoredPoint(id=2, version=0, score=0.871, payload={'city': 'London'}, vector=[0.19, 0.81, 0.75, 0.11], shard_key=None, order_value=None), ScoredPoint(id=5, version=1, score=0.572, payload={'city': 'London'}, vector=[0.24, 0.18, 0.22, 0.44], shard_key=None, order_value=None)]


In [5]:
from qdrant_client import QdrantClient # type: ignore
from qdrant_client.http import models # type: ignore
from fastembed import TextEmbedding, SparseTextEmbedding # type: ignore


sparse_embedding_model = SparseTextEmbedding("Qdrant/bm42-all-minilm-l6-v2-attentions")

# Dữ liệu mẫu
documents = [
    "Xin chào, Tôi là Chiến",
]

sparse_vectors = list(sparse_embedding_model.embed(documents))

sparse_vectors[0]

SparseEmbedding(values=array([0.34575414, 0.37521228, 0.24505894, 0.19813086, 0.36655544]), indices=array([  55504253, 2015064403,  665770872,  823349694,  719869266]))

In [1]:
retrieved_info=[
    {
        "content": "Xin chào, Tôi là Chiến",
        "metadata": {
            "city": "Hanoi",
            "country": "Vietnam"
        }
    },
    {
        "content": "Tôi thích ăn phở",
        "metadata": {
            "city": "Hanoi",
            "country": "Vietnam"
        }
    }
]

retrieved_info_str = "\n".join(
                [f"Content: {doc.get('content', 'N/A')}\nMetadata: {doc.get('metadata', 'N/A')}" 
                for doc in retrieved_info]
            ) if retrieved_info else "No relevant information found."
print(retrieved_info_str)

Content: Xin chào, Tôi là Chiến
Metadata: {'city': 'Hanoi', 'country': 'Vietnam'}
Content: Tôi thích ăn phở
Metadata: {'city': 'Hanoi', 'country': 'Vietnam'}
