In [1]:
from dotenv import load_dotenv
load_dotenv()

from openai import Client

client = Client()  # OPENAI_API_KEY

def embed(query: str):
    result = client.embeddings.create(input=query, model="text-embedding-3-small")
    return result.data[0].embedding

In [2]:
from qdrant_client import QdrantClient, models
from qdrant_client.models import PointStruct, VectorParams, SparseVector, NamedVector

qdrant_client = QdrantClient(url="http://localhost:6333")
collection_name = "filter_testing"

In [10]:
qdrant_client.create_collection(
    collection_name=collection_name,
    vectors_config={
        "dense": VectorParams(
            size=1536, distance=models.Distance.DOT
        )
    },
)

True

In [11]:
data = [
    "something, with:a",
    "something, with:b",
    "something, with:c",
    "something, with:d",
    "something, with:e",
]

points = [
    PointStruct(
        id=i,
        vector={
            "dense": embed(d),
        },
        payload={
            "text": d
        }
    )
    for i, d in enumerate(
        data
    )
]

In [12]:
qdrant_client.upsert(
    collection_name=collection_name,
    points=points,
)

In [None]:

index_fields = [
    ("text", models.PayloadSchemaType.TEXT),
]
for idx_field in index_fields:
    qdrant_client.create_payload_index(
        collection_name=collection_name,
        field_name=idx_field[0],
        field_schema=idx_field[1],
    )

In [5]:
dense_vector = embed("something with")

In [13]:
search_result = qdrant_client.query_points(
    collection_name=collection_name,
    prefetch=[
        models.Prefetch(
            query=dense_vector,
            using="dense",
            limit=2,
        ),
    ],
    query=models.FusionQuery(fusion=models.Fusion.RRF),
    query_filter=models.Filter(
        must=[
            models.FieldCondition(key='text', match=models.MatchText(text="h:a"))
        ],
    ),
)

for p in search_result.points:
    print(f"Result: {p.payload['text']}")