In [1]:
import sys
from pathlib import Path

project_root = Path().absolute().parent
sys.path.append(str(project_root))

In [41]:
from qdrant_client import QdrantClient, models
from rich import print

DENSE_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
SPARSE_MODEL = "Qdrant/bm25"
collection_name = "episodes_enhanced"
qdrant_client = QdrantClient("http://localhost:6333")
# qdrant_client.set_model(DENSE_MODEL)
# qdrant_client.set_sparse_model(SPARSE_MODEL)

In [42]:
from fastembed.embedding import TextEmbedding
from fastembed.sparse.bm25 import Bm25

dense_embedding_model = TextEmbedding(DENSE_MODEL)
sparse_embedding_model = Bm25(SPARSE_MODEL)

In [126]:
query_text = " ุฌุงุฏู ุนู ุงุตู ุงููุงู"

In [127]:
query_dense_vector = next(dense_embedding_model.query_embed(query_text))
query_sparse_vector = next(sparse_embedding_model.query_embed(query_text))

In [128]:
prefecth = [
    # models.Prefetch(
    #     query=query_dense_vector,
    #     using="fast-paraphrase-multilingual-minilm-l12-v2",
    #     limit=40,
    # ),
    models.Prefetch(
        query=models.SparseVector(**query_sparse_vector.as_object()),
        using="fast-sparse-bm25",
        limit=40,
    ),
]

In [129]:
result = qdrant_client.query_points(
    collection_name=collection_name,
    prefetch=prefecth,
    query=models.FusionQuery(fusion=models.Fusion.RRF),
    limit=10,
    with_payload=True,
)

In [130]:
print(result)

In [131]:
for r in result.points:
    if "ุซูุงููุฉ" in r.payload["podcast_author"]:
        print(r.payload)

In [132]:
for r in result.points:
    print(r.payload)
    print(f"{r.score:.3}")