In [None]:
import sys
sys.path.append("..")


In [None]:
from core.vectordb import client
from core.embedding import dense_embedding_model, sparse_embedding_model

In [None]:
source = {
    "url": "https://www.weather.gov/forecast",
    "title": "Weather Forecast",
    "snippet": "Get the latest weather forecast for your area.",
    "from_cache": False,
}

In [None]:
dense_embeddings = list(dense_embedding_model.embed(source["snippet"]))[0]
sparse_embeddings = list(sparse_embedding_model.embed(source["snippet"]))[0]

sparse_embeddings
# dense_embeddings


In [None]:
# upsert to Qdrant
import uuid
points = [
    {
        "id": f"{str(uuid.uuid4())}",
        "vector": {
            "bge_dense_vector": dense_embeddings,
            "bm25_sparse_vector": sparse_embeddings.as_object(),
        },
        "payload": {
            "url": source.get("url", ""),
            "title": source.get("title", ""),
            "snippet": source.get("snippet", ""),
            "query": source.get("query", ""),
            "from_cache": True,
        },
    }
]

client.upsert(
    collection_name="cache-2",
    points=points,
)

In [None]:
query = "Weather"
# query_embedding = list(dense_embedding_model.embed(query))[0]
# len(query_embedding)

In [None]:
client.query_points(
    "cache-2",
    query=next(dense_embedding_model.query_embed(query)),
    using="bge_dense_vector",
    limit=10,
    with_payload=True,
)


In [None]:
from qdrant_client import models

prefetch = [
    models.Prefetch(
        query=next(dense_embedding_model.query_embed(query)),
        using="bge_dense_vector",
        limit=20,
    ),
    models.Prefetch(
        query=(next(sparse_embedding_model.query_embed(query)).as_object()),
        using="bm25_sparse_vector",
        limit=20,
    ),
]

results = client.query_points(
    "cache-2",
    prefetch=prefetch,
    query=models.FusionQuery(
        fusion=models.Fusion.RRF,
    ),
    with_payload=True,
    limit=10,
)
results

In [1]:
import sys

sys.path.append("..")
from core.semantic_search_cache import semantic_cache

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
source = {
    "title": "new york",
    "snippet": "new york is a city",
    "url": "https://www.nyc.gov",
    "query": "new york",
}

semantic_cache.add([source])

Successfully added sources to cache.


In [7]:
query = "who are you"
semantic_cache.get(query)

[]