In [1]:
from qdrant_client import QdrantClient
from qdrant_client import models
import json
import uuid

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
qdClient = QdrantClient("http://localhost:6333")

In [3]:
collection_name="stardew-sparse-and-dense"
vector_model_handle = "jinaai/jina-embeddings-v2-small-en"
EMBEDDING_DIMENSIONALITY = 512
spasrse_model_handle="Qdrant/bm25"

In [None]:
tables = []
texts = []

with open('../data/summarized_tables.json', 'r' , encoding="utf-8") as f1:
    tables = json.load(f1)
    
for t in tables:
    t["content_type"] = "table"

with open('../data/summarized_texts.json', 'r' , encoding="utf-8") as f2:
    texts = json.load(f2)

for t in texts:
    t["content_type"] = "text"

In [None]:
qdClient.create_collection(
    collection_name=collection_name,
    vectors_config={
        # Named dense vector for jinaai/jina-embeddings-v2-small-en
        "jina-small": models.VectorParams(
            size=EMBEDDING_DIMENSIONALITY,
            distance=models.Distance.COSINE,
        ),
    },
    sparse_vectors_config={
        "bm25": models.SparseVectorParams(
            modifier=models.Modifier.IDF,
        )
    }
)

In [None]:
points = []
for text in texts:
    text_to_embedd = f"Page title (2X importance): {text['page_title']}. Section title: {text['section_title']}. text: {text['text']}"
    
    point = models.PointStruct(
            id=uuid.uuid4().hex,
            vector={
                "jina-small": models.Document(
                    text=text_to_embedd,
                    model=vector_model_handle,
                ),
                "bm25": models.Document(
                    text=text_to_embedd, 
                    model=spasrse_model_handle,
                ),
            },
            payload=text
        )
    points.append(point)

In [None]:
for table in tables:
    text_to_embedd = f"Page title (2X importance): {table['page_title']}. Section title: {table['section_title']}. Table summary: {table['summary']}"

    point = models.PointStruct(
            id=uuid.uuid4().hex,
            vector={
                "jina-small": models.Document(
                    text=text_to_embedd,
                    model=vector_model_handle,
                ),
                "bm25": models.Document(
                    text=text_to_embedd, 
                    model=spasrse_model_handle,
                ),
            },
            payload=table
        )
    points.append(point)


In [None]:
def batch_upsert(qdClient, collection_name, points, batch_size=500):
    total = len(points)
    for i in range(0, total, batch_size):
        batch = points[i:i+batch_size]
        qdClient.upsert(collection_name=collection_name,points=batch)
        print(f"✅ Upserted {min(i+batch_size, total)}/{total}")

In [None]:
batch_upsert(qdClient=qdClient,collection_name=collection_name,points=points,batch_size=1000)

In [5]:
def multi_stage_search(client, collection_name, query: str, limit: int = 5) -> list[models.ScoredPoint]:
    results = client.query_points(
        collection_name=collection_name,
        prefetch=[
            models.Prefetch(
                query=models.Document(
                    text=query,
                    model=vector_model_handle,
                ),
                using="jina-small",
                # Prefetch three times more results, then
                # expected to return, so we can really rerank
                limit=(3 * limit),
            ),
        ],
        query=models.Document(
            text=query,
            model=spasrse_model_handle, 
        ),
        using="bm25",
        limit=limit,
        with_payload=True,
    )

    return results.points

In [6]:
question = "How to become friends with Abigail?"

results = multi_stage_search(client = qdClient , collection_name=collection_name, query=question)
results

[ScoredPoint(id='cd4e3a54-4fe0-4e61-951d-f04920d40072', version=0, score=15.683756, payload={'page_title': 'Abigail', 'section_title': 'Relationships', 'text': "Abigail is friends with Sam and Sebastian, and will dance with Sebastian at the Flower Dance if the player doesn't ask her or Sebastian to dance.", 'content_type': 'text'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id='abded959-5b46-4df0-bf82-f4e9d73f694e', version=19, score=15.176384, payload={'page_title': 'Abigail', 'section_title': 'Ten Hearts', 'table_html': '<table><tr><td>Details</td></tr><tr><td>Abigail is preparing to go down the ladder into the mine when she&#x27;s startled by a bat flying up into her face. She laughs it off and looks back down the hole. Hundreds more bats fly up and she&#x27;s terrified, running to cower in the corner of the cave. You rush in to comfort her. &quot;What happened?&quot; (+20 friendship .) &quot;Are you okay?&quot; (+40 friendship .) She tells you that maybe she isn&#

In [7]:
def rrf_search(client, collection_name, query: str, limit: int = 5) -> list[models.ScoredPoint]:
    results = client.query_points(
        collection_name=collection_name,
        prefetch=[
            models.Prefetch(
                query=models.Document(
                    text=query,
                    model=vector_model_handle,
                ),
                using="jina-small",
                limit=(5 * limit),
            ),
            models.Prefetch(
                query=models.Document(
                    text=query,
                    model=spasrse_model_handle,
                ),
                using="bm25",
                limit=(5 * limit),
            ),
        ],
        # Fusion query enables fusion on the prefetched results
        query=models.FusionQuery(fusion=models.Fusion.RRF),
        with_payload=True,
    )

    return results.points[:limit]

In [8]:
results = rrf_search(client=qdClient,collection_name=collection_name, query=question)
results

[ScoredPoint(id='abded959-5b46-4df0-bf82-f4e9d73f694e', version=19, score=0.7, payload={'page_title': 'Abigail', 'section_title': 'Ten Hearts', 'table_html': '<table><tr><td>Details</td></tr><tr><td>Abigail is preparing to go down the ladder into the mine when she&#x27;s startled by a bat flying up into her face. She laughs it off and looks back down the hole. Hundreds more bats fly up and she&#x27;s terrified, running to cower in the corner of the cave. You rush in to comfort her. &quot;What happened?&quot; (+20 friendship .) &quot;Are you okay?&quot; (+40 friendship .) She tells you that maybe she isn&#x27;t as tough as she thought. &quot;You&#x27;re safe with me.&quot; (+20 friendship .) &quot;I get scared too.&quot; (+40 friendship .) &quot;You&#x27;re crying like a little baby. Stop.&quot; (-50 friendship .) She confesses that she likes you as more than a friend, and asks you to stay with her there. She hugs you. If your character is a girl, she will also state that she didn&#x27;

In [9]:
results[0].payload

{'page_title': 'Abigail',
 'section_title': 'Ten Hearts',
 'table_html': '<table><tr><td>Details</td></tr><tr><td>Abigail is preparing to go down the ladder into the mine when she&#x27;s startled by a bat flying up into her face. She laughs it off and looks back down the hole. Hundreds more bats fly up and she&#x27;s terrified, running to cower in the corner of the cave. You rush in to comfort her. &quot;What happened?&quot; (+20 friendship .) &quot;Are you okay?&quot; (+40 friendship .) She tells you that maybe she isn&#x27;t as tough as she thought. &quot;You&#x27;re safe with me.&quot; (+20 friendship .) &quot;I get scared too.&quot; (+40 friendship .) &quot;You&#x27;re crying like a little baby. Stop.&quot; (-50 friendship .) She confesses that she likes you as more than a friend, and asks you to stay with her there. She hugs you. If your character is a girl, she will also state that she didn&#x27;t know she liked other girls until she met you.</td></tr></table>',
 'summary': 'Abig