# Query time weights

Weighting of particular attributes can be set and tweaked query time - no need to re-embed the whole dataset if you wish to alter some weights.

In [1]:
%pip install superlinked==3.14.0

In [2]:
import pandas as pd

from superlinked.framework.common.schema.id_schema_object import IdField
from superlinked.framework.common.schema.schema import schema
from superlinked.framework.common.schema.schema_object import String, Integer
from superlinked.framework.dsl.index.index import Index
from superlinked.framework.dsl.space.text_similarity_space import TextSimilaritySpace

from superlinked.framework.dsl.executor.in_memory.in_memory_executor import (
    InMemoryExecutor,
)
from superlinked.framework.dsl.space.number_space import NumberSpace, Mode
from superlinked.framework.dsl.source.in_memory_source import InMemorySource
from superlinked.framework.dsl.query.query import Query

pd.set_option("display.max_colwidth", 100)

In [3]:
@schema
class Paragraph:
    id: IdField
    body: String
    like_count: Integer


paragraph = Paragraph()

body_space = TextSimilaritySpace(
    text=paragraph.body, model="sentence-transformers/all-mpnet-base-v2"
)
like_space = NumberSpace(
    number=paragraph.like_count, min_value=0, max_value=100, mode=Mode.MAXIMUM
)
# indices can be built on top of multiple spaces as simple as that
paragraph_index = Index([body_space, like_space])

In [4]:
source: InMemorySource = InMemorySource(paragraph)
executor = InMemoryExecutor(sources=[source], indices=[paragraph_index])
app = executor.run()

In [5]:
source.put(
    [
        {
            "id": "paragraph-1",
            "body": "Glorious animals live in the wilderness.",
            "like_count": 75,
        },
        {
            "id": "paragraph-2",
            "body": "Growing computation power enables advancements in AI.",
            "like_count": 10,
        },
    ]
)

## Defining queries

Query weights can be set at definition time, therefore no need to re-embed the dataset if aim to query with altering weights.

The `body_query` here gives double weight to the text provided in the `.similar` query compared to the like count, while the `like_query` searches the other way around: likes are twice as important, hence the differing results.

In [6]:
body_query = (
    Query(
        paragraph_index,
        weights={
            body_space: 1.0,
            like_space: 0.5,
        },
    )
    .find(paragraph)
    .similar(body_space.text, "What makes the AI industry go forward?")
)

like_query = (
    Query(
        paragraph_index,
        weights={
            body_space: 0.5,
            like_space: 1.0,
        },
    )
    .find(paragraph)
    .similar(body_space.text, "What makes the AI industry go forward?")
)

In [7]:
body_result = app.query(body_query)

pd.DataFrame([entry.stored_object for entry in body_result.entries])

Unnamed: 0,body,like_count,id
0,Growing computation power enables advancements in AI.,10,paragraph-2
1,Glorious animals live in the wilderness.,75,paragraph-1


In [8]:
like_result = app.query(like_query)

pd.DataFrame([entry.stored_object for entry in like_result.entries])

Unnamed: 0,body,like_count,id
0,Glorious animals live in the wilderness.,75,paragraph-1
1,Growing computation power enables advancements in AI.,10,paragraph-2
