In [4]:
!pip install FlagEmbedding -qqq

In [3]:
from FlagEmbedding import BGEM3FlagModel

model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=False)

  from .autonotebook import tqdm as notebook_tqdm
Fetching 22 files: 100%|██████████| 22/22 [00:00<00:00, 43.86it/s]


loading existing colbert_linear and sparse_linear---------


In [5]:
passage = ["BGE M3 is an embedding model supporting dense retrieval, lexical matching and multi-vector interaction."]


In [28]:
passage_embeddings = model.encode(passage, return_dense=True, return_sparse=True, return_colbert_vecs=True)


In [8]:
from vespa.package import Schema, Document, Field, FieldSet
m_schema = Schema(
            name="m",
            document=Document(
                fields=[
                    Field(name="id", type="string", indexing=["summary"]),
                    Field(name="text", type="string", indexing=["summary", "index"], index="enable-bm25"),
                    Field(name="lexical_rep", type="tensor<bfloat16>(t{})", indexing=["summary", "attribute"]),
                    Field(name="dense_rep", type="tensor<bfloat16>(x[1024])", indexing=["summary", "attribute"], attribute=["distance-metric: angular"]),
                    Field(name="colbert_rep", type="tensor<bfloat16>(t{}, x[1024])", indexing=["summary", "attribute"])
                ],
            ),
            fieldsets=[
                FieldSet(name = "default", fields = ["text"])
            ]
)

In [9]:
from vespa.package import ApplicationPackage

vespa_app_name = "mtest"
vespa_application_package = ApplicationPackage(
        name=vespa_app_name,
        schema=[m_schema]
) 

In [10]:
from vespa.package import RankProfile, Function,  FirstPhaseRanking


semantic = RankProfile(
    name="m3hybrid", 
    inputs=[
        ("query(q_dense)", "tensor<bfloat16>(x[1024])"), 
        ("query(q_lexical)", "tensor<bfloat16>(t{})"), 
        ("query(q_colbert)", "tensor<bfloat16>(qt{}, x[1024])"),
        ("query(q_len_colbert)", "float"),
    ],
    functions=[
        Function(
            name="dense",
            expression="cosine_similarity(query(q_dense), attribute(dense_rep),x)"
        ),
        Function(
            name="lexical",
            expression="sum(query(q_lexical) * attribute(lexical_rep))"
        ),
        Function(
            name="max_sim",
            expression="sum(reduce(sum(query(q_colbert) * attribute(colbert_rep) , x),max, t),qt)/query(q_len_colbert)"
        )
    ],
    first_phase=FirstPhaseRanking(
        expression="0.4*dense + 0.2*lexical +  0.4*max_sim",
        rank_score_drop_limit=0.0
    ),
    match_features=["dense", "lexical", "max_sim", "bm25(text)"]
)
m_schema.add_rank_profile(semantic)

In [13]:
from vespa.deployment import VespaDocker

vespa_docker = VespaDocker()
app = vespa_docker.deploy(application_package=vespa_application_package)

Waiting for configuration server, 0/300 seconds...
Waiting for configuration server, 5/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 0/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 5/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 10/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 15/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 20/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 25/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Application is up!
Finished deployment.


In [14]:
vespa_fields = {
    "text": passage[0],
    "lexical_rep": {key: float(value) for key, value in passage_embeddings['lexical_weights'][0].items()},
    "dense_rep":passage_embeddings['dense_vecs'][0].tolist(),
    "colbert_rep":  {index: passage_embeddings['colbert_vecs'][0][index].tolist() for index in range(passage_embeddings['colbert_vecs'][0].shape[0])}
}

In [15]:
from vespa.io import VespaResponse
response: VespaResponse = app.feed_data_point(schema='m', data_id=0, fields=vespa_fields)
assert(response.is_successful())

In [16]:
query  = ["Why is the sky blue?"]
query_embeddings = model.encode(query, return_dense=True, return_sparse=True, return_colbert_vecs=True)

In [17]:
query_length = query_embeddings['colbert_vecs'][0].shape[0]

In [18]:
query_fields = {
    "input.query(q_lexical)": {key: float(value) for key, value in query_embeddings['lexical_weights'][0].items()},
    "input.query(q_dense)": query_embeddings['dense_vecs'][0].tolist(),
    "input.query(q_colbert)":  str({index: query_embeddings['colbert_vecs'][0][index].tolist() for index in range(query_embeddings['colbert_vecs'][0].shape[0])}),
    "input.query(q_len_colbert)": query_length
}

In [None]:
from vespa.io import VespaQueryResponse
import json

response:VespaQueryResponse = app.query(
    yql="select id, text from m where userQuery() or ({targetHits:10}nearestNeighbor(dense_rep,q_dense))",
    ranking="m3hybrid",
    query=query[0],
    body={
        **query_fields
    }
)
assert(response.is_successful())
print(json.dumps(response.hits[0], indent=2))

{
  "id": "index:mtest_content/0/cfcd2084234135f700f08abf",
  "relevance": 0.24681896577832974,
  "source": "mtest_content",
  "fields": {
    "matchfeatures": {
      "bm25(text)": 0.28768207245178085,
      "dense": 0.2560008149555224,
      "lexical": 0.017232894897460938,
      "max_sim": 0.35243015204157147
    },
    "text": "BGE M3 is an embedding model supporting dense retrieval, lexical matching and multi-vector interaction."
  }
}


In [None]:
model.compute_lexical_matching_score(passage_embeddings['lexical_weights'][0], query_embeddings['lexical_weights'][0])


0.0173909030854702

In [None]:
query_embeddings['dense_vecs'][0] @ passage_embeddings['dense_vecs'][0].T


0.25596598

In [None]:
model.colbert_score(query_embeddings['colbert_vecs'][0],passage_embeddings['colbert_vecs'][0])


tensor(0.3544)