# FLAX Sentence Embeddings - Inference API

SPDX-License-Identifier: 0BSD

In [1]:
from huggingface_hub import InferenceApi
import numpy as np

In [2]:
with open('.hf_token', encoding='utf-8') as file:
    api_token = file.read().strip()

In [3]:
compare = InferenceApi(
    repo_id='flax-sentence-embeddings/all_datasets_v3_roberta-large',
    task='sentence-similarity',
    token=api_token,
)

In [4]:
inputs = {
    "source_sentence": "That is a happy person",
    "sentences": [
        "That is a happy dog",
        "That is a very happy person",
        "Today is a sunny day"
    ]
}

[np.float32(similarity) for similarity in compare(inputs)]

[0.7271146, 0.97182184, 0.31183147]

In [5]:
embed = InferenceApi(
    repo_id='flax-sentence-embeddings/all_datasets_v3_roberta-large',
    task='feature-extraction',
    token=api_token,
)

You're using a different task than the one specified in the repository. Be sure to know what you're doing :)


In [6]:
texts = [
    "That is a happy person",
    "That is a happy dog",
    "That is a very happy person",
    "Today is a sunny day",
]

v1, v2, v3, v4 = (
    np.array(embedding, dtype=np.float32)
    for embedding in embed(inputs=texts)
)

In [7]:
[np.linalg.norm(embedding) for embedding in (v1, v2, v3, v4)]

[1.0, 0.99999994, 1.0, 0.99999994]

In [8]:
np.dot(v1, v2)

0.72711456

In [9]:
np.dot(v1, v3)

0.9718217

In [10]:
np.dot(v1, v4)

0.3118313

In [11]:
with open('the_open_window.txt', encoding='utf-8') as file:
    tow = file.read().strip().replace('\n', ' ')

with open('the_open_window_modified.txt', encoding='utf-8') as file:
    tow_modified = file.read().strip().replace('\n', ' ')

assert tow != tow_modified

w1 = np.array(embed(inputs=tow), dtype=np.float32)
w2 = np.array(embed(inputs=tow_modified), dtype=np.float32)

In [12]:
np.linalg.norm(w1)

0.9999999

In [13]:
np.linalg.norm(w2)

0.9999999

In [14]:
np.dot(w1, w2)

0.99999976

In [15]:
(w1 == w2).all()  # The modification didn't affect embeddings.

True

In [16]:
w1.shape

(1024,)