In [2]:
# Hugginface embeddings https://python.langchain.com/docs/integrations/platforms/huggingface#embedding-models
# https://python.langchain.com/docs/integrations/text_embedding/huggingfacehub

Use the huggingface api to embed text into a vector.
Compare similiarity between two embedded texts.

In [3]:
import os
from dotenv import load_dotenv

from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain.evaluation import load_evaluator


In [4]:
def prompt_for_api_key():
    import getpass

    hf_api_key = getpass.getpass("Enter your api key")

In [5]:
load_dotenv()

HF_API_KEY = os.getenv("HF_API_KEY")

In [6]:
if not HF_API_KEY:
    prompt_for_api_key()

In [7]:
MODEL_NAME = "sentence-transformers/all-MiniLM-l6-v2"
embedder = HuggingFaceInferenceAPIEmbeddings(
    api_key=HF_API_KEY, model_name=MODEL_NAME
)

In [8]:
user_text = input("Enter some text to embed: ")
embedding = embedder.embed_query(user_text)

print(embedding)

[-0.006138473283499479, 0.031011775135993958, 0.06479360163211823, 0.010941474698483944, 0.0052671595476567745, -0.04747648164629936, 0.08120307326316833, 0.028980961069464684, 0.06676197797060013, 0.030300434678792953, 0.057464972138404846, -0.008623574860394001, 0.0013227601302787662, 0.0003991640987806022, -0.01884303241968155, -0.025793777778744698, -0.013042047619819641, -0.052624959498643875, -0.05829250440001488, -0.025899222120642662, -0.0333736315369606, 0.024567853659391403, -0.005226586479693651, 0.02300594188272953, 0.03286076709628105, 0.07502181828022003, 0.005801758263260126, -0.014958555810153484, -0.02875291183590889, -0.11855192482471466, -0.03932179883122444, -0.05138753354549408, 0.0766182541847229, 0.04840371012687683, -0.03025643341243267, -0.09143415838479996, 0.05118214339017868, -0.009649625979363918, -0.02151087485253811, -0.07177411764860153, -0.06322427093982697, -0.01766885630786419, 0.028081467375159264, 0.09004710614681244, 0.019417941570281982, 0.0054544

In [16]:
evaluator = load_evaluator("embedding_distance", embeddings=embedder)

In [17]:
user_text_compare = input("Enter some text to compare: ")
embedding_compare = embedder.embed_query(user_text_compare)

similarity = evaluator.evaluate_strings(prediction=user_text, reference=user_text_compare)

print(f"Similarity: {similarity}")

Similarity: {'score': 0.6273886339419787}


In [21]:
string_first = "Shark"
string_second = "Fish"

similarity = evaluator.evaluate_strings(prediction=string_first, reference=string_second)
print(f"Similarity: {similarity}")

Similarity: {'score': 0.37752130315967614}
