In [23]:
# https://python.langchain.com/docs/guides/evaluation/string/embedding_distance

from langchain.evaluation import EmbeddingDistance, load_evaluator
from langchain.embeddings import HuggingFaceEmbeddings, SpacyEmbeddings
from langchain.embeddings.base import Embeddings
from helper import make_embeddings

ada_embeddings = make_embeddings()
hugging_face_embeddings = HuggingFaceEmbeddings()
spacy_embeddings = SpacyEmbeddings()

text_original = "I shall go"
text_opposite = "I shan't go"
text_simliar = "I will go"


In [24]:
def run_embedding_distance_case(embeddings: Embeddings, metric :EmbeddingDistance):
    print("metric", metric.name)
    evaluator = load_evaluator("embedding_distance", embeddings=embeddings, distance_metric=metric)
    distance_a = evaluator.evaluate_strings(prediction=text_original, reference=text_opposite)["score"]
    # print("distance_a", distance_a)
    distance_b = evaluator.evaluate_strings(prediction=text_original, reference=text_simliar)["score"]
    # print("distance_b", distance_b)
    diff = abs(distance_a - distance_b)
    if distance_a < distance_b:
        print("Incorrect", diff)
    else:
        print("Correct", diff)



In [25]:
run_embedding_distance_case(ada_embeddings, EmbeddingDistance.COSINE)
run_embedding_distance_case(ada_embeddings, EmbeddingDistance.EUCLIDEAN)
run_embedding_distance_case(ada_embeddings, EmbeddingDistance.MANHATTAN)
run_embedding_distance_case(ada_embeddings, EmbeddingDistance.CHEBYSHEV)


metric COSINE
Correct 0.059122047002837186
metric EUCLIDEAN
Correct 0.165510957931359
metric MANHATTAN
Correct 5.131426016576139
metric CHEBYSHEV
Correct 0.021672650595010383


In [26]:
run_embedding_distance_case(hugging_face_embeddings, EmbeddingDistance.COSINE)
run_embedding_distance_case(hugging_face_embeddings, EmbeddingDistance.EUCLIDEAN)
run_embedding_distance_case(hugging_face_embeddings, EmbeddingDistance.MANHATTAN)
run_embedding_distance_case(hugging_face_embeddings, EmbeddingDistance.CHEBYSHEV)

metric COSINE
Correct 0.33845588211537436
metric EUCLIDEAN
Correct 0.3991504823511227
metric MANHATTAN
Correct 8.62121908978844
metric CHEBYSHEV
Correct 0.06994136609137058


In [27]:
run_embedding_distance_case(spacy_embeddings, EmbeddingDistance.COSINE)
run_embedding_distance_case(spacy_embeddings, EmbeddingDistance.EUCLIDEAN)
run_embedding_distance_case(spacy_embeddings, EmbeddingDistance.MANHATTAN)
run_embedding_distance_case(spacy_embeddings, EmbeddingDistance.CHEBYSHEV)

metric COSINE
Correct 0.18769721829879338
metric EUCLIDEAN
Correct 1.7894340400280806
metric MANHATTAN
Correct 16.047470546793193
metric CHEBYSHEV
Correct 0.5207570840138942
