In [1]:
# !pip install tabulate
# !pip install sentence_transformers

In [None]:
import numpy as np
from sentence_transformers import SentenceTransformer
from tabulate import tabulate

In [None]:
# Load the model
model = SentenceTransformer("all-mpnet-base-v2")

In [None]:
# Here we can make lists for datasets we want to order
animals = [
    "kitten", "puppy", "rabbit", "panda", "hedgehog",
    "hamster", "dolphin", "horse", "penguin",
    "fox", "deer", "owl", "cow", "chicken", "goat",
    "pig", "parrot", "squirrel", "rat", "snake", "spider",
    "bat", "vulture", "shark", "cockroach", "maggot", "worm", "hyena",
]

In [None]:
# This function does the projection
def proj_meas(v1, v2, v3):
    v = v2-v1
    w = v3-v1
    proj = np.dot(w,v)/np.dot(v,v)*v
    d = np.linalg.norm(w - proj)
    
    t = np.dot(w, v) / np.dot(v, v) # t is how far along on the spectrum something is, 0.0 for v1, 1.0 for v2.
    proj_point = v1 + t * v
    return d, proj_point, t

In [None]:
# Here I make the list and sort them based on the scale
def make_scale_list(word1, word2, word_list):
    scale_scores = []
    dist_scores = []
    for word in animals:
        vec1 = model.encode(word1)
        vec2 = model.encode(word2)
        deter = model.encode(word)

        d, proj, t = proj_meas(vec1, vec2, deter)
        scale_scores.append(t)
        dist_scores.append(d)

    scores, words, dists = zip(*sorted(zip(scale_scores, animals, dist_scores)))
    # normed_scores = (scores-min(scores))/(max(scores)-min(scores))
    normed_dists = 1-(dists-min(dists))/(max(dists)-min(dists)) # Normalized makes a bit more sense here
    # Build table
    table = []
    for word, score, dist, nor_dist in zip(words, scores, dists, normed_dists):
        table.append([word, f"{score:.3f}", f"{dist:.3f}", f"{nor_dist:.3f}"])

    headers = ["Word", "t (scale)", "Distance", "Normalized Distance"]
    print('From ', word1, ' to ', word2, ':')
    print(tabulate(table, headers=headers, tablefmt="fancy_grid"))
        
make_scale_list('revolting', 'adorable', animals)