# Similar Embeddings


In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
from scipy.special import softmax
print(f"sentence_transformers version: {SentenceTransformer.__version__}")

In [None]:
# Load a pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

In [None]:
# List of words to get embeddings for
words = ["king", "man", "woman", "queen", "princess", "monarch", "empress", "duchess", "lady", "baseball", "floor", "monitor"]

In [None]:
# Get embeddings for all words
embeddings = model.encode(words)

# Create a mapping from word to embedding
word_to_embedding = dict(zip(words, embeddings))

In [None]:
# Extract embeddings
embedding_king = word_to_embedding["king"]
embedding_man = word_to_embedding["man"]
embedding_woman = word_to_embedding["woman"]

# Perform the vector arithmetic
result_vector = embedding_king - embedding_man + embedding_woman

In [None]:
# Calculate cosine similarities with other words
similarities = []
print("Cosine Similarities:")
for word in words[3:]:  # skip "king", "man", "woman"
    embedding_word = word_to_embedding[word]
    similarity = np.dot(result_vector, embedding_word) / (np.linalg.norm(result_vector) * np.linalg.norm(embedding_word))
    similarities.append(similarity)
    print(f"Similarity with '{word}': {similarity:.4f}")

# Shift the similarities to be positive
min_similarity = min(similarities)
shifted_similarities = [sim - min_similarity for sim in similarities]

# Use softmax to get the most probable word
temperature = 0.01
scaled_similarities = [sim / temperature for sim in shifted_similarities]
probabilities = softmax(scaled_similarities)
print("\nSoftmax Probabilities:")
for word, probability in zip(words[3:], probabilities):
    print(f"Probability for '{word}': {probability:.4f}")