In [None]:
# Globals

import numpy as np
import openai
from sklearn.metrics.pairwise import cosine_similarity

# Example: replace with your list of questions
questions = [
    "Hey there! How are you doing today?",
    "You look really good today!",
    "Can you help me with something?",
    "Do you want to go out today?",
]

client = openai.Client(
    base_url="http://127.0.0.1:1234/v1"
)  # Adapt to your client initialization

In [None]:
# Embedding similarity by dot product

embs = []
for q in questions:
    r = client.embeddings.create(input=q, model="text-embedding-nomic-embed-text-v1.5")
    embs.append(np.array(r.data[0].embedding, dtype=np.float32))

X = np.vstack(embs)
sim_matrix = np.dot(X, X.T)

np.fill_diagonal(sim_matrix, 0)  # Ignore self-similarity
nn_max = sim_matrix.max(axis=1)

print("Per-item max similarity to any other item:", nn_max)
print("Mean max similarity:", nn_max.mean())

Per-item max similarity to any other item: [0.73575836 0.73575836 0.55402076 0.6671491 ]
Mean max similarity: 0.67317164


In [None]:
# Embedding similarity by cosine similarity

# Get embeddings (adapt to your client)
embs = []
for q in questions:
    r = client.embeddings.create(input=q, model="text-embedding-nomic-embed-text-v1.5")
    embs.append(np.array(r.data[0].embedding, dtype=np.float32))

X = np.vstack(embs)
# Normalize to be explicit about cosine similarity
X = X / (np.linalg.norm(X, axis=1, keepdims=True) + 1e-12)

S = cosine_similarity(X)  # NxN matrix in [-1, 1]

# Simple redundancy signal: for each item, max similarity to any other item
np.fill_diagonal(S, -1.0)
nn_max = S.max(axis=1)
print("Per-item max similarity to any other item:", nn_max)
print("Mean max similarity:", nn_max.mean())

Per-item max similarity to any other item: [0.73575836 0.73575836 0.5540208  0.6671492 ]
Mean max similarity: 0.67317164
