# All-pairs similarities

SPDX-License-Identifier: 0BSD

Multiplying a matrix whose rows are embeddings by is transpose gives a matrix whose $(i, j)$ entries are the similarities between the $i$-th and $j$-th embedding.

In [1]:
import numpy as np
import openai.embeddings_utils
import tabulate

In [2]:
np.set_printoptions(linewidth=120)

In [3]:
openai.api_key_path = '.api_key'

In [4]:
texts = [
    'French kissing experts',
    'kissing French experts',
    'experts in French kissing',
    'kissing French people who are experts',
    'The canine shall never perish from the earth.',
    'Somewhere in the world, there will always be dogs.',
    'Quelque part au monde, il y aura toujours des chiens.',
    'The tabular data have been sequestered from the public.',
]

In [5]:
embeddings = openai.embeddings_utils.get_embeddings(
    list_of_text=texts,
    engine='text-embedding-ada-002',
)
matrix = np.array(embeddings)

In [6]:
similarities = matrix @ matrix.transpose()

In [7]:
tabulate.tabulate(similarities, tablefmt='html')

0,1,2,3,4,5,6,7
1.0,0.96145,0.945449,0.941631,0.719258,0.722507,0.727874,0.715571
0.96145,1.0,0.962644,0.975816,0.721417,0.725247,0.736398,0.710534
0.945449,0.962644,1.0,0.949081,0.71438,0.716647,0.720001,0.687278
0.941631,0.975816,0.949081,1.0,0.717578,0.731879,0.740077,0.703308
0.719258,0.721417,0.71438,0.717578,1.0,0.868313,0.824918,0.709783
0.722507,0.725247,0.716647,0.731879,0.868313,1.0,0.938079,0.688226
0.727874,0.736398,0.720001,0.740077,0.824918,0.938079,1.0,0.66264
0.715571,0.710534,0.687278,0.703308,0.709783,0.688226,0.66264,1.0


In [8]:
similarities  # For easier copying and pasting.

array([[0.99999991, 0.96144969, 0.9454494 , 0.94163079, 0.71925837, 0.72250736, 0.72787397, 0.71557135],
       [0.96144969, 1.        , 0.96264444, 0.97581551, 0.72141712, 0.72524659, 0.73639836, 0.71053379],
       [0.9454494 , 0.96264444, 1.00000007, 0.94908145, 0.71438011, 0.71664666, 0.72000093, 0.68727793],
       [0.94163079, 0.97581551, 0.94908145, 1.00000004, 0.71757824, 0.73187853, 0.74007661, 0.7033079 ],
       [0.71925837, 0.72141712, 0.71438011, 0.71757824, 1.00000006, 0.86831257, 0.82491844, 0.70978253],
       [0.72250736, 0.72524659, 0.71664666, 0.73187853, 0.86831257, 1.00000012, 0.9380792 , 0.68822555],
       [0.72787397, 0.73639836, 0.72000093, 0.74007661, 0.82491844, 0.9380792 , 0.99999992, 0.66263953],
       [0.71557135, 0.71053379, 0.68727793, 0.7033079 , 0.70978253, 0.68822555, 0.66263953, 0.99999999]])