In [1]:
import logging

import numpy as np
import openai
import openai.embeddings_utils
import safetensors.numpy

In [2]:
logging.basicConfig(level=logging.INFO)
openai.api_key_path = '.api_key'

In [3]:
def save(embeddings, basename):
    safetensors.numpy.save_file(
        tensor_dict={'embeddings': embeddings},
        filename=f'{basename}.safetensors',
    )

In [4]:
def load(basename):
    tensor_dict = safetensors.numpy.load_file(f'{basename}.safetensors')
    return tensor_dict['embeddings']

In [5]:
def embed(texts):
    embeddings = openai.embeddings_utils.get_embeddings(
        texts,
        engine='text-embedding-ada-002',
    )
    return np.array(embeddings, np.float32)

In [6]:
TEXTS = [
    'Gee whiz!',
    'Golly wow!',
    'Well, shucks!',
    'The meeting is this afternoon!',
]

In [7]:
a = embed(TEXTS)

INFO:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/engines/text-embedding-ada-002/embeddings
INFO:openai:message='OpenAI API response' path=https://api.openai.com/v1/engines/text-embedding-ada-002/embeddings processing_ms=445 request_id=9c66e9fed31207b93f385e41c9a3a196 response_code=200


In [8]:
a @ np.transpose(a)

array([[0.99999964, 0.9231242 , 0.88108635, 0.78033847],
       [0.9231242 , 1.        , 0.86986357, 0.76908714],
       [0.88108635, 0.86986357, 1.0000002 , 0.75694346],
       [0.78033847, 0.76908714, 0.75694346, 1.0000001 ]], dtype=float32)

In [9]:
b = embed(TEXTS)

INFO:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/engines/text-embedding-ada-002/embeddings
INFO:openai:message='OpenAI API response' path=https://api.openai.com/v1/engines/text-embedding-ada-002/embeddings processing_ms=21 request_id=82eaa919d33bc235172fc411eb425642 response_code=200


In [10]:
(a == b).all()

False

In [11]:
a - b

array([[-1.69524923e-04,  1.26272556e-03,  1.05181709e-04, ...,
        -1.70242041e-04, -6.79388177e-04,  7.45965168e-04],
       [ 4.41698357e-05,  6.22558873e-05,  4.19653952e-06, ...,
         9.22381878e-06, -1.76671892e-05, -5.49666584e-06],
       [ 1.74460001e-05,  8.22935253e-05,  3.36444937e-05, ...,
        -1.95819885e-05,  3.06218863e-06,  2.90069729e-05],
       [-2.45291740e-05, -2.71040481e-05, -6.73998147e-05, ...,
         4.19355929e-05, -1.15180155e-05, -9.52929258e-06]], dtype=float32)

In [12]:
(a - b).min(), (a - b).max()

(-0.0026211143, 0.0031402768)

In [13]:
abs(a - b).mean()

0.00018736879

In [14]:
[np.dot(a_row, a_row) for a_row in a]

[1.0000002, 1.0000002, 1.0000001, 1.0]

In [15]:
[np.dot(b_row, b_row) for b_row in b]

[1.0000001, 1.0, 0.99999976, 1.0000001]

In [16]:
[np.dot(a_row, b_row) for a_row, b_row in zip(a, b)]

[0.9995055, 0.99999917, 0.9999976, 0.9999979]

In [17]:
save(a, 'a')
save(b, 'b')