In [18]:
import itertools

import numpy as np
import openai

In [2]:
openai.api_key_path = '.openai_key'

In [3]:
texts = [
    "That is a happy person",
    "That is a happy dog",
    "That is a very happy person",
    "Today is a sunny day",
]

In [4]:
results = openai.Embedding.create(
    input=texts,
    model='text-embedding-ada-002',
)

In [13]:
results.data[0].keys()

dict_keys(['object', 'index', 'embedding'])

In [14]:
[datum.object for datum in results.data]

['embedding', 'embedding', 'embedding', 'embedding']

In [27]:
[datum.index for datum in results.data]

[0, 1, 2, 3]

In [16]:
pairs = itertools.pairwise(datum.index for datum in results.data)
all(lhs < rhs for lhs, rhs in pairs)

True

In [19]:
v1, v2, v3, v4 = (
    np.array(datum.embedding, dtype=np.float32)
    for datum in results.data
)

In [20]:
[embedding.shape for embedding in (v1, v2, v3, v4)]

[(1536,), (1536,), (1536,), (1536,)]

In [21]:
[np.linalg.norm(embedding) for embedding in (v1, v2, v3, v4)]

[0.99999994, 1.0, 0.9999999, 0.9999999]

In [22]:
np.dot(v1, v2)

0.9301601

In [23]:
np.dot(v1, v3)

0.98347116

In [24]:
np.dot(v1, v4)

0.8226278

In [25]:
with open('the_open_window.txt', encoding='utf-8') as file:
    tow = file.read().strip().replace('\n', ' ')

with open('the_open_window_modified.txt', encoding='utf-8') as file:
    tow_modified = file.read().strip().replace('\n', ' ')

assert tow != tow_modified

tow_results = openai.Embedding.create(
    input=[tow, tow_modified],
    model='text-embedding-ada-002',
)

In [26]:
[datum.object for datum in tow_results.data]

['embedding', 'embedding']

In [28]:
[datum.index for datum in tow_results.data]

[0, 1]

In [29]:
pairs = itertools.pairwise(datum.index for datum in tow_results.data)
all(lhs < rhs for lhs, rhs in pairs)

True

In [30]:
w1, w2 = (
    np.array(datum.embedding, dtype=np.float32)
    for datum in tow_results.data
)

In [31]:
(w1 == w2).all()

False

In [32]:
np.linalg.norm(w1 - w2)

0.08089018

In [33]:
np.dot(w1, w2)

0.9967284

In [38]:
results.usage

<OpenAIObject at 0x22ae654dc10> JSON: {
  "prompt_tokens": 21,
  "total_tokens": 21
}

In [36]:
tow_results.usage

<OpenAIObject at 0x22ae6ad2c90> JSON: {
  "prompt_tokens": 3114,
  "total_tokens": 3114
}