# Basic demo of text-embedding-ada-002

SPDX-License-Identifier: 0BSD

In [1]:
import numpy as np
import openai
import openai.embeddings_utils

In [2]:
openai.api_key_path = '.api_key'

In [3]:
r1 = openai.Embedding.create(
    input='French kissing experts',
    model='text-embedding-ada-002',
)

In [4]:
r2 = openai.Embedding.create(
    input='kissing French experts',
    model='text-embedding-ada-002',
)

In [5]:
v1 = np.array(r1['data'][0]['embedding'])

In [6]:
v2 = np.array(r2['data'][0]['embedding'])

In [7]:
np.linalg.norm(v1)

1.0000000397506872

In [8]:
np.linalg.norm(v2)

1.0000000293103923

In [9]:
np.dot(v1, v2)

0.9614921521499794

In [10]:
r3 = openai.Embedding.create(
    input='The canine shall never perish from the earth.',
    model='text-embedding-ada-002',
)

In [11]:
r4 = openai.Embedding.create(
    input='Somewhere in the world, there will always be dogs.',
    model='text-embedding-ada-002',
)

In [12]:
v3 = np.array(r3['data'][0]['embedding'])

In [13]:
v4 = np.array(r4['data'][0]['embedding'])

In [14]:
np.dot(v3, v4)

0.8689153923880475

In [15]:
np.dot(v1, v3)

0.7186503713790333

In [16]:
np.dot(v2, v4)

0.7252955353117507

In [17]:
def embed_one(text):
    embedding = openai.embeddings_utils.get_embedding(
        text=text,
        engine='text-embedding-ada-002',
    )
    return np.array(embedding)

In [18]:
def embed_many(texts):
    embeddings = openai.embeddings_utils.get_embeddings(
        list_of_text=texts,
        engine='text-embedding-ada-002',
    )
    return np.array(embeddings)

In [19]:
w1 = embed_one('French kissing experts')

In [20]:
(v1 == w1).all()

True

In [21]:
ws = embed_many([
    'French kissing experts',
    'kissing French experts',
    'The canine shall never perish from the earth.',
    'Somewhere in the world, there will always be dogs.',
])

In [22]:
ws.shape

(4, 1536)

In [23]:
(ws - np.array([v1, v2, v3, v4])).max()

0.002188093261793256

Example of embedding a paragraph. This text is drawn from [Embeddings -
Limitations &
Risks](https://beta.openai.com/docs/guides/embeddings/limitations-risks?lang=python)
by OpenAI:

In [24]:
nontrivial = ' '.join("""
We found evidence of bias in our models via running the SEAT (May et al, 2019)
and the Winogender (Rudinger et al, 2018) benchmarks. Together, these
benchmarks consist of 7 tests that measure whether models contain implicit
biases when applied to gendered names, regional names, and some stereotypes.
""".split())

t = embed_one(nontrivial)

u = np.array(
    openai.Embedding.create(
        input=nontrivial,
        model='text-embedding-ada-002',
    )['data'][0]['embedding']
)

In [25]:
(t == u).all()

True