# OpenAI API: Embeddings

### Install the necessary libraries. 

In [None]:
pip install openai

In [None]:
pip install openai[datalib]

In [None]:
pip install urllib3==1.26.6 

In [None]:
pip install python-dotenv

### Import the libraries and environment file to gain access to the Open API Key
#### The key can be generated here: https://platform.openai.com/account/api-keys

In [27]:
import os
from openai import OpenAI

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

### Authenticate to the API using the API Key
#### Pull from environment variables or use api_key = ("your_key_here") to hardcode the key

In [28]:
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY']  
)

#### Helper functions

In [29]:
def get_embeddings(word, model):
    try:
        response = client.embeddings.create(
            input= word,
            model= model
        )
        
        return response
    except openai.APIError as e:
        print(e.http_status)
        print(e.error)
        return e.error

In [30]:
import numpy as np
from numpy.linalg import norm

# compute cosine similarity
def compute_cosine_similarity(embeddings1, embeddings2):
    return np.dot(embeddings1,embeddings2)/(norm(embeddings1)*norm(embeddings2)) 

### Generate embeddings for single words

In [31]:
response = get_embeddings("sun","text-embedding-ada-002")

In [32]:
sun_embeddings = response.data[0].embedding

In [33]:
print(sun_embeddings)

[0.02472955547273159, -0.0024875381495803595, -0.0014574227388948202, -0.009020929224789143, -0.023183109238743782, 0.025353558361530304, -0.014962533488869667, -0.028568536043167114, -0.0063790855929255486, -0.022938935086131096, 0.007271004840731621, 0.017756987363100052, 0.008532578125596046, 0.0014752271817997098, -0.014962533488869667, -0.005151425488293171, 0.04359889775514603, -0.025475647300481796, 0.026533741503953934, -0.021975796669721603, -0.009502497501671314, 0.0072370911948382854, -0.004812292754650116, -0.0021416228264570236, -0.0027130614034831524, 0.0042357672937214375, -0.00030394765781238675, -0.019764652475714684, 0.004083157517015934, -0.020239438861608505, 0.022097885608673096, -0.015857843682169914, -0.01764846406877041, -0.005636385176330805, -0.00014784064842388034, -0.011340596713125706, -0.005917865317314863, -0.0005900908727198839, -0.0011979861883446574, -0.007216743193566799, 0.0011784860398620367, 0.017322897911071777, -0.0014921837719157338, -0.00624682

In [34]:
response = get_embeddings("moon","text-embedding-ada-002")

In [35]:
moon_embeddings = response.data[0].embedding

In [36]:
print(moon_embeddings)

[0.01746830902993679, -0.00917647872120142, 0.0020518943201750517, -0.016429197043180466, -0.027831340208649635, 0.008516502566635609, -0.020543517544865608, -0.012651885859668255, 0.005097263492643833, -0.034066010266542435, 0.006761246360838413, 0.017833402380347252, 0.01541816908866167, -0.003591253887861967, -0.013740144670009613, 0.0021449229680001736, 0.040132176131010056, -0.020641811192035675, 0.00933094136416912, -0.016429197043180466, 0.015039034187793732, -0.002373106312006712, 0.016358986496925354, -0.0034964701626449823, -0.007070171646773815, 0.0035596592351794243, 0.009183499962091446, -0.0037843321915715933, -0.0008118059486150742, -0.013192504644393921, 0.016597701236605644, -0.017004920169711113, -0.0282245185226202, -0.0016174684278666973, -0.007954820990562439, -0.0021765176206827164, 0.004840996116399765, -0.008172472007572651, 0.0008828938007354736, -0.00461632339283824, 0.013592703267931938, 0.012806348502635956, 0.0005046361475251615, 0.00506566883996129, -0.008

### Compare the vectors 

#### Vectors need to be the same length for the comparison

In [37]:
len(sun_embeddings)

1536

In [38]:
len(moon_embeddings)

1536

#### Cosine similarity is a measure of similarity between two non-zero vectors. The value can be between 0 and 1; the closer the value is to 1, the more similar the vectors are. 

In [39]:
cosine = compute_cosine_similarity(sun_embeddings, moon_embeddings)
print("Cosine Similarity:", cosine)

Cosine Similarity: 0.8806713172655337


### Generate embeddings for dissimilar words

In [56]:
response = get_embeddings("cloud","text-embedding-ada-002")
computer_embeddings = response.data[0].embedding
print(response)

CreateEmbeddingResponse(data=[Embedding(embedding=[-0.004630567505955696, -0.012147098779678345, -0.00044086360139772296, -0.011982578784227371, 0.014409256167709827, 0.03213634341955185, -0.008863543160259724, -0.032492805272340775, 0.006834456697106361, -0.03682517632842064, 0.014793137088418007, 0.02524019032716751, 0.002080499194562435, 0.0030899010598659515, -0.01720610447227955, 0.005422322545200586, 0.02768057957291603, 0.0006833599763922393, -0.006440293043851852, -0.031258899718523026, -0.0003453217213973403, 0.011064005084335804, -0.0034703549463301897, -0.009884941391646862, -0.016040751710534096, -0.001787447021342814, 0.034933190792798996, -0.01543750986456871, 0.007382858544588089, -0.010597864165902138, 0.00704696262255311, 0.0006456573610194027, -0.009473640471696854, -0.005957013927400112, -0.010330517776310444, -0.015163308940827847, 0.005981006659567356, -0.01078980416059494, -0.0035440463107079268, 0.0057410807348787785, 0.010885775089263916, -0.0032921242527663708,

In [63]:
response = get_embeddings("hero","text-embedding-ada-002")
hero_embeddings = response.data[0].embedding
print(response)

CreateEmbeddingResponse(data=[Embedding(embedding=[-0.011414039880037308, -0.01675194874405861, -0.010824882425367832, -0.00930584967136383, -0.0070911855436861515, 0.014033307321369648, -0.017859281972050667, -0.019378313794732094, -0.005036232527345419, -0.0022040170151740313, 0.028648672625422478, 0.027470357716083527, 0.01550265122205019, -0.0028765795286744833, -0.009234867058694363, -0.007368018385022879, 0.04542901739478111, 6.283089896896854e-05, 0.038387518376111984, -0.015332292765378952, -0.010860374197363853, 0.009064508602023125, -0.0025802261661738157, 0.0019697737880051136, -0.008716692216694355, 0.010235724970698357, 0.005391146522015333, -0.044321682304143906, 0.02049984410405159, -0.015701403841376305, 0.0035313961561769247, -0.014267549850046635, -0.026504991576075554, -0.01653900183737278, -0.020372074097394943, -0.013153119944036007, -9.449590288568288e-05, -0.004546450916677713, 0.010363494046032429, 0.009497503750026226, 0.02366567775607109, -0.005298868753015995

In [64]:
print("Cosine Similarity:", compute_cosine_similarity(computer_embeddings, hero_embeddings))

Cosine Similarity: 0.7842241218080854
