# OpenAI API: Embeddings

### Install the necessary libraries. 

In [None]:
pip install openai

In [None]:
pip install openai[datalib]

In [None]:
pip install urllib3==1.26.6 

In [None]:
pip install python-dotenv

### Import the libraries and environment file to gain access to the Open API Key
#### The key can be generated here: https://platform.openai.com/account/api-keys

In [27]:
import os
from openai import OpenAI

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

### Authenticate to the API using the API Key
#### Pull from environment variables or use api_key = ("your_key_here") to hardcode the key

In [28]:
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY']  
)

#### Helper functions

In [29]:
def get_embeddings(sentences, model):
    try:
        response = client.embeddings.create(
            input= sentences,
            model= model
        )
        
        return response
    except openai.APIError as e:
        print(e.http_status)
        print(e.error)
        return e.error

In [30]:
import numpy as np
from numpy.linalg import norm

# compute cosine similarity
def compute_cosine_similarity(embeddings1, embeddings2):
    return np.dot(embeddings1,embeddings2)/(norm(embeddings1)*norm(embeddings2)) 

### Determine textual entailment for similar sentences

In [31]:
text_premise = '''
                The astronaut completed her spacewalk outside 
                the International Space Station.
               '''
text_hypothesis = "The spacewalk occurred in space."

sentences = [text_premise, text_hypothesis]

response = get_embeddings(sentences,"text-embedding-ada-002")

### Compare the vectors 

#### Vectors need to be the same length for the comparison

In [32]:
len(response.data[0].embedding)

1536

In [33]:
len(response.data[1].embedding)

1536

#### Cosine similarity is a measure of similarity between two non-zero vectors. The value can be between 0 and 1; the closer the value is to 1, the more similar the vectors are. 

In [34]:
cosine = compute_cosine_similarity(response.data[0].embedding, response.data[1].embedding)
print("Cosine Similarity:", cosine)

Cosine Similarity: 0.9002355446204217


### Determine textual entailment for dissimilar sentences

In [35]:
text_premise = "A group of students is studying in the library."
text_hypothesis = "It is raining outside."

sentences = [text_premise, text_hypothesis]

response = get_embeddings(sentences,"text-embedding-ada-002")

In [36]:
len(response.data[0].embedding)

1536

In [37]:
len(response.data[1].embedding)

1536

In [38]:
print("Cosine Similarity:", compute_cosine_similarity(response.data[0].embedding, 
                                                      response.data[1].embedding))

Cosine Similarity: 0.7909503880133563
