# OpenAI API: Embeddings

### Install the necessary libraries. 

In [None]:
pip install openai

In [None]:
pip install openai[datalib]

In [None]:
pip install urllib3==1.26.6 

In [None]:
pip install python-dotenv

In [None]:
pip install scikit-learn

### Import the libraries and environment file to gain access to the Open API Key
#### The key can be generated here: https://platform.openai.com/account/api-keys

In [41]:
import os
from openai import OpenAI

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

### Authenticate to the API using the API Key
#### Pull from environment variables or use api_key = ("your_key_here") to hardcode the key

In [42]:
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY']  
)

### Cluster similar words

In [43]:
from sklearn.cluster import KMeans

# Words to cluster
words = ['apple', 'banana', 'car', 'bike', 'grape', 'truck']

response = client.embeddings.create(
            input= words,
            model= "text-embedding-ada-002")

In [44]:
embeddings = []

for i, embedding in enumerate(words):
    embeddings.append(response.data[i].embedding)

In [45]:
# Apply KMeans clustering

#number of clusters based on the number of categories: fruit and vehicle
k = 2

#randmo_state = 0 makes our results reproducible.
#n_init = number of times the k-means algorithm is run 
kmeans = KMeans(n_clusters=k, random_state=0, n_init=10).fit(embeddings)
labels = kmeans.labels_

In [46]:
# Print the clusters
for label, word in zip(labels, words):
    print(f"{word}: Cluster {label}")

apple: Cluster 0
banana: Cluster 0
car: Cluster 1
bike: Cluster 1
grape: Cluster 0
truck: Cluster 1
