# Embeddings (1)

This file is for running the embeddings API of OpenAI for the **1st quarter** of the original DataFrame.

- Read DataFrame from csv (words only).

In [1]:
import pandas as pd

embeddings1 = pd.read_csv('emb1.csv')
del embeddings1['Unnamed: 0']
embeddings1

Unnamed: 0,word,vec
0,existence,"[-0.012068594805896282, 0.0012747612781822681,..."
1,being,"[0.004383814055472612, 0.011128472164273262, 0..."
2,entity,"[-0.01941847987473011, -0.011283711530268192, ..."
3,ens,"[-0.0191356148570776, 0.01700943522155285, -0...."
4,esse,"[-0.013645762577652931, 0.02081446535885334, -..."
...,...,...
24995,slowly,
24996,leisurely,
24997,piano,
24998,adagio,


- Use personal api-key to access the API (choose the large embedding model that provides vectors up to **3072 elements**).  
- Then read each word and create the corresponding embedding vector.  
- Store vector in our DataFrame.

In [3]:
import traceback
import logging

from openai import OpenAI
client = OpenAI(api_key='my-key') # Establish connection

# Make sure the vec column is filled with None before running
embeddings1['vec'] = [None]*embeddings1['vec'].size

# Tackle errors during execution with try-except schema
for i in range(embeddings1['word'].size):
    try:
        response = client.embeddings.create(
            input=embeddings1['word'][i], # Give input
            model="text-embedding-3-large" # Define model
        )
        embeddings1['vec'][i] = response.data[0].embedding # Extract vector output 
    except Exception as e:
        continue # Next word

In [4]:
embeddings1

Unnamed: 0,word,vec
0,existence,"[-0.012068594805896282, 0.0012747612781822681,..."
1,being,"[0.004383814055472612, 0.011128472164273262, 0..."
2,entity,"[-0.01941847987473011, -0.011283711530268192, ..."
3,ens,"[-0.0191356148570776, 0.01700943522155285, -0...."
4,esse,"[-0.013645762577652931, 0.02081446535885334, -..."
...,...,...
24995,slowly,"[0.005791004281491041, -0.0055467807687819, -0..."
24996,leisurely,"[-0.007631841581314802, 0.045921508222818375, ..."
24997,piano,"[-0.019256502389907837, -0.03540017455816269, ..."
24998,adagio,"[-0.014269070699810982, -0.029934115707874298,..."


- Store vectors to csv file.

In [5]:
embeddings1.to_csv('emb1L.csv')