# Using OpenAI embedding models

## Installation and loading of libraries

In [None]:
!pip install openai
!pip install tiktoken

In [None]:
import openai
from getpass import getpass
import os
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
OPENAI_API_KEY = getpass('Enter the secret value: ')
openai.api_key = OPENAI_API_KEY

## Generating embeddings

In [None]:
def get_embedding(text, model='text-embedding-3-small'):
    text = text.replace('\n','')
    return openai.embeddings.create(input = [text], model=model).data[0].embedding

In [None]:
embedding_prueba = get_embedding('esto es una prueba de embeddings para openAI')

In [None]:
len(embedding_prueba)

### Test 1

In [None]:
# Try different words here
word1 = "brother"
word2 = "sister"
word3 = "man"
word4 = "woman"

emb1 = get_embedding(word1)
emb2 = get_embedding(word2)
emb3 = get_embedding(word3)
emb4 = get_embedding(word4)

result_vector = np.array(emb1) - np.array(emb3) + np.array(emb4)
similarity = cosine_similarity(result_vector.reshape(1, -1), np.array(emb2).reshape(1, -1))[0][0]

print(f"Cosine similarity between ({word1} - {word3} + {word4}) and {word2}: {similarity}")

### Test 2

In [None]:
word_pairs = [
    ("cat", "dog"),
    ("dog", "airplane"),
    ("cat", "airplane"),
    ("airplane", "helicopter")
]

for word1, word2 in word_pairs:
    emb1 = get_embedding(word1)
    emb2 = get_embedding(word2)
    similarity = cosine_similarity(np.array(emb1).reshape(1, -1), np.array(emb2).reshape(1, -1))[0][0]
    print(f"Cosine similarity between '{word1}' and '{word2}': {similarity}")