## Generate embeddings with the OpenAI SDK

### Set up the OpenAI client with GitHub Models

In [None]:
#%pip install python-dotenv
#%pip install openai

import os
import dotenv
import openai

dotenv.load_dotenv()

openai_client = openai.OpenAI(
    base_url="https://models.inference.ai.azure.com",
    api_key=os.environ["GITHUB_TOKEN"]
)
MODEL_NAME = "text-embedding-3-small"

### Generate embeddings using OpenAI client

In [None]:
content_input = "Hoja de vida: Lionel Messi. Futbolista argentino, considerado uno de los mejores jugadores de fútbol de todos los tiempos."

In [None]:
embeddings_response = openai_client.embeddings.create(
    model=MODEL_NAME,
    input=content_input,
)
embedding = embeddings_response.data[0].embedding

print(len(embedding))
print(embedding)

### Vector similarity

In [None]:
def cosine_similarity(v1, v2):

  dot_product = sum(
    [a * b for a, b in zip(v1, v2)])
  
  magnitude = (
    sum([a**2 for a in v1]) *
    sum([a**2 for a in v2])) ** 0.5

  return dot_product / magnitude

In [None]:
embeddings_response = openai_client.embeddings.create(
    model=MODEL_NAME,
    input=content_input,
)
content_embedding = embeddings_response.data[0].embedding
print(content_embedding)

In [None]:
embeddings_response = openai_client.embeddings.create(
    model=MODEL_NAME,
    input="Diego Zumárraga Mera",
)
query_embedding1 = embeddings_response.data[0].embedding
print(query_embedding1)

In [None]:
# Compare the two vectors
similarity = cosine_similarity(query_embedding1, content_embedding)
print(f"Similarity: {similarity:.4f}")

In [None]:
embeddings_response = openai_client.embeddings.create(
    model=MODEL_NAME,
    input="Resume la hoja de vida de Diego Zumárraga Mera",
)
query_embedding2 = embeddings_response.data[0].embedding
print(query_embedding2)

In [None]:
# Compare the two vectors
similarity = cosine_similarity(query_embedding2, content_embedding)
print(f"Similarity: {similarity:.4f}")