In [29]:
import os
from openai import AzureOpenAI

endpoint = os.getenv("AZURE_OPENAI_TARGET_ENDPOINT")
model_name = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
api_key = os.getenv("AZURE_OPENAI_API_KEY")
api_version = "2024-02-01"

# Initialize the Azure OpenAI client

client = AzureOpenAI(
    api_version="2024-12-01-preview",
    azure_endpoint=endpoint,
    api_key=api_key
)

# Function to get embedding for a given text
# Raises ValueError if no embeddings are returned
# Prints the embedding in a formatted way

def get_embedding(text):
    get_embedding_response = client.embeddings.create(model=deployment, input=text)
    if len(get_embedding_response.data) == 0:
        raise ValueError("No embeddings returned for the input text.")
    return get_embedding_response.data[0].embedding

def print_embedding(embedding):
    print(f"Embedding: {embedding}")
    for i in range(0, len(embedding), 10):
        print(f"  [{i:3d}]: {embedding[i:i+10]}")

# Example usage
vector = get_embedding("apple")
print("Vector value is:")
print_embedding(vector)



Vector value is:
Embedding: [-0.020793559029698372, 0.014009363017976284, -0.0008607447962276638, 0.01870741881430149, -0.008157994598150253, -0.0020034576300531626, 0.0038118697702884674, 0.021624622866511345, 0.016061581671237946, 0.03186875581741333, 0.017689788714051247, -0.022489607334136963, 0.014077205210924149, 0.01721489615738392, -0.0173420999199152, 0.022336963564157486, 0.02237088419497013, -0.012313314713537693, -0.020759638398885727, -0.009735320694744587, 0.0160276610404253, -0.053357698023319244, 0.013534469529986382, 0.02859538421034813, 0.016782402992248535, 0.0048125386238098145, -0.003625304438173771, -0.0004327574570197612, -0.00375462812371552, 0.02571210078895092, 0.03548134118318558, 0.031037693843245506, -0.009141703136265278, 0.005596961360424757, -0.055121585726737976, 0.03677034005522728, 0.0072718095034360886, -0.00198649731464684, 0.023439394310116768, -0.027136782184243202, 0.010956475511193275, -0.010447660461068153, -0.01615486480295658, 0.0449452959001

In [36]:

import numpy as np

sentence1 = "Apple"
sentences2 = [ "iPhone", "Banana", "Microsoft" ]

def get_embeddings(texts):
    get_embedding_response = client.embeddings.create(model=deployment, input=texts)
    if len(get_embedding_response.data) == 0:
        raise ValueError("No embeddings returned for the input texts.")
    return [item.embedding for item in get_embedding_response.data]

embedding1 = get_embedding(sentence1)
embeddings2 = get_embeddings(sentences2)



def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

for i in range(len(sentences2)):
    print(f"{sentence1} \t\t {sentences2[i]} \t\t Score: {cosine_similarity(embedding1, embeddings2[i]):.4f}")

Apple 		 iPhone 		 Score: 0.6738
Apple 		 Banana 		 Score: 0.3920
Apple 		 Microsoft 		 Score: 0.5439


In [37]:
import numpy as np

sentence1 = "Apple"
sentences2 = ["Fruit", "MacBook", "Orange"]

def get_embeddings(texts):
    get_embedding_response = client.embeddings.create(model=deployment, input=texts)
    if len(get_embedding_response.data) == 0:
        raise ValueError("No embeddings returned for the input texts.")
    return [item.embedding for item in get_embedding_response.data]

embedding1 = get_embedding(sentence1)
embeddings2 = get_embeddings(sentences2)



def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

for i in range(len(sentences2)):
    print(f"{sentence1} \t\t {sentences2[i]} \t\t Score: {cosine_similarity(embedding1, embeddings2[i]):.4f}")

Apple 		 Fruit 		 Score: 0.4307
Apple 		 MacBook 		 Score: 0.5499
Apple 		 Orange 		 Score: 0.4369
