# Submit formula functions to Azure AI

In [None]:
import os
import json
import uuid
from typing import List, Dict, Any
from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential
from openai import AzureOpenAI
from internal_shared.models.documents import DevExpressFunction
from internal_shared.models.ai import EMBEDDING_3_LARGE

embed_client = AzureOpenAI(
    api_key=EMBEDDING_3_LARGE.api_key,
    api_version=EMBEDDING_3_LARGE.api_version,
    azure_endpoint=EMBEDDING_3_LARGE.endpoint,
)

search_client = SearchClient(
    endpoint=os.getenv("AZURE_AI_SEARCH_ENDPOINT"),
    index_name="mergedfunctionindex_v2",
    credential=AzureKeyCredential(os.getenv("AZURE_AI_SEARCH_API_KEY")),
)

def load_json(file_path: str) -> List[DevExpressFunction]:
    with open(file_path, "r") as file:
        return [DevExpressFunction(**item) for item in json.load(file)]

def convert_to_search_document(embedding: List[float], data: DevExpressFunction) -> Dict[str, Any]:
    return {
        "id": str(uuid.uuid4()),
        "content": data.description,
        "embedding": embedding,
        "name": data.name,
        "example": data.example,
        "category": data.category,
        "source": data.source,
        "keywords": data.keywords or [],
    }

def embed_all_text(text: List[str]) -> List[List[float]]:
    embeddings = embed_client.embeddings.create(
        input=text, model=EMBEDDING_3_LARGE.model_name
    )
    return [result.embedding for result in embeddings.data]

Read custom and native function and convert them to search documents

In [None]:
documents = load_json("/workspace/data/functions/data.custom.json")
native_documents = load_json("/workspace/data/functions/data.json")
documents.extend(native_documents)
descriptions = [document.description for document in documents]

len(descriptions)

208

Embed and convert the code to search documents in batches

In [None]:
import time

search_documents = []
embedding_batch_size = 1000
total_embedding_batches = (len(descriptions) + embedding_batch_size - 1) // embedding_batch_size

for i in range(0, len(descriptions), embedding_batch_size):
    batch_descriptions = descriptions[i : i + embedding_batch_size]
    batch_embeddings = embed_all_text(batch_descriptions)

    for j in range(len(batch_descriptions)):
        document = convert_to_search_document(batch_embeddings[j], documents[i + j])
        search_documents.append(document)

    # before next iteration, wait time to avoid rate limiting
    if (i // embedding_batch_size + 1) < total_embedding_batches:
        time.sleep(30)

len(search_documents)

208

Upload the created search documents in batches

In [None]:
import time

upload_batch_size = 1000
total_upload_batches = (
    len(search_documents) + upload_batch_size - 1
) // upload_batch_size

for i in range(0, len(search_documents), upload_batch_size):
    batch = search_documents[i : i + upload_batch_size]
    results = search_client.upload_documents(documents=batch)
    if all(result.succeeded for result in results):
        print(f"Uploaded batch {i // upload_batch_size + 1} successfully.")
    else:
        for result in results:
            if not result.succeeded:
                print(
                    f"Failed to upload document with ID {result.key}. Error: {result.error_message}"
                )

    # ensure we don't hit the rate limit
    if (i // upload_batch_size + 1) < total_upload_batches:
        time.sleep(30)

print(f"All {len(results)} documents uploaded.")

Uploaded batch 1 successfully.
All 208 documents uploaded.
