Just a quick way to generate embeddings using the text-embeddings-ada-002 model.  I just this to manually generate embeddings so I can test searches directly from the AI Search Portal.   

For example the following is an example of a search you could use within the portal.  In order to use this you have to have the embedding.

~~~
   &search=@search.vector(vectorVenderName, $VENDOR_EMBEDDING)
   &searchFields=vectorVenderName
   &$select=contractTitle
~~~

In [4]:
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from langchain.text_splitter import MarkdownHeaderTextSplitter
from azure.storage.blob import BlobSasPermissions
from azure.storage.blob import generate_blob_sas
from datetime import datetime, timedelta, UTC  # Added UTC
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
from openai import AzureOpenAI 

import os
from dotenv import load_dotenv
from pathlib import Path

# Get root directory path
root_dir = Path().absolute().parent
env_path = root_dir / '.env'

# Load .env from root
load_dotenv(dotenv_path=env_path)
print(f"Loaded .env from {env_path}")

aoai_client = AzureOpenAI(
        azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
        api_key=os.getenv("AZURE_OPENAI_KEY"),  
        api_version="2023-05-15"
        )


def generate_embeddings(text, model="text-embedding-ada-002"): # model = "deployment_name"
    """Generate embeddings for the input text using the specified model."""
    return aoai_client.embeddings.create(input = [text], model=model).data[0].embedding

query="Fabrikam"

query_vector = generate_embeddings(query)

print(embeddings)

Loaded .env from c:\Users\rickcau\source\repos\vendor-contracts-gen-ai\.env
[-0.028883837163448334, -0.007260853890329599, -0.02141021005809307, -0.00328633445315063, -0.019614944234490395, 0.016436658799648285, -0.03633086755871773, -0.003023693570867181, -0.0028242196422070265, -0.01924259215593338, 0.01704837940633297, 0.006193668115884066, 0.01401637401431799, -0.0005331774009391665, -0.01118384301662445, -0.0005377486813813448, 0.011469755321741104, -0.011150597594678402, 0.01011998113244772, -0.013364758342504501, -0.010618666186928749, 0.025027340278029442, 0.013058898970484734, -0.006502852775156498, -0.03603830561041832, -0.010332753881812096, 0.013477793894708157, -0.03090517409145832, 0.004833920393139124, 0.002112762304022908, 0.0032929833978414536, -0.018418099731206894, -0.022513967007398605, -0.010572122409939766, 0.002626407891511917, -0.003653699066489935, -0.0033727730624377728, 0.01071840338408947, 0.005010122433304787, 0.004235498141497374, 0.02633056975901127, 0.01

Now, lets try performing a search using this embedding to see what kind of search results we get.

In [7]:
search_endpoint = os.getenv('AZURE_SEARCH_ENDPOINT')
index_name = os.getenv('INDEX_NAME')
search_key = os.getenv('AZURE_SEARCH_KEY')
index_name = os.getenv('AZURE_SEARCH_INDEX')


# Test vector search
search_client = SearchClient(endpoint=search_endpoint , index_name=index_name, credential=AzureKeyCredential(search_key))

# Remove the top parameter or set it to a high number
vector_query = VectorizedQuery(vector=query_vector, fields="vendorNameVector", kind="vector")
print(f"\nSearching using: {query}")
results = search_client.search(
    search_text=query,  
    vector_queries=[vector_query],
    select=["id", "contractId", "vendorName"]
)

# Instead of using get_all()
all_results = list(results)

for result in all_results:
    print(result)

print(f"Total results: {len(all_results)}")


Searching using: Fabrikam
{'id': '1fb91887f558ee99f577956e7f6701df', 'vendorName': 'Fabrikam Services', 'contractId': '5004432', '@search.score': 0.03306011110544205, '@search.reranker_score': None, '@search.highlights': None, '@search.captions': None}
{'id': 'f310326ed56592ebcef7c87b142efa32', 'vendorName': 'Fabrikam Services', 'contractId': '5004432-A1', '@search.score': 0.03306011110544205, '@search.reranker_score': None, '@search.highlights': None, '@search.captions': None}
Total results: 2
