# Code to query documents from Azure AI Search

In [87]:
from azure.search.documents import SearchClient
from azure.storage.blob import BlobServiceClient
from azure.search.documents.indexes.models import *
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.models import VectorizedQuery, VectorQuery

from typing import List

## Set environment variables

In [5]:
import os
from dotenv import load_dotenv

load_dotenv('../.env')

# AZURE AI SEARCH CREDENTIALS
searchservice = os.environ.get('searchservice')
index = os.environ.get('index')
searchkey = os.environ.get('searchkey')

# OPENAI CONFIGURATION
openai_key = os.environ.get('openai_key')

# DATA CONFIGURATION
filepath = os.environ.get('filepath')

# set credentials
search_creds = AzureKeyCredential(searchkey)

In [89]:
class Embedder:
    def __init__(
        self, 
        key=None,
        model="text-embedding-ada-002"
    ):
        from langchain_openai import OpenAIEmbeddings
        self.embedder = OpenAIEmbeddings(openai_api_key=key, model=model)
        
    def embed_in_batches(self, chunks, batch_size=16):
        num_batches = math.ceil(len(chunks) / batch_size)
        embeddings = []
        for i in range(num_batches):            
            batch = chunks[i*batch_size:i*batch_size+batch_size]
            embeddings_batch = self.embedder.embed_documents(batch)
            embeddings += embeddings_batch
        return embeddings
    
    def embed_single_document(self, text):
        embedding = self.embedder.embed_documents([text])
        return embedding[0]
    
class CustomAzureSearch:
    def __init__(self,
                 searchservice:str,
                 search_creds:str,
                 index_name:str,
                 number_results_to_return:int,
                 number_near_neighbors:int,
                 embedding_field_name:str,
                 openai_key:str,
                 embedding_model:str
                 ):
        
        self.number_results_to_return=number_results_to_return
        self.number_near_neighbors=number_near_neighbors
        self.embedding_field_name=embedding_field_name
        self.openai_key=openai_key
        self.embedding_model=embedding_model
        
        # initialize search client
        self.search_client = SearchClient(
            endpoint="https://{}.search.windows.net/".format(searchservice),
            index_name=index_name,
            credential=search_creds
        )
        
    def get_results_vector_search(self, 
                                  query:str, 
                                  fields_to_return:List[str]=None):
        
        vector_query = self.get_vectorized_query(query)
        
        results = self.search_client.search(
            search_text=None,
            vector_queries=[vector_query],
            select=fields_to_return,
            top=self.number_results_to_return
        )
        return results
        
    def get_vectorized_query(self, query:str):
        query_vector = self.get_embedding_query_vector(query)
        vector_query = VectorizedQuery(
            vector=query_vector,
            k_nearest_neighbors=self.number_near_neighbors,
            fields=self.embedding_field_name
        )
        return vector_query 
    
    def get_embedding_query_vector(self, query:str):
        embedder = Embedder(key=self.openai_key, model=self.embedding_model)
        query_vector = embedder.embed_single_document(query)
        return query_vector 
    

In [109]:
if __name__ == "__main__":
    # initalize search
    custom_search = CustomAzureSearch(
        searchservice=searchservice,
        search_creds=search_creds,
        index_name=index,
        number_results_to_return=3,
        number_near_neighbors=3,
        embedding_field_name="embedding",
        openai_key=openai_key,
        embedding_model="text-embedding-ada-002"
    )
    
    # get results
    query = "tell me about mlops best practices"
    results = custom_search.get_results_vector_search(query)
    
    # format results
    content = "\n\n\n".join(
        [f"Document: {doc['sourcefile']}\nContent: {doc['content']}\nSource page:{doc['sourcepage']}\nSimilarity Score: {doc['@search.score']}" for doc in results]
        )
    
    print(content)

Document: mlops_for_dummies_databricks_special_edition.pdf
Content: collaborate.
As this chapter highlights, MLOps helps you build and train 
 sustainable models, improve how they are implemented, ensure 
that they are reproducible and secure, and generate audit trails 
proving that your models are compliant with internal and  external 
requirements and expectations.18      MLOps For Dummies, Databricks Special Edition
These materials are © 2023 John Wiley & Sons, Inc. Any dissemination, distribution, or unauthorized use is strictly prohibited.Scaling Effectively
Each and every ML model is just as complex as the last one. It 
never really gets any easier. Your data scientists certainly build 
experience, but that doesn’t mitigate the sheer volume of work 
and levels of attention that each model requires. You need to be 
able­to­scale­this­work­effectively­ in­order­to­quench­ the­organiza-
tion’s thirst for ML.
That’s­one­of­the­most­obvious­ benefits­ of­MLOps.­ It­takes­the­
magic of