## Vector Search 

###  Libraries


In [52]:
import json
import datetime
import time

from azure.core.exceptions import AzureError
from azure.core.credentials import AzureKeyCredential
from azure.cosmos import exceptions, CosmosClient, PartitionKey
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient, SearchIndexerClient
from azure.search.documents.models import (
    QueryAnswerType,
    QueryCaptionType,
    QueryLanguage,
    QueryType,
    Vector  
)
from azure.search.documents.indexes.models import (
    IndexingSchedule,
    SearchIndex,
    SearchIndexer,
    SearchIndexerDataContainer,
    SearchField,
    SearchFieldDataType,
    SearchableField,
    SemanticConfiguration,
    SimpleField,
    SemanticField,
    VectorSearch,
   SearchIndexerDataSourceConnection,
    
)


from tenacity import retry, wait_random_exponential, stop_after_attempt


import os
from dotenv import load_dotenv

import openai
from openai.embeddings_utils import  cosine_similarity


### Enviromnent variables

In [None]:

load_dotenv()

cosmos_db_api_endpoint  = os.getenv("cosmos_db_api_endpoint")
if cosmos_db_api_endpoint is None or cosmos_db_api_endpoint == "":
    print("cosmos_db_api_endpoint environment variable not set.")
    exit()

cosmos_db_api_key  = os.getenv("cosmos_db_api_key")
if cosmos_db_api_key is None or cosmos_db_api_key == "":
    print("cosmos_db_api_key environment variable not set.")
    exit()

cog_search_endpoint  = os.getenv("cog_search_endpoint")
if cog_search_endpoint is None or cog_search_endpoint == "":
    print("cog_search_endpoint environment variable not set.")
    exit()

cog_search_key  = os.getenv("cog_search_key")
if cog_search_key is None or cog_search_key == "":
    print("cog_search_key environment variable not set.")
    exit()

cosmos_db_connection_string  = os.getenv("cosmos_db_connection_string")
if cosmos_db_connection_string is None or cosmos_db_connection_string == "":
    print("cosmos_db_connection_string environment variable not set.")
    exit()
    
aoai_embedding_deployed_model  = os.getenv("aoai_embedding_deployed_model")
if aoai_embedding_deployed_model is None or aoai_embedding_deployed_model == "":
    print("aoai_embedding_deployed_model environment variable not set.")
    exit()
azure_openai_key  = os.getenv("AZURE_OPENAI_KEY")
if azure_openai_key is None or azure_openai_key == "":
    print("azure_openai_key environment variable not set.")
    exit()
aoai_api_version  = os.getenv("AOAI_API_VERSION")
if aoai_api_version is None or aoai_api_version == "":
    print("aoai_api_version environment variable not set.")
    exit()    

text_table_name = 'text_sample'
doc_table_name = 'doc_sample'
image_table_name = 'image_sample'

database_name = "Vector_DB"
credential = AzureKeyCredential(str(cog_search_key))


#### Open AI

In [None]:
import openai
from openai.embeddings_utils import  cosine_similarity


openai.api_key = azure_openai_key
openai.api_version = aoai_api_version


#### Initialize the search

In [22]:
# Initialize Cosmos DB client
cosmos_client = CosmosClient(cosmos_db_api_endpoint, cosmos_db_api_key)
database = cosmos_client.get_database_client(database_name)


##### Embedding Function

In [39]:
def get_embedding(text, model=aoai_embedding_deployed_model):
   text = text.replace("\n", " ")
   return openai.embeddings.create(input = [text], model=model).data[0].embedding

### Simple Vector Search
##### You will use the index name that you created previously on the ingestion steps for the respectively containers


#### Using Simple Search with Vector

In [None]:

#  Query Cosmos DB using Azure Cognitive Search

container_name = 'text_sample'
index_name = "text_sample_index"

container = database.get_container_client(container_name)
search_client = SearchClient(cog_search_endpoint, index_name, credential) 

# Perform Azure Cognitive Search query
query = 'tools for software development'


search_results = search_client.search(  
        search_text="",  
        vector=Vector(value=get_embedding(query,  model=aoai_embedding_deployed_model), fields = "content_vector"),  
        select=["title", "content", "category", "title_vector", "content_vector"]
    )


for result in search_results:
    print(f"Title: {result['title']}")
    print(f"Score: {result['@search.score']}")
    print(f"Content: {result['content']}")
    print(f"Category: {result['category']}")


#### Cross Search (Two columns)

In [None]:
container_name = "text_sample"
index_name = "text_sample_index"

container = database.get_container_client(container_name)
search_client = SearchClient(cog_search_endpoint, index_name, credential) 

query = 'tools for software development'
query_vector = get_embedding(query, model=aoai_embedding_deployed_model)

# Perform Azure Cognitive Search query
search_results = search_client.search(
    search_text=query,
    select=["title", "content", "category", "title_vector", "content_vector"]
)

# Filter results in Python based on cosine similarity
for result in search_results:
    title_vector = result.get("title_vector", None)
    content_vector = result.get("content_vector", None)

    if title_vector is not None and content_vector is not None:
        title_similarity = cosine_similarity(query_vector, title_vector)
        content_similarity = cosine_similarity(query_vector, content_vector)

        # Adjust the threshold as needed
        if title_similarity > 0.7 or content_similarity > 0.7:
            print(f"Title: {result['title']}")
            print(f"Score: {result['@search.score']}")
            print(f"Content: {result['content']}")
            print(f"Category: {result['category']}")
            print(f"Title Cosine Similarity: {title_similarity}")
            print(f"Content Cosine Similarity: {content_similarity}\n")
    else:
        print(f"Skipping result with empty or missing vector.\n")


### Hybrid search + Semantic Rank

In [None]:
##Enable the service: https://learn.microsoft.com/en-us/azure/search/semantic-how-to-enable-disable?tabs=enable-portal
##Use the Semantic configuration defined at the Ingestion

container_name = "text_sample"
index_name = "text_sample_index"
semantic_configuration = 'ConfigSemantictext'

container = database.get_container_client(container_name)
search_client = SearchClient(cog_search_endpoint, index_name, credential) 

query = 'Azure DevOps is a suite of services that help you plan'

# Perform Azure Cognitive Search query
search_results = search_client.search(
    search_text=query,
    select=["title", "content", "category", "title_vector", "content_vector"],
    query_type=QueryType.SEMANTIC, query_language=QueryLanguage.EN_US, semantic_configuration_name=semantic_configuration, query_caption=QueryCaptionType.EXTRACTIVE, query_answer=QueryAnswerType.EXTRACTIVE,
    top=5
)

_answers = search_results.get_answers()
for answer in _answers:
    print(f"Semantic Answer: {answer}")
    if answer.highlights:
        print(f"Semantic Answer highlight: {answer.highlights}")
    else:
        print(f"Semantic Answer Text : {answer.text}")
    print(f"Semantic Answer Score: {answer.score}\n")


# Filter results in Python based on cosine similarity
for row in search_results:
    print(f"Title: {row['title']}")
    print(f"Score: {row['@search.score']}")
    print(f"Content: {row['content']}")
    print(f"Category: {row['category']}")

    
    captions = row["@search.captions"]
    if captions:
        caption = captions[0]
        if caption.highlights:
            print(f"Caption: {caption.highlights}\n")
        else:
            print(f"Caption: {caption.text}\n")


#### Document search example

In [None]:
container_name = 'doc_sample'
index_name = "doc_sample_index"

container = database.get_container_client(container_name)
search_client = SearchClient(cog_search_endpoint, index_name, credential) 

#  Query Cosmos DB using Azure Cognitive Search

query = 'web hosting services'

search_results = search_client.search(  
        search_text="",  
        vector=Vector(value=get_embedding(query,  engine=aoai_embedding_deployed_model), fields = "chunk_content_vector"),  
        select=["chunk_content", "chunk_content_vector"]
    )


for result in search_results:
    print(f"chunk_content: {result['chunk_content']}")
    print(f"Score: {result['@search.score']}")
    print(f"chunk_content_vector: {result['chunk_content_vector']}")

### Hybrid search + Semantic Rank

In [None]:
##Enable the service: https://learn.microsoft.com/en-us/azure/search/semantic-how-to-enable-disable?tabs=enable-portal
##Use the Semantic configuration defined at the Ingestion

container_name = "doc_sample"
index_name = "doc_sample_index"
semantic_configuration = 'ConfigSemanticdoc'

container = database.get_container_client(container_name)
search_client = SearchClient(cog_search_endpoint, index_name, credential) 

query = 'This policy applies to all Contoso Electronics employees'
# Perform Azure Cognitive Search query with semantic search
search_results = search_client.search(
    search_text=query,
    select=["chunk_content", "chunk_content_vector"],
    query_type=QueryType.SEMANTIC, query_language=QueryLanguage.EN_US, semantic_configuration_name=semantic_configuration, query_caption=QueryCaptionType.EXTRACTIVE, query_answer=QueryAnswerType.EXTRACTIVE,
    top=5
)


_answers = search_results.get_answers()
for answer in _answers:
    print(f"Semantic Answer: {answer}")
    if answer.highlights:
        print(f"Semantic Answer highlight: {answer.highlights}")
    else:
        print(f"Semantic Answer Text : {answer.text}")
    print(f"Semantic Answer Score: {answer.score}\n")

# Filter results in Python based on cosine similarity
for row in search_results:
    print(f"chunk_content: {row['chunk_content']}")
    print(f"Score: {row['@search.score']}")
    print(f"chunk_content_vector: {row['chunk_content_vector']}")
    
    captions = row["@search.captions"]
    if captions:
        caption = captions[0]
        if caption.highlights:
            print(f"Caption: {caption.highlights}\n")
        else:
            print(f"Caption: {caption.text}\n")


