## Vector Search 

### Set environment variables

####  Libraries


In [3]:
import json
import datetime
import time

from azure.core.exceptions import AzureError
from azure.core.credentials import AzureKeyCredential
from azure.cosmos import exceptions, CosmosClient, PartitionKey
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient, SearchIndexerClient
from azure.search.documents.models import Vector
from azure.search.documents.indexes.models import (
    IndexingSchedule,
    SearchIndex,
    SearchIndexer,
    SearchIndexerDataContainer,
    SearchField,
    SearchFieldDataType,
    SearchableField,
    SemanticConfiguration,
    SimpleField,
    PrioritizedFields,
    SemanticField,
    SemanticSettings,
    VectorSearch,
    VectorSearchAlgorithmConfiguration,
    SearchIndexerDataSourceConnection
)

import openai
from tenacity import retry, wait_random_exponential, stop_after_attempt

import os
from dotenv import load_dotenv


In [None]:

load_dotenv()

cosmos_db_api_endpoint  = os.getenv("cosmos_db_api_endpoint")
if cosmos_db_api_endpoint is None or cosmos_db_api_endpoint == "":
    print("cosmos_db_api_endpoint environment variable not set.")
    exit()

cosmos_db_api_key  = os.getenv("cosmos_db_api_key")
if cosmos_db_api_key is None or cosmos_db_api_key == "":
    print("cosmos_db_api_key environment variable not set.")
    exit()

cog_search_endpoint  = os.getenv("cog_search_endpoint")
if cog_search_endpoint is None or cog_search_endpoint == "":
    print("cog_search_endpoint environment variable not set.")
    exit()

cog_search_key  = os.getenv("cog_search_key")
if cog_search_key is None or cog_search_key == "":
    print("cog_search_key environment variable not set.")
    exit()

aoai_embedding_deployed_model  = os.getenv("aoai_embedding_deployed_model")
if aoai_embedding_deployed_model is None or aoai_embedding_deployed_model == "":
    print("aoai_embedding_deployed_model environment variable not set.")
    exit()   


aoai_key  = os.getenv("aoai_key")
if aoai_key is None or aoai_key == "":
    print("aoai_key environment variable not set.")
    exit()   



text_table_name = 'text_sample'
doc_table_name = 'doc_sample'
image_table_name = 'image_sample'





#### Initialize the search

In [5]:
from azure.cosmos import CosmosClient
##from azure.search.documents.indexes import SimpleField, SearchableField, Index
from azure.search.documents import SearchClient


# Initialize Cosmos DB client
cosmos_client = CosmosClient(cosmos_db_api_endpoint, cosmos_db_api_key)
database = cosmos_client.get_database_client(database_name)
container = database.get_container_client(container_name)

credential = AzureKeyCredential(cog_search_key)

# Connect to Azure Cognitive Search
search_client = SearchClient(endpoint=cog_search_endpoint, index_name=index_name, credential=credential)



#### Simple Vector Search
##### You will use the index name that you created previously on the ingestion


In [76]:
from openai.embeddings_utils import get_embedding, cosine_similarity
#  Query Cosmos DB using Azure Cognitive Search

container_name = 'text_sample'
index_name = "text_sample_index"


query = 'tools for software development'
query_columns= search_client.search(search_text=query, select=["title", "content", "category"])
query_vector = get_embedding(query, k=3,  engine=aoai_embedding_deployed_model, fields="contentvector" )


##Finish search to return vector + columns
for i, (search_result, row_vector) in enumerate(zip(query_columns, query_vector)):
    print(search_result)
    print(f"Result {i + 1} from Azure Cognitive Search:")
    print(f"Title: {search_result['title']}")
    print(f"Content: {search_result['content']}")
    print(f"Category: {search_result['category']}\n")

    print(f"Result {i + 1} from Embeddings:")
    print(f"Title: {row_vector['title']}")
    print(f"Content: {row_vector['content']}")
    print(f"Category: {row_vector['category']}\n")

#### Cross Search (Two columns)

In [58]:

# Query parameters
query = 'tools for software development'
query_columns = search_client.search(search_text=query, select=["title", "content", "category"])
query_vector = get_embedding(query, k=3, engine=aoai_embedding_deployed_model, fields=["contentvector", "titlevector"])

for i, (search_result, row_vector) in enumerate(zip(query_columns, query_vector)):
    # Assuming the vectors are lists or arrays, you can compare them
    contentvector_match = search_result['contentvector'] == row_vector['contentvector']
    titlevector_match = search_result['titlevector'] == row_vector['titlevector']

    if contentvector_match and titlevector_match:
        print(f"Result {i + 1} from Azure Cognitive Search:")
        print(f"Title: {search_result['title']}")
        print(f"Content: {search_result['content']}")
        print(f"Category: {search_result['category']}\n")

        print(f"Result {i + 1} from Embeddings:")
        print(f"Title: {row_vector['title']}")
        print(f"Content: {row_vector['content']}")
        print(f"Category: {row_vector['category']}\n")


#### Document search example

In [83]:
from openai.embeddings_utils import get_embedding, cosine_similarity
#  Query Cosmos DB using Azure Cognitive Search

container_name = 'doc_sample'
index_name = "doc_sample_index"


query = 'tools for software development'
query_columns= search_client.search(search_text=query, select=["chunk_content"])
query_vector = get_embedding(query, k=3,  engine=aoai_embedding_deployed_model, fields="chunk_content_vector" )


##Finish search to return vector + columns
#for i, (search_result, row_vector) in enumerate(zip(query_columns, query_vector)):
#    print(f"Result {i + 1} from Azure Cognitive Search:")
#    print(f"Chunk Content: {search_result}")#

#    print(f"Result {i + 1} from Embeddings:")9
#    print(f"Vector: {row_vector}")

#### Image search example

In [None]:
## TODO