## Vector Search - Azure AI Search using Python SDK

### Prerequisites
  
- Generate embeddings - [generate_embeddings.ipynb](../../common/generate_embeddings.ipynb) 
- Create AI Search Index and ingest embeddings - [ai_search_sdk_data_pipeline.ipynb](./ai_search_sdk_data_pipeline.ipynb)

#### Set environment variables

In [None]:
import os
from dotenv import load_dotenv
import openai

load_dotenv()

ais_endpoint  = os.getenv("AIS_ENDPOINT")
if ais_endpoint is None or ais_endpoint == "":
    print("AIS_ENDPOINT environment variable not set.")
    exit()

ais_api_version  = os.getenv("AIS_API_VERSION")
if ais_api_version is None or ais_api_version == "":
    print("AIS_API_VERSION environment variable not set.")
    exit()

ais_key  = os.getenv("AIS_KEY")
if ais_key is None or ais_key == "":
    print("AIS_KEY environment variable not set.")
    exit()

aoai_endpoint  = os.getenv("AOAI_ENDPOINT")
if aoai_endpoint is None or aoai_endpoint == "":
    print("AOAI_ENDPOINT environment variable not set.")
    exit()

aoai_api_version  = os.getenv("AOAI_API_VERSION")
if aoai_api_version is None or aoai_api_version == "":
    print("AOAI_API_VERSION environment variable not set.")
    exit()

aoai_embedding_deployed_model  = os.getenv("AOAI_EMBEDDING_DEPLOYED_MODEL")
if aoai_embedding_deployed_model is None or aoai_embedding_deployed_model == "":
    print("AOAI_EMBEDDING_DEPLOYED_MODEL environment variable not set.")
    exit()

azure_openai_key  = os.getenv("AZURE_OPENAI_KEY")
if azure_openai_key is None or azure_openai_key == "":
    print("AZURE_OPENAI_KEY environment variable not set.")
    exit()

com_vision_endpoint  = os.getenv("COM_VISION_ENDPOINT")
if com_vision_endpoint is None or com_vision_endpoint == "":
    print("COM_VISION_ENDPOINT environment variable not set.")
    exit()

com_vision_api_version  = os.getenv("COM_VISION_API_VERSION")
if com_vision_api_version is None or com_vision_api_version == "":
    print("COM_VISION_API_VERSION environment variable not set.")
    exit()

com_vision_key  = os.getenv("COM_VISION_KEY")
if com_vision_key is None or com_vision_key == "":
    print("COM_VISION_KEY environment variable not set.")
    exit()

text_index_name = 'text-sample'
doc_index_name = 'doc-sample'
image_index_name = 'image-sample'

openai.api_type = "azure"
openai.api_key = azure_openai_key
openai.api_base = aoai_endpoint
openai.api_version = aoai_api_version

#### Helper methods

In [None]:
from azure.search.documents.indexes import SearchIndexClient, SearchIndexerClient
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes.models import (
    SimpleField,
    SearchField,
    SearchableField,
    SearchFieldDataType,
    VectorSearch,  
    HnswVectorSearchAlgorithmConfiguration,  
    SemanticSettings,
    SemanticConfiguration,
    PrioritizedFields,
    SemanticField,
    SearchIndex
)
from azure.search.documents.models import (
    QueryAnswerType,
    QueryCaptionType,
    QueryLanguage,
    QueryType,
    RawVectorQuery,
    VectorizableTextQuery,
    VectorFilterMode,    
)

def vectorize_text_com_vision(com_vision_endpoint,com_vision_key,query):
    vectorize_text_url = f"{com_vision_endpoint}/computervision/retrieval:vectorizeText"  
    params = {  
        "api-version": com_vision_api_version  
    } 
    headers = {  
        "Content-Type": "application/json",  
        "Ocp-Apim-Subscription-Key": com_vision_key  
    }  
    data = {
        'text': query
    }

    response = requests.post(vectorize_text_url, params=params, headers=headers, json=data)
    query_vector = response.json()["vector"]

    return query_vector

def show_image(image_folder, image):
    image_path = os.path.join(image_folder, image)
    plt.imshow(Image.open(image_path))
    plt.axis('off')
    plt.show()

#### Simple vector search

This demo shows how to apply vector search on single field.

In [None]:
from openai import AzureOpenAI
from azure.search.documents import SearchClient, SearchIndexingBufferedSender  

azure_oai_client = AzureOpenAI(
  api_key = azure_openai_key,  
  api_version = aoai_api_version,
  azure_endpoint = aoai_endpoint
)

query = 'tools for software development'
query_vector = azure_oai_client.embeddings.create(input = [query], model=aoai_embedding_deployed_model).data[0].embedding

search_client = SearchClient(ais_endpoint, text_index_name, AzureKeyCredential(ais_key))
raw_vector_query = RawVectorQuery(vector=query_vector, k=3, fields="title_vector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [raw_vector_query],
    select=["title", "category"],
)  

for result in results:
    print(f"{result['title']} - {result['category']}")

#### Metadata filtering with vector search

This demo shows how to apply metadata filtering (SQL - where, order by etc.) on top of vector search.

In [None]:
from openai import AzureOpenAI
from azure.search.documents import SearchClient, SearchIndexingBufferedSender  

azure_oai_client = AzureOpenAI(
  api_key = azure_openai_key,  
  api_version = aoai_api_version,
  azure_endpoint = aoai_endpoint
)

query = 'tools for software development'
query_vector = azure_oai_client.embeddings.create(input = [query], model=aoai_embedding_deployed_model).data[0].embedding

search_client = SearchClient(ais_endpoint, text_index_name, AzureKeyCredential(ais_key))
raw_vector_query = RawVectorQuery(vector=query_vector, k=3, fields="title_vector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [raw_vector_query],
    vector_filter_mode=VectorFilterMode.PRE_FILTER,
    filter="category eq 'Web'",
    select=["title", "category"],
)

for result in results:
    print(f"{result['title']} - {result['category']}")

#### Cross column vector search

This demo shows how to apply vector search on multiple columns.

In [None]:
from openai import AzureOpenAI
from azure.search.documents import SearchClient, SearchIndexingBufferedSender  

azure_oai_client = AzureOpenAI(
  api_key = azure_openai_key,  
  api_version = aoai_api_version,
  azure_endpoint = aoai_endpoint
)

query = 'tools for software development'
query_vector = azure_oai_client.embeddings.create(input = [query], model=aoai_embedding_deployed_model).data[0].embedding

search_client = SearchClient(ais_endpoint, text_index_name, AzureKeyCredential(ais_key))
raw_vector_query = RawVectorQuery(vector=query_vector, k=5, fields="title_vector, content_vector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [raw_vector_query],
    select=["title", "category"],
    top=5
)

for result in results:
    print(f"{result['title']} - {result['category']}")

#### Hybrid search

This demo shows how to apply vector search in in conjunction with additional search methods, such as lexical search. 

In [None]:
from openai import AzureOpenAI
from azure.search.documents import SearchClient, SearchIndexingBufferedSender  

azure_oai_client = AzureOpenAI(
  api_key = azure_openai_key,  
  api_version = aoai_api_version,
  azure_endpoint = aoai_endpoint
)

query = 'tools for software development'
query_vector = azure_oai_client.embeddings.create(input = [query], model=aoai_embedding_deployed_model).data[0].embedding

search_client = SearchClient(ais_endpoint, text_index_name, AzureKeyCredential(ais_key))
raw_vector_query = RawVectorQuery(vector=query_vector, k=5, fields="title_vector")

results = search_client.search(  
    search_text=query,  
    vector_queries= [raw_vector_query],
    select=["title", "category"],
    top=5
)

for result in results:
    print(f"{result['title']} - {result['category']}")

#### Document search example

This demo shows how to apply vector search for srarching within documents.

In [None]:
from openai import AzureOpenAI
from azure.search.documents import SearchClient, SearchIndexingBufferedSender  

azure_oai_client = AzureOpenAI(
  api_key = azure_openai_key,  
  api_version = aoai_api_version,
  azure_endpoint = aoai_endpoint
)

query = 'when are performance review announced?'
query_vector = azure_oai_client.embeddings.create(input = [query], model=aoai_embedding_deployed_model).data[0].embedding

search_client = SearchClient(ais_endpoint, doc_index_name, AzureKeyCredential(ais_key))
raw_vector_query = RawVectorQuery(vector=query_vector, k=3, fields="chunk_content_vector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [raw_vector_query],
    select=["chunk_content"],
)  

for result in results:
    print(result['chunk_content'])

#### Image search example

This demo shows how to apply vector search for searching images.

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import requests

query = 'flower'
image_folder = "../../data/images"

query_vector = vectorize_text_com_vision(com_vision_endpoint,com_vision_key,query)

search_client = SearchClient(ais_endpoint, image_index_name, AzureKeyCredential(ais_key))
raw_vector_query = RawVectorQuery(vector=query_vector, k=3, fields="image_vector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [raw_vector_query],
    select=["image"],
)  

for result in results:
    show_image(image_folder, result['image'])
    print("\n")