## Vector Search - Azure AI Search using REST Endpoint

The samples uses Azure AI Search API Version _2023-11-01_.

### Prerequisites
  
- Generate embeddings - [generate_embeddings.ipynb](../../common/generate_embeddings.ipynb) 
- Create AI Search Index and ingest embeddings - [ai_search_rest_data_pipeline.ipynb](./ai_search_rest_data_pipeline.ipynb)

#### Set environment variables

In [None]:
import os
from dotenv import load_dotenv
import openai

load_dotenv()

ais_endpoint  = os.getenv("AIS_ENDPOINT")
if ais_endpoint is None or ais_endpoint == "":
    print("AIS_ENDPOINT environment variable not set.")
    exit(1)

ais_api_version  = os.getenv("AIS_API_VERSION")
if ais_api_version is None or ais_api_version == "":
    print("AIS_API_VERSION environment variable not set.")
    exit(1)

ais_key  = os.getenv("AIS_KEY")
if ais_key is None or ais_key == "":
    print("AIS_KEY environment variable not set.")
    exit(1)

aoai_endpoint  = os.getenv("AOAI_ENDPOINT")
if aoai_endpoint is None or aoai_endpoint == "":
    print("AOAI_ENDPOINT environment variable not set.")
    exit(1)

aoai_api_version  = os.getenv("AOAI_API_VERSION")
if aoai_api_version is None or aoai_api_version == "":
    print("AOAI_API_VERSION environment variable not set.")
    exit(1)

aoai_embedding_deployed_model  = os.getenv("AOAI_EMBEDDING_DEPLOYED_MODEL")
if aoai_embedding_deployed_model is None or aoai_embedding_deployed_model == "":
    print("AOAI_EMBEDDING_DEPLOYED_MODEL environment variable not set.")
    exit(1)

azure_openai_key  = os.getenv("AZURE_OPENAI_KEY")
if azure_openai_key is None or azure_openai_key == "":
    print("AZURE_OPENAI_KEY environment variable not set.")
    exit(1)

com_vision_endpoint  = os.getenv("COM_VISION_ENDPOINT")
if com_vision_endpoint is None or com_vision_endpoint == "":
    print("COM_VISION_ENDPOINT environment variable not set.")
    exit(1)

com_vision_api_version  = os.getenv("COM_VISION_API_VERSION")
if com_vision_api_version is None or com_vision_api_version == "":
    print("COM_VISION_API_VERSION environment variable not set.")
    exit(1)

com_vision_key  = os.getenv("COM_VISION_KEY")
if com_vision_key is None or com_vision_key == "":
    print("COM_VISION_KEY environment variable not set.")
    exit(1)

ais_index_definition_text = 'index_definition/index_definition_text.json'
ais_index_definition_doc = 'index_definition/index_definition_doc.json'
ais_index_definition_image = 'index_definition/index_definition_image.json'

text_table_name = 'text_sample'
doc_table_name = 'doc_sample'
image_table_name = 'image_sample'

openai.api_type = "azure"
openai.api_key = azure_openai_key
openai.api_base = aoai_endpoint
openai.api_version = aoai_api_version

### Helper methods

In [None]:
import requests
import json

def vectorize_text_com_vision(com_vision_endpoint,com_vision_key,query):
    vectorize_text_url = f"{com_vision_endpoint}/computervision/retrieval:vectorizeText"  
    params = {  
        "api-version": com_vision_api_version
    } 
    headers = {  
        "Content-Type": "application/json",  
        "Ocp-Apim-Subscription-Key": com_vision_key  
    }  
    data = {
        'text': query
    }

    response = requests.post(vectorize_text_url, params=params, headers=headers, json=data)
    query_vector = response.json()["vector"]

    return query_vector

def show_image(image_folder, image):
    image_path = os.path.join(image_folder, image)
    plt.imshow(Image.open(image_path))
    plt.axis('off')
    plt.show()

#### Simple vector search

This demo shows how to apply vector search on single field.

In [None]:
from openai import AzureOpenAI

azure_oai_client = AzureOpenAI(
  api_key = azure_openai_key,  
  api_version = aoai_api_version,
  azure_endpoint = aoai_endpoint
)

query = 'tools for software development'
query_vector = azure_oai_client.embeddings.create(input = [query], model=aoai_embedding_deployed_model).data[0].embedding

url = f"{ais_endpoint}/indexes/{text_table_name}/docs/search?api-version={ais_api_version}"

headers = {
    "Content-Type": "application/json",
    "api-key": ais_key
}

request_body = {
    "vectorQueries": [
        {
        "kind": "vector",
        "vector": query_vector,
        "fields": "content_vector",
        "k": 5
        }
    ],
    "select": "title, category"
}
request_body = json.dumps(request_body)
response = requests.request('POST', url, headers=headers, data=request_body)

search_results = [(item['title'] , item['category']) for item in response.json()['value']]

print(search_results)

#### Metadata filtering with vector search

This demo shows how to apply metadata filtering (SQL - where, order by etc.) on top of vector search.

In [None]:
from openai import AzureOpenAI

azure_oai_client = AzureOpenAI(
  api_key = azure_openai_key,  
  api_version = aoai_api_version,
  azure_endpoint = aoai_endpoint
)

query = 'tools that works without server'
index_name = "text_sample"
query_vector = azure_oai_client.embeddings.create(input = [query], model=aoai_embedding_deployed_model).data[0].embedding

url = f"{ais_endpoint}/indexes/{text_table_name}/docs/search?api-version={ais_api_version}"

headers = {
    "Content-Type": "application/json",
    "api-key": ais_key
}

request_body = {
    "vectorQueries": [
        {
        "kind": "vector",
        "vector": query_vector,
        "fields": "content_vector",
        "k": 5
        }
    ],
    "select": "title, category",
    "filter": "category eq 'Web'"
}
request_body = json.dumps(request_body)
response = requests.request('POST', url, headers=headers, data=request_body)

search_results = [(item['title'] , item['category']) for item in response.json()['value']]

print(search_results)

#### Cross column vector search

This demo shows how to apply vector search on multiple columns.

In [None]:
from openai import AzureOpenAI

azure_oai_client = AzureOpenAI(
  api_key = azure_openai_key,  
  api_version = aoai_api_version,
  azure_endpoint = aoai_endpoint
)

query = 'tools for web hosting'
top_doc_count = 3
query_vector = azure_oai_client.embeddings.create(input = [query], model=aoai_embedding_deployed_model).data[0].embedding

url = f"{ais_endpoint}/indexes/{text_table_name}/docs/search?api-version={ais_api_version}"

headers = {
    "Content-Type": "application/json",
    "api-key": ais_key
}

request_body = {
    "vectorQueries": [
        {
        "kind": "vector",
        "vector": query_vector,
        "fields": "title_vector, content_vector",
        "k": top_doc_count
        }
    ],
    "select": "title, category"
}
request_body = json.dumps(request_body)
response = requests.request('POST', url, headers=headers, data=request_body)

docs = [(item['title'] , item['category']) for item in response.json()['value']]

print(docs)

#### Hybrid search

This demo shows how to apply vector search in in conjunction with additional search methods, such as lexical search. 

In [None]:
from openai import AzureOpenAI

azure_oai_client = AzureOpenAI(
  api_key = azure_openai_key,  
  api_version = aoai_api_version,
  azure_endpoint = aoai_endpoint
)

query = 'tools that works without server'
top_doc_count = 5
query_vector = azure_oai_client.embeddings.create(input = [query], model=aoai_embedding_deployed_model).data[0].embedding

url = f"{ais_endpoint}/indexes/{text_table_name}/docs/search?api-version={ais_api_version}"

headers = {
    "Content-Type": "application/json",
    "api-key": ais_key
}

request_body = {
    "vectorQueries": [
        {
        "kind": "vector",
        "vector": query_vector,
        "fields": "content_vector",
        "k": top_doc_count
        }
    ],
    "search": query,
    "top": "5",
    "select": "title, category",
    "filter": "category eq 'Web'"
}
request_body = json.dumps(request_body)
response = requests.request('POST', url, headers=headers, data=request_body)

docs = [(item['title'] , item['category']) for item in response.json()['value']]

print(docs)

#### Document search example

This demo shows how to apply vector search for srarching within documents.

In [None]:
from openai import AzureOpenAI

azure_oai_client = AzureOpenAI(
  api_key = azure_openai_key,  
  api_version = aoai_api_version,
  azure_endpoint = aoai_endpoint
)

query = 'when are performance review announced?'
query_vector = azure_oai_client.embeddings.create(input = [query], model=aoai_embedding_deployed_model).data[0].embedding

url = f"{ais_endpoint}/indexes/{doc_table_name}/docs/search?api-version={ais_api_version}"

headers = {
    "Content-Type": "application/json",
    "api-key": ais_key
}

request_body = {
    "vectorQueries": [
        {
        "kind": "vector",
        "vector": query_vector,
        "fields": "chunk_content_vector",
        "k": top_doc_count
        }
    ],
    "select": "chunk_content"
}
request_body = json.dumps(request_body)
response = requests.request('POST', url, headers=headers, data=request_body)

search_results = [(item['chunk_content']) for item in response.json()['value']]

for chunk in search_results:
    print(chunk)

#### Image search example

This demo shows how to apply vector search for searching images.

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

query = 'flower'
image_folder = "../../data/images"

query_vector = vectorize_text_com_vision(com_vision_endpoint,com_vision_key,query)

url = f"{ais_endpoint}/indexes/{image_table_name}/docs/search?api-version={ais_api_version}"

headers = {
    "Content-Type": "application/json",
    "api-key": ais_key
}

request_body = {
    "vectorQueries": [
        {
        "kind": "vector",
        "vector": query_vector,
        "fields": "image_vector",
        "k": top_doc_count
        }
    ],
    "select": "image"
}
request_body = json.dumps(request_body)
response = requests.request('POST', url, headers=headers, data=request_body)

search_results = [(item['image']) for item in response.json()['value']]

for image in search_results:
    show_image(image_folder, image)