## Vector Search - Azure Cache for Redis

### Prerequisites
  
- Generate embeddings - [generate_embeddings.ipynb](../common/generate_embeddings.ipynb)
- Create table and ingest embeddings - [redis_data_pipeline.ipynb](.../redis_data_pipeline.ipynb)

#### Set environment variables

In [None]:
from dotenv import load_dotenv
import os
import openai
import warnings

warnings.filterwarnings(action="ignore", message="unclosed", category=ResourceWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning) 

load_dotenv()

redis_host  = os.getenv("REDIS_HOST")
if redis_host is None or redis_host == "":
    print("REDIS_HOST environment variable not set.")
    exit()

redis_port  = os.getenv("REDIS_PORT")
if redis_port is None or redis_port == "":
    print("REDIS_PORT environment variable not set.")
    exit()

redis_password  = os.getenv("REDIS_PASSWORD")
if redis_password is None or redis_password == "":
    print("REDIS_PASSWORD environment variable not set.")
    exit()

aoai_endpoint  = os.getenv("AOAI_ENDPOINT")
if aoai_endpoint is None or aoai_endpoint == "":
    print("AOAI_ENDPOINT environment variable not set.")
    exit()

aoai_api_version  = os.getenv("AOAI_API_VERSION")
if aoai_api_version is None or aoai_api_version == "":
    print("AOAI_API_VERSION environment variable not set.")
    exit()

aoai_embedding_deployed_model  = os.getenv("AOAI_EMBEDDING_DEPLOYED_MODEL")
if aoai_embedding_deployed_model is None or aoai_embedding_deployed_model == "":
    print("AOAI_EMBEDDING_DEPLOYED_MODEL environment variable not set.")
    exit()

azure_openai_key  = os.getenv("AZURE_OPENAI_KEY")
if azure_openai_key is None or azure_openai_key == "":
    print("AZURE_OPENAI_KEY environment variable not set.")
    exit()

com_vision_endpoint  = os.getenv("COM_VISION_ENDPOINT")
if com_vision_endpoint is None or com_vision_endpoint == "":
    print("COM_VISION_ENDPOINT environment variable not set.")
    exit()

com_vision_api_version  = os.getenv("COM_VISION_API_VERSION")
if com_vision_api_version is None or com_vision_api_version == "":
    print("COM_VISION_API_VERSION environment variable not set.")
    exit()

com_vision_key  = os.getenv("COM_VISION_KEY")
if com_vision_key is None or com_vision_key == "":
    print("COM_VISION_KEY environment variable not set.")
    exit()

text_table_name = 'text_sample'
doc_table_name = 'doc_sample'
image_table_name = 'image_sample'

openai.api_type = "azure"
openai.api_key = azure_openai_key
openai.api_base = aoai_endpoint
openai.api_version = aoai_api_version

#### Setup Redis connection

In [None]:
import redis
from redis.commands.search.query import Query
import os

# Connect to Redis
redis_client = redis.Redis(
    host=redis_host,
    port=redis_port,
    ssl=True,
    password=redis_password
)

# should return True
redis_client.ping()

#### Helper method

In [None]:
from typing import List
import numpy as np
from openai import AzureOpenAI
import requests
import matplotlib.pyplot as plt
from PIL import Image

def search_redis(
    redis_client: redis.Redis,
    user_query: str,
    index_name: str,
    vector_field: str, 
    return_fields: list = ["title", "category", "content", "vector_score"],
    hybrid_fields = "*",
    k: int = 20,
) -> List[dict]:

    azure_oai_client = AzureOpenAI(
        api_key = azure_openai_key,  
        api_version = aoai_api_version,
        azure_endpoint = aoai_endpoint
        )
    query_vector = azure_oai_client.embeddings.create(input = [user_query], model=aoai_embedding_deployed_model).data[0].embedding

    # Prepare the Query
    base_query = f'{hybrid_fields}=>[KNN {k} @{vector_field} $vector AS vector_score]'
    
    query = (
        Query(base_query)
         .return_fields(*return_fields)
         .sort_by("vector_score")
         .paging(0, k)
         .dialect(2)
    )
    params_dict = {"vector": np.array(query_vector).astype(dtype=np.float32).tobytes()}

    # perform vector search
    results = redis_client.ft(index_name).search(query, params_dict)
    return results

def vectorize_text_com_vision(com_vision_endpoint,com_vision_key,query):
    vectorize_text_url = f"{com_vision_endpoint}/computervision/retrieval:vectorizeText"  
    params = {  
        "api-version": "2023-02-01-preview"  
    } 
    headers = {  
        "Content-Type": "application/json",  
        "Ocp-Apim-Subscription-Key": com_vision_key  
    }  
    data = {
        'text':query
    }

    response = requests.post(vectorize_text_url, params=params, headers=headers, json=data)
    query_vector = response.json()["vector"]

    return query_vector

def show_image(image_folder, image):
    image_path = os.path.join(image_folder, image)
    plt.imshow(Image.open(image_path))
    plt.axis('off')
    plt.show()

#### Simple vector search

This demo shows how to apply vector search on single field.

In [None]:
text_search_field = "title_vector"
text_return_fields = ["title", "category", "content", "vector_score"]
results = search_redis(redis_client, 'products to store unstructured documents', text_table_name, text_search_field, text_return_fields, k=10)

for i, article in enumerate(results.docs):
        score = 1 - float(article.vector_score)
        print(f"{i}. {article.title} (Score: {round(score ,3) })")

#### Metadata filtering with vector search

This demo shows how to apply metadata filtering (SQL - where, order by etc.) on top of vector search.

In [None]:
text_search_field = "title_vector"
text_return_fields = ["title", "category", "content", "vector_score"]
hybrid_query = "(@category:Databases)"

results = search_redis(redis_client, 'products to store unstructured documents', text_table_name, text_search_field, text_return_fields,hybrid_query, k=10)
for i, article in enumerate(results.docs):
        score = 1 - float(article.vector_score)
        print(f"{i}. {article.title} (Score: {round(score ,3) })")

#### Cross column vector search

This demo shows how to apply vector search on multiple columns.

In [None]:
# TODO

#### Hybrid search

This demo shows how to apply vector search in in conjunction with additional search methods, such as lexical search. 


In [None]:
# TODO

#### Range query

It is possible to define a range query on a vector index. The range query is defined by a center vector and a radius. Given the search query (center vector), the result will be all vectors that are within the radius from the center vector.

In [None]:
import numpy as np

text_search_field = "title_vector"
text_return_fields = ["title", "category", "content", "vector_score"]
range_radius = 0.2
search_query = 'store a blob'

base_vector_query = f'@{text_search_field}:[VECTOR_RANGE {range_radius} $vector]=>{{$yield_distance_as: vector_score}}'

azure_oai_client = AzureOpenAI(
    api_key = azure_openai_key,  
    api_version = aoai_api_version,
    azure_endpoint = aoai_endpoint
    )
query_vector = azure_oai_client.embeddings.create(input = [search_query], model=aoai_embedding_deployed_model).data[0].embedding
   
query = (
        Query(base_vector_query)
         .return_fields(*text_return_fields)
         .sort_by("vector_score")
         .dialect(2)
    )
params_dict = {"vector": np.array(query_vector).astype(dtype=np.float32).tobytes()}

# perform vector range query search
results = redis_client.ft(text_table_name).search(query, params_dict)
    
for i, article in enumerate(results.docs):
        print(f"{i}. {article.title} (Close by: {article.vector_score})")     

#### Document search example

In [None]:
# employee search
doc_search_field = "chunk_content_vector"
doc_search_return_fields = ["chunk_content"]
results = search_redis(redis_client, 'what are the company values', doc_table_name, doc_search_field, doc_search_return_fields, k=1)
for i, article in enumerate(results.docs):
        print(f"{i}. {article.chunk_content}")

#### Image search
Using the Azure Cognitive Services Vision API, we convert the user query (string) to an embedded vector. We then use the vector search to find the most similar images in the database.

In [None]:
query = 'flowers in a hand'
query_vector = vectorize_text_com_vision(com_vision_endpoint,com_vision_key,query)
image_folder = "../data/images"
num_of_images = 2

base_query = f'*=>[KNN {num_of_images} @image_vector $vector AS vector_score]'
    
query = (
    Query(base_query)
        .return_fields(*["image", "vector_score"])
        .sort_by("vector_score")
        .dialect(2)
)
params_dict = {"vector": np.array(query_vector).astype(dtype=np.float32).tobytes()}

# perform vector search
results = redis_client.ft(image_table_name).search(query, params_dict)

for i, image in enumerate(results.docs):
    show_image(image_folder, image.image)
    print("\n")