# Azure AI Search Vector Search Code Sample with Azure AI Vision Image Retreival API for Images
This code demonstrates how to use Azure AI Search with Azure AI Vision Image Retreival API and Azure Python SDK
## Prerequisites
To run the code, install the following packages. Please use the latest stable version by running `pip install azure-search-documents`. This sample currently uses version `11.4.0`.

In [None]:
! pip install azure-search-documents 
! pip install openai python-dotenv ipython 

## Import required libraries and environment variables

In [17]:
# Import required libraries  
import os  
import requests  
from dotenv import load_dotenv  
from azure.core.credentials import AzureKeyCredential  
from azure.search.documents import SearchClient  
from azure.search.documents.indexes import SearchIndexClient, SearchIndexerClient  
from azure.search.documents.models import (
    QueryAnswerType,
    QueryCaptionType,
    QueryType,
    VectorizedQuery,
    VectorFilterMode,    
)
from azure.search.documents.indexes.models import (  
 
    ExhaustiveKnnParameters,  
    ExhaustiveKnnAlgorithmConfiguration,
    FieldMapping,  
    HnswParameters,  
    HnswAlgorithmConfiguration,  
    InputFieldMappingEntry,  
    OutputFieldMappingEntry,  
    SimpleField,
    SearchField,  
    SearchFieldDataType,  
    SearchIndex,  
    SearchIndexer,  
    SearchIndexerDataContainer,  
    SearchIndexerDataSourceConnection,  
    SearchIndexerSkillset,  
    VectorSearch,  
    VectorSearchAlgorithmKind,  
    VectorSearchAlgorithmMetric,
    VectorSearchProfile,  
    WebApiSkill
)  
from IPython.display import Image, display 
from azure.storage.blob import BlobServiceClient  

  
load_dotenv()  
service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")  
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")  
key = os.getenv("AZURE_SEARCH_ADMIN_KEY")  
aiVisionEndpoint = os.getenv("AZURE_AI_VISION_ENDPOINT")  
aiVisionApiKey = os.getenv("AZURE_AI_VISION_API_KEY")  
customSkill_endpoint = os.getenv("FUNCTION_CUSTOM_SKILL_ENDPOINT")  
blob_connection_string = os.getenv("BLOB_CONNECTION_STRING")  
container_name = os.getenv("BLOB_CONTAINER_NAME")
credential = AzureKeyCredential(key)

# Connect to Blob Storage

Retreive your images from Blob Storage

In [18]:
# Connect to Blob Storage
blob_service_client = BlobServiceClient.from_connection_string(blob_connection_string)
container_client = blob_service_client.get_container_client(container_name)
blobs = container_client.list_blobs()

first_blob = next(blobs)
blob_url = container_client.get_blob_client(first_blob).url
print(f"URL of the first blob: {blob_url}")

URL of the first blob: https://fsunavalastorage.blob.core.windows.net/vector-demo/Benefit_Options.pdf


# Connect your Blob Storage to a data source in Azure AI Search

In [19]:
# Create a data source 
ds_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
container = SearchIndexerDataContainer(name=container_name)
data_source_connection = SearchIndexerDataSourceConnection(
    name=f"{index_name}-blob",
    type="azureblob",
    connection_string=blob_connection_string,
    container=container
)
data_source = ds_client.create_or_update_data_source_connection(data_source_connection)

print(f"Data source '{data_source.name}' created or updated")

Data source 'image-vector-demo-2-blob' created or updated


# Create a skillset

Create a custom skill Azure Function which calls the Florence Vision API to generate image embeddings. See GetImageEmbeddings for details on the custom skill.

In [20]:
# Create a skillset  
skillset_name = f"{index_name}-skillset"  
skill_uri = customSkill_endpoint
  
skill = WebApiSkill(  
    uri=skill_uri,  
    inputs=[  
        InputFieldMappingEntry(name="imageUrl", source="/document/metadata_storage_path"),  
        InputFieldMappingEntry(name="recordId", source="/document/metadata_storage_name")  
    ],  
    outputs=[OutputFieldMappingEntry(name="vector", target_name="imageVector")],  
)  
  
skillset = SearchIndexerSkillset(  
    name=skillset_name,  
    description="Skillset to extract image vector",  
    skills=[skill],  
)  
  
client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))  
client.create_or_update_skillset(skillset)  
print(f' {skillset.name} created')  


 image-vector-demo-2-skillset created


## Create an index
Create your search index schema and vector search configuration:

In [21]:
# Create a search index  
index_client = SearchIndexClient(endpoint=service_endpoint, credential=credential)  
fields = [  
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),  
    SearchField(name="imageUrl", type=SearchFieldDataType.String),  
    SearchField(name="title", type=SearchFieldDataType.String),  
    SearchField(  
        name="imageVector",  
        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),  
        vector_search_dimensions=1024,  
        vector_search_profile_name="myHnswProfile",  
    ),  
]  
  
# Configure the vector search configuration  
vector_search = VectorSearch(  
    algorithms=[  
        HnswAlgorithmConfiguration(  
            name="myHnsw",  
            kind=VectorSearchAlgorithmKind.HNSW,  
            parameters=HnswParameters(  
                m=4,  
                ef_construction=400,  
                ef_search=1000,  
                metric=VectorSearchAlgorithmMetric.COSINE,  
            ),  
        ),  
            ExhaustiveKnnAlgorithmConfiguration(  
            name="myExhaustiveKnn",  
            kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,  
            parameters=ExhaustiveKnnParameters(  
                metric=VectorSearchAlgorithmMetric.COSINE,  
            ),  
        ), 
    ],  
   profiles=[  
        VectorSearchProfile(  
            name="myHnswProfile",  
            algorithm_configuration_name="myHnsw",  
        ),  
        VectorSearchProfile(  
            name="myExhaustiveKnnProfile",  
            algorithm_configuration_name="myExhaustiveKnn",  
        ),  
    ],  
)  
  
# Create the search index with the vector search configuration  
index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search)  
result = index_client.create_or_update_index(index)  
print(f"{result.name} created")  


image-vector-demo-2 created


## Create an indexer

Create or update an indexer to process images and populate the search index

In [22]:
# Create an indexer  
indexer_name = f"{index_name}-indexer"  
indexer = SearchIndexer(  
    name=indexer_name,  
    description="Indexer to process images",  
    skillset_name=skillset_name,  
    target_index_name=index_name,  
    data_source_name=data_source.name,  
    field_mappings=[  
        FieldMapping(source_field_name="metadata_storage_path", target_field_name="imageUrl"),  
        FieldMapping(source_field_name="metadata_storage_name", target_field_name="title")  
    ],  
    output_field_mappings=[  
        FieldMapping(source_field_name="/document/imageVector", target_field_name="imageVector")  
    ]  
)  
  
indexer_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))  
indexer_result = indexer_client.create_or_update_indexer(indexer)  
  
# Run the indexer  
indexer_client.run_indexer(indexer_name)  
print(f' {indexer_name} created')

 image-vector-demo-2-indexer created


## Perform a vector search by vectorizing your text query

Perform a vector search to find the most relevant images based on the text query

In [24]:
def generate_embeddings(text, aiVisionEndpoint, aiVisionApiKey):  
    url = f"{aiVisionEndpoint}/computervision/retrieval:vectorizeText"  
  
    params = {  
        "api-version": "2023-02-01-preview"  
    }  
  
    headers = {  
        "Content-Type": "application/json",  
        "Ocp-Apim-Subscription-Key": aiVisionApiKey  
    }  
  
    data = {  
        "text": text  
    }  
  
    response = requests.post(url, params=params, headers=headers, json=data)  
  
    if response.status_code == 200:  
        embeddings = response.json()["vector"]  
        return embeddings  
    else:  
        print(f"Error: {response.status_code} - {response.text}")  
        return None  

  
# Generate text embeddings for the query  
query = "winter clothes"  
  
# Initialize the SearchClient  
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))  
vector_query = VectorizedQuery(vector=generate_embeddings(query, aiVisionEndpoint, aiVisionApiKey), k_nearest_neighbors=3, fields="imageVector")  

# Perform vector search  
results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "imageUrl"]  
)   
  
# Print the search results  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Image URL: {result['imageUrl']}") 
    display(Image(url=result['imageUrl'])) 
    print("\n") 


Title: Picture505.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture505.jpg




Title: Picture621.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture621.jpg




Title: Picture623.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture623.jpg






Perform a vector search to find the most relevant images based on the image query

In [26]:
def generate_embeddings(text, aiVisionEndpoint, aiVisionApiKey):  
    url = f"{aiVisionEndpoint}/computervision/retrieval:vectorizeImage"  
  
    params = {  
        "api-version": "2023-02-01-preview"  
    }  
  
    headers = {  
        "Content-Type": "application/json",  
        "Ocp-Apim-Subscription-Key": aiVisionApiKey  
    }  
  
    data = {  
        "url": text  
    }  
  
    response = requests.post(url, params=params, headers=headers, json=data)  
  
    if response.status_code == 200:  
        embeddings = response.json()["vector"]  
        return embeddings  
    else:  
        print(f"Error: {response.status_code} - {response.text}")  
        return None  

  
# Generate text embeddings for the query (for context, this is a photo of a lady in a red hat)
query = "https://images.unsplash.com/photo-1593476087123-36d1de271f08?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=387&q=80"  
  
# Initialize the SearchClient  
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))  
vector_query = VectorizedQuery(vector=generate_embeddings(query, aiVisionEndpoint, aiVisionApiKey), k_nearest_neighbors=3, fields="imageVector")  

# Perform vector search  
results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "imageUrl"]  
)   
  
# Print the search results  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Image URL: {result['imageUrl']}") 
    display(Image(url=result['imageUrl'])) 
    print("\n") 


Title: Picture290.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture290.jpg




Title: Picture285.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture285.jpg




Title: Picture296.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture296.jpg




