## Image Retrieval with Azure AI Search

Requirements:
1. Azure AI Search
2. Azure Computer Vision
3. Azure Storage

Steps:
1. Store images in Azure Storage 
2. Get access to images in Azure Storage
3. Create Search index in Azure AI Search
4. Generate embeddings for images
5. Store embeddings and other metadata to Azure AI Search
6. Perform a search in Azure AI Search

## Load Azure Configuration

In [44]:
import os

azure_computer_vision_endpoint = os.environ["AZURE_COMPUTER_VISION_ENDPOINT"]
azure_computer_vision_key = os.environ["AZURE_COMPUTER_VISION_KEY"]

azure_storage_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")

azure_search_service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
azure_search_service_admin_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")
azure_search_service_index_name = "az-image-index-001"
azure_search_service_embedding_size=1024


## Get files from Azure Storage and Store in Container

In [None]:
from azure.storage.blob import BlobServiceClient

# Initialize the BlobServiceClient with the connection string
blob_service_client = BlobServiceClient.from_connection_string(azure_storage_connection_string)

# Get the container client
container_client = blob_service_client.get_container_client("images")

# List blobs in the container and store their URLs, names, and IDs in a collection
blobs_collection = []

idx = 0
try:
    blobs_list = container_client.list_blobs()
    for blob in blobs_list:
        idx += 1
        blob_url = f"{container_client.url}/{blob.name}"
        blobs_collection.append({"id": str(idx), "imageName": blob.name, "imageUrl": blob_url})
    print("Access to the blob storage was granted.")
except Exception as e:
    print(f"Failed to access the blob storage: {e}")

# Print the collection
print("Blobs collection:")
for blob in blobs_collection:
    print(blob)

Access to the blob storage was granted.
Blobs collection:
{'id': 1, 'imageName': 'analyze_image.jpg', 'imageUrl': 'https://ziggystorage01.blob.core.windows.net/images/analyze_image.jpg'}
{'id': 2, 'imageName': 'bicycle1.jpeg', 'imageUrl': 'https://ziggystorage01.blob.core.windows.net/images/bicycle1.jpeg'}
{'id': 3, 'imageName': 'bicycle10.jpeg', 'imageUrl': 'https://ziggystorage01.blob.core.windows.net/images/bicycle10.jpeg'}
{'id': 4, 'imageName': 'bicycle2.jpeg', 'imageUrl': 'https://ziggystorage01.blob.core.windows.net/images/bicycle2.jpeg'}
{'id': 5, 'imageName': 'bicycle3.jpeg', 'imageUrl': 'https://ziggystorage01.blob.core.windows.net/images/bicycle3.jpeg'}
{'id': 6, 'imageName': 'bicycle4.jpeg', 'imageUrl': 'https://ziggystorage01.blob.core.windows.net/images/bicycle4.jpeg'}
{'id': 7, 'imageName': 'bicycle5.jpeg', 'imageUrl': 'https://ziggystorage01.blob.core.windows.net/images/bicycle5.jpeg'}
{'id': 8, 'imageName': 'bicycle6.jpeg', 'imageUrl': 'https://ziggystorage01.blob.core

## Vectorize Image API

In [50]:
import requests

def vectorize_image(image_url):
    """
    Get image embeddings from Azure Computer Vision API using an image URL.

    Parameters:
    - image_url: str, the URL of the image to be vectorized

    Returns:
    - dict, the JSON response from the API
    """
    # API URL
    url = f"{azure_computer_vision_endpoint}/computervision/retrieval:vectorizeImage?api-version=2024-02-01&model-version=2023-04-15"

    # Set headers
    headers = {
        "Content-Type": "application/json",
        "Ocp-Apim-Subscription-Key": azure_computer_vision_key
    }

    # Set the data payload
    data = {
        "url": image_url
    }

    try:
        # Make the request
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Return the JSON response
        return response.json()

    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None



## Get Embeddings for each Image and save to a file

In [53]:

# Vectorize each image in the collection and add the embeddings to the collection
for blob in blobs_collection:
    # Vectorize the image
    response = vectorize_image(blob["imageUrl"])

    # Add the embeddings to the collection
    if response:
        blob["imageVector"] = response["vector"]
    else:
        blob["imageVector"] = None

# save the blob collection to a file
import json
# Save the blob collection to a file
output_file = "blobs_collection.json"
with open(output_file, "w") as file:
    json.dump(blobs_collection, file, indent=4)

print(f"Blobs collection saved to {output_file}")


Blobs collection saved to blobs_collection.json


## Create an Azure AI Search Index

In [55]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchField,
    SimpleField,
    SearchableField,
    SearchFieldDataType,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    SearchIndex
)

# Get credential from Azure AI Search Admin key
credential = AzureKeyCredential(azure_search_service_admin_key)

# Create a search index
index_client = SearchIndexClient(
  endpoint=azure_search_service_endpoint, 
  credential=credential)

fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="imageName", type=SearchFieldDataType.String),
    SearchableField(name="imageUrl", type=SearchFieldDataType.String),
    SearchField(name="imageVector", 
                type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, 
                vector_search_dimensions=azure_search_service_embedding_size, 
                vector_search_profile_name="myHnswProfile"),
]

# Configure the vector search configuration  
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(name="myHnsw")
    ],
    profiles=[
        VectorSearchProfile(
            name="myHnswProfile",
            algorithm_configuration_name="myHnsw"
        )
    ] 
)

# Create the search index with the semantic settings
index = SearchIndex(name=azure_search_service_index_name, 
                    fields=fields,
                    vector_search=vector_search)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')

 az-image-index-001 created


## Upload data into Search Index

In [66]:
import json
from azure.search.documents import SearchIndexingBufferedSender

# Load the JSON file
file_path = 'blobs_collection.json'
with open(file_path, 'r') as file:
    documents = json.load(file)

# Use SearchIndexingBufferedSender to upload the documents in batches optimized for indexing
with SearchIndexingBufferedSender(
    endpoint=azure_search_service_endpoint,
    index_name=azure_search_service_index_name,
    credential=credential,
) as batch_client:
    # Add upload actions for all documents
    batch_client.upload_documents(documents)

print(f"Uploaded {len(documents)} documents in total")

Uploaded 62 documents in total


## Vectorize Text API

In [67]:
import requests

def vectorize_text(text):
    
    # API URL
    url = f"{azure_computer_vision_endpoint}/computervision/retrieval:vectorizeText?api-version=2024-02-01&model-version=2023-04-15"

    # Set headers
    headers = {
        "Content-Type": "application/json",
        "Ocp-Apim-Subscription-Key": azure_computer_vision_key
    }

    # Set the data payload
    data = {
        "text": text
    }

    try:
        # Make the request
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Return the JSON response
        return response.json()

    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None


# Perform a Search in Azure AI Search

In [75]:
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
from azure.core.credentials import AzureKeyCredential

# Get credential from Azure AI Search Admin key
credential = AzureKeyCredential(azure_search_service_admin_key)
search_client = SearchClient(endpoint=azure_search_service_endpoint, 
                             credential=credential, 
                             index_name=azure_search_service_index_name)


# Your query and vectorization function
query = "people working"
vectorized_text = vectorize_text(query)
vector = vectorized_text["vector"]

# Create the VectorizedQuery instance
vector_query = VectorizedQuery(
    vector=vector, 
    k_nearest_neighbors=3, 
    fields="imageVector"
)

# Perform the search
results = search_client.search(
    search_text=None,  
    vector_queries=[vector_query],  # Include the vector query here
    select=["imageName", "imageUrl"],
    top=5
)

# Process and print the results
for result in results:
    print(f"Image Name: {result['imageName']}, Image URL: {result['imageUrl']}")

Image Name: analyze_image.jpg, Image URL: https://ziggystorage01.blob.core.windows.net/images/analyze_image.jpg
Image Name: group_2.jpeg, Image URL: https://ziggystorage01.blob.core.windows.net/images/group_2.jpeg
Image Name: group_1.jpeg, Image URL: https://ziggystorage01.blob.core.windows.net/images/group_1.jpeg
