## Creating a Search Index using Azure AI Search SDK

## Prerequisites

Before starting, ensure you have:<br>

An Azure account with an Azure AI Search service created. The service name, admin API key, and endpoint URL.<br>

You can find information on setting up your Azure and all needed requirements [here](../../SETUP_RETAIL.md).

#### Install SDK

In [None]:
%pip install azure-identity --quiet
%pip install azure-storage-blob --quiet
%pip install azure-search-documents --quiet
%pip install openai --quiet

#### Install libraries

In [None]:
%pip install requests --quiet
%pip install json --quiet
%pip install typing --quiet

#### Import Libraries

In [2]:
import requests
import json
import os
from typing import Dict

from openai import AzureOpenAI

from azure.identity import DefaultAzureCredential, get_bearer_token_provider

from azure.storage.blob import BlobServiceClient

from azure.search.documents import SearchClient, SearchItemPaged
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.models import VectorizedQuery, QueryType
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    VectorSearch,
    VectorSearchProfile,
    HnswAlgorithmConfiguration,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch
)

#### Define Search Index Schema

Put the name of your search index in placeholder below. If you have created search index just update the name in placeholder below and jump to [Data Ingestion](#data-ingestion). Otherwise follow steps below to create your index.

In [None]:
index_name = '<your_index_name>'

In [3]:
retail_index = SearchIndex(
    name = index_name,
    fields = [
        SearchField(name='id', type='Edm.String', key=True),
        SearchField(name='articleId', type='Edm.String', stored=True, facetable=True),
        SearchField(name='productName', type='Edm.String', stored=True, facetable=True),
        SearchField(name='productType', type='Edm.String', stored=True, filterable=True, facetable=True),
        SearchField(name='indexGroupName', type='Edm.String', stored=True, filterable=True, facetable=True),
        SearchField(name='gender', type='Edm.String', stored=True, filterable=True, facetable=True),
        SearchField(name='detailDescription', type='Edm.String', stored=True, searchable=True, analyzer_name='en.microsoft'),
        SearchField(name='detailDescriptionVector', type='Collection(Edm.Single)', hidden=True, stored=True, searchable=True, vector_search_dimensions=1536, vector_search_profile_name='myHnswProfile'),
        SearchField(name='summarizedDescription', type='Edm.String', stored=True, searchable=True, analyzer_name='en.microsoft'),
        SearchField(name='summarizedDescriptionVector', type='Collection(Edm.Single)', hidden=True, stored=True, searchable=True, vector_search_dimensions=1536, vector_search_profile_name='myHnswProfile'),
        SearchField(name='generatedDescription', type='Edm.String', stored=True, searchable=True, analyzer_name='en.microsoft'),
        SearchField(name='generatedDescriptionVector', type='Collection(Edm.Single)', hidden=True, stored=True, searchable=True, vector_search_dimensions=1536, vector_search_profile_name='myHnswProfile'),
        SearchField(name='imageUrl', type='Edm.String', stored=True, facetable=True) 
    ]
)


#### Introduce Vector Search: Define Vector Search Parameters

In [4]:
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(
            name="myHnsw"
        )
    ],
    profiles=[
        VectorSearchProfile(
            name="myHnswProfile",
            algorithm_configuration_name="myHnsw",
        )
    ]
)

retail_index.vector_search = vector_search

#### Introduce Semantic Search: Define Semantic configuration based on Index schema

In [5]:
semantic_config = SemanticConfiguration(
    name="catalog-semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
        keywords_fields=[
            SemanticField(field_name="indexGroupName"),
            SemanticField(field_name="gender"),
            SemanticField(field_name="productType")
        ],
        content_fields=[
            SemanticField(field_name="detailDescription"),
            SemanticField(field_name="summarizedDescription"),
            SemanticField(field_name="generatedDescription")
        ]
    )
)

# Create the semantic settings with the configuration
retail_index.semantic_search = SemanticSearch(configurations=[semantic_config])

#### Create Index in Azure AI Search

In [None]:
# Replace the placeholders with your Azure AI Search service details.
AZURE_AI_SEARCH_RESOURCE_NAME = '<your_search_service_name>'

# Create a SearchIndexClient for index management
index_client = SearchIndexClient(endpoint=f"https://{AZURE_AI_SEARCH_RESOURCE_NAME}.search.windows.net",
                                 credential=DefaultAzureCredential())

result = index_client.create_index(retail_index)
print(f"Successfully created index '{index_name}'.")

# Check if the Index Exists
index_list = index_client.list_index_names()
if index_name in index_list:
    print(f"Index '{index_name}' exists.")

## Data Ingestion

For ingesting data you will need two components:<br>
1. Images of catalog items: These will be stored in blob storage. It is important that name of each image be exactly articleID of corresponding catalog item. These images are crucial for enhancements during ingestion. Images are provided [here](../../data/retail/product_images) <br>
2. Listing of product : This listing will be ingested into Azure AI Search. Listing is provided [here](../../data/retail/product_listings.json)

In [None]:
# Setup Storage Account details
AZURE_STORAGE_ACCOUNT_NAME = ""
AZURE_STORAGE_BLOB_CONTAINER_NAME = ""

blob_client = BlobServiceClient(
    account_url=f"https://{AZURE_STORAGE_ACCOUNT_NAME}.blob.core.windows.net",
    credential=DefaultAzureCredential()
)

blob_container_client = blob_client.get_container_client(AZURE_STORAGE_BLOB_CONTAINER_NAME)
if not blob_container_client.exists():
    blob_container_client.create_container()
    print(f"Created container {AZURE_STORAGE_BLOB_CONTAINER_NAME} in {AZURE_STORAGE_ACCOUNT_NAME}.")

# Upload images to the blob storage
directory_path = "../../data/retail/product_images"
for root, dirs, files in os.walk(directory_path):
    for filename in files:
        with open(os.path.join(root, filename), "rb") as f:
            blob_client = blob_container_client.upload_blob(name=filename, data=f.read(), overwrite=True)

#### Ingest data into Azure AI Search

In [None]:
# Replace the placeholders with Ingestion service URL (local or remote).
INGESTION_SERVICE_URL = ""

# Setup HTTP request (URL and headers)
CONVERSATION_ID = "" # Required
USER_ID = "" # Required
DIALOG_ID = "" # Required

url = f'{INGESTION_SERVICE_URL}/indexer/index'
headers = {'conversation_id': CONVERSATION_ID, 'user_id': USER_ID, 'dialog_id': DIALOG_ID}

# Create payload
with open("../../data/retail/product_listings.json") as product_file:
    product_items = json.load(product_file)

payload = {
  "storage_container_name": AZURE_STORAGE_BLOB_CONTAINER_NAME,
  "index_name": index_name,
  "enrichment": "IMAGE_DESCRIPTION",
  "payload": product_items
}

response = requests.post(url, json=payload, headers=headers)
if response.status_code == 201:
  print(f"Catalog items submitted for indexing successfully.")

## Search

This part is for testing your ingested data and is not needed for ingesting date directly.

#### Setup Search Client

In [None]:
AZURE_SEARCH_SERVICE = ""
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"

credential = DefaultAzureCredential()

index_client = SearchIndexClient(endpoint=AZURE_SEARCH_ENDPOINT, credential=credential)
search_client = SearchClient(AZURE_SEARCH_ENDPOINT, index_name, credential=credential)

#### Setup Utility Functions

In [None]:
# Replace the placeholders with your Azure OpenAI Service and Model Deployment.
AZURE_OPENAI_SERVICE = ""
AZURE_OPENAI_ADA_DEPLOYMENT = ""

token_provider = get_bearer_token_provider(
    DefaultAzureCredential(),
    "https://cognitiveservices.azure.com/.default"
)

openai_client = AzureOpenAI(
    api_version="2024-08-01-preview",
    azure_endpoint=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com",
    azure_ad_token_provider=token_provider)

def get_embedding(text):
    get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text)
    return get_embeddings_response.data[0].embedding

def search_results_cleanup(result_dict: dict) -> dict:
        '''
        Removing entries in the search result dict that have None value
        '''
        return {key: value for key, value in result_dict.items() if value is not None and key != "@search.score"}

def unpack_search_results(results: SearchItemPaged[Dict]):
        output = list()
        # search results come in an iterator, unpacking before returning
        for r in results:
            output.append(search_results_cleanup(r))

        return output


#### Search Using Vector Similarity

In [None]:
search_query = "Show me green shirts."

search_query_vector = get_embedding(search_query)

results = search_client.search(
    search_text=search_query,
    top=5,
    vector_queries=[
        VectorizedQuery(vector=search_query_vector,
                        k_nearest_neighbors=5,
                        fields="detailDescriptionVector, summarizedDescriptionVector, generatedDescriptionVector")
    ],
    query_type=QueryType.SEMANTIC,
    semantic_configuration_name=semantic_config.name)

final_result_set = unpack_search_results(results)
print(f"Successfully retrieved {len(final_result_set)} results from Search.")
