# 1 Create Index
# 2 Upload hotels
# 3 Sample Semantic Ranker Query
# 4 Semantic Config Explained
# 5 Semantic Captions
# 6 Semantic Answers
# 7 Reranking individual query / docs
# 8 Debugging Semantic Ranker

In [None]:
%pip install -r requirements.txt --quiet

In [9]:
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
import os

load_dotenv(override=True) # take environment variables from .env.

# Variables not used here do not need to be updated in your .env file
endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
credential = AzureKeyCredential(os.getenv("AZURE_SEARCH_ADMIN_KEY")) if len(os.getenv("AZURE_SEARCH_ADMIN_KEY", "")) > 0 else DefaultAzureCredential()
index_name = os.environ["AZURE_SEARCH_INDEX"]
aoai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
# Data is pre-vectorized using text-embedding-3-large
model_name = "text-embedding-3-large"
aoai_embedding_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-3-large")
aoai_key = os.getenv("AZURE_OPENAI_KEY", None)

In [None]:
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    ComplexField,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters
)
from azure.search.documents.indexes import SearchIndexClient


index = SearchIndex(
    name=index_name,
    fields=[
        SearchField(name="HotelId", type="Edm.String", key=True, hidden=False, filterable=True, sortable=False, facetable=False, searchable=True),
        SearchField(name="HotelName", type="Edm.String", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True),
        SearchField(name="Description", type="Edm.String", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True),
        SearchField(name="DescriptionEmbedding", type="Collection(Edm.Single)", hidden=False, searchable=True, vector_search_dimensions=3072, vector_search_profile_name="hnsw"),
        SearchField(name="Description_fr", type="Edm.String", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True, analyzer_name="fr.microsoft"),
        SearchField(name="Description_fr_Embedding", type="Collection(Edm.Single)", hidden=False, searchable=True, vector_search_dimensions=3072, vector_search_profile_name="hnsw"),
        SearchField(name="Category", type="Edm.String", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),
        SearchField(name="Tags", type="Collection(Edm.String)", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),
        SearchField(name="ParkingIncluded", type="Edm.Boolean", hidden=False, filterable=True, sortable=False, facetable=True, searchable=False),
        SearchField(name="LastRenovationDate", type="Edm.DateTimeOffset", hidden=False, filterable=False, sortable=True, facetable=False, searchable=False),
        SearchField(name="Rating", type="Edm.Double", hidden=False, filterable=True, sortable=True, facetable=True, searchable=False),
        ComplexField(name="Address", collection=False, fields=[
            SearchField(name="StreetAddress", type="Edm.String", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True),
            SearchField(name="City", type="Edm.String", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),
            SearchField(name="StateProvince", type="Edm.String", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),
            SearchField(name="PostalCode", type="Edm.String", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),
            SearchField(name="Country", type="Edm.String", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True)
        ]),
        SearchField(name="Location", type="Edm.GeographyPoint", hidden=False, filterable=True, sortable=True, facetable=False, searchable=False),
        ComplexField(name="Rooms", collection=True, fields=[
            SearchField(name="Description", type="Edm.String", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True),
            SearchField(name="Description_fr", type="Edm.String", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True, analyzer_name="fr.microsoft"),
            SearchField(name="Type", type="Edm.String", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),
            SearchField(name="BaseRate", type="Edm.Double", hidden=False, filterable=True, sortable=False, facetable=True, searchable=False),
            SearchField(name="BedOptions", type="Edm.String", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),
            SearchField(name="SleepsCount", type="Edm.Int64", hidden=False, filterable=True, sortable=False, facetable=True, searchable=False),
            SearchField(name="SmokingAllowed", type="Edm.Boolean", hidden=False, filterable=True, sortable=False, facetable=True, searchable=False),
            SearchField(name="Tags", type="Collection(Edm.String)", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),
        ])
    ],
    vector_search=VectorSearch(
        profiles=[VectorSearchProfile(name="hnsw", vectorizer_name="openai", algorithm_configuration_name="hnsw")],
        algorithms=[HnswAlgorithmConfiguration(name="hnsw")],
        vectorizers=[
            AzureOpenAIVectorizer(
                vectorizer_name="openai",
                parameters=AzureOpenAIVectorizerParameters(
                    resource_url=aoai_endpoint,
                    deployment_name=aoai_embedding_deployment,
                    model_name=model_name,
                    api_key=aoai_key,
                )
            )
        ]
    )
)

index_client = SearchIndexClient(endpoint, credential)
result = index_client.create_or_update_index(index)
print("Created sample index")

Created hotels index
Uploaded search documents


In [16]:
from azure.search.documents import SearchClient
import json

client = SearchClient(endpoint, index_name, credential)
with open("../../../data/hotels.json", encoding="utf-8", mode="r") as f:
    documents = json.load(f)
    client.upload_documents(documents)
    print("Uploaded sample documents")

Uploaded sample documents


In [None]:
from azure.search.documents.indexes.models import SemanticSearch, SemanticConfiguration, SemanticField, SemanticPrioritizedFields

index.semantic_search = SemanticSearch(
    default_configuration_name="semantic-config",
    configurations=[
        SemanticConfiguration(
            name="semantic-config",
            prioritized_fields=SemanticPrioritizedFields(
                content_fields=[SemanticField(field_name="Description")]
            )
        ),
        SemanticConfiguration(
            name="semantic-config-with-title",
            prioritized_fields=SemanticPrioritizedFields(
                title_field=SemanticField(field_name="HotelName"),
                content_fields=[SemanticField(field_name="Description")]
            )
        ),
        SemanticConfiguration(
            name="semantic-config-with-title-and-keywords",
            prioritized_fields=SemanticPrioritizedFields(
                title_field=SemanticField(field_name="HotelName"),
                content_fields=[SemanticField(field_name="Description")],
                keywords_fields=[SemanticField(field_name="Tags")]
            )
        ),
    ]
)

result = index_client.create_or_update_index(index)

print("Updated index with semantic configuration")

Updated index with semantic configuration


In [None]:
from azure.search.documents.models import VectorizableTextQuery
import pandas as pd

def run_query(query: str, semantic_config: str = None, include_debug: bool = False) -> pd.DataFrame:
    results = client.search(
        search_text=query,
        vector_queries=[VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="DescriptionEmbedding")],
        select="HotelName,Description",
        top=5,
        query_type="semantic",
        semantic_configuration_name=semantic_config,
        debug="semantic"
    )
    df = pd.DataFrame(list(results), columns=["@search.score", "@search.reranker_score", "HotelName", "Description"])
    df_no_reranker = df.sort_values(by="@search.score")
    df = pd.DataFrame({"OriginalIndex": df.index.map({original_idx: reranked_idx for reranked_idx, original_idx in enumerate(df_no_reranker.index)}), **df.to_dict('list')})

    def highlight_row_changes(row: pd.Series):
        reranked_index = df.index.get_loc(row.name)
        original_index = df_no_reranker.index.get_loc(row.name)
        first_column_style = ''
        if original_index > reranked_index:
            first_column_style = 'background-color: green'
        elif original_index < reranked_index:
            first_column_style =  'background-color: red'

        return [first_column_style] + ['' for i in range(len(row) - 1)]

    df = df.style.apply(highlight_row_changes, axis=1).set_properties(**{
        'max-width': '500px',
        'text-align': 'left',
        'white-space': 'normal',
        'word-wrap': 'break-word'
    })
    return df


display(run_query("new york hotel with pool or gym"))


Unnamed: 0,OriginalIndex,@search.score,@search.reranker_score,HotelName,Description
0,4,0.032796,2.979044,Hotel on the Harbor,"Stunning Downtown Hotel with indoor Pool. Ideally located close to theatres, museums and the convention center. Indoor Pool and Sauna and fitness centre. Popular Bar & Restaurant"
1,1,0.027106,2.757146,City Center Summer Wind Resort,"Eco-friendly from our gardens to table, with a rooftop serenity pool and outdoor seating to take in the sunset. Just steps away from the Convention Center. Located in the heart of downtown with modern rooms with stunning city views, 24-7 dining options, free WiFi and easy valet parking."
2,2,0.030077,2.722054,Winter Panorama Resort,"Plenty of great skiing, outdoor ice skating, sleigh rides, tubing and snow biking. Yoga, group exercise classes and outdoor hockey are available year-round, plus numerous options for shopping as well as great spa services. Newly-renovated with large rooms, free 24-hr airport shuttle & a new restaurant. Rooms/suites offer mini-fridges & 49-inch HDTVs."
3,0,0.026133,2.692278,Grand Gaming Resort,"The Best Gaming Resort in the area. With elegant rooms & suites, pool, cabanas, spa, brewery & world-class gaming. This is the best place to play, stay & dine."
4,3,0.032002,2.672074,King's Cellar Hotel,"Newest kid on the downtown block. Steps away from the most popular destinations in downtown, enjoy free WiFi, an indoor rooftop pool & fitness center, 24 Grab'n'Go & drinks at the bar"


In [43]:
display(run_query("new york hotel with pool or gym", semantic_config="semantic-config-with-title"))


Unnamed: 0,OriginalIndex,@search.score,@search.reranker_score,HotelName,Description
0,3,0.032002,3.229299,King's Cellar Hotel,"Newest kid on the downtown block. Steps away from the most popular destinations in downtown, enjoy free WiFi, an indoor rooftop pool & fitness center, 24 Grab'n'Go & drinks at the bar"
1,4,0.032796,2.955649,Hotel on the Harbor,"Stunning Downtown Hotel with indoor Pool. Ideally located close to theatres, museums and the convention center. Indoor Pool and Sauna and fitness centre. Popular Bar & Restaurant"
2,2,0.031498,2.803227,Double Sanctuary Resort,"5 star Luxury Hotel - Biggest Rooms in the city. #1 Hotel in the area listed by Traveler magazine. Free WiFi, Flexible check in/out, Fitness Center & espresso in room."
3,1,0.027106,2.775224,City Center Summer Wind Resort,"Eco-friendly from our gardens to table, with a rooftop serenity pool and outdoor seating to take in the sunset. Just steps away from the Convention Center. Located in the heart of downtown with modern rooms with stunning city views, 24-7 dining options, free WiFi and easy valet parking."
4,0,0.024266,2.66782,Countryside Hotel,"Save up to 50% off traditional hotels. Free WiFi, great location near downtown, full kitchen, washer & dryer, 24/7 support, bowling alley, fitness center and more."


In [44]:
display(run_query("new york hotel with pool or gym", semantic_config="semantic-config-with-title-and-keywords"))


Unnamed: 0,OriginalIndex,@search.score,@search.reranker_score,HotelName,Description
0,3,0.032002,3.015908,King's Cellar Hotel,"Newest kid on the downtown block. Steps away from the most popular destinations in downtown, enjoy free WiFi, an indoor rooftop pool & fitness center, 24 Grab'n'Go & drinks at the bar"
1,4,0.032796,2.82166,Hotel on the Harbor,"Stunning Downtown Hotel with indoor Pool. Ideally located close to theatres, museums and the convention center. Indoor Pool and Sauna and fitness centre. Popular Bar & Restaurant"
2,2,0.031498,2.82166,Double Sanctuary Resort,"5 star Luxury Hotel - Biggest Rooms in the city. #1 Hotel in the area listed by Traveler magazine. Free WiFi, Flexible check in/out, Fitness Center & espresso in room."
3,1,0.030077,2.65116,Winter Panorama Resort,"Plenty of great skiing, outdoor ice skating, sleigh rides, tubing and snow biking. Yoga, group exercise classes and outdoor hockey are available year-round, plus numerous options for shopping as well as great spa services. Newly-renovated with large rooms, free 24-hr airport shuttle & a new restaurant. Rooms/suites offer mini-fridges & 49-inch HDTVs."
4,0,0.027106,2.622093,City Center Summer Wind Resort,"Eco-friendly from our gardens to table, with a rooftop serenity pool and outdoor seating to take in the sunset. Just steps away from the Convention Center. Located in the heart of downtown with modern rooms with stunning city views, 24-7 dining options, free WiFi and easy valet parking."


In [49]:
query = "new york hotel with pool or gym"
results = client.search(
    search_text=query,
    vector_queries=[VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="DescriptionEmbedding")],
    select="HotelName,Description",
    top=5,
    query_type="semantic",
    debug="semantic"
)
results = list(results)
results[0]["@search.document_debug_info"].as_dict()

{'semantic': {'content_fields': [{'name': 'Description', 'state': 'used'}],
  'keyword_fields': [],
  'reranker_input': {'content': 'Stunning Downtown Hotel with indoor Pool. Ideally located close to theatres, museums and the convention center. Indoor Pool and Sauna and fitness centre. Popular Bar & Restaurant'}}}