# Azure AI Search: Semantic Ranking 

## Step 1: Install Required Packages
Install Azure Search SDK, identity authentication, and blob storage client for loading hotel data.

In [None]:
# Install required packages
import sys
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "azure-search-documents", "azure-identity", "azure-storage-blob", "--quiet"])
print("‚úì All packages installed")

In [None]:
# Import libraries
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex,
    SimpleField,
    SearchableField,
    ComplexField,
    SearchFieldDataType,
    SemanticConfiguration,
    SemanticField,
    SemanticPrioritizedFields,
    SemanticSearch
)
from azure.core.credentials import AzureKeyCredential
from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient
import json

print("‚úì Libraries imported")

## Step 2: Azure Service Configuration (Managed Identity)
Set up connections to Azure AI Search and Azure Blob Storage using DefaultAzureCredential for secure authentication.

In [None]:
# Configuration
search_endpoint = "https://xxxxxxxxxxxxxxx.search.windows.net"
index_name = "hotels-semantic-index"
blob_account = "xxxxxxxxxxxxxxx"
container_name = "files"
blob_name = "HotelsData_toAzureBlobs.json"

# Managed Identity
credential = DefaultAzureCredential()

print(f"‚úì Azure Search: {search_endpoint}")
print(f"‚úì Index: {index_name}")
print(f"‚úì Blob: {blob_account}/{container_name}/{blob_name}")
print(f"‚úì Authentication: Managed Identity")

## Step 3: Create Index with Semantic Configuration
Define search index schema and configure semantic ranking with title, keywords, and content fields for intelligent reranking.

In [None]:
# Create index with semantic configuration
index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)

# Define fields
fields = [
    SimpleField(name="HotelId", type=SearchFieldDataType.String, key=True, filterable=True, sortable=True),
    SearchableField(name="HotelName", type=SearchFieldDataType.String, sortable=True),
    SearchableField(name="Description", type=SearchFieldDataType.String),
    SearchableField(name="Category", type=SearchFieldDataType.String, filterable=True, facetable=True),
    SearchableField(name="Tags", collection=True, type=SearchFieldDataType.String, filterable=True, facetable=True),
    SimpleField(name="ParkingIncluded", type=SearchFieldDataType.Boolean, filterable=True, facetable=True),
    SimpleField(name="Rating", type=SearchFieldDataType.Double, filterable=True, sortable=True, facetable=True),
    ComplexField(name="Address", fields=[
        SearchableField(name="City", type=SearchFieldDataType.String, filterable=True, sortable=True, facetable=True),
        SearchableField(name="StateProvince", type=SearchFieldDataType.String, filterable=True, sortable=True, facetable=True),
    ])
]

# Define semantic configuration
semantic_config = SemanticConfiguration(
    name="semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="HotelName"),
        keywords_fields=[SemanticField(field_name="Tags")],
        content_fields=[SemanticField(field_name="Description")]
    )
)

# Create semantic search configuration
semantic_search = SemanticSearch(configurations=[semantic_config])

# Create index
index = SearchIndex(
    name=index_name,
    fields=fields,
    semantic_search=semantic_search
)

# Create or update index
result = index_client.create_or_update_index(index)
print(f"‚úì Index '{index_name}' created with semantic configuration")
print(f"  - Title field: HotelName")
print(f"  - Keywords field: Tags")
print(f"  - Content field: Description")

## Step 4: Load Hotels Data from Azure Blob Storage
Download and parse HotelsData_toAzureBlobs.json containing 50 hotels with descriptions, ratings, and locations.

In [None]:
# Load hotels from blob storage (same file as keyword and vector notebooks)
blob_service_client = BlobServiceClient(
    account_url=f"https://{blob_account}.blob.core.windows.net",
    credential=credential
)

blob_client = blob_service_client.get_blob_client(
    container=container_name,
    blob=blob_name
)

# Download and parse JSON
blob_data = blob_client.download_blob()
raw_text = blob_data.readall().decode('utf-8')

# Parse comma-separated JSON objects
documents_data = json.loads(f'[{raw_text}]')

print(f"‚úì Loaded {len(documents_data)} hotels")
if documents_data:
    sample = documents_data[0]
    print(f"Sample: {sample.get('HotelName', 'N/A')} ({sample.get('Category', 'N/A')})")

## Step 5: Prepare and Upload Documents to Index
Filter required fields from hotel data and batch upload all documents to the semantic-enabled search index.

In [None]:
# Prepare documents for semantic index
documents = []
for doc in documents_data:
    # Extract tags from various fields if available
    tags = []
    if doc.get('Category'):
        tags.append(doc['Category'])
    if doc.get('ParkingIncluded'):
        tags.append('Parking')
    if doc.get('Rating', 0) >= 4.0:
        tags.append('Highly Rated')
    
    filtered_doc = {
        "HotelId": doc["HotelId"],
        "HotelName": doc["HotelName"],
        "Description": doc["Description"],
        "Category": doc.get("Category", ""),
        "Tags": tags,
        "ParkingIncluded": doc.get("ParkingIncluded", False),
        "Rating": doc.get("Rating", 0.0),
        "Address": {
            "City": doc.get("Address", {}).get("City", ""),
            "StateProvince": doc.get("Address", {}).get("StateProvince", "")
        }
    }
    documents.append(filtered_doc)

# Upload documents
search_client = SearchClient(endpoint=search_endpoint, index_name=index_name, credential=credential)
result = search_client.upload_documents(documents=documents)

print(f"‚úì {len(documents)} documents uploaded to semantic index")
print("‚úì Ready for semantic ranking queries")

## Step 6: Verify Index and Document Count
Confirm the index was created successfully and contains all 50 hotel documents ready for semantic search.

In [None]:
# Verify documents in index
result = search_client.search(search_text="*", include_total_count=True)
print(f"‚úì Total documents in index: {result.get_count()}")

# Show sample results
sample_results = list(search_client.search(search_text="*", top=3, select=["HotelName", "Category", "Rating"]))
print(f"\nSample hotels indexed:")
for i, doc in enumerate(sample_results, 1):
    print(f"{i}. {doc['HotelName']} ({doc.get('Rating', 0)}‚òÖ, {doc.get('Category', 'N/A')})")

---
# Semantic Ranking Queries with Captions & Answers

Now we'll run queries that demonstrate what semantic ranking adds beyond keyword and vector search.

## Query 1: Semantic Search with Captions ("walking distance to live music")

In [None]:
# Semantic query with captions
query_text = "walking distance to live music"

results = search_client.search(
    query_type='semantic',
    semantic_configuration_name='semantic-config',
    search_text=query_text,
    select=['HotelName', 'Description', 'Category', 'Rating'],
    query_caption='extractive',
    top=5
)

print(f"Query: '{query_text}'")
print(f"\n{'='*80}")
print("SEMANTIC RANKING RESULTS (with captions)\n")

for i, result in enumerate(results, 1):
    reranker_score = result.get("@search.reranker_score", 0)
    print(f"{i}. {result['HotelName']} (Reranker Score: {reranker_score:.4f})")
    print(f"   Category: {result.get('Category', 'N/A')} | Rating: {result.get('Rating', 0)}‚òÖ")
    
    # Show caption (extracted relevant passage)
    captions = result.get("@search.captions")
    if captions:
        caption = captions[0]
        if hasattr(caption, 'highlights') and caption.highlights:
            print(f"   üìù Caption: {caption.highlights}")
        elif hasattr(caption, 'text'):
            print(f"   üìù Caption: {caption.text}")
    else:
        print(f"   üìù Caption: (no caption extracted)")
    print()

## Query 2: Semantic Answers ("what's a good hotel for people who like to read")

In [None]:
# Semantic query with answers
query_text = "what's a good hotel for people who like to read"

results = search_client.search(
    query_type='semantic',
    semantic_configuration_name='semantic-config',
    search_text=query_text,
    select=['HotelName', 'Description', 'Category', 'Rating'],
    query_caption='extractive',
    query_answer='extractive',
    top=5
)

print(f"Query: '{query_text}'")
print(f"\n{'='*80}")

# Show semantic answers first
semantic_answers = results.get_answers()
if semantic_answers:
    print("üéØ SEMANTIC ANSWER (extracted from content):\n")
    for answer in semantic_answers:
        if hasattr(answer, 'highlights') and answer.highlights:
            print(f"   {answer.highlights}")
        elif hasattr(answer, 'text'):
            print(f"   {answer.text}")
        if hasattr(answer, 'score'):
            print(f"   Answer Score: {answer.score:.4f}")
    print(f"\n{'='*80}\n")
else:
    print("(No direct answer extracted)\n")

# Show top results
print("TOP RESULTS:\n")
for i, result in enumerate(results, 1):
    reranker_score = result.get("@search.reranker_score", 0)
    print(f"{i}. {result['HotelName']} (Reranker Score: {reranker_score:.4f})")
    print(f"   Category: {result.get('Category', 'N/A')} | Rating: {result.get('Rating', 0)}‚òÖ")
    
    captions = result.get("@search.captions")
    if captions:
        caption = captions[0]
        if hasattr(caption, 'highlights') and caption.highlights:
            print(f"   üìù Caption: {caption.highlights}")
        elif hasattr(caption, 'text'):
            print(f"   üìù Caption: {caption.text}")
    print()

## Query 3: Semantic Search for Family Vacation

In [None]:
# Family vacation query
query_text = "family friendly hotel with activities for children"

results = search_client.search(
    query_type='semantic',
    semantic_configuration_name='semantic-config',
    search_text=query_text,
    select=['HotelName', 'Description', 'Category', 'Rating'],
    query_caption='extractive',
    query_answer='extractive',
    top=5
)

print(f"Query: '{query_text}'")
print(f"\n{'='*80}")

# Check for answers
semantic_answers = results.get_answers()
if semantic_answers:
    print("üéØ SEMANTIC ANSWER:\n")
    for answer in semantic_answers:
        if hasattr(answer, 'highlights') and answer.highlights:
            print(f"   {answer.highlights}")
        elif hasattr(answer, 'text'):
            print(f"   {answer.text}")
    print(f"\n{'='*80}\n")

print("TOP RESULTS:\n")
for i, result in enumerate(results, 1):
    reranker_score = result.get("@search.reranker_score", 0)
    print(f"{i}. {result['HotelName']} (Reranker Score: {reranker_score:.4f})")
    print(f"   Category: {result.get('Category', 'N/A')} | Rating: {result.get('Rating', 0)}‚òÖ")
    
    captions = result.get("@search.captions")
    if captions:
        caption = captions[0]
        if hasattr(caption, 'highlights') and caption.highlights:
            print(f"   üìù Caption: {caption.highlights}")
        elif hasattr(caption, 'text'):
            print(f"   üìù Caption: {caption.text}")
    print()

## Query 4: Business Travel Query with Semantic Context

In [None]:
# Business travel query
query_text = "hotel suitable for business meetings and conferences"

results = search_client.search(
    query_type='semantic',
    semantic_configuration_name='semantic-config',
    search_text=query_text,
    select=['HotelName', 'Description', 'Category', 'Rating'],
    query_caption='extractive',
    top=5
)

print(f"Query: '{query_text}'")
print(f"\n{'='*80}\n")

for i, result in enumerate(results, 1):
    reranker_score = result.get("@search.reranker_score", 0)
    print(f"{i}. {result['HotelName']} (Reranker Score: {reranker_score:.4f})")
    print(f"   Category: {result.get('Category', 'N/A')} | Rating: {result.get('Rating', 0)}‚òÖ")
    
    captions = result.get("@search.captions")
    if captions:
        caption = captions[0]
        if hasattr(caption, 'highlights') and caption.highlights:
            print(f"   üìù Caption: {caption.highlights}")
        elif hasattr(caption, 'text'):
            print(f"   üìù Caption: {caption.text}")
    print()

---
# Comparison: Keyword vs Semantic Ranking

Let's run the same query with both approaches to see the key differences.

**Note:** This index uses semantic ranking (no vector fields). For vector search comparison, see the Vector Search notebook.

## Side-by-Side Comparison Query: "romantic hotel with scenic views"

In [None]:
query_text = "romantic hotel with scenic views"

print(f"Query: '{query_text}'")
print(f"\n{'='*80}")
print("COMPARISON: KEYWORD vs SEMANTIC RANKING\n")

# 1. Keyword Search (BM25)
print("1Ô∏è‚É£ KEYWORD SEARCH (BM25 - exact word matching):")
print("-" * 80)
keyword_results = list(search_client.search(
    search_text=query_text,
    select=['HotelName', 'Category', 'Rating'],
    top=3
))

for i, result in enumerate(keyword_results, 1):
    score = result.get("@search.score", 0)
    print(f"{i}. {result['HotelName']} (BM25 Score: {score:.4f})")
    print(f"   {result.get('Category', 'N/A')} | {result.get('Rating', 0)}‚òÖ")
    print(f"   ‚ö†Ô∏è No captions - can't see WHY it matched")
    print()

print(f"\n{'='*80}\n")

# 2. Semantic Ranking (with captions and context)
print("2Ô∏è‚É£ SEMANTIC RANKING (ML reranking + captions + answers):")
print("-" * 80)
semantic_results = search_client.search(
    query_type='semantic',
    semantic_configuration_name='semantic-config',
    search_text=query_text,
    select=['HotelName', 'Description', 'Category', 'Rating'],
    query_caption='extractive',
    top=3
)

for i, result in enumerate(semantic_results, 1):
    reranker_score = result.get("@search.reranker_score", 0)
    print(f"{i}. {result['HotelName']} (Reranker Score: {reranker_score:.4f})")
    print(f"   {result.get('Category', 'N/A')} | {result.get('Rating', 0)}‚òÖ")
    
    captions = result.get("@search.captions")
    if captions:
        caption = captions[0]
        if hasattr(caption, 'highlights') and caption.highlights:
            print(f"   ‚úÖ Caption: {caption.highlights}")
        elif hasattr(caption, 'text'):
            print(f"   ‚úÖ Caption: {caption.text}")
    print()

print(f"\n{'='*80}")
print("\nüìä KEY DIFFERENCES:")
print("  ‚Ä¢ Keyword: Fast, exact word matching, no context understanding")
print("  ‚Ä¢ Semantic: ML-powered reranking + captions showing relevance + extractive answers")
print("\nüí° Why Semantic is Better:")
print("  - Understands intent ('romantic' ‚Üí 'views', 'old town hospitality')")
print("  - Captions show WHY each result matches")
print("  - Reranker scores more accurate than BM25")