# Azure Search - Practical Implementation


## Step 1: Install and Import Required Libraries
Install Azure Search SDK and supporting packages for JSON data and Azure authentication.

In [None]:
import subprocess
import sys
import json
from pathlib import Path
from typing import List, Dict
from datetime import datetime
import time

# Install Azure packages
packages = ['azure-search-documents', 'azure-identity']
for package in packages:
    try:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', package, '-q'])
    except:
        print(f'‚ö† Warning: {package}')

from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents import SearchClient
from azure.search.documents.indexes.models import (
    SearchIndex, SimpleField, SearchableField, ComplexField, SearchFieldDataType
)
from azure.identity import DefaultAzureCredential

print('‚úì All packages imported')

## Step 2: Azure Service Configuration
Set up connections to Azure Search service and define paths for the Hotels data.

In [None]:
# Configuration
search_endpoint = 'https://xxxxxxxxxxxxxxx.search.windows.net'
index_name = 'hotels-sample-index'
json_file_path = Path('HotelsData_toAzureBlobs.json')

# Authentication
credential = DefaultAzureCredential()

# Clients
index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)
search_client = SearchClient(endpoint=search_endpoint, index_name=index_name, credential=credential)

print(f'‚úì Connected to {search_endpoint}')
print(f'‚úì Index: {index_name}')
print(f'‚úì Data file: {json_file_path.name}')

## Step 3: Load and Validate Hotels Data
Read HotelsData_toAzureBlobs.json (50+ hotel records with nested Address and Rooms arrays).

In [None]:
print(f'üìÇ Loading {json_file_path.name}...\n')

try:
    with open(json_file_path, 'r', encoding='utf-8') as f:
        raw_text = f.read().strip()

    # Source file is comma-separated JSON objects (not wrapped in an array)
    # Convert to valid JSON array and parse
    hotels_data = json.loads(f'[{raw_text}]')

    print(f'‚úì Loaded {len(hotels_data)} hotels')

    if hotels_data:
        sample = hotels_data[0]
        print(f'\nüìã Sample Hotel:')
        print(f'  ID: {sample.get("HotelId")}')
        print(f'  Name: {sample.get("HotelName")}')
        print(f'  Category: {sample.get("Category")}')
        print(f'  City: {sample.get("Address", {}).get("City")}')
        print(f'  Rooms: {len(sample.get("Rooms", []))} types')

except FileNotFoundError:
    print(f'‚úó File not found: {json_file_path}')
    hotels_data = []
except Exception as e:
    print(f'‚úó Error: {str(e)}')
    hotels_data = []

## Step 4: Define Index Schema (Portal-Aligned)
Create index matching the portal wizard configuration:
- Key field: HotelId
- Searchable: HotelName, Description, Tags, Address
- Filterable: Category, Rating, ParkingIncluded, Address fields  
- Facetable: Category, City for navigation

In [None]:
print('üî® Creating index schema...\n')

# Delete existing
try:
    index_client.delete_index(index_name)
    print(f'‚úì Deleted existing index')
except:
    print('‚Ñπ Creating fresh index')

# Define fields
fields = [
    SimpleField(name='HotelId', type=SearchFieldDataType.String, key=True, searchable=True, filterable=True, sortable=True),
    SearchableField(name='HotelName', type=SearchFieldDataType.String, sortable=True),
    SearchableField(name='Description', type=SearchFieldDataType.String, analyzer_name='en.lucene'),
    SearchableField(name='Description_fr', type=SearchFieldDataType.String, analyzer_name='fr.lucene'),
    SearchableField(name='Category', type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),
    SearchableField(name='Tags', type=SearchFieldDataType.String, collection=True, facetable=True, filterable=True),
    SimpleField(name='ParkingIncluded', type=SearchFieldDataType.Boolean, facetable=True, filterable=True),
    SimpleField(name='Rating', type=SearchFieldDataType.Double, facetable=True, filterable=True, sortable=True),
    SimpleField(name='LastRenovationDate', type=SearchFieldDataType.DateTimeOffset, facetable=True, filterable=True),
    ComplexField(name='Address', fields=[
        SearchableField(name='StreetAddress', type=SearchFieldDataType.String),
        SearchableField(name='City', type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),
        SearchableField(name='StateProvince', type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),
        SearchableField(name='PostalCode', type=SearchFieldDataType.String, facetable=True, filterable=True),
        SearchableField(name='Country', type=SearchFieldDataType.String),
    ]),
    ComplexField(name='Rooms', fields=[
        SearchableField(name='Description', type=SearchFieldDataType.String),
        SearchableField(name='Type', type=SearchFieldDataType.String, facetable=True, filterable=True),
        SimpleField(name='BaseRate', type=SearchFieldDataType.Double, filterable=True),
    ], collection=True)
]

suggesters = [{'name': 'sg', 'source_fields': ['Tags', 'Address/City']}]
index = SearchIndex(name=index_name, fields=fields, suggesters=suggesters)

try:
    result = index_client.create_or_update_index(index)
    print(f'‚úì Index created: {result.name}')
    print(f'  Fields: {len(result.fields)}')
except Exception as e:
    print(f'‚úó Error: {str(e)}')

## Step 5: Upload Documents to Index
Upload all hotel documents from JSON file to Azure Search (using SDK, not indexer).

In [None]:
print('üì§ Uploading documents...\n')

if hotels_data:
    # Keep only fields defined in this notebook's index schema
    documents = []
    for hotel in hotels_data:
        doc = {
            '@search.action': 'upload',
            'HotelId': hotel.get('HotelId'),
            'HotelName': hotel.get('HotelName'),
            'Description': hotel.get('Description'),
            'Description_fr': hotel.get('Description_fr'),
            'Category': hotel.get('Category'),
            'Tags': hotel.get('Tags', []),
            'ParkingIncluded': hotel.get('ParkingIncluded'),
            'Rating': hotel.get('Rating'),
            'LastRenovationDate': hotel.get('LastRenovationDate'),
            'Address': hotel.get('Address', {}),
            'Rooms': [
                {
                    'Description': room.get('Description'),
                    'Type': room.get('Type'),
                    'BaseRate': room.get('BaseRate')
                }
                for room in hotel.get('Rooms', [])
            ]
        }
        documents.append(doc)

    try:
        result = search_client.upload_documents(documents=documents)
        successful = sum(1 for r in result if r.succeeded)
        print(f'‚úì Uploaded {successful}/{len(documents)} documents')
        time.sleep(2)
    except Exception as e:
        print(f'‚úó Error: {str(e)}')
else:
    print('‚ö† No data loaded')

## Step 6: Verify Index
Query the index to confirm documents were indexed successfully.

In [None]:
try:
    results = list(search_client.search(search_text='*', select=['HotelId', 'HotelName', 'Category', 'Rating'], top=5))
    total = len(list(search_client.search(search_text='*')))
    print(f'‚úì Index contains {total} documents\n')
    print('Sample documents:')
    for i, r in enumerate(results, 1):
        print(f'  {i}. {r["HotelName"]} ({r["Category"]}) - {r["Rating"]}‚òÖ')
except Exception as e:
    print(f'‚úó Error: {str(e)}')

## Query 1: Simple Keyword Search
Search for keywords across all searchable fields.

In [None]:
print('\n=== QUERY 1: Keyword Search (beach) ===')
results = list(search_client.search(search_text='beach', select=['HotelName', 'Category', 'Rating'], top=5))
print(f'Found {len(results)} results:\n')
for i, r in enumerate(results, 1):
    print(f'{i}. {r["HotelName"]} | {r["Category"]} | Rating: {r["Rating"]}')

## Query 2: Filtered Search
Search with filter constraints (e.g., Rating > 4).

In [None]:
print('\n=== QUERY 2: Filtered (Rating > 4) ===')
results = list(search_client.search(
    search_text='*',
    filter='Rating gt 4.0',
    order_by=['Rating desc'],
    select=['HotelName', 'Rating', 'Address/City'],
    top=5
))
print(f'Found {len(results)} results:\n')
for i, r in enumerate(results, 1):
    city = r.get('Address', {}).get('City', 'N/A')
    print(f'{i}. {r["HotelName"]} | Rating: {r["Rating"]}‚≠ê | {city}')

## Query 3: Complex Filtering
Combine multiple filter conditions with AND/OR logic.

In [None]:
print('\n=== QUERY 3: Complex Filter (Rating > 3.5 AND Parking) ===')
results = list(search_client.search(
    search_text='*',
    filter='Rating gt 3.5 and ParkingIncluded eq true',
    select=['HotelName', 'Rating', 'ParkingIncluded', 'Address/City'],
    top=5
))
print(f'Found {len(results)} results:\n')
for i, r in enumerate(results, 1):
    city = r.get('Address', {}).get('City', 'N/A')
    parking = 'Yes' if r.get('ParkingIncluded') else 'No'
    print(f'{i}. {r["HotelName"]} | Rating: {r["Rating"]} | Parking: {parking} | {city}')

## Query 4: Sorted Results
Sort search results by specific fields (e.g., Rating descending).

In [None]:
print('\n=== QUERY 4: Sorted by Rating DESC ===')
results = list(search_client.search(
    search_text='*',
    order_by=['Rating desc'],
    select=['HotelName', 'Rating', 'Category'],
    top=10
))
print(f'Top {len(results)} hotels by rating:\n')
for i, r in enumerate(results, 1):
    print(f'{i}. {r["Rating"]}‚≠ê {r["HotelName"]} ({r["Category"]})')

## Query 5: Faceted Search
Group results by categories for navigation (faceted navigation).

In [None]:
print('\n=== QUERY 5: Faceted Search ===')
results = search_client.search(
    search_text='*',
    facets=['Category', 'Address/City'],
    select=['HotelName', 'Category'],
    top=10
)
results_list = list(results)
print(f'Results with facets:\n')
for i, r in enumerate(results_list[:5], 1):
    print(f'{i}. {r["HotelName"]} | {r["Category"]}')

## Query 6: Lucene Syntax (Advanced)
Use full Lucene syntax for fuzzy search and advanced queries.

In [None]:
print('\n=== QUERY 6: Lucene Fuzzy Search (seatle~) ===')
results = list(search_client.search(
    search_text='seatle~',
    query_type='full',
    select=['HotelName', 'Address/City'],
    top=5
))
print(f'Fuzzy match results: {len(results)}\n')
for i, r in enumerate(results, 1):
    city = r.get('Address', {}).get('City', 'N/A')
    print(f'{i}. {r["HotelName"]} | {city}')

## Query 7: Autocomplete
Get suggestions for user autocomplete (type-ahead functionality).

In [None]:
print('\n=== QUERY 7: Autocomplete ("ca") ===')
try:
    results = list(search_client.autocomplete(search_text='ca', suggester_name='sg', top=5))
    print(f'Suggestions: {len(results)}\n')
    for s in results:
        print(f'  ‚Ä¢ {s["text"]}')
except Exception as e:
    print(f'Note: {e}')

## Query 8: One Query That Fails in Keyword Search
Use this semantic intent query. It fails here because this index has no vector field, but it works in a vector index.

**Query:** "outdoor adventure and nature activities"

In [None]:
from azure.search.documents.models import VectorizedQuery

query_text = "outdoor adventure and nature activities"
print(f"Query: {query_text}")

try:
    vector_query = VectorizedQuery(
        vector=[0.1] * 1536,
        k_nearest_neighbors=5,
        fields="DescriptionVector"
    )
    list(search_client.search(vector_queries=[vector_query]))
    print("Unexpected: query succeeded")
except Exception as e:
    print("Expected failure on keyword index:")
    print(str(e))

## Summary
This notebook includes one semantic query string that intentionally fails on keyword search because `DescriptionVector` does not exist in this index.