# Azure AI Search: Agentic Retrieval 

## Step 1: Install Required Packages
Install Azure Search Documents SDK (preview version) with agentic retrieval support, authentication, and HTTP client libraries.

In [None]:
# Install required packages (preview SDK for agentic retrieval)
import sys
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "azure-search-documents", "--pre", "--force-reinstall", "azure-identity", "requests", "--quiet"])
print("‚úì All packages installed (preview SDK with agentic retrieval support)")

In [None]:
# Import libraries (SDK approach from official quickstart)
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery

# Agentic retrieval SDK classes
from azure.search.documents.indexes.models import (
    SearchIndexKnowledgeSource, 
    SearchIndexKnowledgeSourceParameters,
    SearchIndexFieldReference,
    KnowledgeBase,
    KnowledgeBaseAzureOpenAIModel,
    KnowledgeSourceReference,
    AzureOpenAIVectorizerParameters,
    KnowledgeRetrievalOutputMode,
    KnowledgeRetrievalLowReasoningEffort
)
from azure.search.documents.knowledgebases import KnowledgeBaseRetrievalClient
from azure.search.documents.knowledgebases.models import (
    KnowledgeBaseRetrievalRequest,
    KnowledgeBaseMessage,
    KnowledgeBaseMessageTextContent,
    SearchIndexKnowledgeSourceParams
)
import requests
import json

print("‚úì All libraries imported (SDK with agentic retrieval support)")

## Step 2: Configuration and Authentication (Managed Identity)
Set up connections to Azure AI Search, Azure OpenAI Foundry, and configure the agentic retrieval components.

In [None]:
# Configuration - Microsoft Foundry + Azure AI Search
search_endpoint = "https://xxxxxxxxxxxxxxx.search.windows.net"

# Microsoft Foundry endpoint (where models are deployed)
aoai_endpoint = "https://xxxxxxxxxxxxxxx.cognitiveservices.azure.com"

# Use existing semantic index (agentic retrieval requires semantic configuration)
index_name = "hotels-semantic-index"

# Agentic retrieval objects
knowledge_source_name = "hotels-knowledge-source"
knowledge_base_name = "hotels-knowledge-base"

# Azure OpenAI models deployed in Foundry
aoai_embedding_model = "text-embedding-ada-002"
aoai_embedding_deployment = "text-embedding-ada-002"
aoai_gpt_model = "gpt-5-mini"
aoai_gpt_deployment = "gpt-5-mini"

# Managed Identity
credential = DefaultAzureCredential()
token_provider = get_bearer_token_provider(credential, "https://search.azure.com/.default")

print(f"‚úì Azure Search: {search_endpoint}")
print(f"‚úì Foundry: {aoai_endpoint}")
print(f"‚úì Source Index: {index_name}")
print(f"‚úì Knowledge Source: {knowledge_source_name}")
print(f"‚úì Knowledge Base: {knowledge_base_name}")
print(f"‚úì LLM: {aoai_gpt_deployment}")
print(f"‚úì Embeddings: {aoai_embedding_deployment}")
print(f"‚úì Authentication: Managed Identity")

## Step 3: Verify Existing Hotels Index
Confirm the hotels-semantic-index exists and contains 50 hotels ready for agentic retrieval queries.

In [None]:
# Verify hotels-vector-index exists and has data
search_client = SearchClient(endpoint=search_endpoint, index_name=index_name, credential=credential)
result = search_client.search(search_text="*", include_total_count=True, top=3, select=["HotelName", "Category", "Rating"])

count = result.get_count()
print(f"‚úì Index '{index_name}' contains {count} hotels\n")

print("Sample hotels:")
for i, doc in enumerate(result, 1):
    print(f"{i}. {doc['HotelName']} ({doc.get('Rating', 0)}‚òÖ, {doc.get('Category', 'N/A')})")

## Step 4: Create Knowledge Source
Define a knowledge source that references the hotels search index and specifies which fields to use for retrieval.

In [None]:
# Create knowledge source using SDK
ks = SearchIndexKnowledgeSource(
    name=knowledge_source_name,
    description="Knowledge source for 50 real hotels with descriptions, categories, and ratings",
    search_index_parameters=SearchIndexKnowledgeSourceParameters(
        search_index_name=index_name,
        source_data_fields=[
            SearchIndexFieldReference(name="HotelId"),
            SearchIndexFieldReference(name="HotelName"),
            SearchIndexFieldReference(name="Category"),
            SearchIndexFieldReference(name="Rating")
        ]
    )
)

index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)
index_client.create_or_update_knowledge_source(knowledge_source=ks)

print(f"‚úì Knowledge source '{knowledge_source_name}' created or updated")
print(f"  - References index: {index_name}")
print(f"  - Source data fields: HotelId, HotelName, Category, Rating")

## Step 5: Create Knowledge Base with Answer Synthesis
Define the knowledge base with LLM configuration (gpt-5-mini) for intelligent query orchestration and conversational answer generation.

In [None]:
# Create knowledge base using SDK (matching official quickstart)
aoai_params = AzureOpenAIVectorizerParameters(
    resource_url=aoai_endpoint,
    deployment_name=aoai_gpt_deployment,
    model_name=aoai_gpt_model,
)

knowledge_base = KnowledgeBase(
    name=knowledge_base_name,
    models=[KnowledgeBaseAzureOpenAIModel(azure_open_ai_parameters=aoai_params)],
    knowledge_sources=[KnowledgeSourceReference(name=knowledge_source_name)],
    output_mode=KnowledgeRetrievalOutputMode.ANSWER_SYNTHESIS,
    answer_instructions="You are a knowledgeable hotel concierge. Provide helpful, conversational hotel recommendations based on retrieved data. Always cite sources using [ref_id:X] format. Be specific about amenities, features, and ratings."
)

index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)
index_client.create_or_update_knowledge_base(knowledge_base)

print(f"‚úì Knowledge base '{knowledge_base_name}' created or updated")
print(f"  - LLM: {aoai_gpt_deployment} (via {aoai_endpoint})")
print(f"  - Mode: Answer Synthesis (conversational responses)")
print(f"  - Sources: {knowledge_source_name}")

---
# Agentic Retrieval in Action

Now we'll demonstrate how the LLM agent intelligently handles complex queries.

## Query 1: Complex Multi-Part Request (Family Vacation)

In [None]:
# Set up messages and run first query
messages = [
    {"role": "system", "content": "You are a knowledgeable hotel concierge helping travelers find the perfect hotel. If you don't have the answer, respond with 'I don't know'."}
]

query_1 = """
I'm planning a family vacation for summer. We need a hotel that:
- Is suitable for children and families
- Has outdoor activities or is near nature
- Has good amenities (WiFi, breakfast, etc.)
- Has high ratings (at least 3.5 stars)
What would you recommend and why?
"""

messages.append({"role": "user", "content": query_1})

# Use SDK KnowledgeBaseRetrievalClient
agent_client = KnowledgeBaseRetrievalClient(
    endpoint=search_endpoint, 
    knowledge_base_name=knowledge_base_name, 
    credential=credential
)

req = KnowledgeBaseRetrievalRequest(
    messages=[
        KnowledgeBaseMessage(
            role=m["role"],
            content=[KnowledgeBaseMessageTextContent(text=m["content"])]
        ) for m in messages if m["role"] != "system"
    ],
    knowledge_source_params=[
        SearchIndexKnowledgeSourceParams(
            knowledge_source_name=knowledge_source_name,
            include_references=True,
            include_reference_source_data=True,
            always_query_source=True
        )
    ],
    include_activity=True,
    retrieval_reasoning_effort=KnowledgeRetrievalLowReasoningEffort
)

result = agent_client.retrieve(retrieval_request=req)
print(f"‚úì Retrieved content from '{knowledge_base_name}' successfully.\n")

# Display response
print(f"Query: {query_1.strip()}\n")
print(f"{'='*80}")
print("AGENTIC RETRIEVAL RESPONSE:\n")

response_parts = []
for resp in result.response:
    for content in resp.content:
        response_parts.append(content.text)
response_content = "\n\n".join(response_parts) if response_parts else "No response"
print(response_content)

messages.append({"role": "assistant", "content": response_content})

# Show activity log (query decomposition)
if result.activity:
    print(f"\n{'='*80}")
    print("ACTIVITY LOG (Query Decomposition):\n")
    activity_data = [a.as_dict() for a in result.activity]
    for activity in activity_data:
        activity_type = activity.get('type', 'unknown')
        if activity_type == 'modelQueryPlanning':
            print(f"üß† Query Planning by LLM:")
            print(f"   Input tokens: {activity.get('input_tokens', 0)}")
            print(f"   Output tokens: {activity.get('output_tokens', 0)}")
            print(f"   Time: {activity.get('elapsed_ms', 0)}ms\n")
        elif activity_type == 'searchIndex':
            args = activity.get('search_index_arguments', {})
            print(f"üîç Subquery {activity.get('id', '?')}: {args.get('search', 'N/A')}")
            print(f"   Results: {activity.get('count', 0)} documents")
            print(f"   Time: {activity.get('elapsed_ms', 0)}ms\n")
        elif activity_type == 'agenticReasoning':
            print(f"ü§î Agentic Reasoning:")
            print(f"   Reasoning tokens: {activity.get('reasoning_tokens', 0)}\n")
        elif activity_type == 'modelAnswerSynthesis':
            print(f"üí¨ Answer Synthesis by LLM:")
            print(f"   Input tokens: {activity.get('input_tokens', 0)}")
            print(f"   Output tokens: {activity.get('output_tokens', 0)}")
            print(f"   Time: {activity.get('elapsed_ms', 0)}ms\n")

# Show references
if result.references:
    print(f"{'='*80}")
    print("REFERENCED HOTELS:\n")
    for i, ref in enumerate(result.references[:5], 1):
        ref_dict = ref.as_dict()
        source_data = ref_dict.get('source_data', {})
        print(f"{i}. {source_data.get('HotelName', 'N/A')}")
        print(f"   Category: {source_data.get('Category', 'N/A')} | Rating: {source_data.get('Rating', 0)}‚òÖ")
        print(f"   Reranker Score: {ref_dict.get('reranker_score', 0):.4f}")
        print()

## Query 2: Follow-Up Question (Maintains Context)

In [None]:
# Follow-up question - agent remembers previous context
query_2 = "Which of those hotels has the best rating and is closest to water or a lake?"

messages.append({"role": "user", "content": query_2})

req = KnowledgeBaseRetrievalRequest(
    messages=[
        KnowledgeBaseMessage(
            role=m["role"],
            content=[KnowledgeBaseMessageTextContent(text=m["content"])]
        ) for m in messages if m["role"] != "system"
    ],
    knowledge_source_params=[
        SearchIndexKnowledgeSourceParams(
            knowledge_source_name=knowledge_source_name,
            include_references=True,
            include_reference_source_data=True,
            always_query_source=True
        )
    ],
    include_activity=True,
    retrieval_reasoning_effort=KnowledgeRetrievalLowReasoningEffort
)

result = agent_client.retrieve(retrieval_request=req)
print(f"‚úì Retrieved content from '{knowledge_base_name}' successfully.\n")

print(f"Follow-Up Query: {query_2}")
print(f"\n{'='*80}")
print("AGENTIC RETRIEVAL RESPONSE:\n")

response_parts = []
for resp in result.response:
    for content in resp.content:
        response_parts.append(content.text)
response_content = "\n\n".join(response_parts) if response_parts else "No response"
print(response_content)

messages.append({"role": "assistant", "content": response_content})

## Query 3: Business + Luxury Requirements

In [None]:
# New conversation - Business + Luxury query
messages = [
    {"role": "system", "content": "You are a knowledgeable hotel concierge. If you don't have the answer, respond with 'I don't know'."}
]

query_3 = """
I'm looking for a luxury hotel suitable for a business conference. 
It needs to be in a city center with modern facilities. 
I also want a spa for relaxation after meetings. 
What are my best options with ratings above 4 stars?
"""

messages.append({"role": "user", "content": query_3})

req = KnowledgeBaseRetrievalRequest(
    messages=[
        KnowledgeBaseMessage(
            role=m["role"],
            content=[KnowledgeBaseMessageTextContent(text=m["content"])]
        ) for m in messages if m["role"] != "system"
    ],
    knowledge_source_params=[
        SearchIndexKnowledgeSourceParams(
            knowledge_source_name=knowledge_source_name,
            include_references=True,
            include_reference_source_data=True,
            always_query_source=True
        )
    ],
    include_activity=True,
    retrieval_reasoning_effort=KnowledgeRetrievalLowReasoningEffort
)

result = agent_client.retrieve(retrieval_request=req)
print(f"‚úì Retrieved content from '{knowledge_base_name}' successfully.\n")

print(f"Query: {query_3.strip()}")
print(f"\n{'='*80}")
print("AGENTIC RETRIEVAL RESPONSE:\n")

response_parts = []
for resp in result.response:
    for content in resp.content:
        response_parts.append(content.text)
response_content = "\n\n".join(response_parts) if response_parts else "No response"
print(response_content)

# Show subqueries generated
if result.activity:
    print(f"\n{'='*80}")
    print("SUBQUERIES GENERATED:\n")
    activity_data = [a.as_dict() for a in result.activity]
    subquery_count = 0
    for activity in activity_data:
        if activity.get('type') == 'searchIndex':
            subquery_count += 1
            args = activity.get('search_index_arguments', {})
            print(f"{subquery_count}. {args.get('search', 'N/A')}")
            print(f"   ‚Üí Found {activity.get('count', 0)} results\n")

---
# COMPARISON: All 4 Search Approaches

Let's compare the same query across all 4 methods:
1. **Keyword Search** (BM25)
2. **Vector Search** (Semantic Similarity)
3. **Semantic Ranking** (L2 Reranking + Captions)
4. **Agentic Retrieval** (LLM orchestration + Answer Synthesis)

## Comparison Query: "Romantic luxury hotel with spa near water, parking included"

In [None]:
comparison_query = "romantic luxury hotel with spa near water, parking included"

print(f"Query: '{comparison_query}'")
print(f"\n{'='*80}")
print(f"{'='*80}\n")

### Approach 1: Keyword Search (BM25)

In [None]:
# 1. KEYWORD SEARCH
print("‚ùå KEYWORD SEARCH (BM25 - Word Matching):")
print("-" * 80)

# Use hotels-semantic-index for keyword search
keyword_client = SearchClient(
    endpoint=search_endpoint,
    index_name="hotels-semantic-index",
    credential=credential
)

keyword_results = list(keyword_client.search(
    search_text=comparison_query,
    select=["HotelName", "Category", "Rating"],
    top=3
))

for i, result in enumerate(keyword_results, 1):
    score = result.get("@search.score", 0)
    print(f"{i}. {result['HotelName']} (BM25 Score: {score:.4f})")
    print(f"   {result.get('Category', 'N/A')} | {result.get('Rating', 0)}‚òÖ")
    print(f"   ‚ö†Ô∏è Just matches words - no understanding of 'romantic' or intent")
    print()

print(f"\n{'='*80}\n")

### Approach 2: Vector Search (Semantic Similarity)

In [None]:
# 2. VECTOR SEARCH
print("‚ö†Ô∏è VECTOR SEARCH (Semantic Similarity):")
print("-" * 80)

from azure.search.documents.models import VectorizedQuery
import requests

# Get embedding for query
def get_embedding(text):
    token = credential.get_token("https://cognitiveservices.azure.com/.default")
    headers = {
        "Authorization": f"Bearer {token.token}",
        "Content-Type": "application/json"
    }
    data = {"input": text}
    
    for api_version in ["2024-02-15-preview", "2024-02-01", "2023-05-15"]:
        try:
            url = f"{aoai_endpoint}/openai/deployments/{aoai_embedding_deployment}/embeddings?api-version={api_version}"
            response = requests.post(url, headers=headers, json=data, timeout=30)
            if response.status_code == 200:
                return response.json()["data"][0]["embedding"]
        except:
            continue
    return None

query_vector = get_embedding(comparison_query)

if query_vector:
    vector_client = SearchClient(
        endpoint=search_endpoint,
        index_name="hotels-vector-index",
        credential=credential
    )
    
    vector_results = list(vector_client.search(
        vector_queries=[VectorizedQuery(vector=query_vector, k_nearest_neighbors=3, fields="DescriptionVector")],
        select=["HotelName", "Category", "Rating"],
        top=3
    ))
    
    for i, result in enumerate(vector_results, 1):
        score = result.get("@search.score", 0)
        print(f"{i}. {result['HotelName']} (Similarity: {score:.4f})")
        print(f"   {result.get('Category', 'N/A')} | {result.get('Rating', 0)}‚òÖ")
        print(f"   ‚ö†Ô∏è Semantic match, but no explanation WHY")
        print()
else:
    print("Could not generate embedding")

print(f"\n{'='*80}\n")

### Approach 3: Semantic Ranking (L2 Reranking + Captions)

In [None]:
# 3. SEMANTIC RANKING
print("‚úÖ SEMANTIC RANKING (Reranking + Captions):")
print("-" * 80)

semantic_client = SearchClient(
    endpoint=search_endpoint,
    index_name="hotels-semantic-index",
    credential=credential
)

semantic_results = semantic_client.search(
    query_type='semantic',
    semantic_configuration_name='semantic-config',
    search_text=comparison_query,
    select=["HotelName", "Description", "Category", "Rating"],
    query_caption='extractive',
    top=3
)

for i, result in enumerate(semantic_results, 1):
    reranker_score = result.get("@search.reranker_score", 0)
    print(f"{i}. {result['HotelName']} (Reranker Score: {reranker_score:.4f})")
    print(f"   {result.get('Category', 'N/A')} | {result.get('Rating', 0)}‚òÖ")
    
    captions = result.get("@search.captions")
    if captions:
        caption = captions[0]
        if hasattr(caption, 'highlights') and caption.highlights:
            print(f"   ‚úÖ Caption: {caption.highlights}")
        elif hasattr(caption, 'text'):
            print(f"   ‚úÖ Caption: {caption.text}")
    print()

print(f"\n{'='*80}\n")

### Approach 4: Agentic Retrieval (LLM Orchestration + Answer Synthesis)

In [None]:
# 4. AGENTIC RETRIEVAL (using SDK)
print("üöÄ AGENTIC RETRIEVAL (LLM Orchestration + Conversational Answer):")
print("-" * 80)

messages = [{"role": "user", "content": comparison_query}]

agent_client = KnowledgeBaseRetrievalClient(
    endpoint=search_endpoint, 
    knowledge_base_name=knowledge_base_name, 
    credential=credential
)

req = KnowledgeBaseRetrievalRequest(
    messages=[
        KnowledgeBaseMessage(
            role=m["role"],
            content=[KnowledgeBaseMessageTextContent(text=m["content"])]
        ) for m in messages
    ],
    knowledge_source_params=[
        SearchIndexKnowledgeSourceParams(
            knowledge_source_name=knowledge_source_name,
            include_references=True,
            include_reference_source_data=True,
            always_query_source=True
        )
    ],
    include_activity=True,
    retrieval_reasoning_effort=KnowledgeRetrievalLowReasoningEffort
)

result = agent_client.retrieve(retrieval_request=req)

response_parts = []
for resp in result.response:
    for content in resp.content:
        response_parts.append(content.text)
response_content = "\n\n".join(response_parts) if response_parts else "No response"
print(response_content)
print()

# Show how query was decomposed
if result.activity:
    print("\nüß† Query Decomposition:")
    activity_data = [a.as_dict() for a in result.activity]
    subquery_num = 0
    for activity in activity_data:
        if activity.get('type') == 'searchIndex':
            subquery_num += 1
            args = activity.get('search_index_arguments', {})
            print(f"   {subquery_num}. {args.get('search', 'N/A')}")

print(f"\n{'='*80}\n")

---
# Summary: Key Differences

| Feature | Keyword | Vector | Semantic | Agentic |
|---------|---------|--------|----------|----------|
| **How it works** | Word matching (TF-IDF) | Embedding similarity | ML reranking | LLM orchestration |
| **Query understanding** | ‚ùå None | ‚ö†Ô∏è Similarity only | ‚úÖ Context aware | ‚úÖ Intent analysis |
| **Multi-part queries** | ‚ùå No decomposition | ‚ùå Single vector | ‚ùå Single query | ‚úÖ Auto-decomposed |
| **Explanations** | ‚ùå Just scores | ‚ùå Just scores | ‚úÖ Captions | ‚úÖ Conversational |
| **Answer format** | List of docs | List of docs | List + captions | Natural language |
| **Citations** | ‚ùå No | ‚ùå No | ‚ö†Ô∏è Implicit | ‚úÖ [ref:X] |
| **Conversational** | ‚ùå No context | ‚ùå No context | ‚ùå No context | ‚úÖ Multi-turn |
| **Best for** | Exact terms | Concept match | Context + captions | Complex questions |

## When to Use Each:

**üîç Keyword Search:**
- Fast, precise lookups
- Known exact terms
- Filters and facets

**üéØ Vector Search:**
- Semantic similarity
- Multilingual search
- Concept matching

**‚ú® Semantic Ranking:**
- Better relevance
- Captions showing WHY
- Question-answering

**üöÄ Agentic Retrieval:**
- Complex multi-part questions
- Conversational AI
- Natural language answers
- Multi-turn dialogues

## üí° Hybrid Approach (Best Practice):
Combine all 4:
- Vector search for semantic retrieval
- Semantic ranking for better relevance
- Agentic retrieval for conversational interface
- Keyword filters for precise constraints