In [2]:
''' 
Agentic Retrieval Quickstart

Description:    Demonstrates the new Agentic Retrieval feature in Azure Cognitive Search that executes parallel queries.
Feature Status: Preview
Code Status:    This sample working as of Oct 2, 2025
Language:       Python (used a .venv environment)
Documentation:  https://learn.microsoft.com/en-us/azure/search/search-get-started-agentic-retrieval?tabs=foundry-perms%2Csearch-endpoint&pivots=programming-language-python

Notes:
    Python Version
    - If you are using an ARM processor I suggest using Python 3.12 and not 3.13. I tried 3.13 and the openai and aiohttp packages failed to install.

    Azure Roles
    - The instructions say to add the 'Cognitive Services User' role to your ID and the managed identity of the Foundry instance. This is required.
      !! You must also add the 'Cognitive Services OpenAI User' role to both your ID and the managed identity of the Foundry instance.

    Query Phrasing
    - The first query in the instructions does not return results. After analysis I determined that this is an issue with the phrasing of the query.
      The query asks 'why' versus 'what' or 'how. I modified the query and it returns results. Example:
      
        Original Query (no results)
        Why is the Phoenix nighttime street grid is so sharply visible from space, whereas large stretches of the interstate between midwestern cities remain comparatively dim?
    
        Modified Query (results)
        What makes Phoenix's urban street grid so visible from space at night? How does Phoenix's nighttime appearance compare to other cities?

    Performance
    - The agent:
       - breaks a user question into multiple queries (the framework controls this),
       - executes the queries in parallel (possibly multiple times),
       - semantically ranks and combines the results,
       - returns the resulting text to an LLM, which generates the final response to the user.

      For the first query in this notebook, these steps took 9.2 seconds to complete.

C. Scott - Oct 2, 2025
'''

# Install packages (see notes above about Python version if openai or aiohttp fail to install)
! pip install azure-search-documents==11.7.0b1 --quiet
! pip install azure-identity --quiet
! pip install openai --quiet
! pip install aiohttp --quiet
! pip install ipykernel --quiet
! pip install requests --quiet

# successful output should look something like:
# [notice] A new release of pip is available: 25.0.1 -> 25.2
# [notice] To update, run: python.exe -m pip install --upgrade pip

# or there may be no output. Any error messages should be addressed.

In [None]:
# set variables
#  - search_endpoint:               Azure Cognitive Search endpoint.
#  - aoai_endpoint:                 Azure OpenAI endpoint.
#  - Models (aoai_xxx variables):   If you use the names recommended in the QuickStart documentation, there is no need to change these values.

from azure.identity import DefaultAzureCredential, get_bearer_token_provider
import os

search_endpoint             = "YOUR-SEARCH-ENDPOINT.search.windows.net HERE"    # TODO! add your search endpoint.
credential                  = DefaultAzureCredential()
token_provider              = get_bearer_token_provider(credential, "https://search.azure.com/.default") # Do not change this URL.
aoai_endpoint               = "YOUR-AOAI-ENDPOINT.openai.azure.com HERE"        # TODO! add your Azure OpenAI endpoint.
aoai_embedding_model        = "text-embedding-3-large"                          # a model by this name must exist in your AOAI resource.
aoai_embedding_deployment   = "text-embedding-3-large"                          # a deployment by this name must exist in your AOAI resource.
aoai_gpt_model              = "gpt-4.1-mini"                                    # a model by this name must exist in your AOAI resource.
aoai_gpt_deployment         = "gpt-4.1-mini"                                    # a deployment by this name must exist in your AOAI resource.
index_name                  = "earth-at-night"                                  # an index by this name must exist in your Azure Cognitive Search resource.
knowledge_source_name       = "earth-knowledge-source"                  
knowledge_agent_name        = "earth-knowledge-agent"
search_api_version          = "2025-08-01-preview"

In [None]:
# Create the index

from azure.search.documents.indexes.models import SearchIndex, SearchField, VectorSearch, VectorSearchProfile, HnswAlgorithmConfiguration, AzureOpenAIVectorizer, AzureOpenAIVectorizerParameters, SemanticSearch, SemanticConfiguration, SemanticPrioritizedFields, SemanticField
from azure.search.documents.indexes import SearchIndexClient
from openai import AzureOpenAI
from azure.identity import get_bearer_token_provider

azure_openai_token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default") #do not change this URL.
index = SearchIndex(
    name=index_name,
    fields=[
        SearchField(name="id", type="Edm.String", key=True, filterable=True, sortable=True, facetable=True),
        SearchField(name="page_chunk", type="Edm.String", filterable=False, sortable=False, facetable=False),
        SearchField(name="page_embedding_text_3_large", type="Collection(Edm.Single)", stored=False, vector_search_dimensions=3072, vector_search_profile_name="hnsw_text_3_large"),
        SearchField(name="page_number", type="Edm.Int32", filterable=True, sortable=True, facetable=True)
    ],
    vector_search=VectorSearch(
        profiles=[VectorSearchProfile(name="hnsw_text_3_large", algorithm_configuration_name="alg", vectorizer_name="azure_openai_text_3_large")],
        algorithms=[HnswAlgorithmConfiguration(name="alg")],
        vectorizers=[
            AzureOpenAIVectorizer(
                vectorizer_name="azure_openai_text_3_large",
                parameters=AzureOpenAIVectorizerParameters(
                    resource_url=aoai_endpoint,
                    deployment_name=aoai_embedding_deployment,
                    model_name=aoai_embedding_model
                )
            )
        ]
    ),
    semantic_search=SemanticSearch(
        default_configuration_name="semantic_config",
        configurations=[
            SemanticConfiguration(
                name="semantic_config",
                prioritized_fields=SemanticPrioritizedFields(
                    content_fields=[
                        SemanticField(field_name="page_chunk")
                    ]
                )
            )
        ]
    )
)

index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)
index_client.create_or_update_index(index)
print(f"Index '{index_name}' created or updated successfully.")

In [None]:
# Upload documents to the index (documents are from NASA's Earth at Night dataset)

import requests
from azure.search.documents import SearchIndexingBufferedSender

# this URL points to a JSON file with sample documents from NASA's Earth at Night dataset.
url = "https://raw.githubusercontent.com/Azure-Samples/azure-search-sample-data/refs/heads/main/nasa-e-book/earth-at-night-json/documents.json"
documents = requests.get(url).json()

with SearchIndexingBufferedSender(endpoint=search_endpoint, index_name=index_name, credential=credential) as client:
    client.upload_documents(documents=documents)

print(f"Documents uploaded to index '{index_name}' successfully.")

In [None]:
# Create the knowledge source

from azure.search.documents.indexes.models import SearchIndexKnowledgeSource, SearchIndexKnowledgeSourceParameters
from azure.search.documents.indexes import SearchIndexClient

ks = SearchIndexKnowledgeSource(
    name=knowledge_source_name,
    description="Knowledge source for Earth at night data",
    search_index_parameters=SearchIndexKnowledgeSourceParameters(
        search_index_name=index_name,
        source_data_select="id,page_chunk,page_number",
    ),
)

index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)
index_client.create_or_update_knowledge_source(knowledge_source=ks, api_version=search_api_version)
print(f"Knowledge source '{knowledge_source_name}' created or updated successfully.")

In [None]:
# Create the knowledge agent

from azure.search.documents.indexes.models import KnowledgeAgent, KnowledgeAgentAzureOpenAIModel, KnowledgeSourceReference, AzureOpenAIVectorizerParameters, KnowledgeAgentOutputConfiguration, KnowledgeAgentOutputConfigurationModality
from azure.search.documents.indexes import SearchIndexClient

aoai_params = AzureOpenAIVectorizerParameters(
    resource_url=aoai_endpoint,
    deployment_name=aoai_gpt_deployment,
    model_name=aoai_gpt_model,
)

output_cfg = KnowledgeAgentOutputConfiguration(
    modality=KnowledgeAgentOutputConfigurationModality.ANSWER_SYNTHESIS,
    include_activity=True,
)

agent = KnowledgeAgent(
    name=knowledge_agent_name,
    models=[KnowledgeAgentAzureOpenAIModel(azure_open_ai_parameters=aoai_params)],
    knowledge_sources=[
        KnowledgeSourceReference(
            name=knowledge_source_name,
            reranker_threshold=2.5,
        )
    ],
    output_configuration=output_cfg,
)

index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)
index_client.create_or_update_agent(agent, api_version=search_api_version)
print(f"Knowledge agent '{knowledge_agent_name}' created or updated successfully.")

In [None]:
# set the system message for the model.

instructions = """
A Q&A agent that can answer questions about the Earth at night.
If you don't have the answer, respond with "I don't know".
"""

messages = [
    {
        "role": "system",
        "content": instructions
    }
]

In [None]:
# ask the agent a question.

from azure.search.documents.agent import KnowledgeAgentRetrievalClient
from azure.search.documents.agent.models import KnowledgeAgentRetrievalRequest, KnowledgeAgentMessage, KnowledgeAgentMessageTextContent, SearchIndexKnowledgeSourceParams

agent_client = KnowledgeAgentRetrievalClient(endpoint=search_endpoint, agent_name=knowledge_agent_name, credential=credential)
query_1 = """
    What can you tell me about holiday lighting patterns in December? How do suburban and urban areas differ in their nighttime lighting?
    """

messages.append({
    "role": "user",
    "content": query_1
})

req = KnowledgeAgentRetrievalRequest(
    messages=[
        KnowledgeAgentMessage(
            role=m["role"],
            content=[KnowledgeAgentMessageTextContent(text=m["content"])]
        ) for m in messages if m["role"] != "system"
    ],
    knowledge_source_params=[
        SearchIndexKnowledgeSourceParams(
            knowledge_source_name=knowledge_source_name,
            kind="searchIndex"
        )
    ]
)

result = agent_client.retrieve(retrieval_request=req, api_version=search_api_version)
print(f"Retrieved content from '{knowledge_source_name}' successfully.")

In [None]:
# Print Results..

# This is for demonstration purposes, and prints the following:
#  - Analysis:  Query plan
#  - Response:  Response returned to the user
#  - Activity:  Steps taken by the agent
#  - Results:   Documents selected along with text selected from the documents.
# Note: Notebooks may not have the ability to stream responses. The full response may be sent when the function completes.

import textwrap
import json

# response
print("=" * 80)
print("RESPONSE")
print("=" * 80)
print(textwrap.fill(result.response[0].content[0].text, width=120))

# process steps
print(f"\n{'=' * 80}")
print("ACTIVITY (Process Steps)")
print("=" * 80)
print(json.dumps([a.as_dict() for a in result.activity], indent=2))

# documents selected
print(f"\n{'=' * 80}")
print("RESULTS (Documents Selected)")
print("=" * 80)
print(json.dumps([r.as_dict() for r in result.references], indent=2))

# content received by the LLM
print(f"\n{'=' * 80}")
print("SOURCE CONTENT (What the LLM Received)")
print("=" * 80)
for i, ref in enumerate(result.references):
    doc_results = search_client.search(f'id:"{ref.doc_key}"', top=1)
    doc = list(doc_results)[0]
    print(f"\nDocument {i+1} (Score: {ref.reranker_score:.2f}):")
    print(f"Page: {doc.get('page_number', 'N/A')}")
    print(f"Content: {doc['page_chunk'][:300]}...")
    print("-" * 40)

In [None]:
# additional query to demonstrate multiple queries in a single session.

query_2 = "How do I find lava at night?"
messages.append({
    "role": "user",
    "content": query_2
})

req = KnowledgeAgentRetrievalRequest(
    messages=[
        KnowledgeAgentMessage(
            role=m["role"],
            content=[KnowledgeAgentMessageTextContent(text=m["content"])]
        ) for m in messages if m["role"] != "system"
    ],
    knowledge_source_params=[
        SearchIndexKnowledgeSourceParams(
            knowledge_source_name=knowledge_source_name,
            kind="searchIndex"
        )
    ]
)

result = agent_client.retrieve(retrieval_request=req, api_version=search_api_version)
print(f"Retrieved content from '{knowledge_source_name}' successfully.")

In [None]:
# Print Results

# This is for demonstration purposes, and prints the following:
#  - Analysis:  Query plan
#  - Response:  Response returned to the user
#  - Activity:  Steps taken by the agent
#  - Results:   Documents selected along with text selected from the documents.
# Note: Notebooks may not have the ability to stream responses. The full response may be sent when the function completes.

import textwrap
import json

print("=" * 80)
print("RESPONSE")
print("=" * 80)
print(textwrap.fill(result.response[0].content[0].text, width=120))

print(f"\n{'=' * 80}")
print("ACTIVITY (Process Steps)")
print("=" * 80)
print(json.dumps([a.as_dict() for a in result.activity], indent=2))

print(f"\n{'=' * 80}")
print("RESULTS (Documents Selected)")
print("=" * 80)
print(json.dumps([r.as_dict() for r in result.references], indent=2))

print(f"\n{'=' * 80}")
print("SOURCE CONTENT (What the LLM Received)")
print("=" * 80)
for i, ref in enumerate(result.references):
    doc_results = search_client.search(f'id:"{ref.doc_key}"', top=1)
    doc = list(doc_results)[0]
    print(f"\nDocument {i+1} (Score: {ref.reranker_score:.2f}):")
    print(f"Page: {doc.get('page_number', 'N/A')}")
    print(f"Content: {doc['page_chunk'][:300]}...")
    print("-" * 40)