# Agentic Retrieval Quickstart for Azure AI Search

### 1. Load Connections

In [20]:
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
import os

load_dotenv(override=True) # take environment variables from .env.

# The following variables from your .env file are used in this notebook
answer_model = os.getenv("ANSWER_MODEL", "gpt-4o")
endpoint = os.environ["AZURE_SEARCH_ENDPOINT"]
credential = DefaultAzureCredential()
token_provider = get_bearer_token_provider(credential, "https://search.azure.com/.default")
index_name = os.getenv("AZURE_SEARCH_INDEX", "earth_at_night")
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_gpt_deployment = os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT", "gpt-4o")
azure_openai_gpt_model = os.getenv("AZURE_OPENAI_GPT_MODEL", "gpt-4o")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2025-03-01-preview")
azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-large")
azure_openai_embedding_model = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-3-large")
agent_name = os.getenv("AZURE_SEARCH_AGENT_NAME", "earth-search-agent")
api_version = "2025-05-01-Preview"

### 2. Create Index

In [2]:
from azure.search.documents.indexes.models import SearchIndex, SearchField, VectorSearch, VectorSearchProfile, HnswAlgorithmConfiguration, AzureOpenAIVectorizer, AzureOpenAIVectorizerParameters, SemanticSearch, SemanticConfiguration, SemanticPrioritizedFields, SemanticField
from azure.search.documents.indexes import SearchIndexClient

index = SearchIndex(
    name=index_name,
    fields=[
        SearchField(name="id", type="Edm.String", key=True, filterable=True, sortable=True, facetable=True),
        SearchField(name="page_chunk", type="Edm.String", filterable=False, sortable=False, facetable=False),
        SearchField(name="page_embedding_text_3_large", type="Collection(Edm.Single)", stored=False, vector_search_dimensions=3072, vector_search_profile_name="hnsw_text_3_large"),
        SearchField(name="page_number", type="Edm.Int32", filterable=True, sortable=True, facetable=True),
        SearchField(name="chapter_number", type="Edm.Int32", filterable=True, sortable=True, facetable=True)
    ],
    vector_search=VectorSearch(
        profiles=[VectorSearchProfile(name="hnsw_text_3_large", algorithm_configuration_name="alg", vectorizer_name="azure_openai_text_3_large")],
        algorithms=[HnswAlgorithmConfiguration(name="alg")],
        vectorizers=[
            AzureOpenAIVectorizer(
                vectorizer_name="azure_openai_text_3_large",
                parameters=AzureOpenAIVectorizerParameters(
                    resource_url=azure_openai_endpoint,
                    deployment_name=azure_openai_embedding_deployment,
                    model_name=azure_openai_embedding_model
                )
            )
        ]
    ),
    semantic_search=SemanticSearch(
        default_configuration_name="semantic_config",
        configurations=[
            SemanticConfiguration(
                name="semantic_config",
                prioritized_fields=SemanticPrioritizedFields(
                    content_fields=[
                        SemanticField(field_name="page_chunk")
                    ]
                )
            )
        ]
    )
)

index_client = SearchIndexClient(endpoint=endpoint, credential=credential)
index_client.create_or_update_index(index)
print(f"Index '{index_name}' created or updated successfully")


Index 'earth_at_night' created or updated successfully


### 3. Upload Sample Documents

In [3]:
import requests
from azure.search.documents import SearchIndexingBufferedSender
from azure.search.documents import SearchClient

url = "https://raw.githubusercontent.com/Azure-Samples/azure-search-sample-data/refs/heads/main/nasa-e-book/earth-at-night-json/documents.json"
documents = requests.get(url).json()

with SearchIndexingBufferedSender(endpoint=endpoint, index_name=index_name, credential=credential) as client:
    client.upload_documents(documents=documents)

print(f"Documents uploaded to index '{index_name}'")


Documents uploaded to index 'earth_at_night'


### 4. Create search agent

In [4]:
import requests

create_agent_request = {
    "name": agent_name,
    "targetIndexes": [ { "indexName": index_name } ],
    "models": [
          {
            "kind": "azureOpenAI",
            "azureOpenAIParameters": {
                "resourceUri": azure_openai_endpoint,
                "apiKey": None,
                "deploymentId": azure_openai_gpt_model,
                "modelName": azure_openai_gpt_model
            }
        }
    ]
}

response = requests.put(
    url=f"{endpoint}/agents/{agent_name}?api-version={api_version}",
    headers={ "Authorization": f"Bearer {token_provider()}" },
    json=create_agent_request
)
response.raise_for_status()


### 5. Setup messages

In [5]:
instructions = """
An Q&A agent that can answer questions about the Earth at night.
Sources have a JSON format with a ref_id that must be cited in the answer.
If you do not have the answer, respond with "I don't know".
"""

messages = [
    {
        "role": "system",
        "content": instructions
    }
]

### 6. Use Agentic Retrieval to fetch results

In [6]:
def query_agent(messages: list[dict[str, any]]):
    retrieval_request = {
        "messages" : [ { "role": msg["role"], "content": [ { "text": msg["content"] , "type": "text" } ] } for msg in messages ],
        "targetIndexParams" :  [
            { 
                "indexName" : index_name,
                "rerankerThreshold": 2.5,
                "includeReferenceSourceData": True
            } 
        ]
    }
    response = requests.post(
        url=f"{endpoint}/agents/{agent_name}/retrieve?api-version={api_version}",
        headers={ "Authorization": f"Bearer {token_provider()}" },
        json=retrieval_request
    )
    response.raise_for_status()
    result = response.json()
    return result

In [7]:
messages.append({
    "role": "user",
    "content": """
    Why do suburban belts display larger December brightening than urban cores even though absolute light levels are higher downtown?
    Why is the Phoenix nighttime street grid is so sharply visible from space, whereas large stretches of the interstate between midwestern cities remain comparatively dim?
    """
})

retrieval_result = query_agent(messages)
messages.append({
    "role": "assistant",
    "content": retrieval_result["response"][0]["content"][0]["text"]
})

### 6.1. Review retrieval activity and results

In [8]:
import textwrap

print("Response")
print(textwrap.fill(retrieval_result["response"][0]["content"][0]["text"], width=120))

Response
[{"ref_id":1,"content":"# Urban Structure\n\n## March 16, 2013\n\n### Phoenix Metropolitan Area at Night\n\nThis figure
presents a nighttime satellite view of the Phoenix metropolitan area, highlighting urban structure and transport
corridors. City lights illuminate the layout of several cities and major thoroughfares.\n\n**Labeled Urban
Features:**\n\n- **Phoenix:** Central and brightest area in the right-center of the image.\n- **Glendale:** Located to
the west of Phoenix, this city is also brightly lit.\n- **Peoria:** Further northwest, this area is labeled and its
illuminated grid is seen.\n- **Grand Avenue:** Clearly visible as a diagonal, brightly lit thoroughfare running from
Phoenix through Glendale and Peoria.\n- **Salt River Channel:** Identified in the southeast portion, running through
illuminated sections.\n- **Phoenix Mountains:** Dark, undeveloped region to the northeast of Phoenix.\n- **Agricultural
Fields:** Southwestern corner of the image, grid patterns are 

In [9]:
import json
print("Activity")
print(json.dumps(retrieval_result["activity"], indent=2))
print("Results")
print(json.dumps(retrieval_result["references"], indent=2))

Activity
[
  {
    "type": "ModelQueryPlanning",
    "id": 0,
    "inputTokens": 1407,
    "outputTokens": 513
  },
  {
    "type": "AzureSearchQuery",
    "id": 1,
    "targetIndex": "earth_at_night",
    "query": {
      "search": "suburban belts December brightening compared to urban cores",
      "filter": null
    },
    "queryTime": "2025-05-01T23:15:19.003Z",
    "elapsedMs": 1005
  },
  {
    "type": "AzureSearchQuery",
    "id": 2,
    "targetIndex": "earth_at_night",
    "query": {
      "search": "Phoenix nighttime street grid visibility from space",
      "filter": null
    },
    "queryTime": "2025-05-01T23:15:19.493Z",
    "count": 2,
    "elapsedMs": 486
  },
  {
    "type": "AzureSearchQuery",
    "id": 3,
    "targetIndex": "earth_at_night",
    "query": {
      "search": "interstate visibility between midwestern cities at night",
      "filter": null
    },
    "queryTime": "2025-05-01T23:15:19.961Z",
    "count": 2,
    "elapsedMs": 466
  }
]
Results
[
  {
    "type"

### 7. Create Azure OpenAI Client

In [10]:
from openai import AzureOpenAI
from azure.identity import get_bearer_token_provider

azure_openai_token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
client = AzureOpenAI(
    azure_endpoint=azure_openai_endpoint,
    azure_ad_token_provider=azure_openai_token_provider,
    api_version=azure_openai_api_version
)

### 7.1 Use Responses API to generate an answer

In [11]:
response = client.responses.create(
    model=answer_model,
    input=messages
)

wrapped = textwrap.fill(response.output_text, width=100)
print(wrapped)

Suburban belts display larger December brightening compared to urban cores despite higher absolute
light levels downtown due to differences in holiday lighting patterns. Suburban areas often have
more residential properties that participate in holiday lighting, leading to a noticeable increase
in brightness during December. Urban cores, with their higher baseline light levels and more
commercial properties, might not experience the same relative increase during the holiday season
[ref_id:3].  The Phoenix nighttime street grid is sharply visible from space because of its regular
grid of city blocks and streets, which are illuminated extensively. This structured pattern of
development, common in many western U.S. cities, reveals the continuous spread of urban areas
connected by well-lit surface streets and major corridors such as Grand Avenue. In contrast, large
stretches of interstate highways between midwestern cities remain dim because they pass through
areas with lower population den

### 7.2 Use Chat Completions API to generate an answer

In [12]:
response = client.chat.completions.create(
    model=answer_model,
    messages=messages
)

wrapped = textwrap.fill(response.choices[0].message.content, width=100)
print(wrapped)

Suburban belts often display larger December brightening than urban cores despite the absolute light
levels being higher downtown because suburban areas are more expansive and can experience more
decorative and holiday lighting during the festive season. These areas generally have less ambient
light pollution compared to urban cores, making any additional lighting more noticeable and
impactful on satellite imagery (ref_id: 3).   The Phoenix nighttime street grid is sharply visible
from space due to its structured, grid-like urban development, which is characteristic of many
western U.S. cities. The widespread and regular pattern of street lighting creates a distinct grid
pattern that can be easily seen from space. In contrast, interstates between midwestern cities tend
to have fewer lights, as they traverse sparsely populated or rural areas, making them appear
comparatively dim on nighttime satellite imagery (ref_id: 0, ref_id: 1, ref_id: 2).


### 8. Continue the conversation

In [13]:
messages.append({
    "role": "user",
    "content": "How do I find lava at night?"
})

retrieval_result = query_agent(messages)
messages.append({
    "role": "assistant",
    "content": retrieval_result["response"][0]["content"][0]["text"]
})

### 8.1. Review activity and results

In [14]:
print("Response")
print(textwrap.fill(retrieval_result["response"][0]["content"][0]["text"], width=120))

Response
[{"ref_id":5,"content":"For the first time in perhaps a decade, Mount Etna experienced a \"flank eruption\"—erupting
from its side instead of its summit—on December 24, 2018. The activity was accompanied by 130 earthquakes occurring over
three hours that morning. Mount Etna, Europe’s most active volcano, has seen periodic activity on this part of the
mountain since 2013. The Operational Land Imager (OLI) on the Landsat 8 satellite acquired the main image of Mount Etna
on December 28, 2018.\n\nThe inset image highlights the active vent and thermal infrared signature from lava flows,
which can be seen near the newly formed fissure on the southeastern side of the volcano. The inset was created with data
from OLI and the Thermal Infrared Sensor (TIRS) on Landsat 8. Ash spewing from the fissure cloaked adjacent villages and
delayed aircraft from landing at the nearby Catania airport. Earthquakes occurred in the subsequent days after the
initial eruption and displaced hundreds of pe

In [15]:
print("Activity")
print(json.dumps(retrieval_result["activity"], indent=2))
print("Results")
print(json.dumps(retrieval_result["references"], indent=2))

Activity
[
  {
    "type": "ModelQueryPlanning",
    "id": 0,
    "inputTokens": 3540,
    "outputTokens": 99
  },
  {
    "type": "AzureSearchQuery",
    "id": 1,
    "targetIndex": "earth_at_night",
    "query": {
      "search": "find lava at night",
      "filter": null
    },
    "queryTime": "2025-05-01T23:15:31.710Z",
    "count": 6,
    "elapsedMs": 489
  }
]
Results
[
  {
    "type": "AzureSearchDoc",
    "id": "0",
    "activitySource": 1,
    "docKey": "earth_at_night_508_page_65_verbalized",
    "sourceData": {
      "id": "earth_at_night_508_page_65_verbalized",
      "page_chunk": "# Volcanoes\n\n## Figure: Satellite Image of Sicily and Mount Etna Lava, March 16, 2017\n\nThe annotated satellite image below shows the island of Sicily and the surrounding region at night, highlighting city lights and volcanic activity.\n\n**Description:**\n\n- **Date of image:** March 16, 2017\n- **Geographical locations labeled:**\n    - Major cities: Palermo (northwest Sicily), Marsala (we

### 8.2. Generate answer

In [21]:
response = client.responses.create(
    model=answer_model,
    input=messages
)

wrapped = textwrap.fill(response.output_text, width=100)
print(wrapped)

To find lava at night, you can use thermal infrared imaging, which detects the heat emitted by lava
flows. Satellites equipped with sensors like the VIIRS Day/Night Band on polar-orbiting satellites
or the Thermal Infrared Sensor on Landsat can detect the thermal signature of lava. These
instruments utilize faint light sources such as moonlight and airglow to enhance visibility at
night, revealing the glow of hot lava against the cooler surroundings. This method is commonly used
to monitor active volcanoes like Mount Etna and Kilauea [ref_id: 5, ref_id: 4].
