# Azure AI Search: Multimodal RAG Query Examples



## Step 1: Configuration & Authentication
Set up credentials and index references for querying.


In [None]:
# Step 1: Configuration & Authentication
import requests
import json
from azure.identity import DefaultAzureCredential

# ============================
# Azure AI Search Configuration
# ============================
search_endpoint = "https://xxxxxxxxxxxxxxx.search.windows.net"
api_version = "2025-11-01-preview"
index_name = "multimodal-rag-1771147228395"

# ============================
# Azure OpenAI Configuration
# ============================
aoai_endpoint = "https://xxxxxxxxxxxxxxx.cognitiveservices.azure.com"
embedding_deployment = "text-embedding-ada-002"
embedding_model = "text-embedding-ada-002"
embedding_dimensions = 1536
chat_deployment = "gpt-5-mini"

# ============================
# Authentication
# ============================
# Using admin key temporarily while role assignment propagates
# TODO: Switch back to managed identity after ~10 minutes
admin_key = "xxxxxxxxxxxxxxx"

headers = {
    "Content-Type": "application/json",
    "api-key": admin_key
}

print(f"Azure AI Search: {search_endpoint}")
print(f"Index: {index_name}")
print(f"Azure OpenAI: {aoai_endpoint}")
print(f"Authentication: Admin Key (temporary)")
print(f"\nConfiguration loaded successfully ✓")


In [None]:
# Verify the index exists
verify_url = f"{search_endpoint}/indexes/{index_name}?api-version={api_version}"
verify_response = requests.get(verify_url, headers=headers)

if verify_response.status_code == 200:
    print(f"✓ Index '{index_name}' found")
    index_info = verify_response.json()
    print(f"  Fields: {len(index_info.get('fields', []))}")
    
    # Get document count
    count_url = f"{search_endpoint}/indexes/{index_name}/docs/$count?api-version={api_version}"
    count_response = requests.get(count_url, headers=headers)
    if count_response.status_code == 200:
        print(f"  Documents: {count_response.text}")
    else:
        print(f"  Cannot get document count: {count_response.status_code}")
else:
    print(f"✗ Index '{index_name}' not found!")
    print(f"  Status: {verify_response.status_code}")
    print(f"  Response: {verify_response.text[:200]}")


In [None]:
# Temporary: Check index using admin key (role permissions are propagating)
admin_key = "xxxxxxxxxxxxxxx"
admin_headers = {
    "api-key": admin_key,
    "Content-Type": "application/json"
}

# Check index exists
index_url = f"{search_endpoint}/indexes/{index_name}?api-version={api_version}"
r = requests.get(index_url, headers=admin_headers)
print(f"Index status: {r.status_code}")

if r.status_code == 200:
    idx = r.json()
    print(f"✓ Index '{index_name}' exists")
    print(f"  Fields: {len(idx.get('fields', []))}")
    
    # Get doc count
    count_url = f"{search_endpoint}/indexes/{index_name}/docs/$count?api-version={api_version}"
    count_r = requests.get(count_url, headers=admin_headers)
    if count_r.status_code == 200:
        print(f"  Documents: {count_r.text}")
    else:
        print(f"  Doc count error: {count_r.status_code} - {count_r.text}")
else:
    print(f"Error: {r.text}")


## Step 2: Query the Index
Run 3 useful multimodal search queries demonstrating different search modes.


### Define Search Helper Function


In [None]:
# Helper function for querying the multimodal index
def search_multimodal(query_text, search_type="full_text", top=5, filter_expr=None):
    """
    Search the multimodal index with different search modes.
    search_type: 'full_text', 'hybrid', 'semantic', 'vector_only'
    """
    url = f"{search_endpoint}/indexes/{index_name}/docs/search?api-version={api_version}"
    
    body = {
        "count": True,
        "select": "content_id, document_title, content_text, content_path, image_document_id, text_document_id",
        "top": top
    }
    
    if filter_expr:
        body["filter"] = filter_expr
    
    if search_type == "full_text":
        body["search"] = query_text
        
    elif search_type == "hybrid":
        body["search"] = query_text
        body["vectorQueries"] = [{
            "kind": "text",
            "text": query_text,
            "fields": "content_embedding",
            "k": top
        }]
        
    elif search_type == "semantic":
        body["search"] = query_text
        body["queryType"] = "semantic"
        body["semanticConfiguration"] = "multimodal-rag-1771147228395-semantic-configuration"
        body["answers"] = "extractive|count-3"
        body["captions"] = "extractive|highlight-true"
        body["vectorQueries"] = [{
            "kind": "text",
            "text": query_text,
            "fields": "content_embedding",
            "k": top
        }]
        
    elif search_type == "vector_only":
        body["vectorQueries"] = [{
            "kind": "text",
            "text": query_text,
            "fields": "content_embedding",
            "k": top
        }]
    
    response = requests.post(url, headers=headers, json=body)
    
    # Handle errors
    if response.status_code != 200:
        print(f"Error: HTTP {response.status_code}")
        print(f"Response: {response.text}")
        return {"error": response.text, "@odata.count": 0, "value": []}
    
    return response.json()

print("Search helper function defined.")

## Query 1: Full-Text Search
Keyword-based BM25 search across text and verbalized image descriptions (fast but requires exact word matches).

In [None]:
# Query 1: Full-text search across text AND image content
results = search_multimodal("renewable energy solutions", search_type="full_text", top=3)

print(f"Query: 'renewable energy solutions' (Full-Text Search)")
print("=" * 80)
print(f"Found {results.get('@odata.count', 0)} documents\n")

for i, doc in enumerate(results.get("value", []), 1):
    doc_type = "IMAGE" if doc.get("image_document_id") else "TEXT"
    score = doc.get("@search.score", 0)
    content = doc.get("content_text", "")[:150]
    
    print(f"[{i}] {doc_type} (Score: {score:.2f})")
    print(f"    {content}...\n")
print("=" * 80)


## Query 2: Hybrid Search (Recommended)
Combines BM25 keyword matching with vector semantic similarity - best balance of speed and accuracy for multimodal content.

In [None]:
# Query 2: Hybrid search (text + vector) about sustainability topics
results = search_multimodal("carbon emissions reduction and climate action", search_type="hybrid", top=3)

print(f"Query: 'carbon emissions reduction and climate action' (Hybrid Search)")
print("=" * 80)
print(f"Found {results.get('@odata.count', 0)} documents\n")

for i, doc in enumerate(results.get("value", []), 1):
    doc_type = "IMAGE" if doc.get("image_document_id") else "TEXT"
    score = doc.get("@search.score", 0)
    content = doc.get("content_text", "")[:150]
    image_path = doc.get("content_path", "")
    
    print(f"[{i}] {doc_type} (Score: {score:.4f})")
    if image_path:
        print(f"    Image: {image_path.split('/')[-1]}")
    print(f"    {content}...\n")
print("=" * 80)


## Query 3: Semantic Hybrid Search (Most Powerful)
Combines keyword search, vector similarity, AND semantic reranking. Returns answers and highlighted captions.


In [None]:
# Query 3: Semantic hybrid search - the most powerful multimodal query
results = search_multimodal(
    "How can organizations reduce their water consumption?", 
    search_type="semantic", 
    top=3
)

print(f"Question: How can organizations reduce their water consumption?")
print("=" * 80)

# Display best semantic answer
answers = results.get("@search.answers", [])
if answers:
    best_answer = answers[0]
    confidence = best_answer.get('score', 0)
    answer_text = best_answer.get('text', '')[:300]
    
    print(f"\nAnswer (confidence: {confidence:.1%}):")
    print(f"{answer_text}...\n")

# Show top source
top_result = results.get("value", [])[0] if results.get("value") else None
if top_result:
    source_type = "IMAGE" if top_result.get("image_document_id") else "TEXT"
    reranker = top_result.get("@search.rerankerScore", 0)
    print(f"Source: {source_type} (Reranker Score: {reranker:.2f})")
    if top_result.get("content_path"):
        print(f"Image: {top_result.get('content_path', '').split('/')[-1]}")

print("=" * 80)


## Query 4: Chart Data Extraction Test
Asking a specific question about DGX H100 energy intensity to test if multimodal indexing captured data from charts/infographics in the PDF.

In [None]:
# Query 4: Testing chart data extraction - DGX H100 energy intensity
results = search_multimodal(
    "how much is energy intensity of compute for DGX H100",
    search_type="semantic",
    top=5
)

print(f"Question: How much is energy intensity of compute for DGX H100?")
print("=" * 80)

# Get the best semantic answer
answers = results.get("@search.answers", [])
if answers and len(answers) > 0:
    best_answer = answers[0]
    confidence = best_answer.get('score', 0)
    answer_text = best_answer.get('text', '')
    
    print(f"\nAnswer (confidence: {confidence:.1%}):")
    print(f"{answer_text}")
    
    # Get the source
    top_result = results.get("value", [])[0] if results.get("value") else None
    if top_result:
        source_type = "IMAGE" if top_result.get("image_document_id") else "TEXT"
        print(f"\nSource: {source_type}")
        if top_result.get("content_path"):
            print(f"Image: {top_result.get('content_path', '').split('/')[-1]}")
else:
    print("No answer found.")

print("=" * 80)


## Query 5: Trend Data Extraction Test
Testing if the index can extract numerical trend data about electricity consumption changes over a specific time period (2023-2030).

In [None]:
# Query 5: Electricity consumption increase for space heating 2023-2030
results = search_multimodal(
    "how much is the electricity consumption increase for space heating from 2023 and 2030",
    search_type="semantic",
    top=5
)

print(f"Question: How much is the electricity consumption increase for space heating from 2023 to 2030?")
print("=" * 80)

# Get the best semantic answer
answers = results.get("@search.answers", [])
if answers and len(answers) > 0:
    best_answer = answers[0]
    confidence = best_answer.get('score', 0)
    answer_text = best_answer.get('text', '')
    
    print(f"\nAnswer (confidence: {confidence:.1%}):")
    print(f"{answer_text}")
    
    # Get the source
    top_result = results.get("value", [])[0] if results.get("value") else None
    if top_result:
        source_type = "IMAGE" if top_result.get("image_document_id") else "TEXT"
        print(f"\nSource: {source_type}")
        if top_result.get("content_path"):
            print(f"Image: {top_result.get('content_path', '').split('/')[-1]}")
else:
    print("No answer found.")

print("=" * 80)
