# Use an existing AI Search index with the Azure AI Search tool

# Creating a Azure AI Search Index

In [23]:
import os

# Azure OpenAI Service configuration
azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME")
azure_openai_embeddings_deployment = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT")
azure_openai_api_version = "2024-10-01-preview"
azure_openai_embedding_size=1536

# Azure AI Search configuration
azure_search_service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
azure_search_service_admin_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")
azure_search_service_index_name = "travel-product-index"

# Azure Storage configuration
azure_storage_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")


In [None]:
## This code creates an Azure Search index with vector search and semantic search capabilities.

from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchField,
    SearchFieldDataType,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    SearchIndex,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch
)

# Get credential from Azure AI Search Admin key
credential = AzureKeyCredential(azure_search_service_admin_key)

# Search index name  
index_name = azure_search_service_index_name

# Create a Search Index Client
index_client = SearchIndexClient(endpoint=azure_search_service_endpoint, credential=credential)

# Define the fields collection
fields = [
    SearchField(name="parent_id", type=SearchFieldDataType.String),  
    SearchField(name="title", type=SearchFieldDataType.String),
    SearchField(name="chunk_id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True, analyzer_name="keyword"),  
    SearchField(name="chunk", type=SearchFieldDataType.String, sortable=False, filterable=False, facetable=False),  
    SearchField(name="text_vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), vector_search_dimensions=azure_openai_embedding_size, vector_search_profile_name="myHnswProfile")
    ]  
  
# Configure the vector search configuration  
vector_search = VectorSearch(  
    algorithms=[  
        HnswAlgorithmConfiguration(name="myHnsw"),
    ],  
    profiles=[  
        VectorSearchProfile(  
            name="myHnswProfile",  
            algorithm_configuration_name="myHnsw",  
            vectorizer_name="myOpenAI",  
        )
    ],  
    vectorizers=[   # a vectorizer is software that performs vectorization
        AzureOpenAIVectorizer(  
            vectorizer_name="myOpenAI",  
            kind="azureOpenAI",  
            parameters=AzureOpenAIVectorizerParameters(  
                resource_url=azure_openai_endpoint,  
                deployment_name=azure_openai_embeddings_deployment,
                model_name=azure_openai_embeddings_deployment
            ),
        ),  
    ], 
)  

# New semantic configuration
semantic_config = SemanticConfiguration(
    name="my-semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="title"),
        content_fields=[SemanticField(field_name="chunk")]
    )
)

# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])

# Create the search index
index = SearchIndex(name=index_name, 
                    fields=fields, 
                    vector_search=vector_search,
                    semantic_search=semantic_search)  
result = index_client.create_or_update_index(index)  
print(f"{result.name} created")

travel-product-index created


In [27]:
# Create a data source connection to Azure Blob Storage

from azure.search.documents.indexes import SearchIndexerClient
from azure.search.documents.indexes.models import (
    SearchIndexerDataContainer,
    SearchIndexerDataSourceConnection
)

# Create a data source 
indexer_client = SearchIndexerClient(endpoint=azure_search_service_endpoint, credential=credential)
container = SearchIndexerDataContainer(name="travelproducts")
data_source_connection = SearchIndexerDataSourceConnection(
    name="travel-products-ds",
    type="azureblob",
    connection_string=azure_storage_connection_string,
    container=container
)
data_source = indexer_client.create_or_update_data_source_connection(data_source_connection)

print(f"Data source '{data_source.name}' created or updated")

Data source 'travel-products-ds' created or updated


In [29]:
# Test the connection to Azure Blob Storage

from azure.storage.blob import BlobServiceClient

# Initialize the BlobServiceClient with the connection string
blob_service_client = BlobServiceClient.from_connection_string(azure_storage_connection_string)

# Get the container client
container_client = blob_service_client.get_container_client("travelproducts")

# List blobs in the container
try:
    blobs_list = container_client.list_blobs()
    print("Blobs in the container:")
    for blob in blobs_list:
        print(blob.name)
    print("Access to the blob storage was granted.")
except Exception as e:
    print(f"Failed to access the blob storage: {e}")

Blobs in the container:
Adventure Seeker Sling.docx
AdventurePro Hiking Backpack.docx
Executive Pro Laptop Briefcase.docx
Globetrotter Elite Suitcase.docx
JetSet Travel Pillow.docx
Metro Commuter Crossbody Bag.docx
Nomad Traveler Suitcase.docx
SafeJourney Travel Insurance.docx
TechGuard Laptop Bag.docx
TravelBuddy Digital Luggage Scale.docx
TravelConnect WiFi Plan.docx
TravelEase Packing Cubes Set.docx
TravelSmart Universal Adapter.docx
Urban Explorer Daypack.docx
UrbanTech Laptop Messenger Bag.docx
UrbanTraveler Laptop Backpack.docx
Voyager Pro Luggage.docx
WanderSafe Travel Lock.docx
Access to the blob storage was granted.


In [None]:
# Create a skillset to chunk documents and generate embeddings

from azure.search.documents.indexes.models import (
    SplitSkill,
    InputFieldMappingEntry,
    OutputFieldMappingEntry,
    AzureOpenAIEmbeddingSkill,
    SearchIndexerIndexProjection,
    SearchIndexerIndexProjectionSelector,
    SearchIndexerIndexProjectionsParameters,
    IndexProjectionMode,
    SearchIndexerSkillset,
)

# Create a skillset  
skillset_name = "travel-products-ss"

split_skill = SplitSkill(  
    description="Split skill to chunk documents",  
    text_split_mode="pages",  
    context="/document",  
    maximum_page_length=2000,  
    page_overlap_length=500,  
    inputs=[  
        InputFieldMappingEntry(name="text", source="/document/content"),  
    ],  
    outputs=[  
        OutputFieldMappingEntry(name="textItems", target_name="pages")  
    ],  
)  
  
embedding_skill = AzureOpenAIEmbeddingSkill(  
    description="Skill to generate embeddings via Azure OpenAI",  
    context="/document/pages/*",  
    resource_url=azure_openai_endpoint,  
    deployment_name=azure_openai_embeddings_deployment,  
    model_name=azure_openai_embeddings_deployment,
    dimensions=azure_openai_embedding_size,
    inputs=[  
        InputFieldMappingEntry(name="text", source="/document/pages/*"),  
    ],  
    outputs=[  
        OutputFieldMappingEntry(name="embedding", target_name="text_vector")  
    ],  
)
  
index_projections = SearchIndexerIndexProjection(  
    selectors=[  
        SearchIndexerIndexProjectionSelector(  
            target_index_name=azure_search_service_index_name,  
            parent_key_field_name="parent_id",  
            source_context="/document/pages/*",  
            mappings=[  
                InputFieldMappingEntry(name="chunk", source="/document/pages/*"),  
                InputFieldMappingEntry(name="text_vector", source="/document/pages/*/text_vector"),
                InputFieldMappingEntry(name="title", source="/document/metadata_storage_name"),  
            ],  
        ),  
    ],  
    parameters=SearchIndexerIndexProjectionsParameters(  
        projection_mode=IndexProjectionMode.SKIP_INDEXING_PARENT_DOCUMENTS  
    ),  
) 

skills = [split_skill, embedding_skill]

skillset = SearchIndexerSkillset(  
    name=skillset_name,  
    description="Skillset to chunk documents, generate embeddings",  
    skills=skills,  
    index_projection=index_projections
)
  
client = SearchIndexerClient(endpoint=azure_search_service_endpoint, credential=credential)  
client.create_or_update_skillset(skillset)  
print(f"{skillset.name} created")  

travel-products-ss created


In [31]:
# Create an indexer to run the skillset and populate the index

from azure.search.documents.indexes.models import (
    SearchIndexer
)

# Create an indexer  
indexer_name = "travel-products-idxr" 

indexer_parameters = None

indexer = SearchIndexer(  
    name=indexer_name,  
    description="Indexer to index documents, generate embeddings",  
    skillset_name=skillset_name,  
    target_index_name=index_name,  
    data_source_name=data_source.name,
    parameters=indexer_parameters
)  

# Create and run the indexer  
indexer_client = SearchIndexerClient(endpoint=azure_search_service_endpoint, credential=credential)  
indexer_result = indexer_client.create_or_update_indexer(indexer)  

print(f' {indexer_name} is created and running. Give the indexer a few minutes before running a query.')  

 travel-products-idxr is created and running. Give the indexer a few minutes before running a query.


## Step 1: Create an Azure AI Client

In [43]:
import os
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from azure.ai.projects.models import AzureAISearchTool, AzureAISearchQueryType

# Create an Azure AI Client from a connection string, copied from your Azure AI Foundry project.
# At the moment, it should be in the format "<HostName>;<AzureSubscriptionId>;<ResourceGroup>;<ProjectName>"
# HostName can be found by navigating to your discovery_url and removing the leading "https://" and trailing "/discovery" 
# To find your discovery_url, run the CLI command: az ml workspace show -n {project_name} --resource-group {resource_group_name} --query discovery_url
# Project Connection example: eastus.api.azureml.ms;my-subscription-id;my-resource-group;my-hub-name

connection_string = os.environ["PROJECT_CONNECTION_STRING"] 

project_client = AIProjectClient.from_connection_string(
    credential=DefaultAzureCredential(),
    conn_str=connection_string,
)

## Step 2: Get the connection ID for the Azure AI Search resource

Needs you to login: az login

In [44]:
# AI Search resource connection ID
# This code looks for the AI Search Connection ID and saves it as variable conn_id

# If you have more than one AI search connection, try to establish the value in your .env file.
# Extract the connection list.
conn_list = project_client.connections._list_connections()["value"]
conn_id = ""

# Search in the metadata field of each connection in the list for the azure_ai_search type and get the id value to establish the variable
for conn in conn_list:
    metadata = conn["properties"].get("metadata", {})
    if metadata.get("type", "").upper() == "AZURE_AI_SEARCH":
        conn_id = conn["id"]
        break

print(f"Connection ID: {conn_id}")

Connection ID: /subscriptions/f08cda90-375b-4b3e-a105-4656379a94ab/resourceGroups/rg-Ziggy-ai-agent-001/providers/Microsoft.MachineLearningServices/workspaces/ziggy-ai-agent-project-001/connections/ziggyazureaisearch


## Step 3: Configure the Azure AI Search tool

query type values can be found here: https://learn.microsoft.com/en-us/python/api/azure-ai-projects/azure.ai.projects.models.azureaisearchquerytype

In [45]:
# Initialize agent AI search tool and add the search index connection ID and index name
connection_id = os.environ["PROJECT_CONNECTION_ID_AZURE_AI_SEARCH"]
# index_name = os.environ["AZURE_AI_SEARCH_INDEX"]
index_name = "travel-product-index"
ai_search = AzureAISearchTool(
    index_connection_id=connection_id, 
    index_name=index_name,
    query_type=AzureAISearchQueryType.VECTOR_SEMANTIC_HYBRID,
    top_k=5,
)

## Step 4: Create an agent with the Azure AI Search tool enabled

In [46]:
agent = project_client.agents.create_agent(
    model="gpt-4o",
    name="my-python-azure-ai-search-agent",
    instructions="You are a helpful assistant that can answer questions about travel products. You have a search tool that can find relevant information.",
    tools=ai_search.definitions,
    tool_resources = ai_search.resources,
)
print(f"Created agent, ID: {agent.id}")

Created agent, ID: asst_qE6tQNqlYcnH02r7UefhFTff


## Step 5: Create a Thread

In [47]:
thread = project_client.agents.create_thread()
print(f"Created thread, ID: {thread.id}")

Created thread, ID: thread_q9zfpjnH5WSnop9xe2kvYAX0


## Helper Function
- Reformats citations to show the proper document title
- Add a message to the thread
- Run the Agent
- Check the Run Status
- Display the Agent's Response


In [48]:
def reformat_citations(content_block):
    annotations = content_block.get("annotations", [])
    paragraph = content_block["value"]
    
    # Map citation_title -> set of placeholders
    citation_map = {}
    for annotation in annotations:
        if annotation["type"] == "url_citation":
            placeholder_text = annotation["text"]
            citation_title = annotation["url_citation"]["title"]
            citation_map.setdefault(citation_title, set()).add(placeholder_text)

    # Remove all placeholders from the paragraph
    for placeholders in citation_map.values():
        for placeholder_text in placeholders:
            paragraph = paragraph.replace(placeholder_text, "")

    # If there's at least one citation, append "Source: ..." at the end
    if citation_map:
        sources = ", ".join(citation_map.keys())
        paragraph = paragraph.strip() + f" Source: {sources}"

    return paragraph

In [49]:
def run_agent(user_input):  
    # Step 6: Add a message to the thread  
    message = project_client.agents.create_message(
        thread_id=thread.id,
        role="user",
        content=user_input,
    )
    print(f"Created message, ID: {message.id}")

    # Step 7 & 8: Create and process agent run in thread with tools
    run = project_client.agents.create_and_process_run(thread_id=thread.id, agent_id=agent.id)

    if run.status == "failed":
        print(f"Run failed: {run.last_error}")

    # Step 9: Display the Agent's Response
    elif run.status == 'completed':
        # Fetch all messages in the thread
        messages = project_client.agents.list_messages(thread_id=thread.id)
        if messages.data:
            agent_message = messages.data[0]  # Get the last assistant message
            content_block = agent_message.content[0].text

            # Reformat the response to replace placeholders with citation titles
            response_text = reformat_citations(content_block)

            print(f"Agent Response: {response_text}")
        else:
            print("No messages found.")

## Running the agent using the Helper Function

In [50]:
user_input = "Do you have any bags available?"
run_agent(user_input)

Created message, ID: msg_08rT9q7OrjgVgRFh5tWZ9ikn
Agent Response: Here are the travel bags currently available:

1. **Metro Commuter Crossbody Bag** - Ideal for daily commuting and travel, priced at $60. Durable canvas material, RFID-blocking pocket, and USB charging port included. Available in black, grey, olive green, and navy blue.

2. **Adventure Seeker Sling Bag** - Compact design perfect for everyday use or outdoor activities, priced at $45. Made of water-resistant polyester with a built-in earphone port. Available in black, grey, blue, and green.

3. **Voyager Pro Luggage** - High-quality travel suitcase with expandable capacity, TSA-approved lock, and spinner wheels for smooth maneuverability. Priced at $250, it comes in black, blue, red, and silver.

4. **Nomad Traveler Suitcase** - Robust and lightweight suitcase featuring organized compartments, water-resistance, and a TSA-approved lock for $275. Available in black, green, purple, and silver.

5. **Globetrotter Elite Suitcas

In [51]:
user_input = "I need a large suitcase for travel. Which one do you recommend?"
run_agent(user_input)

Created message, ID: msg_rUR4vvp0bC0nxsjBZ2ziPn5B
Agent Response: For a large suitcase suitable for travel, here are two highly recommended options:

1. **Voyager Pro Luggage** priced at $250:
   - Features expandable capacity for extra space when needed.
   - Comes with TSA-approved lock for security.
   - Equipped with spinner wheels for smooth maneuverability.
   - Available in black, blue, red, and silver.

2. **Globetrotter Elite Suitcase** priced at $299:
   - Made from durable polycarbonate for premium quality.
   - Offers expandable space for versatile packing needs.
   - Includes spinner wheels and TSA lock for convenience and safety.
   - Available in black, navy blue, burgundy, and silver.

Both are excellent choices for large travel needs, with expandable options and sturdy designs. If you'd like help selecting based on specific preferences like durability or aesthetic, let me know! Source: Voyager Pro Luggage.docx, Globetrotter Elite Suitcase.docx


In [52]:
user_input = "Can you give me a table comparison of these two products?"
run_agent(user_input)

Created message, ID: msg_aurBhWmRcu3RJ1MZYX0rTMz3
Agent Response: Here’s a comparison table for the **Voyager Pro Luggage** and **Globetrotter Elite Suitcase**:

| Feature                   | Voyager Pro Luggage                   | Globetrotter Elite Suitcase           |
|---------------------------|---------------------------------------|---------------------------------------|
| **Price**                 | $250                                 | $299                                 |
| **Material**              | Not specified                        | Durable polycarbonate                |
| **Lock Type**             | TSA-approved lock                    | TSA-approved lock                    |
| **Wheel Type**            | Spinner wheels (smooth maneuverability) | Spinner wheels (smooth maneuverability) |
| **Expandable Capacity**   | Yes                                  | Yes                                  |
| **Color Options**         | Black, Blue, Red, Silver             | Bla

In [37]:
# Fetch and log all messages
messages = project_client.agents.list_messages(thread_id=thread.id)
print(f"Messages: {messages}")

Messages: {'object': 'list', 'data': [{'id': 'msg_EpT5bhhndssS0r2hxnILDk06', 'object': 'thread.message', 'created_at': 1743929210, 'assistant_id': 'asst_C7LNJoTe7M86nrTmVTD2RdCr', 'thread_id': 'thread_y5pf2Ff3GYxPPzRAsYqRnErU', 'run_id': 'run_qEJOZXdazl4YlaFhd0jnoOun', 'role': 'assistant', 'content': [{'type': 'text', 'text': {'value': 'The price of the TrailMaster X4 Tent is $250【3:0†source】.', 'annotations': [{'type': 'url_citation', 'text': '【3:0†source】', 'start_index': 44, 'end_index': 56, 'url_citation': {'url': 'https://ziggyazureaisearch.search.windows.net//indexes/product-index-challenge/docs/03545acbabcc_aHR0cHM6Ly96aWdneXN0b3JhZ2UwMS5ibG9iLmNvcmUud2luZG93cy5uZXQvcHJvZHVjdHMvcHJvZHVjdF9pbmZvXzEucGRm0_pages_0?api-version=2024-07-01&$select=chunk_id,parent_id,title,chunk', 'title': 'product_info_1.pdf'}}]}}], 'attachments': [], 'metadata': {}}, {'id': 'msg_FYgibwjHrmYH2Cuv1C4KYFCb', 'object': 'thread.message', 'created_at': 1743929205, 'assistant_id': None, 'thread_id': 'thread

## Delete Agent to free up resources

In [53]:
# Delete the agent when done
project_client.agents.delete_agent(agent.id)
print("Deleted agent")

Deleted agent
