### Set up a Python virtual environment in Visual Studio Code

1. Open the Command Palette (Ctrl+Shift+P).
1. Search for **Python: Create Environment**.
1. Select **Venv**.
1. Select a Python interpreter. Choose 3.10 or later.

It can take a minute to set up. If you run into problems, see [Python environments in VS Code](https://code.visualstudio.com/docs/python/environments).

### Install packages

In [None]:
! pip install -r requirements.txt --quiet

### Load .env file (Copy .env-sample to .env and update accordingly)

In [None]:
from dotenv import load_dotenv
from azure.identity.aio import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
import os

load_dotenv(override=True) # take environment variables from .env.

# Variables not used here do not need to be updated in your .env file
endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
credential = AzureKeyCredential(os.getenv("AZURE_SEARCH_ADMIN_KEY")) if os.getenv("AZURE_SEARCH_ADMIN_KEY") else DefaultAzureCredential()
knowledge_source_name = os.getenv("AZURE_SEARCH_KNOWLEDGE_SOURCE", "json-knowledge-source")
knowledge_agent_name = os.getenv("AZURE_SEARCH_KNOWLEDGE_AGENT", "json-knowledge-agent")
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME", "json-knowledge-index")
blob_connection_string = os.environ["BLOB_CONNECTION_STRING"]
# search blob datasource connection string is optional - defaults to blob connection string
# This field is only necessary if you are using MI to connect to the data source
# https://learn.microsoft.com/azure/search/search-howto-indexing-azure-blob-storage#supported-credentials-and-connection-strings
search_blob_connection_string = os.getenv("SEARCH_BLOB_DATASOURCE_CONNECTION_STRING", blob_connection_string)
blob_container_name = os.getenv("BLOB_CONTAINER_NAME", "json-documents")
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_key = os.getenv("AZURE_OPENAI_KEY")
azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-large")
azure_openai_embedding_model_name = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_NAME", "text-embedding-3-large")
azure_openai_embedding_model_dimensions = int(os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_DIMENSIONS", "3072"))
azure_openai_chatgpt_deployment = os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "gpt-5-mini")
azure_openai_chatgpt_model_name = os.getenv("AZURE_OPENAI_CHATGPT_MODEL_NAME", "gpt-5-mini")


## Connect to Blob Storage and load documents

Retrieve documents from Blob Storage. You can use the sample documents in the data/documents folder.  

In [None]:
from azure.storage.blob.aio import BlobServiceClient  
import glob

sample_docs_directory = os.path.join("..", "..", "..", "data", "jsondocuments")

async def upload_sample_documents(
        blob_connection_string: str,
        blob_container_name: str,
        documents_directory: str,
        # Set to false if you want to use credentials included in the blob connection string
        # Otherwise your identity will be used as credentials
        use_user_identity: bool = True
    ):
        # Connect to Blob Storage
        async with DefaultAzureCredential() as user_credential, BlobServiceClient.from_connection_string(conn_str=blob_connection_string, credential=user_credential if use_user_identity else None) as blob_service_client:
            async with blob_service_client.get_container_client(blob_container_name) as container_client:
                if not await container_client.exists():
                    await container_client.create_container()

                files = glob.glob(os.path.join(documents_directory, '*'))
                for file in files:
                    with open(file, "rb") as data:
                        name = os.path.basename(file)
                        async with container_client.get_blob_client(name) as blob_client:
                            if not await blob_client.exists():
                                await blob_client.upload_blob(data)

docs_directory = sample_docs_directory

await upload_sample_documents(
    blob_connection_string = blob_connection_string,
    blob_container_name = blob_container_name,
    documents_directory = docs_directory)

print(f"Setup sample data in {blob_container_name}")

## Create a blob data source connector on Azure AI Search

In [None]:
from azure.search.documents.indexes.aio import SearchIndexerClient
from azure.search.documents.indexes.models import (
    SearchIndexerDataContainer,
    SearchIndexerDataSourceConnection
)
from azure.search.documents.indexes.models import SoftDeleteColumnDeletionDetectionPolicy

# Create a data source 
async with SearchIndexerClient(endpoint, credential) as indexer_client:
    container = SearchIndexerDataContainer(name=blob_container_name)
    data_source_connection = SearchIndexerDataSourceConnection(
        name=f"{index_name}-blob",
        type="azureblob",
        connection_string=search_blob_connection_string,
        container=container,
        data_deletion_detection_policy=SoftDeleteColumnDeletionDetectionPolicy(soft_delete_column_name="is_deleted", soft_delete_marker_value="true")
    )
    data_source = await indexer_client.create_or_update_data_source_connection(data_source_connection)

    print(f"Data source '{data_source.name}' created or updated")

## Create a search index

Vector and nonvector content is stored in a search index.

In [None]:
from azure.search.documents.indexes.aio import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchField,
    SearchFieldDataType,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    SemanticConfiguration,
    SemanticSearch,
    SemanticPrioritizedFields,
    SemanticField,
    SearchIndex,
    BinaryQuantizationCompression
)

# Create a search index  
fields = [
    SearchField(name="parent_id", type=SearchFieldDataType.String, sortable=True, filterable=True, facetable=True),  
    SearchField(name="chunk_id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True, analyzer_name="keyword"),
    SearchField(name="event_id", type=SearchFieldDataType.String, filterable=True, facetable=True, searchable=True, analyzer_name="keyword"),
    SearchField(name="event_name", type=SearchFieldDataType.String, searchable=True),
    SearchField(name="playlist_id", type=SearchFieldDataType.String, filterable=True, facetable=True, searchable=True, analyzer_name="keyword"),
    SearchField(name="playlist_name", type=SearchFieldDataType.String, searchable=True),
    SearchField(name="video_id", type=SearchFieldDataType.String, filterable=True, facetable=True, searchable=True, analyzer_name="keyword"),
    SearchField(name="session_title", type=SearchFieldDataType.String, searchable=True, sortable=True),
    SearchField(name="speaker", type=SearchFieldDataType.String, searchable=True, facetable=True),
    SearchField(name="content", type=SearchFieldDataType.String, searchable=True),
    SearchField(name="timestamp_start", type=SearchFieldDataType.String, filterable=True, sortable=True, searchable=False),
    SearchField(name="timestamp_end", type=SearchFieldDataType.String, filterable=True, sortable=True, searchable=False),
    SearchField(name="chunk_index", type=SearchFieldDataType.Int32, filterable=True, sortable=True),
    SearchField(name="duration", type=SearchFieldDataType.Int32, filterable=True, sortable=True),
    SearchField(name="upload_date", type=SearchFieldDataType.String, filterable=True, sortable=True, searchable=False),
    SearchField(name="view_count", type=SearchFieldDataType.Int64, filterable=True, sortable=True, facetable=True, searchable=False),
    SearchField(name="processed_at", type=SearchFieldDataType.DateTimeOffset, filterable=True, sortable=True),
    SearchField(name="content_length", type=SearchFieldDataType.Int64, filterable=True, sortable=True),

    # Vector field for semantic / vector search (dimensions variable from env)
    SearchField(
        name="vector",
        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
        # See https://learn.microsoft.com/azure/search/vector-search-how-to-storage-options
        stored=False,
        vector_search_dimensions=azure_openai_embedding_model_dimensions,
        vector_search_profile_name="myHnswProfile"
    ),
]

# Configure the vector search configuration  
vector_search = VectorSearch(  
    algorithms=[  
        HnswAlgorithmConfiguration(name="myHnsw"),
    ],  
    profiles=[  
        VectorSearchProfile(  
            name="myHnswProfile",  
            algorithm_configuration_name="myHnsw",  
            vectorizer_name="myOpenAI",  
            compression_name="myBinaryCompression",
        )
    ],  
    vectorizers=[  
        AzureOpenAIVectorizer(  
            vectorizer_name="myOpenAI",  
            kind="azureOpenAI",  
            parameters=AzureOpenAIVectorizerParameters(  
                resource_url=azure_openai_endpoint,  
                deployment_name=azure_openai_embedding_deployment,
                model_name=azure_openai_embedding_model_name,
                api_key=azure_openai_key,
            ),
        ),  
    ],
    compressions=[
        # See https://learn.microsoft.com/azure/search/vector-search-how-to-quantization
        BinaryQuantizationCompression(compression_name="myBinaryCompression")
    ]
)  
  
semantic_config = SemanticConfiguration(  
    name="my-semantic-config",
    prioritized_fields=SemanticPrioritizedFields(  
        content_fields=[SemanticField(field_name="content")]
    ),  
)
  
# Create the semantic search with the configuration  
semantic_search = SemanticSearch(configurations=[semantic_config], default_configuration_name=semantic_config.name)  
  
# Create the search index
index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search, semantic_search=semantic_search) 
async with SearchIndexClient(endpoint=endpoint, credential=credential) as index_client:
    result = await index_client.create_or_update_index(index)  
    print(f"{result.name} created")  


## Create a skillset

Skills drive integrated vectorization. [Text Split](https://learn.microsoft.com/azure/search/cognitive-search-skill-textsplit) provides data chunking. [AzureOpenAIEmbedding](https://learn.microsoft.com/azure/search/cognitive-search-skill-azure-openai-embedding) handles calls to Azure OpenAI, using the connection information you provide in the environment variables. An [indexer projection](https://learn.microsoft.com/azure/search/index-projections-concept-intro) specifies secondary indexes used for chunked data.

In [None]:
from azure.search.documents.indexes.models import (
    SplitSkill,
    InputFieldMappingEntry,
    OutputFieldMappingEntry,
    AzureOpenAIEmbeddingSkill,
    SearchIndexerIndexProjection,
    SearchIndexerIndexProjectionSelector,
    SearchIndexerIndexProjectionsParameters,
    IndexProjectionMode,
    SearchIndexerSkillset
)

# Create a skillset name 
skillset_name = f"{index_name}-skillset"

def create_skillset():
    split_skill = SplitSkill(  
        description="Split skill to chunk documents",  
        text_split_mode="pages",  
        context="/document",  
        maximum_page_length=2000,  
        page_overlap_length=500,  
        inputs=[  
            InputFieldMappingEntry(name="text", source="/document/content"),  
        ],  
        outputs=[  
            OutputFieldMappingEntry(name="textItems", target_name="pages")  
        ]
    )

    embedding_skill = AzureOpenAIEmbeddingSkill(  
        description="Skill to generate embeddings via Azure OpenAI",  
        context="/document/pages/*",  
        resource_url=azure_openai_endpoint,  
        deployment_name=azure_openai_embedding_deployment,  
        model_name=azure_openai_embedding_model_name,
        dimensions=azure_openai_embedding_model_dimensions,
        api_key=azure_openai_key,  
        inputs=[  
            InputFieldMappingEntry(name="text", source="/document/pages/*"),  
        ],  
        outputs=[
            OutputFieldMappingEntry(name="embedding", target_name="vector")  
        ]
    )

    index_projections = SearchIndexerIndexProjection(  
        selectors=[  
            SearchIndexerIndexProjectionSelector(  
                target_index_name=index_name,  
                parent_key_field_name="parent_id",  
                source_context="/document/pages/*",  
                mappings=[
                    InputFieldMappingEntry(name="content", source="/document/pages/*"),
                    InputFieldMappingEntry(name="vector", source="/document/pages/*/vector"),
                    InputFieldMappingEntry(name="timestamp_start", source="/document/timestamp_start"),
                    InputFieldMappingEntry(name="timestamp_end", source="/document/timestamp_end"),
                    InputFieldMappingEntry(name="chunk_index", source="/document/chunk_index"),
                    InputFieldMappingEntry(name="duration", source="/document/duration"),
                    InputFieldMappingEntry(name="content_length", source="/document/content_length"),
                    InputFieldMappingEntry(name="event_id", source="/document/event_id"),
                    InputFieldMappingEntry(name="event_name", source="/document/event_name"),
                    InputFieldMappingEntry(name="playlist_id", source="/document/playlist_id"),
                    InputFieldMappingEntry(name="playlist_name", source="/document/playlist_name"),
                    InputFieldMappingEntry(name="video_id", source="/document/video_id"),
                    InputFieldMappingEntry(name="session_title", source="/document/session_title"),
                    InputFieldMappingEntry(name="speaker", source="/document/speaker"),
                    InputFieldMappingEntry(name="upload_date", source="/document/upload_date"),
                    InputFieldMappingEntry(name="view_count", source="/document/view_count"),
                    InputFieldMappingEntry(name="processed_at", source="/document/processed_at"),
                ]
            )
        ],  
        parameters=SearchIndexerIndexProjectionsParameters(  
            projection_mode=IndexProjectionMode.SKIP_INDEXING_PARENT_DOCUMENTS  
        )  
    )

    skills = [split_skill, embedding_skill]

    return SearchIndexerSkillset(  
        name=skillset_name,  
        description="Skillset to chunk documents and generating embeddings",  
        skills=skills,  
        index_projection=index_projections
    )

skillset = create_skillset()
async with SearchIndexerClient(endpoint, credential) as client:
    await client.create_or_update_skillset(skillset)
    print(f"{skillset.name} created")


## Create an indexer

Use the JSON Array parsing mode to understand the included transcript document, which is stored as an arrays of chunks of transcripts.

In [None]:
from azure.search.documents.indexes.models import (
    SearchIndexer,
    IndexingParameters,
    IndexingParametersConfiguration,
    BlobIndexerParsingMode,
)

# Create an indexer  
indexer_name = f"{index_name}-indexer"  

indexer_parameters = IndexingParameters(
        configuration=IndexingParametersConfiguration(
            parsing_mode=BlobIndexerParsingMode.JSON_ARRAY,
            query_timeout=None))

indexer = SearchIndexer(  
    name=indexer_name,  
    description="Indexer to index documents and generate embeddings",  
    skillset_name=skillset_name,  
    target_index_name=index_name,  
    data_source_name=data_source.name,
    parameters=indexer_parameters
)  

async with SearchIndexerClient(endpoint, credential) as indexer_client:
    indexer_result = await indexer_client.create_or_update_indexer(indexer)

    # Run the indexer  
    await indexer_client.run_indexer(indexer_name)  
    print(f' {indexer_name} is created and running. If queries return no results, please wait a bit and try again.')  


## Create an index knowledge source and agent on Azure AI Search

This step creates an index knowledge source wraps the index you created for querying by a knowledge agent

In [None]:
from azure.search.documents.indexes.models import SearchIndexKnowledgeSource, SearchIndexKnowledgeSourceParameters
from azure.search.documents.indexes.aio import SearchIndexClient

knowledge_source = SearchIndexKnowledgeSource(
    name=knowledge_source_name,
    search_index_parameters=SearchIndexKnowledgeSourceParameters(
        search_index_name=index_name,
        source_data_select="chunk_id,content,session_title,playlist_name"
    )
)

async with SearchIndexClient(endpoint=endpoint, credential=credential) as client:
    await client.create_or_update_knowledge_source(knowledge_source)
    print(f"Created knowledge source: {knowledge_source.name}")

## Create a knowledge agent on Azure AI Search

This step creates a knowledge agent, which acts as a wrapper for your knowledge source and LLM deployment.

`EXTRACTIVE_DATA` is the default modality and returns content from your knowledge sources without generative alteration. Use the `ANSWER_SYNTHESIS` modality for LLM-generated answers that cite the retrieved content.

In [None]:
from azure.search.documents.indexes.models import KnowledgeAgent, KnowledgeSourceReference, KnowledgeAgentOutputConfiguration, KnowledgeAgentOutputConfigurationModality, KnowledgeAgentAzureOpenAIModel

chat_model = KnowledgeAgentAzureOpenAIModel(
    azure_open_ai_parameters=AzureOpenAIVectorizerParameters(
        resource_url=azure_openai_endpoint,
        deployment_name=azure_openai_chatgpt_deployment,
        api_key=azure_openai_key,
        model_name=azure_openai_chatgpt_model_name
    )
)

output_config = KnowledgeAgentOutputConfiguration(
    modality=KnowledgeAgentOutputConfigurationModality.ANSWER_SYNTHESIS,
    include_activity=True
)

agent = KnowledgeAgent(
    name=knowledge_agent_name,
    models=[chat_model],
    knowledge_sources=[
        KnowledgeSourceReference(
            name=knowledge_source.name,
            include_reference_source_data=True,
            always_query_source=True
        )
    ],
    output_configuration=output_config
)

async with SearchIndexClient(endpoint=endpoint, credential=credential) as index_client:
    await index_client.create_or_update_agent(agent)
    print(f"Created knowledge agent: {agent.name}")

## Use agentic retrieval to fetch results

This step runs the agentic retrieval pipeline to produce a grounded, citation-backed answer. Given the conversation history and retrieval parameters, your knowledge agent:

* Analyzes the entire conversation to infer the user's information need.
* Decomposes the compound query into focused subqueries.
* Executes the subqueries concurrently against your knowledge source.
* Uses semantic ranker to rerank and filter the results.
* Synthesizes the top results into a natural-language answer.

In [None]:
from azure.search.documents.agent.aio import KnowledgeAgentRetrievalClient
from azure.search.documents.agent.models import KnowledgeAgentRetrievalRequest, KnowledgeAgentMessage, KnowledgeAgentMessageTextContent, SearchIndexKnowledgeSourceParams

messages = [
    KnowledgeAgentMessage(
        role="user",
        content=[KnowledgeAgentMessageTextContent(
            text="Name a few announcements"
        )]
    )
]

agent_client = KnowledgeAgentRetrievalClient(endpoint=endpoint, agent_name=knowledge_agent_name, credential=credential)
result = await agent_client.retrieve(KnowledgeAgentRetrievalRequest(messages=messages))
await agent_client.close()

## Review the retrieval response, activity, and results
Because your knowledge agent is configured for answer synthesis, the retrieval response contains the following values:

* `response_content`: An LLM-generated answer to the query that cites the retrieved documents.
* `activity_content`: Detailed planning and execution information, including subqueries, reranking decisions, and intermediate steps.
* `references_content`: Source documents and chunks that contributed to the answer.

*Tip:* Retrieval parameters, such as reranker thresholds and knowledge source parameters, influence how aggressively your agent reranks and which sources it queries. Inspect the activity and references to validate grounding and build traceable citations.


In [None]:
print(result.response[0].content[0].text)

In [None]:
import json

# Activity -> JSON string of activity as list of dicts

activity_content = json.dumps([a.as_dict() for a in result.activity], indent=2)
print("activity_content:\n", activity_content, "\n")

In [None]:
# References -> JSON string of references as list of dicts
references_content = json.dumps([r.as_dict() for r in result.references], indent=2)
print("references_content:\n", references_content, "\n")