In [1]:
import os
from dotenv import load_dotenv

load_dotenv()  
AZURE_SEARCH_SERVICE = os.getenv("AISEARCH_API_ENDPOINT_3")
AZURE_OPENAI_ACCOUNT = os.getenv("AZURE_OPENAI_ACCOUNT")
AZURE_DEPLOYMENT_MODEL = os.getenv("DEPLOYMENT_NAME")
AZURE_STORAGE_CONNECTION = os.getenv("AZURE_STORAGE_CONNECTION")
AZURE_AI_MULTISERVICE_ACCOUNT = os.getenv("AZURE_AI_MULTISERVICE_ACCOUNT")
AZURE_AI_MULTISERVICE_KEY = os.getenv("AZURE_AI_MULTISERVICE_KEY")
print(AZURE_AI_MULTISERVICE_ACCOUNT, AZURE_AI_MULTISERVICE_KEY)

https://all-in-one-resource-eastus.cognitiveservices.azure.com/ FhQ8nczZAEl5gjlfqlFK3BGPJuz9R5OKhPuzbpDm6RfIfWM6bUt9JQQJ99BCACHYHv6XJ3w3AAAEACOGCeIr


In [2]:
from azure.identity import DefaultAzureCredential
from azure.identity import get_bearer_token_provider
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchField,
    SearchFieldDataType,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    SearchIndex
)

credential = DefaultAzureCredential()

# Create a search index  
index_name = "vs-index"
index_client = SearchIndexClient(endpoint=AZURE_SEARCH_SERVICE, credential=credential)  
fields = [
    SearchField(name="parent_id", type=SearchFieldDataType.String),  
    SearchField(name="title", type=SearchFieldDataType.String),
    SearchField(name="chunk_id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True, analyzer_name="keyword"),  
    SearchField(name="chunk", type=SearchFieldDataType.String, sortable=False, filterable=False, facetable=False),  
    SearchField(name="text_vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), vector_search_dimensions=1024, vector_search_profile_name="myHnswProfile")
    ]  
  
# Configure the vector search configuration  
vector_search = VectorSearch(  
    algorithms=[  
        HnswAlgorithmConfiguration(name="myHnsw"),
    ],  
    profiles=[  
        VectorSearchProfile(  
            name="myHnswProfile",  
            algorithm_configuration_name="myHnsw",  
            vectorizer_name="myOpenAI",  
        )
    ],  
    vectorizers=[  
        AzureOpenAIVectorizer(  
            vectorizer_name="myOpenAI",  
            kind="azureOpenAI",  
            parameters=AzureOpenAIVectorizerParameters(  
                resource_url=AZURE_OPENAI_ACCOUNT,  
                deployment_name="text-embedding-3-large",
                model_name="text-embedding-3-large"
            ),
        ),  
    ], 
)  
  
# Create the search index
index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search)  
result = index_client.create_or_update_index(index)  
print(f"{result.name} created")  

vs-index created


In [None]:
from azure.search.documents.indexes import SearchIndexerClient
from azure.search.documents.indexes.models import (
    SearchIndexerDataContainer,
    SearchIndexerDataSourceConnection
)

indexer_client = SearchIndexerClient(endpoint=AZURE_SEARCH_SERVICE, credential=credential)
container = SearchIndexerDataContainer(name="contoso")
data_source_connection = SearchIndexerDataSourceConnection(
    name="contoso",
    type="azureblob",
    connection_string=AZURE_STORAGE_CONNECTION,
    container=container
)
data_source = indexer_client.create_or_update_data_source_connection(data_source_connection)

print(f"Data source '{data_source.name}' created or updated")

Data source 'contoso' created or updated


In [4]:
from azure.search.documents.indexes.models import (
    SplitSkill,
    InputFieldMappingEntry,
    OutputFieldMappingEntry,
    AzureOpenAIEmbeddingSkill,
    EntityRecognitionSkill,
    SearchIndexerIndexProjection,
    SearchIndexerIndexProjectionSelector,
    SearchIndexerIndexProjectionsParameters,
    IndexProjectionMode,
    SearchIndexerSkillset,
    CognitiveServicesAccountKey
)

# Create a skillset  
skillset_name = "py-rag-tutorial-ss"

split_skill = SplitSkill(  
    description="Split skill to chunk documents",  
    text_split_mode="pages",  
    context="/document",  
    maximum_page_length=2000,  
    page_overlap_length=500,  
    inputs=[  
        InputFieldMappingEntry(name="text", source="/document/content"),  
    ],  
    outputs=[  
        OutputFieldMappingEntry(name="textItems", target_name="pages")  
    ],  
)  
  
embedding_skill = AzureOpenAIEmbeddingSkill(  
    description="Skill to generate embeddings via Azure OpenAI",  
    context="/document/pages/*",  
    resource_url=AZURE_OPENAI_ACCOUNT,  
    deployment_name="text-embedding-3-large",  
    model_name="text-embedding-3-large",
    dimensions=1024,
    inputs=[  
        InputFieldMappingEntry(name="text", source="/document/pages/*"),  
    ],  
    outputs=[  
        OutputFieldMappingEntry(name="embedding", target_name="text_vector")  
    ],  
)

  
index_projections = SearchIndexerIndexProjection(  
    selectors=[  
        SearchIndexerIndexProjectionSelector(  
            target_index_name=index_name,  
            parent_key_field_name="parent_id",  
            source_context="/document/pages/*",  
            mappings=[  
                InputFieldMappingEntry(name="chunk", source="/document/pages/*"),  
                InputFieldMappingEntry(name="text_vector", source="/document/pages/*/text_vector"),
                InputFieldMappingEntry(name="title", source="/document/metadata_storage_name"),  
            ],  
        ),  
    ],  
    parameters=SearchIndexerIndexProjectionsParameters(  
        projection_mode=IndexProjectionMode.SKIP_INDEXING_PARENT_DOCUMENTS  
    ),  
) 

cognitive_services_account = CognitiveServicesAccountKey(key=AZURE_AI_MULTISERVICE_KEY)

skills = [split_skill, embedding_skill]

skillset = SearchIndexerSkillset(  
    name=skillset_name,  
    description="Skillset to chunk documents and generating embeddings",  
    skills=skills,  
    index_projection=index_projections,
    cognitive_services_account=cognitive_services_account
)
  
client = SearchIndexerClient(endpoint=AZURE_SEARCH_SERVICE, credential=credential)  
client.create_or_update_skillset(skillset)  
print(f"{skillset.name} created")  

py-rag-tutorial-ss created


In [None]:
from azure.search.documents.indexes.models import (
    SearchIndexer
)

# Create an   
indexer_name = "vs-indexer" 

indexer_parameters = None

indexer = SearchIndexer(  
    name=indexer_name,  
    description="Indexer to index documents and generate embeddings",  
    skillset_name=skillset_name,  
    target_index_name=index_name,  
    data_source_name=data_source.name,
    parameters=indexer_parameters
)  

indexer_client = SearchIndexerClient(endpoint=AZURE_SEARCH_SERVICE, credential=credential)  
indexer_result = indexer_client.create_or_update_indexer(indexer)  

print(f' {indexer_name} is created and running. Give the indexer a few minutes before running a query.')  

 vs-indexer is created and running. Give the indexer a few minutes before running a query.


In [None]:
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizableTextQuery

query = "what are the contoso packages ?"  

search_client = SearchClient(endpoint=AZURE_SEARCH_SERVICE, credential=credential, index_name=index_name)
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="text_vector")
  
results = search_client.search(  
    search_text=query,  
    vector_queries= [vector_query],
    select=["chunk"],
    top=1
)  
  
for result in results:  
    print(f"Score: {result['@search.score']}")
    print(f"Chunk: {result['chunk']}")

Score: 0.03333333507180214
Chunk: Contoso Electronics 
Plan and Benefit Packages



This document contains information generated using a language model (Azure OpenAI). The information 
contained in this document is only for demonstration purposes and does not reflect the opinions or 
beliefs of Microsoft. Microsoft makes no representations or warranties of any kind, express or implied, 
about the completeness, accuracy, reliability, suitability or availability with respect to the information 
contained in this document. 

All rights reserved to Microsoft



Welcome to Contoso Electronics! We are excited to offer our employees two comprehensive health 
insurance plans through Northwind Health. 

Northwind Health Plus 
Northwind Health Plus is a comprehensive plan that provides comprehensive coverage for medical, 
vision, and dental services. This plan also offers prescription drug coverage, mental health and substance 
abuse coverage, and coverage for preventive care services. With Nort

In [None]:
from azure.search.documents import SearchClient
from openai import AzureOpenAI

token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
openai_client = AzureOpenAI(
     api_version="2024-06-01",
     azure_endpoint=AZURE_OPENAI_ACCOUNT,
     azure_ad_token_provider=token_provider
 )

deployment_name = "gpt-4o"

search_client = SearchClient(
     endpoint=AZURE_SEARCH_SERVICE,
     index_name=index_name,
     credential=credential
 )

GROUNDED_PROMPT="""
You are an AI assistant that helps users learn from the information found in the source material.
Answer the query using only the sources provided below.
Use bullets if the answer has multiple points.
If the answer is longer than 3 sentences, provide a summary.
Answer ONLY with the facts listed in the list of sources below. Cite your source when you answer the question
If there isn't enough information below, say you don't know.
Do not generate answers that don't use the sources below.
Query: {query}
Sources:\n{sources}
"""

In [14]:
# Provide the search query. 
# It's hybrid: a keyword search on "query", with text-to-vector conversion for "vector_query".
# The vector query finds 50 nearest neighbor matches in the search index
query="what are the contoso packages ?"
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="text_vector")

# Set up the search results and the chat thread.
# Retrieve the selected fields from the search index related to the question.
# Search results are limited to the top 5 matches. Limiting top can help you stay under LLM quotas.
search_results = search_client.search(
    search_text=query,
    vector_queries= [vector_query],
    select=["title", "chunk"],
    top=5,
)

sources_formatted = "=================\n".join([f'TITLE: {document["title"]}, CONTENT: {document["chunk"]}' for document in search_results])
print(f"Sources: {sources_formatted}")


Sources: TITLE: Benefit_Options.pdf, CONTENT: Contoso Electronics 
Plan and Benefit Packages



This document contains information generated using a language model (Azure OpenAI). The information 
contained in this document is only for demonstration purposes and does not reflect the opinions or 
beliefs of Microsoft. Microsoft makes no representations or warranties of any kind, express or implied, 
about the completeness, accuracy, reliability, suitability or availability with respect to the information 
contained in this document. 

All rights reserved to Microsoft



Welcome to Contoso Electronics! We are excited to offer our employees two comprehensive health 
insurance plans through Northwind Health. 

Northwind Health Plus 
Northwind Health Plus is a comprehensive plan that provides comprehensive coverage for medical, 
vision, and dental services. This plan also offers prescription drug coverage, mental health and substance 
abuse coverage, and coverage for preventive care service

In [11]:
response = openai_client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": GROUNDED_PROMPT.format(query=query, sources=sources_formatted)
        }
    ],
    model=deployment_name
)

print(response.choices[0].message.content)

The Contoso Electronics packages refer specifically to the two health insurance plans offered to employees, as described in the source material:

- **Northwind Health Plus**:
  - Comprehensive coverage for medical, vision, dental, and prescription services.
  - Includes coverage for mental health, substance abuse, preventive care, and emergency services (both in-network and out-of-network).
  - Extensive network options, including primary care physicians, specialists, hospitals, and pharmacies.

- **Northwind Standard**:
  - Basic coverage for medical, vision, dental, and prescription services.
  - Includes preventive care services but excludes emergency services, mental health, substance abuse, and out-of-network services.
  - Coverage for Neurodevelopmental Therapy (Habilitation) focused on in-network providers for necessary services.

(Source: **Benefit_Options.pdf**, **Northwind_Standard_Benefits_Details.pdf**)


In [None]:
query="What are the employees packages ?"
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="text_vector")

search_results = search_client.search(
    search_text=query,
    vector_queries= [vector_query],
    select=["title", "chunk"],
    top=5,
)

sources_formatted = "=================\n".join([f'TITLE: {document["title"]}, CONTENT: {document["chunk"]}' for document in search_results])

response = openai_client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": GROUNDED_PROMPT.format(query=query, sources=sources_formatted)
        }
    ],
    model=deployment_name
)

print(response.choices[0].message.content)

Contoso Electronics offers its employees the following benefits packages:

### Health Insurance Plans:
1. **Northwind Health Plus**:
   - Comprehensive coverage for medical, vision, and dental services.
   - Includes prescription drug coverage, mental health and substance abuse coverage, preventive care, and emergency services (in-network and out-of-network).
   - Access to a variety of in-network providers such as primary care physicians, specialists, hospitals, and pharmacies.
   - Covers preventive care services like routine physicals, well-child visits, immunizations, mammograms, colonoscopies, and other cancer screenings.  
   *Source: Benefit_Options.pdf*

2. **Northwind Standard**:
   - Basic medical, vision, and dental coverage.
   - Includes preventive care services and prescription drug coverage.
   - Does not cover emergency services, mental health and substance abuse-related care, or out-of-network services.
   - Meets HIPAA and ACA requirements by providing minimum essenti