In [2]:
%pip install python-dotenv
%pip install azure-search-documents==11.6.0b7
%pip install azure-identity
%pip install openai

Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/anaconda/envs/azureml_py310_sdkv2/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
Collecting azure-search-documents==11.6.0b7
  Downloading azure_search_documents-11.6.0b7-py3-none-any.whl.metadata (22 kB)
Downloading azure_search_documents-11.6.0b7-py3-none-any.whl (335 kB)
Installing collected packages: azure-search-documents
Successfully installed azure-search-documents-11.6.0b7

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[

In [3]:
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
import os

# The following variables from your .env file are used in this notebook
endpoint = ""
credential = AzureKeyCredential("")
index_name = "vectest"
azure_openai_endpoint = ""
azure_openai_key = ""
azure_openai_embedding_deployment = "text-embedding-3-large"
azure_openai_embedding_dimensions = 1024
embedding_model_name =  "text-embedding-3-large"
azure_openai_api_version = "2024-10-21"

In [9]:
from openai import AzureOpenAI
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
import json

openai_credential = DefaultAzureCredential()
token_provider = get_bearer_token_provider(openai_credential, "https://cognitiveservices.azure.com/.default")

In [10]:
client = AzureOpenAI(
    azure_deployment=azure_openai_embedding_deployment,
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key,
    azure_ad_token_provider=token_provider if not azure_openai_key else None
)

In [11]:
# Generate Document Embeddings using OpenAI 3 large
# Read the text-sample.json
path = os.path.join('data.json')
with open(path, 'r', encoding='utf-8') as file:
    input_data = json.load(file)

In [12]:
titles = [item['title'] for item in input_data]
content = [item['content'] for item in input_data]
title_response = client.embeddings.create(input=titles, model=embedding_model_name, dimensions=azure_openai_embedding_dimensions)
title_embeddings = [item.embedding for item in title_response.data]
content_response = client.embeddings.create(input=content, model=embedding_model_name, dimensions=azure_openai_embedding_dimensions)
content_embeddings = [item.embedding for item in content_response.data]

In [13]:
for i, item in enumerate(input_data):
    title = item['title']
    content = item['content']
    item['titleVector'] = title_embeddings[i]
    item['contentVector'] = content_embeddings[i]

# Output embeddings to docVectors.json file
output_path = os.path.join('.', 'docVectors.json')
output_directory = os.path.dirname(output_path)
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
with open(output_path, "w") as f:
    json.dump(input_data, f)

In [14]:
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SimpleField,
    SearchFieldDataType,
    SearchableField,
    SearchField,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch,
    SearchIndex,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters
)


In [15]:
# Create a search index
index_client = SearchIndexClient(
    endpoint=endpoint, credential=credential)
fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="title", type=SearchFieldDataType.String),
    SearchableField(name="content", type=SearchFieldDataType.String),
    SearchableField(name="category", type=SearchFieldDataType.String,
                    filterable=True),
    SearchField(name="titleVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=azure_openai_embedding_dimensions, vector_search_profile_name="myHnswProfile"),
    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=azure_openai_embedding_dimensions, vector_search_profile_name="myHnswProfile"),
]

In [16]:
# Configure the vector search configuration  
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(
            name="myHnsw"
        )
    ],
    profiles=[
        VectorSearchProfile(
            name="myHnswProfile",
            algorithm_configuration_name="myHnsw",
            vectorizer_name="myVectorizer"
        )
    ],
    vectorizers=[
        AzureOpenAIVectorizer(
            vectorizer_name="myVectorizer",
            parameters=AzureOpenAIVectorizerParameters(
                resource_url=azure_openai_endpoint,
                deployment_name=azure_openai_embedding_deployment,
                model_name=embedding_model_name,
                api_key=azure_openai_key
            )
        )
    ]
)

In [17]:
semantic_config = SemanticConfiguration(
    name="my-semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="title"),
        keywords_fields=[SemanticField(field_name="category")],
        content_fields=[SemanticField(field_name="content")]
    )
)

# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])

# Create the search index with the semantic settings
index = SearchIndex(name=index_name, fields=fields,
                    vector_search=vector_search, semantic_search=semantic_search)
result = index_client.create_or_update_index(index)
print(f'{result.name} created')

vectest created


In [18]:
from azure.search.documents import SearchIndexingBufferedSender

In [19]:
# Upload some documents to the index  
with open(output_path, 'r') as file:  
    documents = json.load(file)  
  
# Use SearchIndexingBufferedSender to upload the documents in batches optimized for indexing  
with SearchIndexingBufferedSender(  
    endpoint=endpoint,  
    index_name=index_name,  
    credential=credential,  
) as batch_client:  
    # Add upload actions for all documents  
    batch_client.upload_documents(documents=documents)  
print(f"Uploaded {len(documents)} documents in total")  

Uploaded 108 documents in total


In [20]:
from azure.search.documents import SearchItemPaged

In [21]:
def print_results(results: SearchItemPaged[dict]):
    semantic_answers = results.get_answers()
    if semantic_answers:
        for answer in semantic_answers:
            if answer.highlights:
                print(f"Semantic Answer: {answer.highlights}")
            else:
                print(f"Semantic Answer: {answer.text}")
            print(f"Semantic Answer Score: {answer.score}\n")

    for result in results:
        print(f"Title: {result['title']}")  
        print(f"Score: {result['@search.score']}")
        if result.get('@search.reranker_score'):
            print(f"Reranker Score: {result['@search.reranker_score']}")
        print(f"Content: {result['content']}")  
        print(f"Category: {result['category']}\n")

        captions = result["@search.captions"]
        if captions:
            caption = captions[0]
            if caption.highlights:
                print(f"Caption: {caption.highlights}\n")
            else:
                print(f"Caption: {caption.text}\n")

In [24]:
from azure.search.documents.models import VectorizedQuery

In [25]:
from azure.search.documents import SearchClient
import json

In [29]:
# Upload some documents to the index
output_path = os.path.join('.', 'docVectors.json')
output_directory = os.path.dirname(output_path)
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
with open(output_path, 'r') as file:  
    documents = json.load(file)  
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)
result = search_client.upload_documents(documents)
print(f"Uploaded {len(documents)} documents") 

Uploaded 108 documents


In [30]:
# Pure Vector Search
query = "tools for software development"  
embedding = client.embeddings.create(input=query, model=embedding_model_name, dimensions=azure_openai_embedding_dimensions).data[0].embedding

# 50 is an optimal value for k_nearest_neighbors when performing vector search
# To learn more about how vector ranking works, please visit https://learn.microsoft.com/azure/search/vector-search-ranking
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=50, fields="contentVector")
  
results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
    top=3
)  
  
print_results(results)

Title: Azure DevOps
Score: 0.6198821
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.60532993
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms, such

In [31]:
from azure.search.documents.models import VectorizableTextQuery


In [32]:
# Pure Vector Search
query = "tools for software development"  
  
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="contentVector")
  
results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
    top=3
)  
  
print_results(results)

Title: Azure DevOps
Score: 0.6198821
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.60532993
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms, such

In [33]:
# Pure Vector Search multi-lingual (e.g 'tools for software development' in Dutch)  
query = "tools voor softwareontwikkeling"  
  
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="contentVector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
    top=3
)  
  
print_results(results)

Title: Azure DevOps
Score: 0.5811603
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.5746114
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms, such 

In [34]:
# Pure Vector Search
query = "tools for software development"  
  
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="contentVector", exhaustive=True)
  
results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
    top=3
)  
  
print_results(results)

Title: Azure DevOps
Score: 0.6198821
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.60532993
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms, such

In [35]:
# Pure Vector Search
query = "tools for software development"  
  
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="contentVector, titleVector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
    top=3
)  
  
print_results(results)

Title: Azure DevOps
Score: 0.03333333507180214
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.032786883413791656
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports var

In [36]:
# Multi-Vector Search
query = "tools for software development"  
  

vector_query_1 = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="titleVector")
vector_query_2 = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="contentVector")

results = search_client.search(  
    search_text=None,  
    vector_queries=[vector_query_1, vector_query_2],
    select=["title", "content", "category"],
    top=3
)  
  
print_results(results)

Title: Azure DevOps
Score: 0.03333333507180214
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.032786883413791656
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports var

In [37]:
# Multi-Vector Search
query = "tools for software development"  
  

vector_query_1 = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="titleVector", weight=2)
vector_query_2 = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="contentVector", weight=0.5)

results = search_client.search(  
    search_text=None,  
    vector_queries=[vector_query_1, vector_query_2],
    select=["title", "content", "category"],
    top=3
)  
  
print_results(results)

Title: Azure DevOps
Score: 0.0416666679084301
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.04098360240459442
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports vario

In [38]:
from azure.search.documents.models import VectorFilterMode

In [39]:
# Pure Vector Search
query = "tools for software development"  
  
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="contentVector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    vector_filter_mode=VectorFilterMode.PRE_FILTER,
    filter="category eq 'Developer Tools'",
    select=["title", "content", "category"],
    top=3
)
  
print_results(results)

Title: Azure DevOps
Score: 0.6198821
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.60532993
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms, such

In [40]:
from azure.search.documents.models import QueryType, QueryCaptionType, QueryAnswerType

In [41]:
# Semantic Hybrid Search
query = "what is azure sarch?"

vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="contentVector", exhaustive=True)

results = search_client.search(  
    search_text=query,  
    vector_queries=[vector_query],
    select=["title", "content", "category"],
    query_type=QueryType.SEMANTIC,
    semantic_configuration_name='my-semantic-config',
    query_caption=QueryCaptionType.EXTRACTIVE,
    query_answer=QueryAnswerType.EXTRACTIVE,
    top=3
)

print_results(results)

Semantic Answer: Azure Data Share is<em> a fully managed data sharing service </em>that enables you to securely share and receive data from other organizations using Azure. It provides features like data snapshotting, change tracking, and access control. Data Share supports various data sources, such as Azure Blob Storage, Azure Data Lake Storage, and Azure SQL Database..
Semantic Answer Score: 0.9290000200271606

Title: Azure Data Share
Score: 0.01786786876618862
Reranker Score: 2.7110650539398193
Content: Azure Data Share is a fully managed data sharing service that enables you to securely share and receive data from other organizations using Azure. It provides features like data snapshotting, change tracking, and access control. Data Share supports various data sources, such as Azure Blob Storage, Azure Data Lake Storage, and Azure SQL Database. You can use Azure Data Share to collaborate with your partners, improve your data governance, and ensure the security of your data. It also

In [42]:
from azure.search.documents.models import QueryType, QueryCaptionType, QueryAnswerType, QueryDebugMode
from typing import Optional

In [43]:
# Workaround required to use debug query rewrites with the preview SDK
import azure.search.documents._generated.models
azure.search.documents._generated.models.SearchDocumentsResult._attribute_map["debug_info"]["key"] = "@search\\.debug"
from azure.search.documents._generated.models import DebugInfo
import azure.search.documents._paging
def get_debug_info(self) -> Optional[DebugInfo]:
    self.continuation_token = None
    return self._response.debug_info
azure.search.documents._paging.SearchPageIterator.get_debug_info = azure.search.documents._paging._ensure_response(get_debug_info)
azure.search.documents._paging.SearchItemPaged.get_debug_info = lambda self: self._first_iterator_instance().get_debug_info()

# Semantic Hybrid Search with Query Rewriting
query = "what is azure sarch?"

vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="contentVector", query_rewrites="generative|count-5")

results = search_client.search(  
    search_text=query,  
    vector_queries=[vector_query],
    select=["title", "content", "category"],
    query_type=QueryType.SEMANTIC,
    semantic_configuration_name='my-semantic-config',
    query_rewrites="generative|count-5",
    query_language="en",
    debug=QueryDebugMode.QUERY_REWRITES,
    query_caption=QueryCaptionType.EXTRACTIVE,
    query_answer=QueryAnswerType.EXTRACTIVE,
    top=3
)

text_query_rewrites = results.get_debug_info().query_rewrites.text.rewrites
vector_query_rewrites = results.get_debug_info().query_rewrites.vectors[0].rewrites
print_results(results)

print("Text Query Rewrites:")
print(text_query_rewrites)
print("Vector Query Rewrites:")
print(vector_query_rewrites)

Semantic Answer: Azure Cognitive Search is<em> a fully managed search-as-a-service </em>that enables you to build rich search experiences for your applications. It provides features like full-text search, faceted navigation, and filters. Azure Cognitive Search supports various data sources, such as Azure SQL Database, Azure Blob Storage, and Azure Cosmos DB. You can use ...
Semantic Answer Score: 0.871999979019165

Title: Azure Cognitive Search
Score: 0.03333333507180214
Reranker Score: 2.555807590484619
Content: Azure Cognitive Search is a fully managed search-as-a-service that enables you to build rich search experiences for your applications. It provides features like full-text search, faceted navigation, and filters. Azure Cognitive Search supports various data sources, such as Azure SQL Database, Azure Blob Storage, and Azure Cosmos DB. You can use Azure Cognitive Search to index your data, create custom scoring profiles, and integrate with other Azure services. It also integrates