In [1]:
# set environment variables before importing any other code (in particular the openai module)
import os
from dotenv import load_dotenv

env_path = './.env'
load_dotenv(dotenv_path=env_path)

True

In [2]:
# searchclientIndex (add/remove data) create_or_update_index

from openai import AsyncAzureOpenAI
import jinja2
import pathlib
import logging
import json

from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    CorsOptions,
    SearchIndex,
    SearchFieldDataType
)

logger = logging.getLogger("test")

search_index_client = SearchIndexClient(
    endpoint=os.environ["AZURE_AI_SEARCH_ENDPOINT"],  #azure_ai_search_enpoint, #
    credential=AzureKeyCredential(os.environ["AZURE_AI_SEARCH_KEY"])) #azure_ai_search_key) #


In [None]:
# example 1: create index , define columns programming

In [4]:
from azure.search.documents.indexes.models import (
    SearchableField,
    ScoringProfile,
    SimpleField,
    ComplexField
)
from typing import List


# define columns programming

name = "hotels"
fields = [
    SimpleField(name="hotelId", type=SearchFieldDataType.String, key=True),
    SimpleField(name="baseRate", type=SearchFieldDataType.Double),
    SearchableField(name="description", type=SearchFieldDataType.String, collection=True),
#     ComplexField(
#         name="address",
#         fields=[
#             SimpleField(name="streetAddress", type=SearchFieldDataType.String),
#             SimpleField(name="city", type=SearchFieldDataType.String),
#         ],
#         collection=True,
#     ),
]
cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)
scoring_profiles: List[ScoringProfile] = []
index = SearchIndex(name=name, fields=fields, scoring_profiles=scoring_profiles, cors_options=cors_options)

result = search_index_client.create_index(index)
print(result)

{'additional_properties': {}, 'name': 'hotels', 'fields': [<azure.search.documents.indexes.models._index.SearchField object at 0x10ba0fa40>, <azure.search.documents.indexes.models._index.SearchField object at 0x106db59d0>, <azure.search.documents.indexes.models._index.SearchField object at 0x106db4920>], 'scoring_profiles': [], 'default_scoring_profile': None, 'cors_options': <azure.search.documents.indexes._generated.models._models_py3.CorsOptions object at 0x10bb48860>, 'suggesters': [], 'analyzers': None, 'tokenizers': None, 'token_filters': [], 'char_filters': [], 'encryption_key': None, 'similarity': <azure.search.documents.indexes._generated.models._models_py3.BM25SimilarityAlgorithm object at 0x10bb48e30>, 'semantic_search': None, 'vector_search': None, 'e_tag': '"0x8DCE82886D46B01"'}


In [5]:
result2 = search_index_client.get_index(name)
print(result2)

{'additional_properties': {}, 'name': 'hotels', 'fields': [<azure.search.documents.indexes.models._index.SearchField object at 0x10b8143e0>, <azure.search.documents.indexes.models._index.SearchField object at 0x106dc5e20>, <azure.search.documents.indexes.models._index.SearchField object at 0x106df40b0>], 'scoring_profiles': [], 'default_scoring_profile': None, 'cors_options': <azure.search.documents.indexes._generated.models._models_py3.CorsOptions object at 0x10bb4a2d0>, 'suggesters': [], 'analyzers': None, 'tokenizers': None, 'token_filters': [], 'char_filters': [], 'encryption_key': None, 'similarity': <azure.search.documents.indexes._generated.models._models_py3.BM25SimilarityAlgorithm object at 0x10bb4a810>, 'semantic_search': None, 'vector_search': None, 'e_tag': '"0x8DCE82886D46B01"'}


In [6]:

DOCUMENT = {
    "hotelId": "1000",
    "baseRate": 4.0,
    "description": ["Hotel"],
}

search_client = SearchClient(
    endpoint=os.environ["AZURE_AI_SEARCH_ENDPOINT"],
    credential=AzureKeyCredential(os.environ["AZURE_AI_SEARCH_KEY"]),
    index_name="hotels")

result = search_client.upload_documents(documents=[DOCUMENT])
print("Upload of new document succeeded: {}".format(result[0].succeeded))

Upload of new document succeeded: True


In [16]:

# acl_type = 'azureblob' #'content' #Type of acls to manage. Valid values include groups or oids.
#  retrieve documents relevant to the user's question from Cognitive Search
search_client = SearchClient(
    endpoint=os.environ["AZURE_AI_SEARCH_ENDPOINT"],
    credential=AzureKeyCredential(os.environ["AZURE_AI_SEARCH_KEY"]),
    index_name="pdf-index",
    logging_enable=True) #os.environ["AZURE_AI_SEARCH_INDEX_NAME"])


with search_client:
    # doc = search_client.get_document(key='hotelId', selected_fields=None) #['description'])
    # #, verbose=True)
    # print(doc)
    results = search_client.search(search_text="*", logging_enable=True)

    ans = results.get_answers()
    print(type(ans), ans)

    fts = results.get_facets()
    print(fts)
    cvg = results.get_coverage()
    print(cvg)

    cnt = results.get_count()
    print(cnt)

    search_client.close()

    print('close completely')

<class 'NoneType'> None
None
None
None
close completely


In [7]:
from azure.ai.resources.client import AIClient
from azure.identity import DefaultAzureCredential

# build the index using the product catalog docs from data/3-product-info
# same as build index
def build_cogsearch_index(index_name, path_to_data):
    from azure.ai.resources.operations._index_data_source import LocalSource, ACSOutputConfig
    from azure.ai.generative.index import build_index

    # Set up environment variables for cog search SDK
    os.environ["AZURE_COGNITIVE_SEARCH_TARGET"] = os.environ["AZURE_AI_SEARCH_ENDPOINT"]
    os.environ["AZURE_COGNITIVE_SEARCH_KEY"] = os.environ["AZURE_AI_SEARCH_KEY"]

    client = AIClient.from_config(DefaultAzureCredential())

    # Use the same index name when registering the index in AI Studio
    index = build_index(
        output_index_name=index_name,
        vector_store="azure_cognitive_search",
        embeddings_model=f"azure_open_ai://deployment/{os.environ['AZURE_OPENAI_EMBEDDING_DEPLOYMENT']}/model/{os.environ['AZURE_OPENAI_EMBEDDING_MODEL']}",
        data_source_url="https://product_info.com",
        index_input_config=LocalSource(input_data=path_to_data),
        acs_config=ACSOutputConfig(
            acs_index_name=index_name,
        ),
    )

    # register the index so that it shows up in the project
    cloud_index = client.indexes.create_or_update(index)

    print(f"Created index '{cloud_index.name}'")
    print(f"Local Path: {index.path}")
    print(f"Cloud Path: {cloud_index.path}")

In [None]:
print(os.getenv("AZURE_AI_SEARCH_INDEX_NAME"))

# can successfully create index 
# but fail when update data...
# https://github.com/Azure/aistudio-copilot-sample/blob/836436801a48d14c33efca212d6c7b1bfcf3b0c4/src/run.py#L32

# not try yet 
# build_cogsearch_index("customer_info" , "./data/1-customer-info")