In [None]:
%pip install python-dotenv
%pip install azure-core
%pip install azure-search-documents
%pip install azure-storage-blob
%pip install azure-identity
%pip install openai
%pip install aiohttp
%pip install ipywidgets
%pip install ipykernel

In [5]:
from dotenv import load_dotenv
import os

load_dotenv() # take environment variables from .env.

azure_search_service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
azure_search_service_admin_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")
index_name = "vectest"
azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_key = os.getenv("AZURE_OPENAI_KEY")
azure_openai_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
azure_openai_embeddings_deployment = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT")
azure_openai_api_version = "2024-06-01"

print(f"search service endpoint: {azure_search_service_endpoint}")
print(f"search admin key: {azure_search_service_admin_key}")
print(f"index_name: {index_name}")
print(f"azure_openai_endpoint: {azure_openai_endpoint}")
print(f"azure_openai_key: {azure_openai_key}")
print(f"azure_openai_deployment: {azure_openai_deployment}")
print(f"azure_openai_embeddings_deployment: {azure_openai_embeddings_deployment}")    
print(f"azure_openai_api_version: {azure_openai_api_version}")

search service endpoint: https://linkedinaisearch.search.windows.net
search admin key: 09xwOJt6gzncJkP3lx8seGsHnEqRuP3nGboszUqdzsAzSeA8quBk
index_name: vectest
azure_openai_endpoint: https://linkedinazoai.openai.azure.com/
azure_openai_key: EdW6QMnzj1BX3VqKvv6QhU6KaZz4R86LrJh2Z8B7YtlDmBBFJDyfJQQJ99AKACYeBjFXJ3w3AAABACOGRbwN
azure_openai_deployment: gpt-4o
azure_openai_embeddings_deployment: text-embedding-3-large
azure_openai_api_version: 2024-06-01


In [12]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchField,
    SearchFieldDataType,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    SearchIndex,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch,
)

credential = AzureKeyCredential(azure_search_service_admin_key)

# Create a search index  
index_name = "index-001"
index_client = SearchIndexClient(endpoint=azure_search_service_endpoint, credential=credential)  
fields = [
    SearchField(name="parent_id", type=SearchFieldDataType.String),  
    SearchField(name="title", type=SearchFieldDataType.String),
    SearchField(name="locations", type=SearchFieldDataType.Collection(SearchFieldDataType.String), filterable=True),
    SearchField(name="chunk_id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True, analyzer_name="keyword"),  
    SearchField(name="chunk", type=SearchFieldDataType.String, sortable=False, filterable=False, facetable=False),  
    SearchField(name="text_vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), vector_search_dimensions=1024, vector_search_profile_name="myHnswProfile")
    ]  
  
# Configure the vector search configuration  
vector_search = VectorSearch(  
    algorithms=[  
        HnswAlgorithmConfiguration(name="myHnsw"),
    ],  
    profiles=[  
        VectorSearchProfile(  
            name="myHnswProfile",  
            algorithm_configuration_name="myHnsw",  
            vectorizer_name="myOpenAI",  
        )
    ],  
    vectorizers=[  
        AzureOpenAIVectorizer(  
            vectorizer_name="myOpenAI",  
            kind="azureOpenAI",  
            parameters=AzureOpenAIVectorizerParameters(  
                resource_url=azure_openai_endpoint,  
                deployment_name=azure_openai_embeddings_deployment,
                model_name=azure_openai_embeddings_deployment
            ),
        ),  
    ], 
)  

semantic_config = SemanticConfiguration(
    name="my-semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="title"),
        keywords_fields=[SemanticField(field_name="locations")],
        content_fields=[SemanticField(field_name="chunk")]
    )
)

# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])

# Create the search index
index = SearchIndex(name=index_name, 
                    fields=fields, 
                    vector_search=vector_search,
                    semantic_search=semantic_search)  
result = index_client.create_or_update_index(index)  
print(f"{result.name} created")

index-001 created


In [13]:
from azure.search.documents.indexes import SearchIndexerClient
from azure.search.documents.indexes.models import (
    SearchIndexerDataContainer,
    SearchIndexerDataSourceConnection
)

# Create a data source 
indexer_client = SearchIndexerClient(endpoint=azure_search_service_endpoint, credential=credential)
container = SearchIndexerDataContainer(name="nasa-ebooks-pdfs-all")
data_source_connection = SearchIndexerDataSourceConnection(
    name="ai-search-ds",
    type="azureblob",
    connection_string="DefaultEndpointsProtocol=https;AccountName=linkedinazstorage;AccountKey=aiw6sAkat8xjJCdGEN8YL2KtNNjFhsRPqQ3EjG3Q5i6NY1kNqPy/2upjsKrEdVbnEQDWdkjfjcgU+AStTYORww==;EndpointSuffix=core.windows.net",
    container=container
)
data_source = indexer_client.create_or_update_data_source_connection(data_source_connection)

print(f"Data source '{data_source.name}' created or updated")

Data source 'ai-search-ds' created or updated
