# Create Azure AI Search Index

In [1]:
from azure.search.documents import SearchClient
from azure.search.documents.indexes.models import *
from azure.storage.blob import BlobServiceClient
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient

## Set environment variables

In [2]:
import os
from dotenv import load_dotenv

load_dotenv('../.env')

# AZURE AI SEARCH CREDENTIALS
searchservice = os.environ.get('searchservice')
index = os.environ.get('index')
searchkey = os.environ.get('searchkey')

# OPENAI CONFIGURATION
openai_key = os.environ.get('openai_key')

# DATA CONFIGURATION
filepath = os.environ.get('filepath')

In [3]:
# set credentials
search_creds = AzureKeyCredential(searchkey)

In [4]:
def create_search_index(index):
    
    # Initialize search index client
    print(f"Ensuring search index {index} exists")
    index_client = SearchIndexClient(
        endpoint=f"https://{searchservice}.search.windows.net/",
        credential=search_creds 
    )
    
    # define fields if index does not exist
    if index not in index_client.list_index_names():
        
        print(f"Creating index {index}")
        fields = [
            SimpleField(
                name="id", 
                type=SearchFieldDataType.String, 
                key=True,
                sortable=True,
                filterable=True,
                facetable=True
                ),
            SearchableField(
                name="content",
                type=SearchFieldDataType.String,
                analyzer_name="en.microsoft" 
            ),
            SearchField(
                name="embedding",
                type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True,
                vector_search_dimensions=1536,
                vector_search_profile_name="mlops-vector-profile"
            ),
            SimpleField(
                name="sourcepage",
                type=SearchFieldDataType.String,
                filterable=True,
                facetable=True 
            ),
            SimpleField(
                name="sourcefile",
                type=SearchFieldDataType.String,
                filterable=False,
                facetable=False
            )
        ]

        # configure the vector search
        vector_search = VectorSearch(
            profiles=[VectorSearchProfile(name="mlops-vector-profile", algorithm_configuration_name="mlops-vector-search-algo")],
            algorithms=[HnswAlgorithmConfiguration(name="mlops-vector-search-algo")]
        )
        
        # configure semantic search
        semantic_config = SemanticConfiguration(
            name="mlops-semantic-config",
            prioritized_fields=SemanticPrioritizedFields(
                content_fields=[SemanticField(field_name="content")]
            )
        )
        semantic_search = SemanticSearch(
            configurations=[semantic_config]
        )
        
        # create the search index with vector and semantic settings
        index = SearchIndex(
            name=index,
            fields=fields,
            vector_search=vector_search,
            semantic_search=semantic_search 
        )
        result = index_client.create_or_update_index(index)
        print(f"{result.name} created")
        
    else:
        print(f"Search index {index} already exists")

In [5]:
if __name__ == "__main__":
    create_search_index(index)

Ensuring search index mlops-rag exists
Creating index mlops-rag
mlops-rag created
