In [15]:
from dotenv import dotenv_values
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient, SearchIndexerClient
from azure.search.documents.models import Vector
from azure.search.documents.indexes.models import (
    IndexingSchedule,
    SearchIndex,
    SearchIndexer,
    SearchIndexerDataContainer,
    SearchField,
    SearchFieldDataType,
    SearchableField,
    SemanticConfiguration,
    SimpleField,
    PrioritizedFields,
    SemanticField,
    SemanticSettings,
    VectorSearch,
    VectorSearchAlgorithmConfiguration,
    SearchIndexerDataSourceConnection
)
import requests
import json
import re
import pandas as pd

In [4]:
#import cosmos db credentials 
config = dotenv_values('credential.env')
ai_search_location = config['ai_search_location']
ai_search_key = config['ai_search_key']
ai_search_url = config['ai_search_url']
ai_search_index = 'oewg-speech-meeeting-index'
ai_search_name = 'oewg-meeting'
embedding_length = 768
cog_search_cred = AzureKeyCredential(ai_search_key)

In [13]:
#defining schema
index_client = SearchIndexClient(endpoint=ai_search_url, credential=cog_search_cred)
fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, 
                filterable=True),
    SearchableField(name="Session", type=SearchFieldDataType.String,
                    searchable=True, retrievable=True, filterable=True, sortable=True, facetable=True),
    SearchableField(name="Meeting", type=SearchFieldDataType.String,
                    searchable=True, retrievable=True, filterable=True, sortable=True, facetable=True),
    SearchableField(name="Speaker", type=SearchFieldDataType.String,
                    searchable=True, retrievable=True, filterable=True, sortable=True, facetable=True),
    SearchableField(name="Text", type=SearchFieldDataType.String,
                    searchable=True, retrievable=True, filterable=True, sortable=True, facetable=True),
    SearchField(name="TextEmbeddings", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, dimensions=768, vector_search_configuration="vector-config"),
    SearchField(name="SpeakerEmbeddings", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, dimensions=768, vector_search_configuration="vector-config"),
]

In [9]:
#configure vector search
vector_search = VectorSearch(
    algorithm_configurations=[
        VectorSearchAlgorithmConfiguration(
            name="vector-config", 
            kind="hnsw",
            hnsw_parameters={
                "m": 4,
                "efConstruction": 400,
                "efSearch": 1000,
                "metric": "cosine"
            })])

In [10]:
# Configure semantic search. 
semantic_config = SemanticConfiguration(
    name="my-semantic-config",
    prioritized_fields=PrioritizedFields(
        title_field=SemanticField(field_name="Speaker"),
        prioritized_keywords_fields=[SemanticField(field_name="Speaker")],
        prioritized_content_fields=[SemanticField(field_name="Text")],
       )
)

In [11]:
# Create the semantic settings with the configuration
semantic_settings = SemanticSettings(configurations=[semantic_config])

In [14]:
# Create the search index with the semantic settings
index = SearchIndex(name=ai_search_index, fields=fields,
                    vector_search=vector_search, semantic_settings=semantic_settings)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')


 oewg-speech-meeeting-index created
