-
Notifications
You must be signed in to change notification settings - Fork 304
/
AzureSearchIndexer.py
50 lines (45 loc) · 1.96 KB
/
AzureSearchIndexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import logging
from azure.search.documents.indexes.models import SearchIndexer, FieldMapping
from azure.search.documents.indexes import SearchIndexerClient
from ..helpers.EnvHelper import EnvHelper
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
logger = logging.getLogger(__name__)
class AzureSearchIndexer:
def __init__(self, env_helper: EnvHelper):
self.env_helper = env_helper
self.indexer_client = SearchIndexerClient(
self.env_helper.AZURE_SEARCH_SERVICE,
(
AzureKeyCredential(self.env_helper.AZURE_SEARCH_KEY)
if self.env_helper.is_auth_type_keys()
else DefaultAzureCredential()
),
)
def create_or_update_indexer(self, indexer_name: str, skillset_name: str):
indexer = SearchIndexer(
name=indexer_name,
description="Indexer to index documents and generate embeddings",
skillset_name=skillset_name,
target_index_name=self.env_helper.AZURE_SEARCH_INDEX,
data_source_name=self.env_helper.AZURE_SEARCH_DATASOURCE_NAME,
field_mappings=[
FieldMapping(
source_field_name="metadata_storage_path",
target_field_name="source",
),
],
)
indexer_result = self.indexer_client.create_or_update_indexer(indexer)
# Run the indexer
self.indexer_client.run_indexer(indexer_name)
logger.info(
f" {indexer_name} is created and running. If queries return no results, please wait a bit and try again."
)
return indexer_result
# To be updated for 'Reprocess All'
def run_indexer(self, indexer_name: str):
self.indexer_client.run_indexer(indexer_name)
logger.info(
f" {indexer_name} is created and running. If queries return no results, please wait a bit and try again."
)