-
Notifications
You must be signed in to change notification settings - Fork 353
/
integrated_vectorization_embedder.py
48 lines (42 loc) · 2.19 KB
/
integrated_vectorization_embedder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from .embedder_base import EmbedderBase
from ..env_helper import EnvHelper
from ..llm_helper import LLMHelper
from ...integrated_vectorization.azure_search_index import AzureSearchIndex
from ...integrated_vectorization.azure_search_indexer import AzureSearchIndexer
from ...integrated_vectorization.azure_search_datasource import AzureSearchDatasource
from ...integrated_vectorization.azure_search_skillset import AzureSearchSkillset
from ..config.config_helper import ConfigHelper
import logging
logger = logging.getLogger(__name__)
class IntegratedVectorizationEmbedder(EmbedderBase):
def __init__(self, env_helper: EnvHelper):
self.env_helper = env_helper
self.llm_helper: LLMHelper = LLMHelper()
def embed_file(self, source_url: str, file_name: str = None):
self.process_using_integrated_vectorization(source_url=source_url)
def process_using_integrated_vectorization(self, source_url: str):
config = ConfigHelper.get_active_config_or_default()
try:
search_datasource = AzureSearchDatasource(self.env_helper)
search_datasource.create_or_update_datasource()
search_index = AzureSearchIndex(self.env_helper, self.llm_helper)
search_index.create_or_update_index()
search_skillset = AzureSearchSkillset(
self.env_helper, config.integrated_vectorization_config
)
search_skillset_result = search_skillset.create_skillset()
search_indexer = AzureSearchIndexer(self.env_helper)
indexer_result = search_indexer.create_or_update_indexer(
self.env_helper.AZURE_SEARCH_INDEXER_NAME,
skillset_name=search_skillset_result.name,
)
return indexer_result
except Exception as e:
logger.error(f"Error processing {source_url}: {e}")
raise e
def reprocess_all(self):
search_indexer = AzureSearchIndexer(self.env_helper)
if search_indexer.indexer_exists(self.env_helper.AZURE_SEARCH_INDEXER_NAME):
search_indexer.run_indexer(self.env_helper.AZURE_SEARCH_INDEXER_NAME)
else:
self.process_using_integrated_vectorization(source_url="all")