In [0]:
%pip install -r requirements.txt
dbutils.library.restartPython()

In [0]:
import yaml

from databricks.vector_search.client import VectorSearchClient
from time import sleep

with open("names.yaml", "r") as file:
    names = yaml.safe_load(file)

TABLE_NAME = names.get("table_name")
VECTOR_SEARCH_ENDPOINT_NAME = names.get("vector_search_endpoint_name")
VS_INDEX_FULLNAME = names.get("vs_index_fullname")

## Creating VectorSearch endpoint and index

In [0]:
vsc = VectorSearchClient(disable_notice=True)
#vsc.list_endpoints().get('endpoints', [])
try:
  vsc.get_endpoint(VECTOR_SEARCH_ENDPOINT_NAME)
  print('Endpoint already created')
except:
  print('Creating new endpoint and waiting to be ready')
  vsc.create_endpoint_and_wait(name=VECTOR_SEARCH_ENDPOINT_NAME, endpoint_type="STANDARD")
  print(f'Created new endopoint {VECTOR_SEARCH_ENDPOINT_NAME}')

In [0]:
try:
  vsc.get_index(VECTOR_SEARCH_ENDPOINT_NAME, VS_INDEX_FULLNAME).describe()
  print(f"Index {VS_INDEX_FULLNAME} on table {TABLE_NAME} is already created")
except:
  vsc.create_delta_sync_index(
    endpoint_name=VECTOR_SEARCH_ENDPOINT_NAME,
    index_name=VS_INDEX_FULLNAME,
    source_table_name=TABLE_NAME,
    pipeline_type="TRIGGERED",
    primary_key="id",
    embedding_source_column='content',
    embedding_model_endpoint_name='databricks-gte-large-en'
  )
  print(f"Creating index {VS_INDEX_FULLNAME} on endpoint {VECTOR_SEARCH_ENDPOINT_NAME}...")

In [0]:
for i in range(20):
  index_ready = vsc.get_index(VECTOR_SEARCH_ENDPOINT_NAME, VS_INDEX_FULLNAME).describe()['status']['ready']
  if index_ready:
    vsc.get_index(VECTOR_SEARCH_ENDPOINT_NAME, VS_INDEX_FULLNAME).sync()
    print(f"Index {VS_INDEX_FULLNAME} on table {TABLE_NAME} is ready")
    break
  else:
    print(f'Index not ready yet. Attempt {i}/20')
    sleep(30)