In [0]:
%pip install -qqqq -U databricks-vectorsearch mlflow mlflow-skinny

In [0]:
dbutils.library.restartPython()

In [0]:
from databricks.vector_search.client import VectorSearchClient
from databricks.sdk.service.vectorsearch import EndpointType
import yaml

# Load configurations from YAML files
def load_config(config_file):
    with open(config_file, 'r') as f:
        return yaml.safe_load(f)

# Load configurations
rag_chain_config = load_config('rag_chain_config.yaml')
data_pipeline_config = load_config('data_pipeline_config.yaml')
destination_tables_config = load_config('destination_tables_config.yaml')

# Get Vector Search configuration

retriever_config = rag_chain_config.get('retriever_config')
VECTOR_SEARCH_INDEX = retriever_config.get('vector_search_index')
VECTOR_SEARCH_ENDPOINT = rag_chain_config.get('databricks_resources').get('vector_search_endpoint_name')

def create_vector_search_endpoint():
    """Create the Vector Search endpoint if it doesn't exist."""
    vsc = VectorSearchClient(disable_notice=True)
    
    # Check if endpoint exists
    endpoints = vsc.list_endpoints()
    endpoint_exists = any(endpoint["name"] == VECTOR_SEARCH_ENDPOINT for endpoint in endpoints.get("endpoints", []))
    
    if not endpoint_exists:
        print(f"Creating Vector Search endpoint: {VECTOR_SEARCH_ENDPOINT}")
        vsc.create_endpoint_and_wait(VECTOR_SEARCH_ENDPOINT, endpoint_type=EndpointType.STANDARD.value)
        print("Endpoint created successfully!")
    else:
        print(f"Endpoint {VECTOR_SEARCH_ENDPOINT} already exists.")
    print(f"PASS: Vector Search endpoint `{VECTOR_SEARCH_ENDPOINT}` exists")

def create_vector_search_index():
    """Create the Vector Search index if it doesn't exist."""
    vsc = VectorSearchClient(disable_notice=True)
    
    # Check if index exists
    indexes = vsc.list_indexes(name=VECTOR_SEARCH_ENDPOINT)
    index_exists = any(index["name"] == VECTOR_SEARCH_INDEX 
                      for index in indexes.get("vector_indexes", []))
    
    if not index_exists:
        print("Creating Vector Search Index, this can take 15 minutes or longer...")
        print(f'Check status at: {VECTOR_SEARCH_INDEX}')
        
        vsc.create_delta_sync_index_and_wait(
            endpoint_name=VECTOR_SEARCH_ENDPOINT,
            index_name=VECTOR_SEARCH_INDEX,
            primary_key="chunk_id",
            source_table_name=destination_tables_config["chunked_docs_table_name"].replace("`", ""),
            pipeline_type=data_pipeline_config["vectorsearch_config"]["pipeline_type"],
            embedding_source_column="chunked_text",
            embedding_model_endpoint_name=data_pipeline_config["embedding_config"]["embedding_endpoint_name"]
        )
        print("Index created successfully!")
    else:
        print(f"Index {VECTOR_SEARCH_INDEX} already exists.")

def main():
    """Create Vector Search endpoint and index."""
    create_vector_search_endpoint()
    create_vector_search_index()

if __name__ == "__main__":
    main()