In [0]:
# Guide -- https://docs.databricks.com/aws/en/generative-ai/create-query-vector-search

%pip install databricks-vectorsearch
dbutils.library.restartPython()



In [0]:
%sql
--CREATE CATALOG IF NOT EXISTS prod

--CREATE SCHEMA IF NOT EXISTS prod.storyspark;

/*
USE CATALOG prod;
USE SCHEMA storyspark;
CREATE TABLE IF NOT EXISTS book_inventory(
  book_id STRING,
  owner_id STRING,
  title STRING,
  author STRING,
  last_read DATE,
  relevant_text STRING
)
USING DELTA;
*/

/*
ALTER TABLE prod.storyspark.book_inventory
  SET TBLPROPERTIES (delta.enableChangeDataFeed = true)
*/

In [0]:
from databricks.vector_search.client import VectorSearchClient

# The following line automatically generates a PAT Token for authentication
client = VectorSearchClient()

# The following line uses the service principal token for authentication
# client = VectorSearchClient(service_principal_client_id=<CLIENT_ID>,service_principal_client_secret=<CLIENT_SECRET>)

client.create_endpoint(
    name="storyspark_book_inventory_endpoint",
    endpoint_type="STANDARD" # or "STORAGE_OPTIMIZED"
)

In [0]:
client = VectorSearchClient()

index = client.create_delta_sync_index(
  endpoint_name="storyspark_book_inventory_endpoint",
  source_table_name="prod.storyspark.book_inventory",
  index_name="prod.storyspark.book_inventory_index",
  pipeline_type="TRIGGERED",
  primary_key="book_id",
  embedding_source_column="relevant_text",
  embedding_model_endpoint_name="e5-small-v2", # This model is used for ingestion, and is also used for querying unless model_endpoint_name_for_query is specified.
  #model_endpoint_name_for_query="e5-mini-v2"   # Optional. If specified, used only for querying the index.
  columns_to_sync=["owner_id", "last_read"]
)