In [0]:
%pip install databricks-vectorsearch databricks-langchain langchain-openai

In [0]:
dbutils.library.restartPython()

In [0]:
from databricks.vector_search.client import VectorSearchClient


CATALOG = 'manufacturing_dev'
SCHEMA = 'work_agent_barney'


client = VectorSearchClient()

In [0]:
index = client.create_endpoint_and_wait(
    name="master_sensory_data_endpoint",
    endpoint_type="STANDARD"
)

In [0]:
index = client.create_delta_sync_index_and_wait(
    endpoint_name="master_sensory_data_endpoint",
    index_name=f"{CATALOG}.{SCHEMA}.master_sensory_panel_joined_index",
    primary_key="id",
    source_table_name=f"{CATALOG}.{SCHEMA}.master_sensory_panel_joined_silver",
    pipeline_type="TRIGGERED",
    embedding_dimension=1536,
    embedding_vector_column="data_embedding",
    verbose=True,
)

In [0]:
index = client.create_delta_sync_index_and_wait(
    endpoint_name="master_sensory_data_endpoint",
    index_name=f"{CATALOG}.{SCHEMA}.master_sensory_responses_collected_index",
    primary_key="id",
    source_table_name=f"{CATALOG}.{SCHEMA}.master_sensory_responses_collected_silver",
    pipeline_type="TRIGGERED",
    embedding_dimension=1536,
    embedding_vector_column="data_embedding",
    verbose=True,
)

## Query

In [0]:
import os, getpass

from databricks_langchain import DatabricksVectorSearch
from langchain_openai import OpenAIEmbeddings


CATALOG = "manufacturing_dev"
SCHEMA = "work_agent_barney"
OPENAI_API_BASE = "https://api-internal.8451.com/ai/proxy/"


def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")


# Set env vars
_set_env("OPENAI_API_KEY")
os.environ["OPENAI_API_BASE"] = OPENAI_API_BASE


embed_model = OpenAIEmbeddings(model="text-embedding-3-small")

In [0]:
responses_vector_store = DatabricksVectorSearch(
    endpoint="master_sensory_data_endpoint",
    index_name=f"{CATALOG}.{SCHEMA}.master_sensory_panel_joined_index",
    embedding=embed_model,
    text_column='data'
)

In [0]:
results = responses_vector_store.similarity_search(
    query="Are there any panels involving mushrooms?",
    k=1,
)

In [0]:
results