# Default notebook

This default notebook is executed using a Lakeflow job as defined in resources/sample_job.job.yml.

In [None]:
%pip install databricks-vectorsearch
dbutils.library.restartPython()

In [0]:

from databricks.vector_search.client import VectorSearchClient
from databricks.sdk import WorkspaceClient
import databricks.sdk.service.catalog as c
from mlflow.deployments import get_deploy_client

vsc = VectorSearchClient(disable_notice=True)


if not vsc.endpoint_exists("appian_vsc_poc"):
  vsc.create_endpoint(name="appian_vsc_poc")


if not vsc.index_exists(endpoint_name="appian_vsc_poc", index_name="dev_appian_poc.02_gold.ingested_text_index"):
  index = vsc.create_delta_sync_index(
    endpoint_name="appian_vsc_poc",
    source_table_name="dev_appian_poc.02_gold.ingestion_text_embeddings",
    index_name="dev_appian_poc.02_gold.ingested_text_index",
    pipeline_type="TRIGGERED",
    primary_key="id",
    embedding_vector_column="embedding",
    embedding_dimension=1024,
    embedding_model_endpoint_name="databricks-bge-large-en"
  )
else:
  index = vsc.get_index(endpoint_name="appian_vsc_poc", index_name="dev_appian_poc.02_gold.ingested_text_index")


deploy_client = get_deploy_client("databricks")

In [0]:


query_text = "Who is Mr. Dobalina?"

response = deploy_client.predict(
    endpoint="databricks-bge-large-en",
    inputs={"input": [query_text]}
)

query_embedding = response.data[0]['embedding']

results = index.similarity_search(
    query_vector=query_embedding,
    columns=["id", "content"],
    num_results=1
    )

print(results)