This notebook provides an example of using vector search directly in Batch inference. It works with serverless.

In [0]:
%pip install databricks-vectorsearch
%restart_python

This assumed you already have a vector searchi ndex

In [0]:
%sql
SELECT * FROM vector_search(
  index => 'shm.marketing.campaigns_fixed_index',
  query_text => "viking_ranges",
  num_results => 3
)

In [0]:
%sql
-- Vector search
SELECT
  STRING(
    COLLECT_LIST(
      CONCAT(
        'campaign: ', template, '\n'
        'title:', title, '\n', 
        'name: ', campaign_name, '\n',
        'description: ', campaign_description, '\n\n'
      )
    )
  ) AS similarity_search
FROM vector_search(
  index => 'shm.marketing.campaigns_fixed_index',
  query_text => 'viking ranges',
  num_results => 3
)

In [0]:
prompt = """Act as a marketing expert. Summarize the evocativeness of these campaigns on a scale of 1 to 5, with 5 being the most evocative and provide an critique for improving them. Return a json output.

Output format: 
{'evocativeness': 3, 'critique': Needs to focus on better structure and emotional content}
"""

In [0]:
# Setup widgets to parameterize sql
dbutils.widgets.text("prompt", f"{prompt}", "Prompt")

In [0]:
%sql
CREATE OR REPLACE FUNCTION shm.marketing.campaigns_vector_search(
  query STRING
  )
  RETURNS STRING
  LANGUAGE SQL
  COMMENT 'This function returns matching campaigns' 
  RETURN 
    SELECT
      STRING(
        COLLECT_LIST(
          CONCAT(
            'campaign: ', template, '\n'
            'title:', title, '\n', 
            'name: ', campaign_name, '\n',
            'description: ', campaign_description, '\n\n'
          )
        )
      ) AS similarity_search
    FROM vector_search(
      index => 'shm.marketing.campaigns_fixed_index',
      query_text => query,
      num_results => 3,
      query_type => 'hybrid'
    )

In [0]:
%sql
CREATE OR REPLACE TABLE shm.marketing.products_pred_vector_search AS
SELECT
  product_id,
  AI_QUERY(
    'databricks-meta-llama-3-3-70b-instruct',
    CONCAT(
      :prompt, '\n',
      COALESCE(shm.marketing.campaigns_vector_search(title),''), '\n'
      ),
    responseFormat => '{
      "type": "json_schema",
      "json_schema": {
        "name": "categorization",
        "schema": {
          "type": "object",
          "properties": {
            "evocativeness": {"type": "number"},
            "critique": {"type": "string"}
          }
        }
      }
    }'
  ) AS llm_output
FROM shm.marketing.products_sample
LIMIT 1