In [0]:
# Notebook parameters

params = {
    "refresh_ai_pred": "",
}

# create text widgets
for k in params.keys():
    dbutils.widgets.text(k, "", "")

# fetch values
for k in params.keys():
    params[k] = dbutils.widgets.get(k)
    print(k, ":", params[k])

In [0]:
# Check refresh_ai_pred and whether predicted_pacs_examcode table exists
# If the table doesn't exist, it will be created regardless of refresh_ai_pred
if params["refresh_ai_pred"] != '1' and spark.catalog.tableExists("4_prod.pacs_dlt.ai_predicted_pacs_examcode"):
    dbutils.notebook.exit("AI pred not refreshed.")

In [0]:
# Got an error for spark.catalog.tableExists("4_prod.pacs_ai.pacs_examcode_concat_vs_index")
# therefore modified this to pacs_examcode_dict instead
refresh_vs_index = not spark.catalog.tableExists("4_prod.pacs_dlt.ai_pacs_examcode_dict")
print(refresh_vs_index)

In [0]:
%sql
CREATE TABLE IF NOT EXISTS 4_prod.pacs_dlt.ai_pacs_examcode_dict
AS
SELECT
  *,
  CONCAT(
    '{\n short code: ', short_code, ' ;\n ',
    'description: ', preferred, ' ;\n ',
    'procedure name: ', sct_fsn, ' ;\n ',
    'laterality: ', COALESCE(laterality, 'Null'), ' ;\n ',
    'region: ', COALESCE(region, 'Null'), ' ;\n ',
    'system: ', COALESCE(system, 'Null'), ' ;\n}'
  ) AS concat_text
FROM read_files(
  '/Volumes/4_prod/pacs/base/Annex-1-DID_lookup_group.csv',
  format => 'csv',
  header => true,
  mode => 'FAILFAST')

In [0]:
%sql
ALTER TABLE 4_prod.pacs_dlt.ai_pacs_examcode_dict SET TBLPROPERTIES (delta.enableChangeDataFeed = true) 

In [0]:


if refresh_vs_index:
    from databricks.vector_search.client import VectorSearchClient

    client = VectorSearchClient()

    index = client.create_delta_sync_index(
    endpoint_name="pacs_examcode_vector_search",
    source_table_name="4_prod.pacs_dlt.ai_pacs_examcode_dict",
    index_name="4_prod.pacs_dlt.ai_pacs_examcode_concat_vs_index",
    pipeline_type="TRIGGERED",
    primary_key="short_code",
    embedding_source_column="concat_text",
    embedding_model_endpoint_name="azure_openai_embedding_endpoint"
    )

In [0]:
%sql

CREATE OR REPLACE TABLE 4_prod.pacs_dlt.ai_predicted_pacs_examcode
AS
WITH RawExamCodes AS (
  SELECT RawExamCode
  FROM `4_prod`.pacs_dlt.intmd_pacs_examcode
  WHERE RawExamCode LIKE '% %'
  LIMIT 1000 -- Adjust the limit as needed
),
ar AS (
  SELECT vs.*,
  ai_query(
      "azure_openai_gpt4o",
      "Context:\n" || vs.context || "\nQuestion: Choose the best short code for '" || vs_input || "'. In the first line, output the best short code if found. Otherwise output Unknown. In the second line, output the reason. Please note: +C means contrast. If contrast is not specified then choose a non-contrast short code.",
      failOnError=>false
  ) AS AIResponse
  FROM RawExamCodes rec
  JOIN LATERAL (
    SELECT 
      rec.RawExamCode AS vs_input, 
      array_join(collect_set(concat_text), '\n') AS context
    FROM vector_search(
      index => '1_inland.sectra.pacs_examcode_concat_vs_index',
      query_text => rec.RawExamCode,
      num_results => 20
    )
    GROUP BY vs_input
  ) vs ON vs.vs_input = rec.RawExamCode
),
ar_val AS (
  SELECT
    ar.*,
    RTRIM(LTRIM(SPLIT_PART(AIResponse['result'], '\n', 1))) AS ExtractedAIExamCode,
    ISNOTNULL(ed.short_code) AS IsExamCodeValid
  FROM ar
  LEFT JOIN 4_prod.pacs_dlt.ai_pacs_examcode_dict AS ed
  ON RTRIM(LTRIM(SPLIT_PART(AIResponse['result'], '\n', 1))) = ed.short_code
)
SELECT
  ar_val.*,
  CASE 
    WHEN IsExamCodeValid IS TRUE THEN ar_val.ExtractedAIExamCode
    ELSE NULL
  END AS ValidatedAIExamCode
  FROM ar_val
