In [0]:
CREATE OR REPLACE TABLE shm.contracts.extracted AS
WITH flattened AS (
SELECT
  path,
  parsed, 
  concat_ws(
      '\n\n',
      transform(
        try_cast(parsed:document:elements AS ARRAY<VARIANT>),
        element -> try_cast(element:content AS STRING)
      )
    ) AS text,
  concat_ws(
      ' ',
      slice(
        split(
          concat_ws(
            '\n\n',
            transform(
              try_cast(parsed:document:elements AS ARRAY<VARIANT>),
              element -> try_cast(element:content AS STRING)
            )
          ),
          ' '
        ),
        1,
        500
      )
    ) AS preamble,
  concat_ws(
      ' ',
      slice(
        split(
          concat_ws(
            '\n\n',
            transform(
              try_cast(parsed:document:elements AS ARRAY<VARIANT>),
              element -> try_cast(element:content AS STRING)
            )
          ),
          ' '
        ),
        1,
        10000
      )
    ) AS truncated
FROM shm.contracts.parsed
)
SELECT 
  *,
  AI_QUERY(
    "databricks-claude-sonnet-4-5",
    CONCAT(
      'Carefully analyze the attached document and extract the primary contract number, referenced agreements, dates, vendors, and contractual sections.
        
      Return dates in ISO format with a classification of the type of date - whether it was a revision, expiry, publication date, e.g. 2025-12-03 (Expiry).
      
      For referenced agreements, look for the below terms and list them with the agreement type, e.g. Master Agreement 1239-12900.      
      - Master Agreement
      - Framework Agreement
      - Consulting Agreement (CSA)
      - NDA (Non Disclosure Agreement) / Confidentiality
      - Purchase Order Terms and Conditions
      - Mutually Agreed to Terms and Conditions (MTC)
      - Contract
      - Master Work Agreement (MWA)
      - Sales Contract
      - Engineering Procurement Construction (EPC)
      - Engineering Procurement Construction Management (EPCM)
      - Construction Agreement
      - Site Services Agreement
      - Staffing Agreement
      - Sales/Catering Contract
      - Recruitment Agreement
      - Administration Services Agreement
      - Services Agreement
      - License Agreement
      - Supply Agreement
      - Order Form
      - Purchase Agreement
      - General Terms and Conditions
      
      For contract sections, look for the sections in the documents below. List each section with its heading and any references to other documents or contractual numbers, e.g. Ownership Interest Declaration (EDS-7: 3/2015)
      - Amendments
      - Rate sheets
      - Schedules
      - Exhibits
      - Addendums
      - Statement of Work (SOW)
      - Termination
      - Forms of Undertaking (FOU)
      - Commitment Letter
      - Change Order
      
      Each field should be an entry in the main struct, followed by a list of strings. Output in JSON format. 
      ',
      'File Name:', path, '\n Text:',
      truncated),
    responseFormat => 'STRUCT<extraction:STRUCT<file_name:STRING, contract_number:STRING, agreements:ARRAY<STRING>, dates:ARRAY<STRING>, vendors:ARRAY<STRING>, contract_sections:ARRAY<STRING>>>'
  ) as key_information
  FROM flattened 
  WHERE text != ''