In [0]:
-- CONTRACT ANALYSIS
-- -- Classify

CREATE TABLE IF NOT EXISTS IDENTIFIER(:catalog || '.' || :schema || '.classified') (
  path STRING,
  is_master_agreement BOOLEAN,
  has_amendments BOOLEAN,
  initial_master_agreement_expiry_date STRING,
  final_master_agreement_expiry_date STRING,
  amendments ARRAY<STRUCT<
    amendment_path: STRING,
    changes_master_agreement_expiry: BOOLEAN,
    new_master_agreement_expiry: STRING,
    agreement_document_type: STRING
  >>,
  rationale STRING,
  cofidence INT
);

-- Merge into statement
MERGE INTO IDENTIFIER(:catalog || '.' || :schema || '.classified') AS target
USING (
  SELECT 
    path, 
    classification.is_master_agreement,
    classification.has_amendments,
    classification.initial_master_agreement_expiry_date,
    classification.final_master_agreement_expiry_date,
    classification.amendments,
    classification.rationale,
    classification.cofidence
  FROM (
    SELECT 
      path, 
      from_json(AI_QUERY(
        "databricks-claude-sonnet-4-5",
        SUBSTRING(CONCAT(
        'You are a contractual document classification expert. You will be provided with a document, key information, the preamble (first 100 words), and two sets of related documents. The first is documents within the vendor folder with the key information and preamble from those documents. The second is key information of semantically similar documents.
        
        Use this information to answer the following questions:
        1. Is this document a master agreement?
        2. Are there any amendments to this master agreement?
        3. What is the initial expiry date of this agreement?
        4. What is the final expiry date of this agreement given the amendments?
        5. What are the applicable ammendments? 
        
        List all the amendments, and if this amendment changes the expiry date. Amendments may have other changes than the date that do not affect the master agreement, for example rate changes. Ignore non-agreement documents like confidentiality (CA), non-disclosure (NDA), etc.
        
        # ADDITIONAL INSTRUCTIONS
        Note that there are numerous files within the same folder that are more likely to be related to the contract than others. The folder documents should weigh much more than the other semantically related documents.  

        For amendments, determine their sequence (e.g., Amendment 1, Amendment 2) using the date found inside the document key information to establish chronological order. These amendments should have a path.

        Use document content for classification, not file names.
        Ignore duplicates.
        Provide your rationale and confidence on a scale of 1 to 5, with 5 being perfectly confident based on folder documentation, 4 being confident based on folder documents and/or semantic documents, 3 having some uncertainty, and 2 being quite uncertain with doubt, and 1 having very little information.
        Think carefully and review the work.

        Provide your response in json format with the following fields:
        {
          "is_master_agreement": boolean,
          "has_amendments": boolean,
          "initial_master_agreement_expiry_date": date
          "final_master_agreement_expiry_date": date
          "amendments": [
            {
              "amendment_path": string,
              "changes_master_agreement_expiry": boolean,
              "new_master_agreement_expiry": date,
              "agreement_document_type" string
            },
            ...
          ],
          "rationale": string,
          "cofidence": integer
        }
        ',
        '# PATH: ', path,
        '\n \n# KEY INFO:\n', doc_info,
        '\n \n# PREAMBLE:\n ', preamble,
        '\n \n# NUMBER OF FOLDER DOCS:\n', CAST(other_folder_docs AS STRING),
        '\n \n# FOLDER DOC KEY INFO:\n', other_doc_infos,
        '\n \n# FOLDER DOC PREAMBLE:\n ', other_preambles,
        '\n \n# OTHER DOC KEY INFO ', vs_results,
        '\n \n# OTHER DOC PREAMBLE ', vs_preamble_results
        ),1,:max_input_char),
        responseFormat => 'STRUCT<results:STRUCT<
          is_master_agreement:BOOLEAN,
          has_amendments:BOOLEAN,
          initial_master_agreement_expiry_date:STRING,
          final_master_agreement_expiry_date:STRING,
          amendments:ARRAY<STRUCT<
            amendment_path:STRING,
            changes_master_agreement_expiry:BOOLEAN,
            new_master_agreement_expiry:STRING,
            agreement_document_type:STRING
          >>,
          rationale:STRING,
          cofidence:INT
        >>'
      ), 'STRUCT<
          is_master_agreement:BOOLEAN,
          has_amendments:BOOLEAN,
          initial_master_agreement_expiry_date:STRING,
          final_master_agreement_expiry_date:STRING,
          amendments:ARRAY<STRUCT<
            amendment_path:STRING,
            changes_master_agreement_expiry:BOOLEAN,
            new_master_agreement_expiry:STRING,
            agreement_document_type:STRING
          >>,
          rationale:STRING,
          cofidence:INT
        >'
      ) AS classification
    FROM IDENTIFIER(:catalog || '.' || :schema || '.assembled')
  )
) AS source
ON target.path = source.path
WHEN MATCHED THEN UPDATE SET
  is_master_agreement = source.is_master_agreement,
  has_amendments = source.has_amendments,
  initial_master_agreement_expiry_date = source.initial_master_agreement_expiry_date,
  final_master_agreement_expiry_date = source.final_master_agreement_expiry_date,
  amendments = source.amendments,
  rationale = source.rationale,
  cofidence = source.cofidence
WHEN NOT MATCHED THEN INSERT (
  path,
  is_master_agreement,
  has_amendments,
  initial_master_agreement_expiry_date,
  final_master_agreement_expiry_date,
  amendments,
  rationale,
  cofidence
) VALUES (
  source.path,
  source.is_master_agreement,
  source.has_amendments,
  source.initial_master_agreement_expiry_date,
  source.final_master_agreement_expiry_date,
  source.amendments,
  source.rationale,
  source.cofidence
);