In [0]:
-- CONTRACT ANALYSIS
-- -- Read Data

-- initial declaration of the table if not present
CREATE TABLE IF NOT EXISTS IDENTIFIER(:catalog || '.' || :schema || '.bytes') (
  path STRING,
  modificationTime TIMESTAMP,
  length BIGINT,
  _metadata STRUCT<
    file_path: STRING, 
    file_name: STRING, 
    file_size: BIGINT, 
    file_block_start: BIGINT, 
    file_block_length: BIGINT, 
    file_modification_time: TIMESTAMP
  >,
  content BINARY,
  vendor_name STRING,
  file_name STRING,
  vendor_folder_paths ARRAY<STRING>,
  CONSTRAINT bytes_path_pk PRIMARY KEY (path)
);


-- Read in the raw files, get a unique path
MERGE INTO IDENTIFIER(:catalog || '.' || :schema || '.bytes') AS target
USING (
  WITH main_files AS (
    SELECT
      path,
      modificationTime,
      length,
      _metadata,
      content,
      regexp_extract(path, :doc_path || '/([^/]+)/', 1) AS vendor_name,
      regexp_extract(path, '/([^/]+)$', 1) AS file_name
    FROM READ_FILES(:doc_path, format => 'binaryFile', recursiveFileLookup => true)
  ),
  all_vendor_files AS (
    SELECT
      path, 
      regexp_extract(path, :doc_path || '/([^/]+)/', 1) AS vendor_name,
      regexp_extract(path, '/([^/]+)$', 1) AS file_name
    FROM READ_FILES(:doc_path, format => 'binaryFile', recursiveFileLookup => true)
  )
  SELECT
    m.*,
    (
      SELECT collect_list(avf.path)
      FROM all_vendor_files avf
      WHERE avf.vendor_name = m.vendor_name
        AND avf.file_name != m.file_name
    ) AS vendor_folder_paths
  FROM main_files m
) AS source
ON target.path = source.path
WHEN NOT MATCHED THEN
  INSERT *