In [0]:
CREATE OR REFRESH STREAMING TABLE bronze_customers
  COMMENT 'Raw Customer Data Ingested into Bronze'
  TBLPROPERTIES ('quality' = 'bronze') AS
SELECT
  *,
  _metadata.file_path AS input_file_path,
  current_timestamp AS Ingestion_timestamp
FROM
  cloud_files(
    's3://dbxtrainingbucket/circuitbox/landing/operational_data/customers',
    'json',
    map('cloudFiles.inferColumnTypes', 'true')
  )

In [0]:
CREATE OR REFRESH STREAMING TABLE silver_customers_clean(
  CONSTRAINT valid_customer_id EXPECT(customer_id IS NOT NULL) ON VIOLATION FAIL UPDATE,
  CONSTRAINT valid_customer_name EXPECT(customer_name IS NOT NULL) ON VIOLATION DROP ROW,
  CONSTRAINT valid_telephone EXPECT(LENGTH(telephone)>=10),                              
  CONSTRAINT valid_email EXPECT(email IS NOT NULL),
  CONSTRAINT valid_date_of_birth EXPECT(DATE_OF_BIRTH >='1920-01-01')  
)
  COMMENT 'Customer Data Cleaned and Enriched'
  TBLPROPERTIES ('quality' = 'silver') AS
SELECT
  customer_id,
  customer_name,
  CAST(date_of_birth AS DATE) AS date_of_birth,
  telephone,
  email,
  CAST(created_date AS DATE) AS created_date
FROM
  STREAM(LIVE.bronze_customers)

In [0]:
%python
# from pyspark.sql.functions import input_file_name, current_timestamp

# # Read streaming JSON files from S3 with schema inference
# customer_stream = (
#     spark.readStream
#     .format('cloudFiles')
#     .option('cloudFiles.format', 'json')
#     .option('cloudFiles.inferColumnTypes', 'true')
#     .load('s3://dbxtrainingbucket/circuitbox/landing/operational_data')
# )

# # Add input_file_path and ingestion_timestamp columns
# bronze_customers_stream = customer_stream \
#     .withColumn('input_file_path', input_file_name()) \
#     .withColumn('Ingestion_timestamp', current_timestamp())

# # Write to Delta table with properties and comment
# (
#     bronze_customers_stream.writeStream
#     .format('delta')
#     # .option('checkpointLocation', '/tmp/checkpoints/bronze_customers')
#     .outputMode('append')
#     .toTable('bronze_customers',
#              tableProperty={'quality': 'bronze'},
#              comment='Raw Customer Data Ingested into Bronze')
# )