In [0]:
import dlt
from pyspark.sql.functions import col

# Source tables from the physical Bronze schema
SOURCE_CATALOG = "adb_catalog"
SOURCE_SCHEMA  = "bronze"

# -- CUSTOMERS SILVER --
dlt.create_streaming_table(
    name = "customers_silver",
    comment = "Cleaned and deduplicated customer records. Implements SCD Type 1 logic.",
    expect_all_or_drop = {
        "valid_customer_id": "customerId IS NOT NULL",
        "valid_email": "email LIKE '%@%.%'"
    }
)

dlt.create_auto_cdc_flow(
    target = "customers_silver",
    source = f"{SOURCE_CATALOG}.{SOURCE_SCHEMA}.customers_bronze",
    keys = ["customerId"],
    sequence_by = col("ingestion_timestamp"),
    stored_as_scd_type = "1"
)

# -- Orders Silver --
dlt.create_streaming_table(
    name = "orders_silver",
    comment = "Cleaned order transactions. Rows with null IDs or non-positive amounts are dropped.",
    expect_all_or_drop = {
        "valid_order_id": "orderId IS NOT NULL",
        "positive_amount": "order_amount > 0"
    }
)

dlt.create_auto_cdc_flow(
    target = "orders_silver",
    source = f"{SOURCE_CATALOG}.{SOURCE_SCHEMA}.orders_bronze",
    keys = ["orderId"],
    sequence_by = col("ingestion_timestamp"),
    stored_as_scd_type = "1"
)