In [None]:

spark.sql("""
CREATE TABLE IF NOT EXISTS silver_orders (
    order_id STRING,
    customer_id STRING,
    product_id STRING,
    quantity INT,
    total_amount DOUBLE,
    transaction_date DATE,
    order_status STRING,
    last_updated TIMESTAMP
)
USING DELTA
""")


StatementMeta(, 1ca2c3e5-96b8-44c8-bf1d-fb370591266a, 4, Finished, Available, Finished)

DataFrame[]

In [None]:
last_processed_df = spark.sql("SELECT MAX(last_updated) as last_processed FROM silver_orders")
last_processed_timestamp = last_processed_df.collect()[0]['last_processed']


if last_processed_timestamp is None:
    last_processed_timestamp = "1900-01-01T00:00:00.000+00:00"


StatementMeta(, 1ca2c3e5-96b8-44c8-bf1d-fb370591266a, 5, Finished, Available, Finished)

In [None]:
spark.sql(f"""
CREATE OR REPLACE TEMPORARY VIEW bronze_incremental_orders AS
SELECT *
FROM bronzelayer.orders WHERE ingestion_timestamp > '{last_processed_timestamp}'
""")


StatementMeta(, 1ca2c3e5-96b8-44c8-bf1d-fb370591266a, 6, Finished, Available, Finished)

DataFrame[]

In [None]:

spark.sql("select * from bronzelayer.orders").show()

StatementMeta(, 1ca2c3e5-96b8-44c8-bf1d-fb370591266a, 9, Finished, Available, Finished)

+--------------+-----------+----------+--------+------------+----------------+--------------+----------+--------------------+
|transaction_id|customer_id|product_id|quantity|total_amount|transaction_date|payment_method|store_type| ingestion_timestamp|
+--------------+-----------+----------+--------+------------+----------------+--------------+----------+--------------------+
|     TRX000063|        234|        67|       2|      550.83|      2021-09-12| Bank Transfer|    Online|2024-09-26 18:41:...|
|     TRX000115|         58|       475|       2|      299.56|      2022-07-31| Bank Transfer|    Online|2024-09-26 18:41:...|
|     TRX000126|         29|       609|       2|      706.21|      2021-12-02| Bank Transfer|    Online|2024-09-26 18:41:...|
|     TRX000144|        122|       202|       2|      446.44|      2022-09-24| Bank Transfer|    Online|2024-09-26 18:41:...|
|     TRX000311|        378|       719|       2|      945.18|      2020-02-19| Bank Transfer|    Online|2024-09-26 18:

In [None]:
spark.sql("""
CREATE OR REPLACE TEMPORARY VIEW silver_incremental_orders AS
SELECT
    transaction_id as order_id,
    customer_id,
    product_id,
    CASE
        WHEN quantity < 0 THEN 0
        ELSE quantity
    END AS quantity,
    CASE
        WHEN total_amount < 0 THEN 0
        ELSE total_amount
    END AS total_amount,
    CAST(transaction_date AS DATE) AS transaction_date,
    CASE
        WHEN quantity = 0 AND total_amount = 0 THEN 'Cancelled'
        WHEN quantity > 0 AND total_amount > 0 THEN 'Completed'
        ELSE 'In Progress'
    END AS order_status,
    CURRENT_TIMESTAMP() AS last_updated
FROM bronze_incremental_orders
WHERE transaction_date IS NOT NULL 
  AND customer_id IS NOT NULL 
  AND product_id IS NOT NULL
""")


StatementMeta(, 1ca2c3e5-96b8-44c8-bf1d-fb370591266a, 10, Finished, Available, Finished)

DataFrame[]

In [None]:
spark.sql("""
MERGE INTO silver_orders target
USING silver_incremental_orders source
ON target.order_id = source.order_id
WHEN MATCHED THEN
    UPDATE SET *
WHEN NOT MATCHED THEN
    INSERT *
""")


StatementMeta(, 1ca2c3e5-96b8-44c8-bf1d-fb370591266a, 11, Finished, Available, Finished)

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

In [None]:
spark.sql("SELECT * FROM silver_orders LIMIT 10").show()


StatementMeta(, 1ca2c3e5-96b8-44c8-bf1d-fb370591266a, 12, Finished, Available, Finished)

+---------+-----------+----------+--------+------------+----------------+------------+--------------------+
| order_id|customer_id|product_id|quantity|total_amount|transaction_date|order_status|        last_updated|
+---------+-----------+----------+--------+------------+----------------+------------+--------------------+
|TRX001128|        240|       422|       7|         0.0|      2020-07-24| In Progress|2024-09-28 13:00:...|
|TRX001179|         85|       434|       7|         0.0|      2023-05-28| In Progress|2024-09-28 13:00:...|
|TRX001600|        879|       134|       7|         0.0|      2022-07-02| In Progress|2024-09-28 13:00:...|
|TRX003550|        917|       889|       7|         0.0|      2023-04-10| In Progress|2024-09-28 13:00:...|
|TRX005252|        942|       402|       7|         0.0|      2021-12-10| In Progress|2024-09-28 13:00:...|
|TRX009297|        475|       869|       7|         0.0|      2022-03-24| In Progress|2024-09-28 13:00:...|
|TRX009266|        983|     