In [0]:
spark.sql("""
CREATE OR REPLACE TABLE stoyan.fact_orders AS
SELECT    
    -- Step 1: Create the surrogate key for the order (order_sk)

    MD5(concat(COALESCE(o.order_id,'Null'), '_',  COALESCE(oi.line_item_id,'NULL'), '_', COALESCE(c.cust_nk, 'NULL'), '_', COALESCE(p.prod_nk, 'NULL'))) AS order_sk,  -- Surrogate key for the order based on the concatenation of the natural keys
    
    -- Step 2: Select the surrogate keys for the customer and product
    c.cust_sk,                              -- Surrogate key for the customer (from the dim_customers table)
    p.prod_sk,                              -- Surrogate key for the product (from the dim_products table)

    -- Step 3: Create the natural key for the order (order_nk)
    concat(COALESCE(o.order_id,'Null'), '_',  COALESCE(oi.line_item_id,'NULL'), '_', COALESCE(c.cust_nk, 'NULL'), '_', COALESCE(p.prod_nk, 'NULL')) AS order_nk,-- Concatenated natural key for the order using order_id, line_item_id, and customer & product natural keys
    
    -- Step 4: Select the natural keys for customer and product
    c.cust_nk,                              -- Natural key for the customer
    p.prod_nk,                              -- Natural key for the product
    
    -- Step 5: Select other order details from the orders and order_items tables
    o.order_id,                             -- Order ID from the silver__orders table
    oi.line_item_id,                        -- Line item ID from the silver__order_items table
    o.order_date,                           -- Order date from the silver__orders table
    o.order_mode,                           -- Mode of order  from the silver__orders table
    o.order_status,                         -- Status of the order from the silver__orders table
    oi.unit_price,                          -- Unit price of the item from the silver__order_items table
    oi.quantity,                            -- Quantity of the item from the silver__order_items table
    
    -- Step 6: Insert timestamp when data is ingested into the bronze layer
    o.consume_timestamp AS insert_timestamp,  -- Time when data was injested to bronze area
    
    -- Step 7: Current timestamp for the update
    current_timestamp() AS updated_timestamp  -- Timestamp when the data was last updated 
FROM
    stoyan.silver__orders o                   -- Source table containing orders data
LEFT JOIN
    stoyan.silver__order_items oi ON o.order_id = oi.order_id   -- Join with the order_items table
LEFT JOIN
    stoyan.dim_customers c ON o.customer_id = c.cust_nk AND c.row_num = 1 -- Join with the dim_customers table, ensuring we get the most recent customer record (row_num = 1)
LEFT JOIN
    stoyan.dim_products p ON oi.product_id = p.prod_nk  AND p.row_num = 1 -- Join with the dim_products table, ensuring we get the most recent product record (row_num = 1)
""")


num_affected_rows,num_inserted_rows


In [0]:
spark.read.table('stoyan.fact_orders').display()

order_sk,cust_sk,prod_sk,order_nk,cust_nk,prod_nk,order_id,line_item_id,order_date,order_mode,order_status,unit_price,quantity,insert_timestamp,updated_timestamp
eb115479ec841e450e7b029a64e59e94,38b3eff8baf56627478ec76a704e9b52,,2458_NULL_101_NULL,101.0,,2458,,2007-08-16T15:34:12.234359Z,direct,0,,,2024-12-15T20:00:52.137Z,2024-12-16T07:40:33.193Z
6ad607e84f9f39c95be20cf080ca2d9d,ec8956637a99787bd197eacd77acce5e,,2397_3_102_NULL,102.0,,2397,3.0,2007-11-19T14:41:54.696211Z,direct,1,880.0,16.0,2024-12-15T20:00:52.137Z,2024-12-16T07:40:33.193Z
c726cfc8530e2ae8e9b2a3123693c896,,,2454_NULL_NULL_NULL,,,2454,,2007-10-02T17:49:34.67834Z,direct,1,,,2024-12-15T20:00:52.137Z,2024-12-16T07:40:33.193Z
78154a698b40dbb16a638b7ad76b2fa8,,,2354_7_NULL_NULL,,,2354,7.0,2008-07-14T18:18:23.234567Z,direct,0,17.0,58.0,2024-12-15T20:00:52.137Z,2024-12-16T07:40:33.193Z
dec2925b086dc92e9cb732fea4a0a1b3,,,2358_5_NULL_NULL,,,2358,5.0,2008-01-08T17:03:12.654278Z,direct,2,55.0,14.0,2024-12-15T20:00:52.137Z,2024-12-16T07:40:33.193Z
476c51ed785aeb5750b259006bb9261e,,,2381_NULL_NULL_NULL,,,2381,,2008-05-14T20:59:08.843679Z,direct,3,,,2024-12-15T20:00:52.137Z,2024-12-16T07:40:33.193Z
001c2742cca41b2a14bc32c5a35b08c7,,,2440_4_NULL_NULL,,,2440,4.0,2007-08-31T21:53:06.008765Z,direct,3,86.9,7.0,2024-12-15T20:00:52.137Z,2024-12-16T07:40:33.193Z
7974992b455c34b6dce26adb68c366e5,,10907813b97e249163587e6246612e21,2357_2_NULL_2245,,2245.0,2357,2.0,2006-01-08T20:19:44.123456Z,direct,5,462.0,26.0,2024-12-15T20:00:52.137Z,2024-12-16T07:40:33.193Z
185d93d8e9183805a0adbaa01065ab51,,,2394_7_NULL_NULL,,,2394,7.0,2008-02-10T21:22:35.564789Z,direct,5,19.0,48.0,2024-12-15T20:00:52.137Z,2024-12-16T07:40:33.193Z
eb02069403d75864c52bb2783a6ab7a5,,,2394_6_NULL_NULL,,,2394,6.0,2008-02-10T21:22:35.564789Z,direct,5,18.0,45.0,2024-12-15T20:00:52.137Z,2024-12-16T07:40:33.193Z
