In [0]:
CREATE TABLE IF NOT EXISTS gold.fct_items (
  item_sk BIGINT GENERATED ALWAYS AS IDENTITY,
  order_sk BIGINT,
  product_sk BIGINT,
  customer_sk BIGINT,
  seller_sk BIGINT,
  price DECIMAL(10,2),
  freight_value DECIMAL(10,2),
  order_purchase_timestamp TIMESTAMP
) USING DELTA

  

In [0]:
%python
items_df = (
  spark.read.table("silver.order_items_cleaned")
.join(spark.read.table("gold.dim_orders"), "order_id")
.join(spark.read.table("gold.dim_payments"), "order_id")
.join(spark.read.table("gold.dim_reviews"), "order_id", "left")
.join(spark.read.table("gold.dim_sellers"), "seller_id")
.join(spark.read.table("gold.dim_products"), "product_id")
.join(spark.read.table("gold.dim_customer"), "customer_id")
.select("order_sk", "product_sk", "customer_sk", "seller_sk", "price", "freight_value", "order_purchase_timestamp")
)
items_df.display()


In [0]:
%python
items_df.createOrReplaceTempView("items_view")

In [0]:
MERGE INTO gold.fct_items AS t
USING items_view AS s
ON t.order_sk = s.order_sk

WHEN MATCHED THEN
  UPDATE SET 
  t.price = s.price,
  t.freight_value = s.freight_value

WHEN NOT MATCHED
  THEN INSERT (
    order_sk,
    product_sk,
    customer_sk,
    seller_sk,
    price,
    freight_value,
    order_purchase_timestamp
  ) VALUES (
    s.order_sk,
    s.product_sk,
    s.customer_sk,
    s.seller_sk,
    s.price,
    s.freight_value,
    s.order_purchase_timestamp
  )


In [0]:
SELECT * FROM gold.fct_items ORDER BY order_purchase_timestamp LIMIT 10;