In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS workspace.gold;

CREATE TABLE IF NOT EXISTS workspace.gold.product_revenue_basic (
    product_id STRING,
    product_name STRING,
    currency STRING,
    total_quantity BIGINT,
    total_revenue BIGINT
)
USING DELTA;


In [0]:
from pyspark.sql.functions import explode, col, sum as _sum

silver_df = spark.read.table("workspace.silver.sales_orders_clean")

gold_df = (
    silver_df
    .withColumn("product", explode("ordered_products"))
    .select(
        col("product.id").alias("product_id"),
        col("product.name").alias("product_name"),
        col("product.curr").alias("currency"),
        col("product.qty").alias("qty"),
        col("product.price").alias("price")
    )
    .withColumn("revenue", col("qty") * col("price"))
    .groupBy("product_id", "product_name", "currency")
    .agg(
        _sum("qty").alias("total_quantity"),
        _sum("revenue").alias("total_revenue")
    )
)

gold_df.write \
    .mode("overwrite") \
    .saveAsTable("workspace.gold.product_revenue_basic")


In [0]:
%sql
SELECT * 
FROM workspace.gold.product_revenue_basic
ORDER BY total_revenue DESC
LIMIT 10;
