In [0]:
%sql
CREATE DATABASE IF NOT EXISTS gold;

In [0]:
from pyspark.sql.functions import col, countDistinct, sum, round, to_date, year, month

In [0]:
# Read Silver sales data
silver_sales = spark.table("silver.sales")

# Aggregate daily sales metrics
daily_sales_df = (
    silver_sales
    .withColumn("sales_date", to_date(col("transaction_ts_utc")))
    .groupBy("sales_date")
    .agg(
        countDistinct("transaction_id").alias("total_orders"),
        round(sum("total_amount_usd"), 2).alias("total_revenue_usd")
    )
)

# Write to Gold layer
daily_sales_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold.daily_sales")


In [0]:
# Read Silver sales and products
silver_products = spark.table("silver.products")

# Join sales with product details and aggregate
product_perf_df = (
    silver_sales
    .join(silver_products, "product_id", "inner")
    .groupBy(
        "product_id",
        "product_name",
        "category",
        "brand"
    )
    .agg(
        sum("quantity").alias("total_quantity_sold"),
        round(sum("total_amount_usd"), 2).alias("total_revenue_usd")
    )
)

# Write to Gold layer
product_perf_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold.product_performance")


In [0]:
# Read Silver stores
silver_stores = spark.table("silver.stores")

# Join sales with store details and aggregate
store_revenue_df = (
    silver_sales
    .join(silver_stores, "store_id", "inner")
    .groupBy(
        "store_id",
        "store_name",
        "region",
        "country"
    )
    .agg(
        round(sum("total_amount_usd"), 2).alias("total_revenue_usd"),
        countDistinct("transaction_id").alias("total_transactions")
    )
)

# Write to Gold layer
store_revenue_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold.store_revenue")


In [0]:
monthly_revenue_df = (
    silver_sales
    .withColumn("year", year(col("transaction_ts_utc")))
    .withColumn("month", month(col("transaction_ts_utc")))
    .groupBy("year", "month")
    .agg(
        round(sum("total_amount_usd"), 2).alias("total_revenue_usd")
    )
)

monthly_revenue_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold.monthly_revenue")


In [0]:
print("Daily Sales:", spark.table("gold.daily_sales").count())
print("Product Performance:", spark.table("gold.product_performance").count())
print("Store Revenue:", spark.table("gold.store_revenue").count())
print("Monthly Revenue:", spark.table("gold.monthly_revenue").count())


Daily Sales: 7
Product Performance: 189
Store Revenue: 50
Monthly Revenue: 2


In [0]:
# Create Gold logs table if it does not exist
spark.sql("""
CREATE TABLE IF NOT EXISTS gold_logs (
    run_id STRING,
    table_name STRING,
    status STRING,
    processing_time TIMESTAMP,
    row_count INT,
    error_message STRING
)
USING delta
""")


DataFrame[]

In [0]:
import uuid
from pyspark.sql.functions import current_timestamp

def log_gold_table(table_name, status="SUCCESS", error_message=None):
    """
    Inserts a log entry into gold_logs for a Gold table operation.
    """
    run_id = str(uuid.uuid4())
    row_count = spark.table(table_name).count() if status == "SUCCESS" else 0

    spark.sql(f"""
    INSERT INTO gold_logs
    VALUES (
        "{run_id}",
        "{table_name}",
        "{status}",
        current_timestamp(),
        {row_count},
        {f'"{error_message}"' if error_message else "NULL"}
    )
    """)


In [0]:
log_gold_table("gold.daily_sales")


In [0]:
log_gold_table("gold.product_performance")


In [0]:
log_gold_table("gold.store_revenue")


In [0]:
log_gold_table("gold.monthly_revenue")


In [0]:
spark.table("gold_logs").orderBy("processing_time", ascending=False).show(truncate=False)


+------------------------------------+------------------------+-------+--------------------------+---------+-------------+
|run_id                              |table_name              |status |processing_time           |row_count|error_message|
+------------------------------------+------------------------+-------+--------------------------+---------+-------------+
|87025b12-614f-4af5-9610-e0a4ad5cc807|gold.monthly_revenue    |SUCCESS|2025-12-23 18:25:24.902405|2        |NULL         |
|fd10c138-c7dd-4840-9570-90cc8983f477|gold.store_revenue      |SUCCESS|2025-12-23 18:25:15.756356|50       |NULL         |
|e9aff1c2-1df7-47f9-878f-1ab5149b66f1|gold.product_performance|SUCCESS|2025-12-23 18:25:09.740741|189      |NULL         |
|48da5697-a412-483f-8466-3192d9abd186|gold.daily_sales        |SUCCESS|2025-12-23 18:24:57.123988|7        |NULL         |
|c54ba4b5-6919-4783-8c8e-d0ab855c8f74|gold.store_revenue      |SUCCESS|2025-12-23 18:16:04.413828|50       |NULL         |
|8271f442-4ffe-4