In [0]:
# 05_Gold_Layer

from pyspark.sql.functions import col, sum, count, desc

# Set context
spark.sql("USE CATALOG main")
spark.sql("USE SCHEMA ecommerce")

print("Building Gold Layer Aggregations...")

# Load the Fact Table
df_fact = spark.read.table("fact_sales")

# ====================================================
# AGGREGATION 1: SALES BY STATE
# ====================================================
print("1. Creating gold_sales_by_state...")
df_state = df_fact.groupBy("customer_state") \
    .agg(
        sum("revenue").alias("total_revenue"),
        count("order_id").alias("total_orders")
    ) \
    .orderBy(col("total_revenue").desc())

df_state.write.format("delta").mode("overwrite").saveAsTable("gold_sales_by_state")


# ====================================================
# AGGREGATION 2: DAILY SALES TRENDS
# ====================================================
print("2. Creating gold_daily_sales...")
df_daily = df_fact.groupBy("order_date") \
    .agg(
        sum("revenue").alias("daily_revenue"),
        count("order_id").alias("daily_orders")
    ) \
    .orderBy("order_date")

df_daily.write.format("delta").mode("overwrite").saveAsTable("gold_daily_sales")


# ====================================================
# AGGREGATION 3: TOP PRODUCTS (By Category)
# ====================================================
print("3. Creating gold_top_products...")
df_products = df_fact.groupBy("product_category_name") \
    .agg(
        sum("revenue").alias("category_revenue"),
        count("order_id").alias("units_sold")
    ) \
    .orderBy(col("category_revenue").desc())

df_products.write.format("delta").mode("overwrite").saveAsTable("gold_top_products")

print("------------------------------------------------")
print("SUCCESS: Gold Layer Built! Dashboards are ready.")

# Let's peek at the top selling categories
display(df_products.limit(5))