In [0]:
# 04_Fact_Table

from pyspark.sql.functions import col, to_date

# Set context
spark.sql("USE CATALOG main")
spark.sql("USE SCHEMA ecommerce")

print("Building Fact Sales Table...")

# 1. Load Source Tables
df_items = spark.read.table("silver_order_items")
df_orders = spark.read.table("silver_orders")
df_cust = spark.read.table("dim_customers")
df_prod = spark.read.table("dim_products")

# 2. Join Logic
# Start with Items (Deepest Grain)
# Join Orders to get Time and Customer ID
# Join Customers to get State
# Join Products to get Category
df_fact = df_items.alias("i") \
    .join(df_orders.alias("o"), "order_id", "inner") \
    .join(df_cust.alias("c"), (col("o.customer_id") == col("c.customer_id")) & (col("c.is_active") == True), "left") \
    .join(df_prod.alias("p"), (col("i.product_id") == col("p.product_id")) & (col("p.is_active") == True), "left") \
    .select(
        col("i.order_id"),
        col("i.order_item_id"),
        col("o.customer_id"),
        col("i.product_id"),
        col("o.order_purchase_timestamp"),
        to_date(col("o.order_purchase_timestamp")).alias("order_date"),
        col("i.price"),
        col("i.freight_value"),
        (col("i.price") + col("i.freight_value")).alias("revenue"), # Derived Metric
        col("c.customer_state"),
        col("p.product_category_name")
    )

# 3. Write to Managed Table
df_fact.write.format("delta").mode("overwrite").saveAsTable("fact_sales")

print("--> Created Table: main.ecommerce.fact_sales")
print("SUCCESS: Fact Table Created!")
display(df_fact.limit(5)) # Show first 5 rows to verify