# Gold Fact Build - Daily Sales Fact

## Summary
- Purpose: Join Silver-level transaction, customer SCD, and product dimensions to build the gold fact table used for reporting.
- Inputs: `capstone.silver.transactions`, `capstone.silver.customers_scd2`, `capstone.silver.products`
- Outputs: `capstone.gold.daily_sales_fact` (enriched fact table)
- Audit: Calls `audit_log(spark, table_name, log_path)` after table creation or insert operations.

## Key Transformations
- Enrich transactions with customer and product dimensions
- Compute `line_total` and capture `order_date`

## Usage
- Run after Silver tables are ready; ensure correct clustering/partitioning for performance.


In [None]:
dbutils.widgets.text("catalog", "capstone", "Enter the Catalog: ")

In [None]:
from pyspark.sql.functions import current_timestamp, col
from capstone_pipeline.main import audit_log

table_name = f'{dbutils.widgets.get("catalog")}.gold.daily_sales_fact'
log_path = f'/Volumes/{dbutils.widgets.get("catalog")}/meta/history'

In [None]:
df_silver_trans = spark.table(f"{dbutils.widgets.get("catalog")}.silver.transactions")

display(df_silver_trans.limit(10))

join_condition_cust = (
    (col("transaction.order_timestamp") >= col("customer.start_date")) &
    ((col("customer.end_date").isNull()) | (col("transaction.order_timestamp") < col("customer.end_date")))
)

df_silver_trans_staged = df_silver_trans.withColumn(
    "order_date", 
    col("order_timestamp").cast("date"))

df_txn_with_customer_product = (
    df_silver_trans_staged.alias("transaction")
    .join(spark.table(f"{dbutils.widgets.get("catalog")}.silver.customers_scd2")
        .alias("customer"), join_condition_cust, how="left")
    .join(spark.table(f"{dbutils.widgets.get("catalog")}.silver.products")
        .alias("product"), on="item_id", how="left")
    .select(
        col("transaction.order_id"),
        col("transaction.order_date"),
        col("transaction.item_id"),
        col("product.product_name"),
        col("product.category").alias("product_category"),
        col("customer.customer_key").alias("customer_key"),
        col("customer.customer_id").alias("customer_id"),
        col("customer.name").alias("customer_name"),
        col("customer.email").alias("customer_email"),
        col("customer.region").alias("customer_region"),
        col("transaction.quantity"),
        col("transaction.price"),
        (col("transaction.quantity") * col("transaction.price")).alias("line_total"),
        col("transaction.order_timestamp")))

df_txn_with_customer_product.createOrReplaceTempView("vw_txn_with_customer_product")


In [None]:
if not spark.catalog.tableExists(f"{dbutils.widgets.get("catalog")}.gold.daily_sales_fact"):

    spark.sql(f"""CREATE OR REPLACE TABLE {dbutils.widgets.get("catalog")}.gold.daily_sales_fact
    USING DELTA
    CLUSTER BY (order_date, product_category, customer_id)
    AS
    SELECT
    order_id,
    order_date,
    item_id,
    product_name,
    product_category,
    customer_key,
    customer_id,
    customer_name,
    customer_email,
    customer_region AS region,
    quantity,
    price,
    line_total,
    order_timestamp
    FROM vw_txn_with_customer_product""")

else:
    spark.sql(f"""INSERT INTO {dbutils.widgets.get("catalog")}.gold.daily_sales_fact
    SELECT
    order_id,
    order_date,
    item_id,
    product_name,
    product_category,
    customer_key,
    customer_id,
    customer_name,
    customer_email,
    customer_region AS region,
    quantity,
    price,
    line_total,
    order_timestamp
    FROM vw_txn_with_customer_product""")

In [None]:
audit_log(spark, table_name, log_path)