## fact returns

In [0]:
import dlt
from pyspark.sql.functions import col, to_date
from pyspark.sql.types import IntegerType

@dlt.table(
    name="maven_uc.gold_dlt.fact_returns",
    comment="Fact table for returns"
)
def fact_returns():
    df = dlt.read("maven_uc.silver_dlt.returns_silver")
    return (
        df
        .select(
            "return_date",
            "product_id",
            "store_id",
            "quantity"
        )
    )

## fact inventory events

In [0]:
@dlt.table(
    name="maven_uc.gold_dlt.fact_inventory_events",
    comment="Fact table for inventory restock events"
)
@dlt.expect_or_drop("valid_product", "product_id IS NOT NULL")
@dlt.expect_or_drop("valid_store", "store_id IS NOT NULL")
@dlt.expect_or_drop("valid_qty", "restock_qty >= 0")
def fact_inventory_events():
    df = dlt.read("maven_uc.silver_dlt.inventory_events_silver")
    return (
        df.select(
            "product_id",
            "store_id",
            "restock_qty",
            "quantity_remaining",
            "restock_date",
            "event_ts"
        )
    )

## fact orders

In [0]:
import dlt
from pyspark.sql.functions import col

@dlt.table(
    name="maven_uc.gold_dlt.fact_order_events",
    comment="Fact table for order events"
)
def fact_order_events():
    df = dlt.read_stream("maven_uc.silver_dlt.orders_events_silver")
    return (
        df.select(
            "order_id",
            "customer_id",
            "product_id",
            "store_id",
            "order_date",
            "quantity",
            "stock_date",
            "payment_type",
            "unit_price",
            "order_ts"
        )
    )

## fact sales

In [0]:
import dlt
from pyspark.sql.functions import col, lit, to_date, regexp_replace,concat
from pyspark.sql.types import FloatType

@dlt.table(
    name="maven_uc.gold_dlt.fact_sales",
    comment="Unified sales records from online orders and in-store transactions."
)
def fact_sales():
    # 1. Process Online Orders (Cleaning string-based unit_price)
    orders_df = dlt.read("maven_uc.silver_dlt.orders_events_silver").select(
        col("order_id"),
        col("customer_id"),
        col("product_id"),
        col("store_id"),
        to_date(col("order_date")).alias("sales_date"),
        col("quantity").cast("int"),
        # Clean string: remove '$' or ',' then cast to double
        regexp_replace(col("unit_price"), "[$,]", "").cast("double").alias("unit_price"),
        lit("ONLINE").alias("sales_channel")
    ).withColumn("revenue", col("quantity") * col("unit_price"))

    # 2. Process In-Store Transactions
    products_df = dlt.read("maven_uc.gold_dlt.dim_products").select("product_id", "product_retail_price")
    stores_df = dlt.read("maven_uc.gold_dlt.dim_stores").select("store_id", "region_id")


    
    transactions_df = dlt.read("maven_uc.silver_dlt.transactions_silver") \
        .join(products_df, "product_id", "left") \
        .select(
            concat(lit("POS-"), col("customer_id"), lit("-"), col("transaction_date")).alias("order_id"),
            col("customer_id"),
            col("product_id"),
            col("store_id"),
            to_date(col("transaction_date")).alias("sales_date"),
            col("quantity").cast("int"),
            col("product_retail_price").cast("double").alias("unit_price"),
            lit("POS").alias("sales_channel")
        ).withColumn("revenue", col("quantity") * col("unit_price"))

    combined_df = orders_df.unionByName(transactions_df)
    
    return combined_df.join(stores_df, "store_id", "left")