In [0]:
from pyspark.sql.functions import col, sum, count, date_format, concat_ws

In [0]:
@dlt.view(
    name="mv_high_value_customers",
    comment="Customers with lifetime value > 500"
)
def mv_high_value_customers():
    return (
        dlt.read("maven_uc.gold_dlt.fact_sales")
        .groupBy("customer_id")
        .agg(sum(col("revenue").cast("double")).alias("ltv"))
        .filter(col("ltv") > 500)
    )

In [0]:
@dlt.view(
    name="mv_monthly_regional_sales",
    comment="Regional sales trends by month."
)
def mv_monthly_regional_sales():
    sales = dlt.read("maven_uc.gold_dlt.fact_sales")
    stores = dlt.read("maven_uc.gold_dlt.dim_stores")
    regions = dlt.read("maven_uc.gold_dlt.dim_regions")
    
    return (
        sales.join(stores, "store_id")
        .join(regions, "region_id")
        .groupBy("sales_region", "sales_date") # You can use date_format for month
        .agg(sum("revenue").alias("regional_revenue"))
    )

In [0]:
import dlt
from pyspark.sql.functions import col, sum, avg, expr

# KPI: Inventory Health (Materialized View)
@dlt.view(name="mv_inventory_health")
def mv_inventory_health():
    inv = dlt.read("maven_uc.gold_dlt.fact_inventory_events")
    prod = dlt.read("maven_uc.gold_dlt.dim_products")
    
    return (
        inv.join(prod, "product_id")
        .groupBy("product_id", "product_name", "product_brand")
        .agg(
            sum("restock_qty").alias("total_restocked"),
            avg("quantity_remaining").alias("avg_stock_level")
        )
    )

In [0]:
@dlt.view(
    name="mv_sales_store_daily",
    comment="Daily sales by store."
)
def mv_sales_store_daily():
    sales = dlt.read("maven_uc.gold_dlt.fact_sales")
    stores = dlt.read("maven_uc.gold_dlt.dim_stores")
    
    joined = sales.join(
        stores,
        sales["store_id"] == stores["store_id"]
    )
    
    result = (
        joined.groupBy(
            stores["store_name"],  # Explicitly reference store_name from dim_stores
            sales["sales_date"]
        )
        .agg(
            sum(col("revenue")).alias("daily_sales")
        )
    )
    return result

In [0]:
@dlt.view(
    name="mv_realtime_orders_minute",
    comment="Order counts per minute (realtime)."
)
def mv_realtime_orders_minute():
    orders = dlt.read("maven_uc.gold_dlt.fact_order_events")
    return (
        orders.withColumn("minute", date_format(col("order_ts"), "yyyy-MM-dd HH:mm"))
              .groupBy("minute")
              .agg(count("*").alias("orders_per_minute"))
    )
    return result

In [0]:
@dlt.view(
    name="mv_return_product_by_month",
    comment="Monthly product returns."
)
def mv_return_product_by_month():
    returns = dlt.read("maven_uc.gold_dlt.fact_returns")
    products = dlt.read("maven_uc.gold_dlt.dim_products")
    
    return (
        returns.join(products, "product_id")
               .withColumn("return_month", date_format(col("return_date"), "yyyy-MM"))
               .groupBy("product_id", "product_name", "return_month")
               .agg(count("*").alias("returns_count"))
    )
    return result

In [0]:
@dlt.view(
    name="mv_profit_category_by_month",
    comment="Monthly profit by product category."
)
def mv_profit_category_by_month():
    # 1. Read the gold fact and product dimension
    sales = dlt.read("maven_uc.gold_dlt.fact_sales")
    products = dlt.read("maven_uc.gold_dlt.dim_products")
    
    # 2. Join and calculate month
    # Note: Use a left join if you want to ensure no sales are dropped 
    # even if product metadata is missing.
    joined = sales.join(products, "product_id", "inner") \
        .withColumn("sales_month", date_format(col("sales_date"), "yyyy-MM"))
    
    # 3. Aggregate profit
    # Profit = Revenue - (Cost * Quantity)
    result = (
        joined.groupBy(
            col("product_brand"),
            col("sales_month")
        )
        .agg(
            (sum(col("revenue")) - sum(col("product_cost") * col("quantity"))).alias("monthly_profit")
        )
    )
    
    return result
