In [0]:
import dlt
from pyspark.sql.functions import *

# Get parameters from pipeline configuration with defaults
# source_catalog = spark.conf.get("source_catalog", "main")
# source_schema = spark.conf.get("source_schema", "default")

# Build fully qualified source table names
sales_source_table = f"data_university.dlt01.demo_sales_source"
customers_source_table = f"data_university.dlt01.demo_customers_source"

print(f"Reading from: {sales_source_table}")
print(f"Reading from: {customers_source_table}")


In [0]:
# ===================================================================
# BRONZE LAYER - Using Parameterized Source References
# ===================================================================

@dlt.table(
    name="bronze_sales",
    comment=f"Streaming table from {sales_source_table}"
)
def bronze_sales():
    """
    Streaming table that reads from parameterized source.
    The output will be created in the pipeline's target catalog.schema.
    """
    return (
        spark.readStream
        .format("delta")
        .table(sales_source_table)
    )

@dlt.table(
    name="bronze_customers", 
    comment=f"Materialized view from {customers_source_table}"
)
def bronze_customers():
    """
    Materialized view reading from parameterized source.
    """
    return (
        spark.read
        .format("delta")
        .table(customers_source_table)
    )

In [0]:
# ===================================================================
# SILVER LAYER - Using LIVE References
# ===================================================================

@dlt.table(
    name="silver_sales",
    comment="Enhanced sales data with business calculations"
)
def silver_sales():
    """
    Materialized view that transforms bronze data.
    Uses LIVE keyword to reference other DLT tables.
    """
    return (
        spark.read.table("LIVE.bronze_sales")
        .withColumn("total_amount", col("quantity") * col("unit_price"))
        .withColumn("order_value_tier", 
                   when(col("total_amount") > 500, "High")
                   .when(col("total_amount") > 200, "Medium")
                   .otherwise("Low"))
        .withColumn("year_month", date_format(col("order_date"), "yyyy-MM"))
    )

In [0]:
# ===================================================================
# GOLD LAYER - Analytics and Aggregations
# ===================================================================

@dlt.table(
    name="gold_sales_summary",
    comment="Product sales analytics by category"
)
def gold_sales_summary():
    """
    Gold layer aggregation for business intelligence.
    """
    return (
        spark.read.table("LIVE.silver_sales")
        .groupBy("product_id", "product_name", "category")
        .agg(
            sum("total_amount").alias("total_revenue"),
            count("order_id").alias("total_orders"),
            avg("total_amount").alias("avg_order_value")
        )
        .orderBy(desc("total_revenue"))
    )

@dlt.table(
    name="gold_customer_analytics",
    comment="Customer analysis with joined data"
)
def gold_customer_analytics():
    """
    Join customer and sales data for 360-degree view.
    """
    sales_df = spark.read.table("LIVE.silver_sales")
    customers_df = spark.read.table("LIVE.bronze_customers")
    
    return (
        customers_df.join(
            sales_df.groupBy("customer_id").agg(
                count("order_id").alias("total_orders"),
                sum("total_amount").alias("lifetime_value"),
                avg("total_amount").alias("avg_order_value")
            ),
            "customer_id",
            "left"
        )
        .fillna(0, ["total_orders", "lifetime_value", "avg_order_value"])
        .withColumn("customer_tier",
                   when(col("lifetime_value") > 1000, "Premium")
                   .when(col("lifetime_value") > 500, "Gold")
                   .otherwise("Standard"))
        .orderBy(desc("lifetime_value"))
    )