# Ingest to Gold Layer

Transforms Silver-layer tables into curated Gold-layer tables (`customer_orders`, `customer_summary`, and `product_summary`) for business intelligence and analytics.


In [0]:
customers_silver = spark.read.format("delta").load("/mnt/silver/customers_valid")
product_silver = spark.read.format("delta").load("/mnt/silver/products_valid")
orders_silver = spark.read.format("delta").load("/mnt/silver/orders_valid")

In [0]:
from pyspark.sql.functions import col

a = customers_silver.alias("a")
b = orders_silver.alias("b")
c = product_silver.alias("c")

customer_orders = (
    a.join(
        b,
        col("a.customer_id") == col("b.customer_id")
    )
    .join(
        c,
        col("b.product_id") == col("c.product_id")
    )
    .select(
        col("b.order_id"),
        col("b.order_date"),
        col("a.customer_id"),
        col("a.first_name").alias("customer_first_name"),
        col("a.last_name").alias("customer_last_name"),
        col("a.email"),
        col("a.city"),
        col("a.state"),
        col("c.product_id"),
        col("c.product_name"),
        col("c.brand"),
        col("c.category"),
        col("c.subcategory"),
        col("b.quantity"),
        col("c.unit_price"),
    )
    .withColumn(
        "order_total",
        col("b.quantity") * col("c.unit_price")
    )
)

In [0]:
from pyspark.sql.functions import count, sum

customer_summary = (
    customer_orders
    .groupBy(
        "customer_id",
        "customer_first_name",
        "customer_last_name",
        "email",
        "city",
        "state"
    )
    .agg(
        count("order_id").alias("total_orders"),
        sum("order_total").alias("total_spent")
    )
)

In [0]:
product_summary = customer_orders \
    .groupBy("product_id", "product_name", "brand", "category", "subcategory") \
    .agg(
        sum("quantity").alias("units_sold"),
        sum("order_total").alias("total_revenue"),
        count("order_id").alias("times_ordered")
    )

In [0]:
customer_orders.write.format("delta").mode("overwrite") \
    .save(f"/mnt/gold/customer_orders")

customer_summary.write.format("delta").mode("overwrite") \
    .save(f"/mnt/gold/customer_summary")

product_summary.write.format("delta").mode("overwrite") \
    .save(f"/mnt/gold/product_summary")