In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import count, sum, max, when, col

spark = SparkSession.builder.getOrCreate()

# Load Silver layer outputs
df_inventory = spark.read.format("delta").load("/mnt/delta/silver/inventory_enriched")
df_shipments = spark.read.format("delta").load("/mnt/delta/silver/shipments_enriched")

# -------------------------
# Inventory summary per vendor
# -------------------------
inventory_summary = df_inventory.groupBy("vendor_id", "name").agg(
    count("*").alias("total_inventory_items"),
    sum("quantity_on_hand").alias("total_quantity_on_hand")
)

# Identify low stock items
low_stock = df_inventory.filter(col("quantity_on_hand") < col("reorder_level")) \
    .groupBy("vendor_id", "name").count() \
    .withColumnRenamed("count", "low_stock_items")

# -------------------------
# Shipment summary per vendor
# -------------------------
shipment_summary = df_shipments.groupBy("vendor_id", "name").agg(
    count("*").alias("total_shipments"),
    sum(when(col("status") == "Delivered", 1).otherwise(0)).alias("delivered_shipments"),
    sum(when(col("status") == "Pending", 1).otherwise(0)).alias("pending_shipments"),
    max("shipment_date").alias("last_shipment_date")
)

# -------------------------
# Combine all summaries
# -------------------------
vendor_summary = inventory_summary \
    .join(shipment_summary, on=["vendor_id", "name"], how="outer") \
    .join(low_stock, on=["vendor_id", "name"], how="left") \
    .fillna(0)

# -------------------------
# Write to Delta Gold layer (same single write command)
# -------------------------
vendor_summary.write.format("delta").mode("overwrite").save("/mnt/delta/gold/vendor_summary")

# Optional: Register as SQL table
spark.sql("DROP TABLE IF EXISTS vendor_summary")
spark.sql("""
    CREATE TABLE vendor_summary
    USING DELTA
    LOCATION '/mnt/delta/gold/vendor_summary'
""")
