In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import count, max

spark = SparkSession.builder.getOrCreate()

# ------------------------------------
# 1. Load Silver Enriched Tables
# ------------------------------------
df_inventory = spark.read.format("delta").load("/mnt/delta/silver/inventory_enriched")
df_shipments = spark.read.format("delta").load("/mnt/delta/silver/shipments_enriched")
df_vendors = spark.read.format("delta").load("/mnt/delta/silver/vendors_clean")

# ------------------------------------
# 2. Join Inventory and Shipments
# ------------------------------------
# Join inventory and shipments on vendor_id
df_joined = df_shipments.alias("s") \
    .join(df_inventory.alias("i"), on="vendor_id", how="left") \
    .join(df_vendors.alias("v"), on="vendor_id", how="left")
# ------------------------------------
# 3. Add Vendor Info
# ------------------------------------
df_final = df_joined.join(df_vendors, on="vendor_id", how="left")

# ------------------------------------
# 4. Select and Clean Columns
# ------------------------------------
from pyspark.sql.functions import col

df_silver = df_joined.select(
    col("s.shipment_id").alias("shipment_id"),
    col("s.vendor_id").alias("vendor_id"),
    col("v.name").alias("vendor_name"),
    col("s.shipment_date").alias("shipment_date"),
    col("s.destination").alias("shipment_destination"),
    col("s.status").alias("shipment_status"),
    col("i.item_id").alias("item_id"),
    col("i.item_name").alias("item_name"),
    col("i.quantity_on_hand").alias("inventory_quantity_on_hand"),
    col("i.reorder_level").alias("inventory_reorder_level"),
    col("i.last_updated").alias("inventory_last_updated")
)

# ------------------------------------
# 5. Write to Silver Output (Final Join)
# ------------------------------------
df_silver.write \
    .format("delta") \
    .mode("overwrite") \
    .option("mergeSchema", "true") \
    .save("/mnt/delta/silver/inventory_shipments_joined_clean")


In [0]:
df_preview = spark.read.format("delta").load("/mnt/delta/silver/shipments_enriched")
display(df_preview)