In [0]:
from pyspark.sql.functions import *
from delta.tables import *

In [0]:
df = spark.read.table("inventory_project.bronze.wms_pick_movement_raw")
display(df)

In [0]:
df_std = df.select(
    trim(col("pick_id")).alias("pick_id"),
    trim(col("order_id")).alias("order_id"),
    trim(col("product_id")).alias("product_id"),
    trim(col("lot_id")).alias("lot_id"),
    trim(col("serial_id")).alias("serial_id"),
    trim(col("bin_id")).alias("bin_id"),
    col("quantity").cast("int").alias("quantity"),
    trim(col("picker_id")).alias("picker_id"),
    upper(trim(col("cdc_op"))).alias("cdc_op"),
    to_date(col("pick_date"), "yyyy-MM-dd").alias("pick_date")
)
df_std = df_std.withColumn("cdc_op",
        when(col("cdc_op").contains("U"),"U")\
        .when(col("cdc_op").contains("I"),"I")\
        .when(col("cdc_op").contains("D"),"D")\
        .otherwise("U").alias("cdc_op")
)

In [0]:
valid_condition = (
    col("pick_id").isNotNull() &
    col("order_id").isNotNull() &
    col("product_id").isNotNull() &
    col("bin_id").isNotNull() &
    col("pick_date").isNotNull() &
    (col("quantity") > 0) &
    col("picker_id").isNotNull() &
    col("cdc_op").isin("I", "U", "D")
)

df_valid = df_std.filter(valid_condition).dropDuplicates(["pick_id", "cdc_op"])
df_invalid = df_std.filter(~valid_condition)

# Step 5: Final Silver DataFrame
df_silver = df_valid.select(
    "pick_id", "order_id", "product_id", "lot_id", "serial_id",
    "bin_id", "pick_date", "quantity", "picker_id", "cdc_op"
)

In [0]:
if spark.catalog.tableExists("inventory_project.silver.wms_pick_movement"):
    delta_table = DeltaTable.forName(spark, "inventory_project.silver.wms_pick_movement")
    (
        delta_table.alias("t")
        .merge(
            df_silver.alias("s"),
            "t.pick_id = s.pick_id"   # business key
        )
        .whenMatchedUpdate(
            condition="s.cdc_op = 'U'",
            set={
                "order_id": "s.order_id",
                "product_id": "s.product_id",
                "lot_id": "s.lot_id",
                "serial_id": "s.serial_id",
                "bin_id": "s.bin_id",
                "pick_date": "s.pick_date",
                "quantity": "s.quantity",
                "picker_id": "s.picker_id",
                "cdc_op": "s.cdc_op"
            }
        )
        .whenMatchedDelete(condition="s.cdc_op = 'D'")
        .whenNotMatchedInsertAll(condition="s.cdc_op = 'I'")
        .execute()
    )
else:
    df_silver.write.format("delta").mode("overwrite").saveAsTable("inventory_project.silver.wms_pick_movement")
df_invalid.write.format("csv").mode("overwrite").save("/Volumes/inventory_project/silver/quarantine_layer/wms_pick_movement")
dbutils.notebook.exit("SUCCESS")

In [0]:
display(df_std)