In [0]:
# Pipeline run log table (one row per job run)
MONITOR_TABLE = "workspace.default.pipeline_runs"


In [0]:
# Create monitoring table if it doesn't exist (simple run log)
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {MONITOR_TABLE} (
  run_id STRING,
  run_start_ts TIMESTAMP,
  run_end_ts TIMESTAMP,
  status STRING,
  bronze_rows BIGINT,
  silver_rows BIGINT,
  gold_days BIGINT
)
USING DELTA
""")


In [0]:
from pyspark.sql import functions as F
import uuid

# Generate a run_id (later Jobs will supply this automatically, but this works now)
run_id = str(uuid.uuid4())

# Pull row counts from tables you already built
bronze_rows = spark.table("workspace.default.bronze_events").count()
silver_rows = spark.table("workspace.default.silver_events").count()
gold_days   = spark.table("workspace.default.gold_daily_funnel").count()

# Insert one monitoring row
(
  spark.createDataFrame(
    [(run_id, "SUCCESS", bronze_rows, silver_rows, gold_days)],
    ["run_id", "status", "bronze_rows", "silver_rows", "gold_days"]
  )
  .withColumn("run_start_ts", F.current_timestamp())
  .withColumn("run_end_ts", F.current_timestamp())
  .select("run_id","run_start_ts","run_end_ts","status","bronze_rows","silver_rows","gold_days")
  .write.mode("append")
  .format("delta")
  .saveAsTable(MONITOR_TABLE)
)

display(spark.table(MONITOR_TABLE).orderBy(F.col("run_start_ts").desc()).limit(20))
