In [0]:
# 04_surface_patches_v2.py
import json
from pyspark.sql import functions as F
from trimble_geospatial_demo_utils.site_lock import acquire_site_lock, release_site_lock
from trimble_geospatial_demo_utils import send_notification

# ==================================================
# Unity Catalog context
# ==================================================
spark.sql("USE CATALOG main")
spark.sql("USE SCHEMA demo")

# ==================================================
# Read Job Parameters
# ==================================================
dbutils.widgets.text("siteId", "", "Site ID")
dbutils.widgets.text("patchWaterThreshold", "", "Patch Water Threshold")
dbutils.widgets.text("uploadJobId", "", "Upload Job ID")
dbutils.widgets.text("notificationUrl", "", "Notification URL")
dbutils.widgets.text("dbxWebhookSecret", "", "DBX Webhook Secret")

SITE_ID = dbutils.widgets.get("siteId")
PATCH_WATER_THRESHOLD = dbutils.widgets.get("patchWaterThreshold")
UPLOAD_JOB_ID = dbutils.widgets.get("uploadJobId")
NOTIFICATION_URL = dbutils.widgets.get("notificationUrl")
DBX_WEBHOOK_SECRET = dbutils.widgets.get("dbxWebhookSecret")

# Validate required parameters
if not SITE_ID:
    raise ValueError("Missing required job parameter: siteId")
if not PATCH_WATER_THRESHOLD:
    raise ValueError("Missing required job parameter: patchWaterThreshold")

# Convert to float
PATCH_WATER_THRESHOLD = float(PATCH_WATER_THRESHOLD)

print(f"üèóÔ∏è  Site ID: {SITE_ID}")
print(f"üíß Patch Water Threshold: {PATCH_WATER_THRESHOLD}")

# ==================================================
# CONFIG
# ==================================================
CELLS_TABLE     = "surface_cells_v2"
TILE_STATS_TBL  = "tile_stats_v2"

OUTPUT_TABLE = "surface_patches_v2"
OUTPUT_PATH  = "abfss://processed@trimblegeospatialdemo.dfs.core.windows.net/surface_patches_v2"

# Patch granularity (coarse grouping inside a tile)
PATCH_CELL_SIZE = 8   # e.g. 8x8 cells form one patch bucket

# ==================================================
# Job identity (for locking)
# ==================================================
JOB_RUN_ID = spark.conf.get(
    "spark.databricks.job.runId",
    "manual-notebook"
)

# ==================================================
# Acquire site-level lock
# ==================================================
acquire_site_lock(
    spark=spark,
    site_id=SITE_ID,
    locked_by=JOB_RUN_ID,
    ttl_minutes=90
)

try:
    # ==================================================
    # 1) Read tile routing info (surface type signal)
    # ==================================================
    df_tile_stats = (
        spark.table(TILE_STATS_TBL)
             .filter(F.col("siteId") == SITE_ID)
             .select(
                 "siteId",
                 "tileId"
             )
    )

    # ==================================================
    # 2) Read surface cells
    # ==================================================
    df_cells = (
        spark.table(CELLS_TABLE)
             .filter(F.col("siteId") == SITE_ID)
             .join(df_tile_stats, ["siteId", "tileId"], "inner")
             .select(
                     "siteId", "tileId",
                     "cellX", "cellY",
                     "pointCount",
                     "waterPointCount",
                     "waterPointRatio",
                     "minZ", "meanZ", "maxZ"
                 )
    )

    if df_cells.rdd.isEmpty():
        raise RuntimeError(f"No surface cells found for siteId={SITE_ID}")

    # ==================================================
    # 3) Assign patch buckets (demo version)
    # ==================================================
    df_with_patches = (
        df_cells
        .withColumn("patchX", F.floor(F.col("cellX") / F.lit(PATCH_CELL_SIZE)).cast("int"))
        .withColumn("patchY", F.floor(F.col("cellY") / F.lit(PATCH_CELL_SIZE)).cast("int"))
        .withColumn(
            "patchId",
            F.concat_ws("_", F.col("patchX").cast("string"), F.col("patchY").cast("string"))
        )
    )

    # ==================================================
    # 4) Aggregate to patch level
    # ==================================================
    df_surface_patches = (
        df_with_patches
        .groupBy("siteId", "tileId", "patchId")
        .agg(
            F.count("*").alias("cellCount"),
            F.sum("pointCount").alias("pointsUsed"),

            # patch-level water stats
            F.sum("waterPointCount").alias("waterPointCount"),

            F.min("minZ").alias("minZ"),
            F.max("maxZ").alias("maxZ"),
            F.avg("meanZ").alias("meanZ")
        )
        .withColumn(
            "waterPointRatio",
            F.when(
                F.col("pointsUsed") > 0,
                F.col("waterPointCount") / F.col("pointsUsed")
            ).otherwise(F.lit(0.0))
        )
        # surfaceType from patches it self
        .withColumn(
            "surfaceType",
            F.when(
                F.col("waterPointRatio") >= F.lit(PATCH_WATER_THRESHOLD),
                F.lit("water")
            ).otherwise(F.lit("ground"))
        )
        .withColumn("computedAt", F.current_timestamp())
    )

    # ==================================================
    # 5) Safety check
    # ==================================================
    if df_surface_patches.select("siteId").distinct().count() != 1:
        raise RuntimeError("surface_patches output contains multiple siteId values")

    # ==================================================
    # 6) Write latest snapshot (replace entire site)
    # ==================================================
    (
        df_surface_patches.write
            .format("delta")
            .mode("overwrite")
            .option("replaceWhere", f"siteId = '{SITE_ID}'")
            .option("path", OUTPUT_PATH)
            .partitionBy("siteId", "tileId")
            .saveAsTable(OUTPUT_TABLE)
    )

    # ==================================================
    # 7) Verification / demo-friendly summary
    # ==================================================
    print("\n=== Verify surface_patches_v2 ===")
    spark.sql(f"""
        SELECT
          surfaceType,
          COUNT(DISTINCT patchId) AS patchCount,
          SUM(pointsUsed)         AS pointsUsed
        FROM {OUTPUT_TABLE}
        WHERE siteId = '{SITE_ID}'
        GROUP BY surfaceType
        ORDER BY surfaceType
    """).show(truncate=False)

    print("‚úÖ surface_patches_v2 written successfully")
except Exception as e:
    if NOTIFICATION_URL and DBX_WEBHOOK_SECRET:
        payload = {
            "runId": spark.conf.get("spark.databricks.job.runId", "manual-notebook"),
            "jobId": UPLOAD_JOB_ID,
            "status": "FAILED",
            "error": str(e),
            "siteId": SITE_ID,
            "patchWaterThreshold": str(PATCH_WATER_THRESHOLD),
        }
        try:
            send_notification(json.dumps(payload), NOTIFICATION_URL, webhook_secret=DBX_WEBHOOK_SECRET)
        except Exception as notify_ex:
            print("‚ö†Ô∏è Notification failed:", str(notify_ex)[:200])
    raise
finally:
    # ==================================================
    # Release site-level lock
    # ==================================================
    release_site_lock(
        spark=spark,
        site_id=SITE_ID,
        locked_by=JOB_RUN_ID
    )