###### **Parameters**

In [None]:
source_layer: str = ""            # use the destination layer e.g. 'Bronze', 'Silver', 'Gold'(use the lakehouse names)
source_system: str = ""           # wherever the data came from e.g. 'lcvista', 'intacct'(will be used for the schema)
source_table_name: str = ""       # e.g. 'staff'
target_table_name: str = ""       # usually the same as source_table_name unless splitting into multiple tables
primary_keys_json: str = ""       # single key: 'staff_id' | composite keys: 'staff_id,department_id'(comma-delim & single-string) ->split string on `,` in logic
watermark_column: str = ""        # e.g. 'modified'
max_watermark_cutoff: str = ""    # fmt '2023-12-01T00:00:00Z'
schema_json: str = ""             # use df.schema.json() and store here
transform_mode: str = ""
transform_notebook: str = ""

###### **Dynamic Path Resolution**

In [None]:
ctx = notebookutils.runtime.context
wsid = ctx.get("currentWorkspaceId")

if not wsid: raise ValueError("Missing `currentWorkspaceId` from notebook runtime context.")

try:
    ops_artifact = notebookutils.lakehouse.get(name="Ops", workspaceId=wsid)
except Exception as e:
    raise RuntimeError(f"Could not return `Ops` lakehouse in this workspace ({wsid=}). Error: {repr(e)}") from e

if ops_artifact is None: raise ValueError("Ops artifact is None")

props = ops_artifact.get("properties") or {}
abfs = props.get("abfsPath")

if not abfs: raise ValueError("Ops artifact missing `properties.abfsPath`")

specs_table_path = f"{abfs}/Tables/ops/specs"

In [None]:
# Imports
from delta.tables import DeltaTable
from pyspark.sql import types as T
from datetime import datetime, timezone


def _specs_schema() -> T.StructType:
    return T.StructType(
        [
            T.StructField("source_layer", T.StringType(), nullable=False),
            T.StructField("source_system", T.StringType(), nullable=False),
            T.StructField("source_table_name", T.StringType(), nullable=False),
            T.StructField("target_table_name", T.StringType(), nullable=False),
            T.StructField("primary_keys_json", T.StringType(), nullable=False),
            T.StructField("watermark_column", T.StringType(), nullable=True),
            T.StructField("max_watermark_cutoff", T.TimestampType(), nullable=True),
            T.StructField("schema_json", T.StringType(), nullable=True),
            T.StructField("transform_mode", T.StringType(), nullable=False),
            T.StructField("transform_notebook", T.StringType(), nullable=True),
        ]
    )

# convert string format to timestamp
max_watermark_cutoff = datetime.strptime(max_watermark_cutoff, "%Y-%m-%dT%H:%M:%SZ")


# Single-row source DF (the new/updated spec row)
src_df = spark.createDataFrame(
    [
        {
            "source_layer": source_layer,
            "source_system": source_system,
            "source_table_name": source_table_name,
            "target_table_name": target_table_name,
            "primary_keys_json": primary_keys_json,
            "watermark_column": watermark_column,
            "max_watermark_cutoff": max_watermark_cutoff,
            "schema_json": schema_json,
            "transform_mode": transform_mode,
            "transform_notebook": transform_notebook,
        }
    ],
    schema=_specs_schema()
)

display(src_df)  # preview row to be merged

###### **Merge Data**

In [None]:
# Target delta table at ABFSS path
tgt = DeltaTable.forPath(spark, specs_table_path)

merge_cond = (
    "t.source_layer = s.source_layer "
    "AND t.source_system = s.source_system "
    "AND t.source_table_name = s.source_table_name"
)

(
    tgt.alias("t")
       .merge(src_df.alias("s"), merge_cond)
       .whenMatchedUpdate(set={
            "target_table_name": "s.target_table_name",
            "primary_keys_json": "s.primary_keys_json",
            "watermark_column": "s.watermark_column",
            "max_watermark_cutoff": "s.max_watermark_cutoff",
            "schema_json": "s.schema_json",
            "transform_mode": "s.transform_mode",
            "transform_notebook": "s.transform_notebook",
            "_updated_ts": "current_timestamp()",
        })
       .whenNotMatchedInsert(values={
            "source_layer": "s.source_layer",
            "source_system": "s.source_system",
            "source_table_name": "s.source_table_name",
            "target_table_name": "s.target_table_name",
            "primary_keys_json": "s.primary_keys_json",
            "watermark_column": "s.watermark_column",
            "max_watermark_cutoff": "s.max_watermark_cutoff",
            "schema_json": "s.schema_json",
            "transform_mode": "s.transform_mode",
            "transform_notebook": "s.transform_notebook",
            "_created_ts": "current_timestamp()",
            "_updated_ts": "current_timestamp()",
        })
       .execute()
)

###### **Preview Table**

In [None]:
preview = True

if not preview: notebookutils.notebook.exit("")

df = spark.read.format("delta").load(specs_table_path)
display(df)

In [None]:
#TODO add guardrails around certain inputs