# Ops Deployment Accelerator

<details><summary>Click to read instructions here...</summary>

1. Click `Run all`
2. Set `Ops` as the `default lakehouse` in the left panel
3. Click `Run all` again

</details>

In [None]:
# ---------------------- SAFE DEPLOYMENT ---------------------- #
# --- DEPLOY OPS OR RETURN OPS ARTIFACT IF EXISTS --- #
def idemp_deploy_lh(name: str, *, description: str = "", workspace_id: str, enable_schemas: bool = True):
    try: # Try to get lh artifact
        return notebookutils.lakehouse.get(name, workspace_id)
    except Exception:# If it doesnâ€™t exist (or lookup fails), create it
        print("Ops lakehouse not found, attempting to deploy...", flush=True)
        definition = {"enableSchemas": True} if enable_schemas else None
        return notebookutils.lakehouse.create(name, description, definition, workspace_id)

def raise_(value_name:str): 
    raise ValueError(f"{value_name} was not found") # handle null variables

# --- Fetch the current context --- #
ctx = notebookutils.runtime.context

name_wsid = "currentWorkspaceId"
name_dlhid = "defaultLakehouseId"

# get ws id from context
current_ws_id = ctx.get(name_wsid) or raise_(name_wsid)

# fetch or create Ops
ops_lh = idemp_deploy_lh(
    name="Ops",
    description="Operations / control lakehouse",
    workspace_id=current_ws_id,
    enable_schemas=True
)


# Check ops lakehouse is set as default
default_lh_id = ctx.get(name_dlhid)
if default_lh_id!=ops_lh.id:
    notebookutils.notebook.exit(
        "\nYou should see this message exactly once! If you see it again-- its a dragon!\n"
        "\nStep 1: Set ops as the default lakehouse in the panel to the left! `Add Data Items` -> `From Onelake Catalog`"
        "\nStep 2: Click `Run All` again"
    )


In [None]:
# create schemas
for schema in ("ops", "audit"):
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema}")

In [None]:
# define fully qualified names
MANIFEST_FQN = f"{ops_lh.displayName}.ops.ingestion_manifest"
SPECTBL_FQN = f"{ops_lh.displayName}.ops.specs"
RESTCOL_FQN = f"{ops_lh.displayName}.ops.rest_collection"
NBLOG_FQN = f"{ops_lh.displayName}.audit.notebook_run_log"

In [None]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {MANIFEST_FQN} (
  -- Identity
  manifest_id STRING NOT NULL COMMENT 'UUID for this ingestion job instance',
  table_name  STRING NOT NULL COMMENT 'Source table being ingested (e.g., invoice, staff)',

  -- Ingestion Variables
  sink_path STRING NOT NULL COMMENT 'Sink path where the payload for this job was written',
  ingest_ts   TIMESTAMP NOT NULL DEFAULT current_timestamp() COMMENT 'UTC timestamp when the REST API pull completed and the data was written to sinkpath',
  status      STRING NOT NULL COMMENT 'Execution state of the ingestion job (NEW, RUNNING, DONE, FAILED)',
  running_ts  TIMESTAMP COMMENT 'UTC timestamp when a worker claimed this job for execution',
  done_ts     TIMESTAMP COMMENT 'UTC timestamp when ingestion and downstream processing completed',
  error       STRING COMMENT 'Error message when status = FAILED; NULL when status = DONE'
)
USING DELTA
TBLPROPERTIES (
  'delta.autoOptimize.optimizeWrite'  = 'true',
  'delta.autoOptimize.autoCompact'    = 'true',
  'delta.feature.allowColumnDefaults' = 'supported',
  'delta.columnMapping.mode'          = 'name',
  'delta.minReaderVersion'            = '2',
  'delta.minWriterVersion'            = '5'
);
""")

In [None]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {SPECTBL_FQN} (
  -- identity
  source_layer  STRING NOT NULL COMMENT '(e.g., bronze, silver, gold)',
  source_system STRING NOT NULL COMMENT 'Source system this table belongs to (e.g., lcvista, intacct) also used for schema',
  source_table_name STRING NOT NULL COMMENT 'Logical name (e.g., staff, accounts)',
  
  -- targets
  target_table_name STRING NOT NULL COMMENT 'Target sink (e.g., lcvista.staff)',

  -- merge contract
  primary_keys_json STRING NOT NULL COMMENT 'JSON array of primary key columns for MERGE (e.g., ["id"] or ["id","line_no"])',
  watermark_column STRING COMMENT 'Name of the source-system column used for incremental ingestion (e.g., modified, updated_at, last_changed); actual watermark values are tracked in ops.watermarks',
  max_watermark_cuttoff TIMESTAMP COMMENT 'Highest successfully processed watermark value; next run resumes strictly after this timestamp',

  -- schema contract (Spark StructType JSON)
  schema_json STRING COMMENT 'Spark StructType serialized as JSON; enforced schema for reading source into sink',

  -- controls / routing
  transform_mode STRING NOT NULL DEFAULT 'PASS' COMMENT 'Transformation mode: PASS, FLATTEN, or CUSTOM',
  transform_notebook STRING COMMENT 'Optional notebook name or path used when transform_mode = CUSTOM',

  -- audit
  _created_ts TIMESTAMP NOT NULL DEFAULT current_timestamp() COMMENT 'UTC timestamp when this table spec was created',
  _updated_ts TIMESTAMP NOT NULL DEFAULT current_timestamp() COMMENT 'UTC timestamp when this table spec was last updated'
)
USING DELTA
TBLPROPERTIES (
  'delta.autoOptimize.optimizeWrite'  = 'true',
  'delta.autoOptimize.autoCompact'    = 'true',
  'delta.feature.allowColumnDefaults' = 'supported',
  'delta.columnMapping.mode'          = 'name',
  'delta.minReaderVersion'            = '2',
  'delta.minWriterVersion'            = '5'
)
""")

In [None]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {RESTCOL_FQN} (
  -- Identity
  source_system  STRING  NOT NULL COMMENT 'API system identifier (e.g., lcvista, intacct) used for routing API logic',
  table_name     STRING  NOT NULL COMMENT 'Logical target table name this endpoint populates in the Bronze/Silver pipeline',

  -- Endpoint
  endpoint  STRING  NOT NULL COMMENT 'API endpoint to call (e.g., /v1/gl/accounts, /people)',
  http_method    STRING  NOT NULL COMMENT 'HTTP verb to use when calling the endpoint (typically GET or POST)',
  
  -- Execution
  watermark_column STRING COMMENT 'Name of the source-system column used for incremental ingestion (e.g., modified, updated_at, last_changed); actual watermark values are tracked in ops.watermarks',
  max_watermark_cuttoff TIMESTAMP COMMENT 'Highest successfully processed watermark value; next run resumes strictly after this timestamp',
  page_size      INT COMMENT 'Maximum number of records to request per API call; used for pagination and rate control',
  query_template STRING COMMENT 'Parameterized query string template injected at runtime (e.g., updatedAfter=watermark)',
  headers_json   STRING COMMENT 'Optional static HTTP headers serialized as JSON and merged into request headers'
)
USING DELTA
TBLPROPERTIES (
  'delta.autoOptimize.optimizeWrite'  = 'true',
  'delta.autoOptimize.autoCompact'    = 'true',
  'delta.feature.allowColumnDefaults' = 'supported',
  'delta.columnMapping.mode'          = 'name',
  'delta.minReaderVersion'            = '2',
  'delta.minWriterVersion'            = '5'
);
""")

In [None]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {NBLOG_FQN} (
  -- Identity
  run_id          STRING    COMMENT 'Master pipeline id shared across all activities in the same scheduled run',
  notebook_name   STRING    COMMENT 'Name of the activity being logged',
  table_name      STRING    COMMENT 'Logical source table being ingested (one row per table per run)',

  -- Log Details
  status          STRING    COMMENT 'Final execution status returned by run_notebook_with_retries: SUCCESS or FAILED',
  attempts        INT       COMMENT 'Number of notebook execution attempts including retries',
  elapsed_seconds DOUBLE    COMMENT 'Total wall-clock time spent running and retrying the notebook',
  error           STRING    COMMENT 'Final error message if the notebook failed; NULL when status = SUCCESS',
  log_ts          TIMESTAMP COMMENT 'Timestamp when this result row was written to the ops log table'
)
USING DELTA
TBLPROPERTIES (
  'delta.autoOptimize.optimizeWrite'  = 'true',
  'delta.autoOptimize.autoCompact'    = 'true',
  'delta.feature.allowColumnDefaults' = 'supported',
  'delta.columnMapping.mode'          = 'name',
  'delta.minReaderVersion'            = '2',
  'delta.minWriterVersion'            = '5'
)
""")

In [None]:
# debug outputs
if not ctx.get("isForPipeline"):
    print("\nLakehouse Artifact Details:\n", ops_lh.items(),flush=True)
    
    print("\nContext Details:\n", ctx.items(),flush=True)