# Delta Live Tables governance batch demo

Interactively explore how contract and dataset versions evolve when a Delta Live Tables pipeline publishes governed tables.

## 1. Configure parameters

Fill in the widgets (Databricks) or rely on environment variables when running locally. The notebook uses these values for Unity Catalog targets, dataset identifiers, and the optional local execution harness.

In [None]:
import os


def _resolve_param(name: str, *, default: str | None = None, label: str | None = None) -> str | None:
    env_key = f"DC43_DEMO_{name.upper()}"
    if 'dbutils' in globals():
        try:
            if label is None:
                label = name
            dbutils.widgets.text(name, os.environ.get(env_key, default) or '', label)
            value = dbutils.widgets.get(name)
            if value:
                return value
        except Exception as exc:  # pragma: no cover - widgets unavailable
            print(f'Falling back to environment for {name}: {exc}')
    return os.environ.get(env_key, default)


CONFIG_PATH = _resolve_param('config_path', label='Service config (optional)')
CATALOG = _resolve_param('catalog', default='main', label='Unity Catalog')
SCHEMA = _resolve_param('schema', default='governed_demo', label='Schema')
TABLE = _resolve_param('table', default='orders_dlt', label='DLT table name')
DATASET_ID = _resolve_param('dataset_id', default='governed.analytics.orders')
DATA_PRODUCT_ID = _resolve_param('data_product_id', default='dp.analytics.orders')
OUTPUT_PORT = _resolve_param('output_port', default='orders')
CONTRACT_ID = _resolve_param('contract_id', default='contracts.analytics.orders')
DATASET_VERSION = _resolve_param('dataset_version', default='1.0.0', label='Dataset version')
ENFORCE = (_resolve_param('enforce', default='false') or '').lower() in {'1', 'true', 'yes'}
RUN_LOCAL = (_resolve_param('run_local', default='false', label='Execute locally?') or '').lower() in {'1', 'true', 'yes'}

print('Configuration summary:')
for key in ['CONFIG_PATH', 'CATALOG', 'SCHEMA', 'TABLE', 'DATASET_ID', 'DATA_PRODUCT_ID', 'OUTPUT_PORT', 'CONTRACT_ID', 'DATASET_VERSION', 'ENFORCE', 'RUN_LOCAL']:
    print(f'  {key} = {globals()[key]}')

## 2. Initialise Spark and service clients

In [None]:
from pyspark.sql import SparkSession


spark = SparkSession.builder.getOrCreate()
print(f'Using Spark {spark.version}')

In [None]:
from dc43_service_clients.bootstrap import load_service_clients


suite = load_service_clients(CONFIG_PATH)
if suite.contract is None or suite.data_product is None or suite.governance is None:
    raise RuntimeError('Contract, data product, and governance services are required')

## 3. Prepare Unity Catalog targets

In [None]:
table_name = f"{CATALOG}.{SCHEMA}.{TABLE}"
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG}.{SCHEMA}")
print(f'DLT pipeline writes governed data to {table_name}')

## 4. Build contract revisions

In [None]:
from dc43_integrations.examples.databricks_delta_versioning_support import (
    VersionedWriteSpec,
    build_contract,
    ensure_active_data_product,
    register_contracts,
)

contracts = [
    build_contract(
        version='0.1.0',
        contract_id=CONTRACT_ID,
        table_name=table_name,
        catalog=CATALOG,
        schema=SCHEMA,
        allowed_currencies=['EUR', 'USD'],
        include_discount=False,
    ),
    build_contract(
        version='0.2.0',
        contract_id=CONTRACT_ID,
        table_name=table_name,
        catalog=CATALOG,
        schema=SCHEMA,
        allowed_currencies=['EUR', 'USD'],
        include_discount=True,
    ),
    build_contract(
        version='0.3.0',
        contract_id=CONTRACT_ID,
        table_name=table_name,
        catalog=CATALOG,
        schema=SCHEMA,
        allowed_currencies=['EUR', 'USD', 'GBP'],
        include_discount=True,
    ),
]

register_contracts(suite.contract, contracts)
for contract in contracts:
    ensure_active_data_product(
        data_product_service=suite.data_product,
        data_product_id=DATA_PRODUCT_ID,
        port_name=OUTPUT_PORT,
        contract=contract,
        physical_location=table_name,
    )
print(f'Registered {len(contracts)} contract revisions under {CONTRACT_ID}')

## 5. Select the dataset payload

In [None]:
writes = [
    VersionedWriteSpec(
        contract=contracts[0],
        dataset_version='1.0.0',
        rows=[
            {
                'order_id': 1,
                'customer_id': 101,
                'order_ts': '2024-01-01T10:00:00Z',
                'amount': 125.5,
                'currency': 'EUR',
            },
            {
                'order_id': 2,
                'customer_id': 102,
                'order_ts': '2024-01-02T11:15:00Z',
                'amount': 75.0,
                'currency': 'USD',
            },
        ],
    ),
    VersionedWriteSpec(
        contract=contracts[1],
        dataset_version='1.1.0',
        rows=[
            {
                'order_id': 1,
                'customer_id': 101,
                'order_ts': '2024-02-01T09:00:00Z',
                'amount': 135.0,
                'currency': 'EUR',
                'discount_rate': 0.05,
            },
            {
                'order_id': 2,
                'customer_id': 102,
                'order_ts': '2024-02-02T09:30:00Z',
                'amount': 80.0,
                'currency': 'USD',
                'discount_rate': 0.10,
            },
        ],
    ),
    VersionedWriteSpec(
        contract=contracts[2],
        dataset_version='2.0.0',
        rows=[
            {
                'order_id': 1,
                'customer_id': 101,
                'order_ts': '2024-03-01T08:30:00Z',
                'amount': 140.0,
                'currency': 'EUR',
                'discount_rate': 0.08,
            },
            {
                'order_id': 2,
                'customer_id': 102,
                'order_ts': '2024-03-02T12:45:00Z',
                'amount': 82.5,
                'currency': 'USD',
                'discount_rate': 0.05,
            },
            {
                'order_id': 3,
                'customer_id': 103,
                'order_ts': '2024-03-03T14:10:00Z',
                'amount': 210.0,
                'currency': 'GBP',
                'discount_rate': 0.15,
            },
        ],
    ),
]

SPEC_BY_VERSION = {spec.dataset_version: spec for spec in writes}
AVAILABLE_VERSIONS = sorted(SPEC_BY_VERSION)
active_spec = SPEC_BY_VERSION.get(DATASET_VERSION)
if active_spec is None:
    raise ValueError(f"Unknown dataset version {DATASET_VERSION!r}; choose one of {AVAILABLE_VERSIONS}")
print(f"Selected dataset version {active_spec.dataset_version} using contract {active_spec.contract.version}")

## 6. Define the governed DLT table

In [None]:
from dc43_integrations.examples.databricks_delta_versioning_support import (
    contract_has_discount,
    make_dataframe,
)
from dc43_integrations.spark.dlt import governed_table
from dc43_integrations.spark.dlt_local import ensure_dlt_module

dlt = ensure_dlt_module(allow_stub=True)

@governed_table(
    dlt,
    context={
        'contract': {
            'contract_id': active_spec.contract.id,
            'contract_version': active_spec.contract.version,
        },
        'dataset_id': DATASET_ID,
        'dataset_version': active_spec.dataset_version,
        'output_binding': {
            'data_product': DATA_PRODUCT_ID,
            'port_name': OUTPUT_PORT,
            'physical_location': table_name,
        },
    },
    governance_service=suite.governance,
    name=TABLE,
    comment='Governed orders fact table (DLT batch demo)',
)
def orders():
    return make_dataframe(
        spark,
        active_spec,
        has_discount=contract_has_discount(active_spec.contract),
    )

orders

## 7. (Optional) Execute locally with the DLT harness

In [None]:
if RUN_LOCAL:
    from dc43_integrations.spark.dlt_local import LocalDLTHarness

    with LocalDLTHarness(spark, module=dlt) as harness:
        result = harness.run_asset(TABLE)
        try:
            if 'display' in globals():
                display(result)
            else:
                result.show(truncate=False)
        except Exception:
            result.show(truncate=False)
        if harness.expectation_reports:
            print('Expectation summary:')
            for report in harness.expectation_reports:
                print(
                    f"- {report.asset}::{report.rule} [{report.action}] status={report.status} failures={report.failed_rows}"
                )
else:
    print('Set RUN_LOCAL to true to execute the asset with LocalDLTHarness outside a DLT pipeline.')

## 8. Inspect Delta table history

In [None]:
from dc43_integrations.examples.databricks_delta_versioning_support import describe_delta_history

delta_history = describe_delta_history(spark, table_name)
for record in delta_history:
    version = record.get('version')
    ts = record.get('timestamp')
    op = record.get('operation')
    print(f'version={version} timestamp={ts} operation={op}')
delta_history

## 9. Render the compatibility matrix

In [None]:
from dc43_integrations.examples.databricks_delta_versioning_support import (
    collect_status_matrix,
    render_markdown_matrix,
)

entries = collect_status_matrix(suite.governance, dataset_id=DATASET_ID)
markdown = render_markdown_matrix(entries)
try:
    if 'displayHTML' in globals():
        displayHTML(f'<pre>{markdown}</pre>')
    else:
        print(markdown)
except Exception:
    print(markdown)
markdown