# DQX Dashboard

In [0]:
%pip install databricks-labs-dqx==0.8.0

In [0]:
dbutils.library.restartPython()

## DQX Dashboard

In [0]:
%databricks labs dqx open-dashboards

In [0]:
%load_ext databricks_labs_dqx

%databricks labs dqx open-dashboards

In [0]:
# Location Document Saved: C:\Users\gagn57943\Documents\dqx_config.yaml
# -------------------------------------------------------------------------
log_level: INFO
version: 1

run_configs:
- name: default
  quarantine_config:
    location: dq_dev.dqx.generated_checks_log
    format: delta
    mode: append

  checks_location: dq_dev.dqx.generated_checks_log

  warehouse_id: "ca5a45e27debab49"

In [0]:
from databricks.sdk import WorkspaceClient
from databricks.labs.dqx.contexts.workspace import WorkspaceContext

# Optional: make the link clickable in the notebook
def _display_html_link(url: str, label: str = None):
    try:
        displayHTML(f'<a href="{url}" target="_blank">{label or url}</a>')
    except Exception:
        print(url)

# Build the base link to the dashboards "folder" in your workspace
ctx = WorkspaceContext(WorkspaceClient())
dashboards_folder_link = f"{ctx.installation.workspace_link('')}dashboards/"

print("Open a dashboard from this folder, then hit Refresh inside the dashboard:")
print(dashboards_folder_link)
_display_html_link(dashboards_folder_link, "Open DQX dashboards folder")

# (Optional) If you know a specific dashboard path (from the UI),
# you can deep-link to it like this:
# my_dash_relpath = "dashboards/Shared/DQX/Quality Overview"  # example path after you open the folder and copy link
# my_dash_link = f"{ctx.installation.workspace_link('')}{my_dash_relpath}"
# print("Direct dashboard link:")
# _display_html_link(my_dash_link, "Open: DQX – Quality Overview")

In [0]:
from databricks.labs.dqx.contexts.workspace import WorkspaceContext

ctx = WorkspaceContext(WorkspaceClient())
dashboards_folder_link = f"{ctx.installation.workspace_link('')}dashboards/"
print(f"Open a dashboard from the following folder and refresh it:")
print(dashboards_folder_link)

## Custom DQX Dashboard

In [0]:
%pip install databricks-sdk

In [0]:
dbutils.library.restartPython()

### Authoritative `dashboard_*` Views

In [0]:
%sql
-- DQX dashboard base & tiles (idempotent)
USE CATALOG dq_dev;
USE SCHEMA dqx;

-- Clean
DROP VIEW IF EXISTS dashboard_errors;
DROP VIEW IF EXISTS dashboard_warnings;
DROP VIEW IF EXISTS dashboard_events;
DROP VIEW IF EXISTS dashboard_row_kv;
DROP VIEW IF EXISTS dashboard_row_value_stats;
DROP VIEW IF EXISTS dashboard_kpi;
DROP VIEW IF EXISTS dashboard_error_patterns;
DROP VIEW IF EXISTS dashboard_events_by_day;
DROP VIEW IF EXISTS dashboard_events_by_check;
DROP VIEW IF EXISTS dashboard_events_by_table;
DROP VIEW IF EXISTS dashboard_events_by_column;
DROP VIEW IF EXISTS dashboard_events_by_run_config;
DROP VIEW IF EXISTS dashboard_top_error_messages;
DROP VIEW IF EXISTS dashboard_rules_catalog;
DROP VIEW IF EXISTS dashboard_rules_affected;
DROP VIEW IF EXISTS dashboard_rules_coverage;

In [0]:
%sql
-- DQX dashboard base & tiles (idempotent)
USE CATALOG dq_dev;
USE SCHEMA dqx;

-- Clean
DROP VIEW IF EXISTS dashboard_errors;
DROP VIEW IF EXISTS dashboard_warnings;
DROP VIEW IF EXISTS dashboard_events;
DROP VIEW IF EXISTS dashboard_row_kv;
DROP VIEW IF EXISTS dashboard_row_value_stats;
DROP VIEW IF EXISTS dashboard_kpi;
DROP VIEW IF EXISTS dashboard_error_patterns;
DROP VIEW IF EXISTS dashboard_events_by_day;
DROP VIEW IF EXISTS dashboard_events_by_check;
DROP VIEW IF EXISTS dashboard_events_by_table;
DROP VIEW IF EXISTS dashboard_events_by_column;
DROP VIEW IF EXISTS dashboard_events_by_run_config;
DROP VIEW IF EXISTS dashboard_top_error_messages;
DROP VIEW IF EXISTS dashboard_rules_catalog;
DROP VIEW IF EXISTS dashboard_rules_affected;
DROP VIEW IF EXISTS dashboard_rules_coverage;

-- 1) Events from arrays (stable event_id via posexplode)
CREATE OR REPLACE VIEW dashboard_errors AS
SELECT
  l.log_id, l.table_name, l.run_config_name,
  e.name AS check_name, e.message, e.function, e.filter,
  e.run_time, e.user_metadata, e.columns,
  l.created_at,
  l._errors_fingerprint, l.row_snapshot_fingerprint,
  p.pos AS err_pos,
  sha2(concat(l.log_id, ':E:', cast(p.pos AS STRING)), 256) AS event_id
FROM dq_dev.dqx.checks_log l
LATERAL VIEW OUTER posexplode(_errors) p AS pos, e;

CREATE OR REPLACE VIEW dashboard_warnings AS
SELECT
  l.log_id, l.table_name, l.run_config_name,
  w.name AS check_name, w.message, w.function, w.filter,
  w.run_time, w.user_metadata, w.columns,
  l.created_at,
  l._warnings_fingerprint, l.row_snapshot_fingerprint,
  p.pos AS warn_pos,
  sha2(concat(l.log_id, ':W:', cast(p.pos AS STRING)), 256) AS event_id
FROM dq_dev.dqx.checks_log l
LATERAL VIEW OUTER posexplode(_warnings) p AS pos, w;

CREATE OR REPLACE VIEW dashboard_events AS
SELECT 'ERROR' AS severity, * EXCEPT(err_pos) FROM dashboard_errors
UNION ALL
SELECT 'WARN'  AS severity, * EXCEPT(warn_pos) FROM dashboard_warnings;

-- 2) Row snapshot → KV + quick value stats
CREATE OR REPLACE VIEW dashboard_row_kv AS
SELECT
  l.log_id, l.table_name, l.run_config_name, l.created_at,
  kv.column AS column, kv.value AS value,
  CASE WHEN kv.value IS NULL THEN TRUE ELSE FALSE END                                    AS is_null,
  CASE WHEN kv.value IS NOT NULL AND length(trim(kv.value)) = 0 THEN TRUE ELSE FALSE END AS is_empty,
  CASE WHEN TRY_CAST(kv.value AS DOUBLE) = 0 THEN TRUE ELSE FALSE END                    AS is_zero
FROM dq_dev.dqx.checks_log l
LATERAL VIEW OUTER explode(row_snapshot) s AS kv;

CREATE OR REPLACE VIEW dashboard_row_value_stats AS
SELECT
  log_id,
  SUM(CASE WHEN is_null  THEN 1 ELSE 0 END) AS null_count,
  SUM(CASE WHEN is_empty THEN 1 ELSE 0 END) AS empty_count,
  SUM(CASE WHEN is_zero  THEN 1 ELSE 0 END) AS zero_count
FROM dashboard_row_kv
GROUP BY log_id;

-- 3) KPIs (rows vs events; uniqueness; recency)
CREATE OR REPLACE VIEW dashboard_kpi AS
WITH base AS (
  SELECT
    COUNT(*)                                                   AS rows_with_any_issue,
    SUM(CASE WHEN size(_errors)  > 0 THEN 1 ELSE 0 END)       AS rows_with_errors,
    SUM(CASE WHEN size(_warnings)> 0 THEN 1 ELSE 0 END)       AS rows_with_warnings,
    SUM(CASE WHEN size(_errors)=0 AND size(_warnings)>0 THEN 1 ELSE 0 END) AS rows_warn_only,
    SUM(CASE WHEN size(_errors)>0 AND size(_warnings)>0 THEN 1 ELSE 0 END) AS rows_both,
    COUNT(DISTINCT row_snapshot_fingerprint)                   AS unique_rows_fingerprint,
    COUNT(DISTINCT _errors_fingerprint)                        AS unique_error_patterns,
    COUNT(DISTINCT _warnings_fingerprint)                      AS unique_warning_patterns,
    MIN(created_at)                                            AS first_ingest_at,
    MAX(created_at)                                            AS last_ingest_at
  FROM dq_dev.dqx.checks_log
),
evt AS (
  SELECT
    SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
    SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
  FROM dashboard_events
)
SELECT
  b.*,
  e.error_events,
  e.warning_events,
  ROUND(e.error_events / NULLIF(b.rows_with_any_issue,0), 3)  AS avg_errors_per_row,
  ROUND(e.warning_events / NULLIF(b.rows_with_any_issue,0), 3) AS avg_warnings_per_row
FROM base b CROSS JOIN evt e;

-- 4) Patterns & breakdowns
CREATE OR REPLACE VIEW dashboard_error_patterns AS
SELECT
  e._errors_fingerprint                AS error_pattern,
  COUNT(DISTINCT e.log_id)             AS rows_hit,
  ANY_VALUE(e.table_name)              AS sample_table,
  ANY_VALUE(e.run_config_name)         AS sample_run_config,
  ANY_VALUE(e.check_name)              AS sample_check_name,
  ANY_VALUE(e.message)                 AS sample_message
FROM dashboard_errors e
GROUP BY e._errors_fingerprint
ORDER BY rows_hit DESC;

CREATE OR REPLACE VIEW dashboard_events_by_day AS
SELECT date_trunc('day', run_time) AS day,
       SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
       SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events
GROUP BY date_trunc('day', run_time)
ORDER BY day;

CREATE OR REPLACE VIEW dashboard_events_by_check AS
SELECT COALESCE(check_name,'unknown') AS check_name,
       SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
       SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events
GROUP BY COALESCE(check_name,'unknown')
ORDER BY error_events DESC, warning_events DESC;

CREATE OR REPLACE VIEW dashboard_events_by_table AS
SELECT table_name,
       SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
       SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events
GROUP BY table_name
ORDER BY error_events DESC, warning_events DESC;

CREATE OR REPLACE VIEW dashboard_events_by_column AS
SELECT col AS column,
       SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
       SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events
LATERAL VIEW OUTER explode(columns) c AS col
GROUP BY col
ORDER BY error_events DESC, warning_events DESC;

CREATE OR REPLACE VIEW dashboard_events_by_run_config AS
SELECT run_config_name,
       SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
       SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events
GROUP BY run_config_name
ORDER BY error_events DESC, warning_events DESC;

CREATE OR REPLACE VIEW dashboard_top_error_messages AS
SELECT message, COUNT(*) AS error_events
FROM dashboard_errors
GROUP BY message
ORDER BY error_events DESC;

-- 5) Rule coverage (adjust FQN if your rules live elsewhere)
CREATE OR REPLACE VIEW dashboard_rules_catalog AS
SELECT check_id, name, criticality, table_name, run_config_name
FROM dq_dev.dqx.checks_log;

CREATE OR REPLACE VIEW dashboard_rules_affected AS
SELECT cid AS check_id, COUNT(*) AS rows_hit
FROM dq_dev.dqx.checks_log
LATERAL VIEW OUTER explode(check_id) t AS cid
GROUP BY cid
ORDER BY rows_hit DESC;

CREATE OR REPLACE VIEW dashboard_rules_coverage AS
SELECT
  c.check_id, c.name, c.criticality, c.table_name, c.run_config_name,
  COALESCE(a.rows_hit, 0) AS rows_hit,
  CASE WHEN a.check_id IS NULL THEN FALSE ELSE TRUE END AS ever_fired
FROM dashboard_rules_catalog c
LEFT JOIN dashboard_rules_affected a USING (check_id);

### Lakeview DQX Dashboard via SDK

In [0]:
# Create Lakeview dashboard from repo JSON; publish; schedule; export snapshot
# Run in Databricks (no local tooling required).

%pip install -q databricks-sdk
dbutils.library.restartPython()

import json, os
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.dashboards import (
    Dashboard as LvDashboard,
    Schedule, ScheduleTrigger, ScheduleTriggerType,
    Interval, IntervalUnit,
)

# --- config ---
WAREHOUSE_ID = "ca5a45e27debab49"
SCHEDULE_EVERY_MIN = 30
DISPLAY_NAME = "DQX – Quality Overview"
# Use Shared or your user folder:
PARENT_PATH = "/Users/levi.gagne@claconnect.com/DQX"  # e.g., "/Users/levi.gagne@claconnect.com/DQX"

# Path to your repo JSON (relative to this notebook's folder)
JSON_REL_PATH = "dashboards/DQX_Overview.lvdash.json"

# --- resolve repo file to /Workspace path ---
nb_path = dbutils.entry_point.getDbutils().notebook().getContext().notebookPath().get()
repo_dir = os.path.dirname(nb_path)
json_ws_path = f"{repo_dir}/{JSON_REL_PATH}".replace("//", "/")
json_local = "/Workspace" + json_ws_path  # filesystem path to open()

print(f"Reading dashboard JSON: {json_local}")
with open(json_local, "r", encoding="utf-8") as f:
    serialized = f.read()

# Optional templating: replace placeholder with actual warehouse id
serialized = serialized.replace("{{WAREHOUSE_ID}}", WAREHOUSE_ID)

# Validate minimal fields
try:
    _ = json.loads(serialized)
except json.JSONDecodeError as e:
    raise ValueError(f"Invalid JSON in {json_local}: {e}")

w = WorkspaceClient()

# Ensure parent folder
try:
    w.workspace.get_status(PARENT_PATH)
except Exception:
    w.workspace.mkdirs(PARENT_PATH)

# Create dashboard from serialized JSON (clean create; no UI)
dash = w.lakeview.create(LvDashboard(
    display_name=DISPLAY_NAME,
    warehouse_id=WAREHOUSE_ID,           # default warehouse
    parent_path=PARENT_PATH,
    serialized_dashboard=serialized      # your definition
))
DASHBOARD_ID = dash.dashboard_id
print("dashboard_id:", DASHBOARD_ID)

# Publish (so schedules/subscriptions can attach)
w.lakeview.publish(DASHBOARD_ID)

# Attach a refresh schedule
schedule = Schedule(
    display_name=f"Every {SCHEDULE_EVERY_MIN} minutes",
    trigger=ScheduleTrigger(
        trigger_type=ScheduleTriggerType.INTERVAL,
        interval=Interval(unit=IntervalUnit.MINUTE, frequency=SCHEDULE_EVERY_MIN),
    ),
    paused=False,
)
sched = w.lakeview.create_schedule(DASHBOARD_ID, schedule)
print("schedule_id:", sched.schedule.schedule_id)

# Print URL
base = w.config.host.rstrip("/")
url = f"{base}/dashboardsv3/{DASHBOARD_ID}"
print("dashboard url:", url)

# Export server-enriched JSON snapshot (what the workspace stores)
server_def = w.lakeview.get(DASHBOARD_ID)
snapshot = server_def.serialized_dashboard or ""
snap_path = "/Workspace" + f"{repo_dir}/dashboards/DQX_Overview.snapshot.lvdash.json"
with open(snap_path, "w", encoding="utf-8") as f:
    f.write(snapshot)
print("snapshot written:", snap_path)

# Tip: commit dashboards/DQX_Overview.snapshot.lvdash.json to Git as the source of truth.