# DQX Dashboard

## Reference

In [0]:
{
  "version": "1",
  "displayName": "DQX – Quality Overview",
  "pages": [
    {
      "name": "overview",
      "displayName": "Overview",
      "widgets": [
        { "name": "w_kpi", "displayName": "KPI (rows vs events)", "dataset": "ds_kpi", "visualization": { "type": "table" }, "position": { "x": 0, "y": 0, "w": 8, "h": 6 } },
        { "name": "w_by_day", "displayName": "Events by day", "dataset": "ds_by_day", "visualization": { "type": "table" }, "position": { "x": 8, "y": 0, "w": 8, "h": 6 } },
        { "name": "w_by_check", "displayName": "Events by check", "dataset": "ds_by_check", "visualization": { "type": "table" }, "position": { "x": 0, "y": 6, "w": 8, "h": 8 } },
        { "name": "w_by_table", "displayName": "Events by table", "dataset": "ds_by_table", "visualization": { "type": "table" }, "position": { "x": 8, "y": 6, "w": 8, "h": 8 } },
        { "name": "w_by_column", "displayName": "Events by column", "dataset": "ds_by_column", "visualization": { "type": "table" }, "position": { "x": 0, "y": 14, "w": 8, "h": 8 } },
        { "name": "w_by_run_cfg", "displayName": "Events by run_config", "dataset": "ds_by_run_config", "visualization": { "type": "table" }, "position": { "x": 8, "y": 14, "w": 8, "h": 8 } },
        { "name": "w_patterns", "displayName": "Top error patterns", "dataset": "ds_error_patterns", "visualization": { "type": "table" }, "position": { "x": 0, "y": 22, "w": 8, "h": 8 } },
        { "name": "w_top_msgs", "displayName": "Top error messages", "dataset": "ds_top_error_messages", "visualization": { "type": "table" }, "position": { "x": 8, "y": 22, "w": 8, "h": 8 } },
        { "name": "w_detail", "displayName": "Recent events (detail)", "dataset": "ds_detail", "visualization": { "type": "table" }, "position": { "x": 0, "y": 30, "w": 16, "h": 10 } }
      ]
    }
  ],
  "datasets": [
    {
      "name": "ds_kpi",
      "displayName": "KPI",
      "dataSource": { "type": "WAREHOUSE", "warehouseId": "ca5a45e27debab49" },
      "query": "SELECT rows_with_any_issue, rows_with_errors, rows_with_warnings, error_events, warning_events, avg_errors_per_row, avg_warnings_per_row, first_ingest_at, last_ingest_at FROM dq_dev.dqx.dashboard_kpi"
    },
    {
      "name": "ds_by_day",
      "displayName": "Events by day",
      "dataSource": { "type": "WAREHOUSE", "warehouseId": "ca5a45e27debab49" },
      "query": "SELECT day, error_events, warning_events FROM dq_dev.dqx.dashboard_events_by_day ORDER BY day"
    },
    {
      "name": "ds_by_check",
      "displayName": "Events by check",
      "dataSource": { "type": "WAREHOUSE", "warehouseId": "ca5a45e27debab49" },
      "query": "SELECT check_name, error_events, warning_events FROM dq_dev.dqx.dashboard_events_by_check ORDER BY error_events DESC, warning_events DESC"
    },
    {
      "name": "ds_by_table",
      "displayName": "Events by table",
      "dataSource": { "type": "WAREHOUSE", "warehouseId": "ca5a45e27debab49" },
      "query": "SELECT table_name, error_events, warning_events FROM dq_dev.dqx.dashboard_events_by_table ORDER BY error_events DESC, warning_events DESC"
    },
    {
      "name": "ds_by_column",
      "displayName": "Events by column",
      "dataSource": { "type": "WAREHOUSE", "warehouseId": "ca5a45e27debab49" },
      "query": "SELECT column, error_events, warning_events FROM dq_dev.dqx.dashboard_events_by_column ORDER BY error_events DESC, warning_events DESC"
    },
    {
      "name": "ds_by_run_config",
      "displayName": "Events by run_config",
      "dataSource": { "type": "WAREHOUSE", "warehouseId": "ca5a45e27debab49" },
      "query": "SELECT run_config_name, error_events, warning_events FROM dq_dev.dqx.dashboard_events_by_run_config ORDER BY error_events DESC, warning_events DESC"
    },
    {
      "name": "ds_error_patterns",
      "displayName": "Error patterns",
      "dataSource": { "type": "WAREHOUSE", "warehouseId": "ca5a45e27debab49" },
      "query": "SELECT error_pattern, rows_hit, sample_table, sample_run_config, sample_check_name, sample_message FROM dq_dev.dqx.dashboard_error_patterns ORDER BY rows_hit DESC LIMIT 200"
    },
    {
      "name": "ds_top_error_messages",
      "displayName": "Top error messages",
      "dataSource": { "type": "WAREHOUSE", "warehouseId": "ca5a45e27debab49" },
      "query": "SELECT message, error_events FROM dq_dev.dqx.dashboard_top_error_messages ORDER BY error_events DESC LIMIT 100"
    },
    {
      "name": "ds_detail",
      "displayName": "Events detail",
      "dataSource": { "type": "WAREHOUSE", "warehouseId": "ca5a45e27debab49" },
      "query": "SELECT e.table_name, e.check_name, e.message, e.function, e.filter, e.severity, e.run_time, kv.null_count, kv.empty_count, kv.zero_count FROM dq_dev.dqx.dashboard_events e LEFT JOIN dq_dev.dqx.dashboard_row_value_stats kv USING (log_id) ORDER BY e.run_time DESC LIMIT 1000"
    }
  ]
}

## Implementation

In [0]:
%pip install databricks-sdk
%pip install databricks-labs-dqx==0.8.0
%databricks labs dqx open-dashboards

In [0]:
dbutils.library.restartPython()

In [0]:
from databricks.sdk import WorkspaceClient
from databricks.labs.dqx.contexts.workspace import WorkspaceContext

# Optional: make the link clickable in the notebook
def _display_html_link(url: str, label: str = None):
    try:
        displayHTML(f'<a href="{url}" target="_blank">{label or url}</a>')
    except Exception:
        print(url)

# Build the base link to the dashboards "folder" in your workspace
ctx = WorkspaceContext(WorkspaceClient())
dashboards_folder_link = f"{ctx.installation.workspace_link('')}dashboards/"

print("Open a dashboard from this folder, then hit Refresh inside the dashboard:")
print(dashboards_folder_link)
_display_html_link(dashboards_folder_link, "Open DQX dashboards folder")

# (Optional) If you know a specific dashboard path (from the UI),
# you can deep-link to it like this:
# my_dash_relpath = "dashboards/Shared/DQX/Quality Overview"  # example path after you open the folder and copy link
# my_dash_link = f"{ctx.installation.workspace_link('')}{my_dash_relpath}"
# print("Direct dashboard link:")
# _display_html_link(my_dash_link, "Open: DQX – Quality Overview")

In [0]:
from databricks.labs.dqx.contexts.workspace import WorkspaceContext

ctx = WorkspaceContext(WorkspaceClient())
dashboards_folder_link = f"{ctx.installation.workspace_link('')}dashboards/"
print(f"Open a dashboard from the following folder and refresh it:")
print(dashboards_folder_link)

## Custom DQX Dashboard

### Authoritative `dashboard_*` Views

In [0]:
%sql
-- DQX dashboard base & tiles (idempotent)
USE CATALOG dq_dev;
USE SCHEMA dqx;

-- Clean
DROP VIEW IF EXISTS dashboard_errors;
DROP VIEW IF EXISTS dashboard_warnings;
DROP VIEW IF EXISTS dashboard_events;
DROP VIEW IF EXISTS dashboard_row_kv;
DROP VIEW IF EXISTS dashboard_row_value_stats;
DROP VIEW IF EXISTS dashboard_kpi;
DROP VIEW IF EXISTS dashboard_error_patterns;
DROP VIEW IF EXISTS dashboard_events_by_day;
DROP VIEW IF EXISTS dashboard_events_by_check;
DROP VIEW IF EXISTS dashboard_events_by_table;
DROP VIEW IF EXISTS dashboard_events_by_column;
DROP VIEW IF EXISTS dashboard_events_by_run_config;
DROP VIEW IF EXISTS dashboard_top_error_messages;
DROP VIEW IF EXISTS dashboard_rules_catalog;
DROP VIEW IF EXISTS dashboard_rules_affected;
DROP VIEW IF EXISTS dashboard_rules_coverage;

In [0]:
-- ======================================================================
-- DQX Dashboard Layer (Materialized Views)
-- Source of truth: dq_dev.dqx.checks_log  (arrays with per-issue check_id[])
-- Rules table:      dq_dev.dqx.checks
-- ======================================================================

USE CATALOG dq_dev;
USE SCHEMA dqx;

-- ----------------------------------------------------------------------
-- Clean old objects (idempotent)
-- ----------------------------------------------------------------------
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_row_kv_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_event_kv_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_by_day_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_by_table_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_by_check_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_by_run_config_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_by_column_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_top_error_messages_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_event_check_ids_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_enriched_mv;

-- ======================================================================
-- 1) Flatten issues (one row per issue element), both ERROR and WARN
--    - Keeps the issue-level check_id ARRAY (no extra fan-out here)
--    - Adds a stable event_id (log_id + side + position)
-- ======================================================================
CREATE MATERIALIZED VIEW dashboard_events_mv AS
WITH errs AS (
  SELECT
    'ERROR'                       AS severity,
    l.log_id,
    l.table_name,
    l.run_config_name,
    e.name                        AS check_name,
    e.message,
    e.function,
    e.filter,
    e.columns                     AS rule_columns,          -- ARRAY<STRING>
    e.user_metadata,
    e.check_id                    AS check_id,              -- ARRAY<STRING> (from writer)
    e.run_time,
    l.created_at,
    l._errors_fingerprint         AS pattern_fingerprint,
    l.row_snapshot_fingerprint    AS row_fp,
    p.pos                         AS pos
  FROM dq_dev.dqx.checks_log l
  LATERAL VIEW OUTER posexplode(_errors) p AS pos, e
),
warns AS (
  SELECT
    'WARN'                        AS severity,
    l.log_id,
    l.table_name,
    l.run_config_name,
    w.name                        AS check_name,
    w.message,
    w.function,
    w.filter,
    w.columns                     AS rule_columns,          -- ARRAY<STRING>
    w.user_metadata,
    w.check_id                    AS check_id,              -- ARRAY<STRING> (from writer)
    w.run_time,
    l.created_at,
    l._warnings_fingerprint       AS pattern_fingerprint,
    l.row_snapshot_fingerprint    AS row_fp,
    p.pos                         AS pos
  FROM dq_dev.dqx.checks_log l
  LATERAL VIEW OUTER posexplode(_warnings) p AS pos, w
)
SELECT
  sha2(concat(log_id, CASE WHEN severity='ERROR' THEN ':E:' ELSE ':W:' END, cast(pos AS STRING)), 256) AS event_id,
  *
EXCEPT(pos)
FROM (SELECT * FROM errs UNION ALL SELECT * FROM warns);

COMMENT ON TABLE dashboard_events_mv IS
'Issue-level events (1 row per error/warn element). Includes check_id ARRAY inside each issue.';


-- ======================================================================
-- 2) Row KV: explode row_snapshot into (column, value) pairs
-- ======================================================================
CREATE MATERIALIZED VIEW dashboard_row_kv_mv AS
SELECT
  l.log_id,
  l.table_name,
  l.run_config_name,
  l.created_at,
  kv.column AS column,
  kv.value  AS value,
  CASE WHEN kv.value IS NULL THEN TRUE ELSE FALSE END                                    AS is_null,
  CASE WHEN kv.value IS NOT NULL AND length(trim(kv.value)) = 0 THEN TRUE ELSE FALSE END AS is_empty,
  CASE WHEN TRY_CAST(kv.value AS DOUBLE) = 0 THEN TRUE ELSE FALSE END                    AS is_zero
FROM dq_dev.dqx.checks_log l
LATERAL VIEW OUTER explode(row_snapshot) s AS kv;

COMMENT ON TABLE dashboard_row_kv_mv IS
'Row snapshot exploded into key/value pairs with simple null/empty/zero flags.';


-- ======================================================================
-- 3) Event × KV (scoped): only KV columns relevant to the rule
--    If rule_columns is NULL, include all columns for that row.
-- ======================================================================
CREATE MATERIALIZED VIEW dashboard_event_kv_mv AS
SELECT
  e.event_id,
  e.severity,
  e.log_id,
  e.table_name,
  e.run_config_name,
  e.check_name,
  e.function,
  e.filter,
  e.rule_columns,     -- ARRAY<STRING>
  e.check_id,         -- ARRAY<STRING>
  e.run_time,
  e.created_at,
  e.pattern_fingerprint,
  e.row_fp,
  kv.column,
  kv.value,
  kv.is_null,
  kv.is_empty,
  kv.is_zero
FROM dashboard_events_mv e
JOIN dashboard_row_kv_mv kv
  ON kv.log_id = e.log_id
WHERE e.rule_columns IS NULL OR array_contains(e.rule_columns, kv.column);

COMMENT ON TABLE dashboard_event_kv_mv IS
'Event rows joined to the relevant per-row KV pairs (restricted to columns referenced by the rule when provided).';


-- ======================================================================
-- 4) Helper MV: fan out event_id → individual check_id (many-to-many)
--    Makes joins to dq_dev.dqx.checks trivial and avoids duplicating
--    rows in dashboard_events_mv.
-- ======================================================================
CREATE MATERIALIZED VIEW dashboard_event_check_ids_mv AS
SELECT
  e.event_id,
  cid AS check_id
FROM dashboard_events_mv e
LATERAL VIEW OUTER explode(e.check_id) c AS cid;

COMMENT ON TABLE dashboard_event_check_ids_mv IS
'Mapping of event_id to each check_id (1 row per check_id per event).';


-- ======================================================================
-- 5) Enriched events: join rule metadata/arguments via check_id
--    - Produces 0..N rows per event depending on how many check_id matches
--    - Exposes arguments JSON for parameterized post-analytics
-- ======================================================================
CREATE MATERIALIZED VIEW dashboard_events_enriched_mv AS
SELECT
  e.event_id,
  e.severity,
  e.log_id,
  e.table_name,
  e.run_config_name,
  e.check_name,
  ec.check_id                            AS check_id,
  r.name                                 AS config_rule_name,
  r.criticality                          AS config_criticality,
  r.filter                               AS config_filter,
  r.run_config_name                      AS config_run_config_name,
  to_json(r.check)                       AS config_check_json,
  to_json(r.check.arguments)             AS config_arguments_json,
  e.function,
  e.filter,
  e.rule_columns,
  e.message,
  e.user_metadata,
  e.run_time,
  e.created_at,
  e.pattern_fingerprint,
  e.row_fp
FROM dashboard_events_mv e
LEFT JOIN dashboard_event_check_ids_mv ec
  ON ec.event_id = e.event_id
LEFT JOIN dq_dev.dqx.checks r
  ON r.check_id = ec.check_id;

COMMENT ON TABLE dashboard_events_enriched_mv IS
'Events with per-check enrichment (joins dq_dev.dqx.checks via check_id). Includes arguments JSON for parameterized analysis.';


-- ======================================================================
-- 6) Common aggregations (materialized for speed)
-- ======================================================================

-- By day (run_time)
CREATE MATERIALIZED VIEW dashboard_events_by_day_mv AS
SELECT
  date_trunc('day', run_time) AS day,
  SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
  SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events_mv
GROUP BY date_trunc('day', run_time);

COMMENT ON TABLE dashboard_events_by_day_mv IS
'Daily counts of error/warn events based on event run_time.';


-- By table
CREATE MATERIALIZED VIEW dashboard_events_by_table_mv AS
SELECT
  table_name,
  SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
  SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events_mv
GROUP BY table_name;

COMMENT ON TABLE dashboard_events_by_table_mv IS
'Counts of error/warn events per table.';


-- By check (issue name in the log; use *_enriched_mv for config.name)
CREATE MATERIALIZED VIEW dashboard_events_by_check_mv AS
SELECT
  COALESCE(check_name, 'unknown')        AS check_name,
  SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
  SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events_mv
GROUP BY COALESCE(check_name, 'unknown');

COMMENT ON TABLE dashboard_events_by_check_mv IS
'Counts of error/warn events per check_name (from the log).';


-- By run_config
CREATE MATERIALIZED VIEW dashboard_events_by_run_config_mv AS
SELECT
  run_config_name,
  SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
  SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events_mv
GROUP BY run_config_name;

COMMENT ON TABLE dashboard_events_by_run_config_mv IS
'Counts of error/warn events per run_config_name.';


-- By column (scoped event × KV)
CREATE MATERIALIZED VIEW dashboard_events_by_column_mv AS
SELECT
  column,
  SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
  SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_event_kv_mv
GROUP BY column;

COMMENT ON TABLE dashboard_events_by_column_mv IS
'Counts of error/warn events per column, considering only columns referenced by the rule when available.';


-- Top error messages (dedup by message text)
CREATE MATERIALIZED VIEW dashboard_top_error_messages_mv AS
SELECT
  message,
  COUNT(*) AS error_events
FROM dashboard_events_mv
WHERE severity = 'ERROR'
GROUP BY message
ORDER BY error_events DESC;

COMMENT ON TABLE dashboard_top_error_messages_mv IS
'Most frequent error messages across all events.';

### Lakeview DQX Dashboard via SDK

In [0]:
%pip install -q databricks-sdk

In [0]:
dbutils.library.restartPython()

In [0]:
import json, os
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.dashboards import (
    Dashboard as LvDashboard,
    Schedule, CronSchedule, SchedulePauseStatus
)

# --- config ---
WAREHOUSE_ID   = "ca5a45e27debab49"
SCHEDULE_EVERY_MIN = 30
SCHEDULE_TZ    = "America/Chicago"     # pick your workspace/viewer TZ
DISPLAY_NAME   = "DQX – Quality Overview"
PARENT_PATH    = "/Users/levi.gagne@claconnect.com/DQX"
JSON_REL_PATH  = "dashboards/DQX_Overview.lvdash.json"

# --- resolve repo file to /Workspace path ---
nb_path = dbutils.entry_point.getDbutils().notebook().getContext().notebookPath().get()
repo_dir = os.path.dirname(nb_path)
json_ws_path = f"{repo_dir}/{JSON_REL_PATH}".replace("//", "/")
json_local = "/Workspace" + json_ws_path

print(f"Reading dashboard JSON: {json_local}")
with open(json_local, "r", encoding="utf-8") as f:
    serialized = f.read()

# optional templating
serialized = serialized.replace("{{WAREHOUSE_ID}}", WAREHOUSE_ID)
_ = json.loads(serialized)  # validate JSON

w = WorkspaceClient()

# ensure parent folder
try:
    w.workspace.get_status(PARENT_PATH)
except Exception:
    w.workspace.mkdirs(PARENT_PATH)

# create draft dashboard from serialized JSON
dash = w.lakeview.create(LvDashboard(
    display_name=DISPLAY_NAME,
    warehouse_id=WAREHOUSE_ID,
    parent_path=PARENT_PATH,
    serialized_dashboard=serialized
))
DASHBOARD_ID = dash.dashboard_id
print("dashboard_id:", DASHBOARD_ID)

# publish so we can attach schedules/subscriptions
w.lakeview.publish(DASHBOARD_ID)

# --- upsert schedule (avoid duplicates by display_name) ---
cron = f"0 0/{SCHEDULE_EVERY_MIN} * ? * *"   # every N minutes
sched_payload = Schedule(
    display_name=f"Every {SCHEDULE_EVERY_MIN} minutes",
    cron_schedule=CronSchedule(quartz_cron_expression=cron, timezone_id=SCHEDULE_TZ),
    pause_status=SchedulePauseStatus.UNPAUSED,
    warehouse_id=WAREHOUSE_ID
)

existing = next((s for s in w.lakeview.list_schedules(DASHBOARD_ID)
                 if getattr(s, "display_name", None) == sched_payload.display_name), None)

if existing:
    upd = w.lakeview.update_schedule(DASHBOARD_ID, existing.schedule_id, sched_payload)
    schedule_id = upd.schedule_id
    print("schedule_id (updated):", schedule_id)
else:
    created = w.lakeview.create_schedule(DASHBOARD_ID, sched_payload)
    schedule_id = created.schedule_id
    print("schedule_id (created):", schedule_id)

# URL to open
base = w.config.host.rstrip("/")
print("dashboard url:", f"{base}/dashboardsv3/{DASHBOARD_ID}")

# export the server-enriched snapshot for Git
server_def = w.lakeview.get(DASHBOARD_ID)
snapshot = server_def.serialized_dashboard or ""
snap_path = "/Workspace" + f"{repo_dir}/dashboards/DQX_Overview.snapshot.lvdash.json"
with open(snap_path, "w", encoding="utf-8") as f:
    f.write(snapshot)
print("snapshot written:", snap_path)

In [0]:
#    DASHBOARD QUIRIES TO timestamp


-- ======================================================================
-- DQX Dashboard Layer (Materialized Views)
-- Source of truth: dq_dev.dqx.checks_log  (arrays with per-issue check_id[])
-- Rules table:      dq_dev.dqx.checks
-- ======================================================================

USE CATALOG dq_dev;
USE SCHEMA dqx;

-- ----------------------------------------------------------------------
-- Clean old objects (idempotent)
-- ----------------------------------------------------------------------
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_row_kv_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_event_kv_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_by_day_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_by_table_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_by_check_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_by_run_config_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_by_column_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_top_error_messages_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_event_check_ids_mv;
DROP MATERIALIZED VIEW IF EXISTS dashboard_events_enriched_mv;

-- ======================================================================
-- 1) Flatten issues (1 row per issue element), both ERROR and WARN
--    - Preserves issue-level check_id ARRAY
--    - Adds stable event_id (log_id + side + position)
-- ======================================================================
CREATE MATERIALIZED VIEW dashboard_events_mv AS
WITH errs AS (
  SELECT
    'ERROR'                       AS severity,
    l.log_id,
    l.table_name,
    l.run_config_name,
    e.name                        AS check_name,
    e.message,
    e.function,
    e.filter,
    e.columns                     AS rule_columns,          -- ARRAY<STRING>
    e.user_metadata,
    e.check_id                    AS check_id,              -- ARRAY<STRING>
    e.run_time,
    l.created_at,
    l._errors_fingerprint         AS pattern_fingerprint,
    l.row_snapshot_fingerprint    AS row_fp,
    p.pos                         AS pos
  FROM dq_dev.dqx.checks_log l
  LATERAL VIEW OUTER posexplode(_errors) p AS pos, e
),
warns AS (
  SELECT
    'WARN'                        AS severity,
    l.log_id,
    l.table_name,
    l.run_config_name,
    w.name                        AS check_name,
    w.message,
    w.function,
    w.filter,
    w.columns                     AS rule_columns,          -- ARRAY<STRING>
    w.user_metadata,
    w.check_id                    AS check_id,              -- ARRAY<STRING>
    w.run_time,
    l.created_at,
    l._warnings_fingerprint       AS pattern_fingerprint,
    l.row_snapshot_fingerprint    AS row_fp,
    p.pos                         AS pos
  FROM dq_dev.dqx.checks_log l
  LATERAL VIEW OUTER posexplode(_warnings) p AS pos, w
)
SELECT
  sha2(concat(log_id, CASE WHEN severity='ERROR' THEN ':E:' ELSE ':W:' END, cast(pos AS STRING)), 256) AS event_id,
  *
EXCEPT(pos)
FROM (SELECT * FROM errs UNION ALL SELECT * FROM warns);

COMMENT ON TABLE dashboard_events_mv IS
'Issue-level events (1 row per error/warn element). Includes check_id ARRAY inside each issue.';

-- ======================================================================
-- 2) Row KV: explode row_snapshot into (column, value) pairs
-- ======================================================================
CREATE MATERIALIZED VIEW dashboard_row_kv_mv AS
SELECT
  l.log_id,
  l.table_name,
  l.run_config_name,
  l.created_at,
  kv.column AS column,
  kv.value  AS value,
  CASE WHEN kv.value IS NULL THEN TRUE ELSE FALSE END                                    AS is_null,
  CASE WHEN kv.value IS NOT NULL AND length(trim(kv.value)) = 0 THEN TRUE ELSE FALSE END AS is_empty,
  CASE WHEN TRY_CAST(kv.value AS DOUBLE) = 0 THEN TRUE ELSE FALSE END                    AS is_zero
FROM dq_dev.dqx.checks_log l
LATERAL VIEW OUTER explode(row_snapshot) s AS kv;

COMMENT ON TABLE dashboard_row_kv_mv IS
'Row snapshot exploded into key/value pairs with simple null/empty/zero flags.';

-- ======================================================================
-- 3) Event × KV (scoped to rule columns)
--    If rule_columns is NULL, include all columns for that row.
-- ======================================================================
CREATE MATERIALIZED VIEW dashboard_event_kv_mv AS
SELECT
  e.event_id,
  e.severity,
  e.log_id,
  e.table_name,
  e.run_config_name,
  e.check_name,
  e.function,
  e.filter,
  e.rule_columns,     -- ARRAY<STRING>
  e.check_id,         -- ARRAY<STRING>
  e.run_time,
  e.created_at,
  e.pattern_fingerprint,
  e.row_fp,
  kv.column,
  kv.value,
  kv.is_null,
  kv.is_empty,
  kv.is_zero
FROM dashboard_events_mv e
JOIN dashboard_row_kv_mv kv
  ON kv.log_id = e.log_id
WHERE e.rule_columns IS NULL OR array_contains(e.rule_columns, kv.column);

COMMENT ON TABLE dashboard_event_kv_mv IS
'Event rows joined to relevant KV pairs (restricted to columns referenced by the rule when provided).';

-- ======================================================================
-- 4) Fan out event_id → individual check_id (many-to-many)
-- ======================================================================
CREATE MATERIALIZED VIEW dashboard_event_check_ids_mv AS
SELECT
  e.event_id,
  cid AS check_id
FROM dashboard_events_mv e
LATERAL VIEW OUTER explode(e.check_id) c AS cid;

COMMENT ON TABLE dashboard_event_check_ids_mv IS
'Mapping of event_id to each check_id (1 row per check_id per event).';

-- ======================================================================
-- 5) Enrich events with rule metadata via check_id
-- ======================================================================
CREATE MATERIALIZED VIEW dashboard_events_enriched_mv AS
SELECT
  e.event_id,
  e.severity,
  e.log_id,
  e.table_name,
  e.run_config_name,
  e.check_name,
  ec.check_id                            AS check_id,
  r.name                                 AS config_rule_name,
  r.criticality                          AS config_criticality,
  r.filter                               AS config_filter,
  r.run_config_name                      AS config_run_config_name,
  to_json(r.check)                       AS config_check_json,
  to_json(r.check.arguments)             AS config_arguments_json,
  e.function,
  e.filter,
  e.rule_columns,
  e.message,
  e.user_metadata,
  e.run_time,
  e.created_at,
  e.pattern_fingerprint,
  e.row_fp
FROM dashboard_events_mv e
LEFT JOIN dashboard_event_check_ids_mv ec
  ON ec.event_id = e.event_id
LEFT JOIN dq_dev.dqx.checks r
  ON r.check_id = ec.check_id;

COMMENT ON TABLE dashboard_events_enriched_mv IS
'Events with per-check enrichment (joins dq_dev.dqx.checks via check_id). Includes arguments JSON for parameterized analysis.';

-- ======================================================================
-- 6) Common aggregations (materialized for speed)
-- ======================================================================

-- By day (use run_time when present, else created_at)
CREATE MATERIALIZED VIEW dashboard_events_by_day_mv AS
SELECT
  date_trunc('day', COALESCE(run_time, created_at)) AS day,
  SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
  SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events_mv
GROUP BY date_trunc('day', COALESCE(run_time, created_at));

COMMENT ON TABLE dashboard_events_by_day_mv IS
'Daily counts of error/warn events. Uses run_time when present; otherwise created_at.';

-- By table
CREATE MATERIALIZED VIEW dashboard_events_by_table_mv AS
SELECT
  table_name,
  SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
  SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events_mv
GROUP BY table_name;

COMMENT ON TABLE dashboard_events_by_table_mv IS
'Counts of error/warn events per table.';

-- By check (issue name in the log; use *_enriched_mv for config.name)
CREATE MATERIALIZED VIEW dashboard_events_by_check_mv AS
SELECT
  COALESCE(check_name, 'unknown')        AS check_name,
  SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
  SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events_mv
GROUP BY COALESCE(check_name, 'unknown');

COMMENT ON TABLE dashboard_events_by_check_mv IS
'Counts of error/warn events per check_name (from the log).';

-- By run_config
CREATE MATERIALIZED VIEW dashboard_events_by_run_config_mv AS
SELECT
  run_config_name,
  SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
  SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_events_mv
GROUP BY run_config_name;

COMMENT ON TABLE dashboard_events_by_run_config_mv IS
'Counts of error/warn events per run_config_name.';

-- By column (scoped event × KV)
CREATE MATERIALIZED VIEW dashboard_events_by_column_mv AS
SELECT
  column,
  SUM(CASE WHEN severity='ERROR' THEN 1 ELSE 0 END) AS error_events,
  SUM(CASE WHEN severity='WARN'  THEN 1 ELSE 0 END) AS warning_events
FROM dashboard_event_kv_mv
GROUP BY column;

COMMENT ON TABLE dashboard_events_by_column_mv IS
'Counts of error/warn events per column, scoped to rule columns when available.';

-- Top error messages (dedup by message text)
CREATE MATERIALIZED VIEW dashboard_top_error_messages_mv AS
SELECT
  message,
  COUNT(*) AS error_events
FROM dashboard_events_mv
WHERE severity = 'ERROR'
GROUP BY message
ORDER BY error_events DESC;

COMMENT ON TABLE dashboard_top_error_messages_mv IS
'Most frequent error messages across all events.';