diff --git a/examples/basic_demo/project.yml b/examples/basic_demo/project.yml index 1b380e7..5756004 100644 --- a/examples/basic_demo/project.yml +++ b/examples/basic_demo/project.yml @@ -7,6 +7,15 @@ models_dir: models docs: # Adjust `dag_dir` to change where `fft dag --html` writes documentation (docs/Technical_Overview.md#documentation). dag_dir: site/dag + include_rendered_sql: true + models: + users_clean.ff: + description: "Normalizes CRM users and extracts email_domain." + columns: + email_domain: + description: "Lowercased domain extracted from email." + mart_users_by_domain.ff: + description: "Aggregates signup counts per email domain." # Project-level variables accessible via {{ var('key') }} inside models. # Example: diff --git a/pyproject.toml b/pyproject.toml index 9aa566d..276da9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ dependencies = [ "pydantic-settings>=2.4", "python-dotenv>=1.0", "httpx>=0.28.1", + "sqlparse>=0.5.5", ] [project.optional-dependencies] @@ -153,7 +154,7 @@ known-third-party = ["duckdb", "pandas"] combine-as-imports = true [tool.ruff.lint.pylint] -max-args = 15 +max-args = 18 max-returns = 15 max-branches = 20 max-statements = 60 diff --git a/src/fastflowtransform/artifacts.py b/src/fastflowtransform/artifacts.py index f2bf6c7..513111e 100644 --- a/src/fastflowtransform/artifacts.py +++ b/src/fastflowtransform/artifacts.py @@ -348,3 +348,88 @@ def load_last_run_durations(project_dir: Path) -> dict[str, float]: if isinstance(name, str) and isinstance(dur_ms, (int, float)): out[name] = float(dur_ms) / 1000.0 return out + + +# ---------- TEST RESULTS ---------- + + +@dataclass +class TestResult: + kind: str + table: str # display label (may include arrows for relationships) + relation: str | None # machine-join key (best-effort; usually the tested table) + column: str | None + ok: bool + severity: str # "error" | "warn" + duration_ms: int + msg: str | None = None + param_str: str = "" + example_sql: str | None = None + + +def write_test_results( + project_dir: Path, + *, + started_at: str, + finished_at: str, + results: list[TestResult], +) -> Path: + """ + Write test_results.json containing a run envelope + individual test outcomes. + """ + project_dir = Path(project_dir) + out_dir = _target_dir(project_dir) + path = out_dir / "test_results.json" + + data = { + "metadata": {"tool": "fastflowtransform", "generated_at": _iso_now()}, + "test_started_at": started_at, + "test_finished_at": finished_at, + "results": [asdict(r) for r in results], + } + _json_dump(path, data) + return path + + +# ---------- UNIT TEST RESULTS ---------- + + +@dataclass +class UTestResult: + model: str + case: str + status: str # "pass" | "fail" | "error" | "skip" + duration_ms: int + cache_hit: bool = False + + message: str | None = None + target_relation: str | None = None + spec_path: str = "" + + +def write_utest_results( + project_dir: Path, + *, + started_at: str, + finished_at: str, + failures: int, + results: list[UTestResult], + engine: str | None = None, +) -> Path: + """ + Write utest_results.json containing a run envelope + per-case results. + """ + project_dir = Path(project_dir) + out_dir = _target_dir(project_dir) + path = out_dir / "utest_results.json" + + data = { + "metadata": {"tool": "fastflowtransform", "generated_at": _iso_now()}, + "utest_started_at": started_at, + "utest_finished_at": finished_at, + "engine": engine or "", + "failures": int(failures or 0), + "results": [asdict(r) for r in results], + } + _json_dump(path, data) + return path diff --git a/src/fastflowtransform/cli/docs_utils.py b/src/fastflowtransform/cli/docs_utils.py index 51645c9..90e1ac0 100644 --- a/src/fastflowtransform/cli/docs_utils.py +++ b/src/fastflowtransform/cli/docs_utils.py @@ -1,6 +1,7 @@ # fastflowtransform/cli/docs_utils.py from __future__ import annotations +import inspect import re from datetime import UTC, datetime from pathlib import Path @@ -99,7 +100,8 @@ def _build_docs_manifest( lineage_map = lineage_mod.infer_sql_lineage(rendered, alias_map) elif n.kind == "python": func = REGISTRY.py_funcs[n.name] - lineage_map = lineage_mod.infer_py_lineage(func, getattr(n, "requires", None), None) + src = inspect.getsource(func) + lineage_map = lineage_mod.infer_py_lineage(src, getattr(n, "requires", None)) except Exception: lineage_map = {} diff --git a/src/fastflowtransform/cli/test_cmd.py b/src/fastflowtransform/cli/test_cmd.py index de48573..0f6deb4 100644 --- a/src/fastflowtransform/cli/test_cmd.py +++ b/src/fastflowtransform/cli/test_cmd.py @@ -4,12 +4,15 @@ import re import time from collections.abc import Callable, Iterable, Mapping +from contextlib import suppress from dataclasses import dataclass +from datetime import UTC, datetime from pathlib import Path from typing import Any import typer +from fastflowtransform.artifacts import TestResult, write_test_results from fastflowtransform.cli.bootstrap import _prepare_context, configure_executor_contracts from fastflowtransform.cli.options import ( EngineOpt, @@ -49,6 +52,7 @@ class DQResult: severity: Severity = "error" param_str: str = "" example_sql: str | None = None + relation: str | None = None _REF_CALL_RE = re.compile(r"^ref\(\s*(['\"])([^'\"]+)\1\s*\)$") @@ -351,6 +355,10 @@ def _run_dq_tests(executor: BaseExecutor, tests: Iterable[Any]) -> list[DQResult table_for_exec, ) = _prepare_test(raw_test, executor) + relation: str | None = None + if isinstance(table_for_exec, str) and table_for_exec.strip(): + relation = table_for_exec.strip() + t0 = time.perf_counter() runner: Runner | None = TESTS.get(kind) @@ -373,6 +381,7 @@ def _run_dq_tests(executor: BaseExecutor, tests: Iterable[Any]) -> list[DQResult severity=severity, param_str=param_str, example_sql=None, + relation=relation, ) ) continue @@ -392,6 +401,7 @@ def _run_dq_tests(executor: BaseExecutor, tests: Iterable[Any]) -> list[DQResult severity=severity, param_str=param_str, example_sql=example, + relation=relation, ) ) @@ -494,9 +504,36 @@ def test( typer.secho("No tests configured.", fg="bright_black") raise typer.Exit(code=0) + started_at = datetime.now(UTC).isoformat(timespec="seconds") + results = _run_dq_tests(execu, tests) _print_summary(results) + finished_at = datetime.now(UTC).isoformat(timespec="seconds") + + # Persist for docs (best-effort; never fail the command because of artifact IO) + with suppress(Exception): + write_test_results( + ctx.project, + started_at=started_at, + finished_at=finished_at, + results=[ + TestResult( + kind=r.kind, + table=r.table, + relation=r.relation, + column=r.column, + ok=bool(r.ok), + severity=str(r.severity), + duration_ms=int(r.ms), + msg=r.msg, + param_str=r.param_str, + example_sql=r.example_sql, + ) + for r in results + ], + ) + # Exit code: count only ERROR fails failed = sum((not r.ok) and (r.severity != "warn") for r in results) raise typer.Exit(code=2 if failed > 0 else 0) diff --git a/src/fastflowtransform/cli/utest_cmd.py b/src/fastflowtransform/cli/utest_cmd.py index 99c77ff..61992e1 100644 --- a/src/fastflowtransform/cli/utest_cmd.py +++ b/src/fastflowtransform/cli/utest_cmd.py @@ -1,7 +1,12 @@ +# fastflowtransform/cli/utest_cmd.py from __future__ import annotations +from contextlib import suppress +from datetime import UTC, datetime + import typer +from fastflowtransform.artifacts import UTestResult, write_utest_results from fastflowtransform.cli.bootstrap import _prepare_context from fastflowtransform.cli.options import ( CaseOpt, @@ -38,6 +43,9 @@ def utest( echo("ℹ️ No unit tests found (tests/unit/*.yml).") # noqa: RUF001 raise typer.Exit(0) + started_at = datetime.now(UTC).isoformat(timespec="seconds") + collected: list[dict] = [] + failures = run_unit_specs( specs, ex, @@ -45,7 +53,36 @@ def utest( only_case=case, cache_mode=getattr(cache, "value", str(cache)) if cache is not None else "off", reuse_meta=bool(reuse_meta), + results_out=collected, ) + finished_at = datetime.now(UTC).isoformat(timespec="seconds") + + # Write artifact for docs (best-effort; never block exit) + with suppress(Exception): + write_utest_results( + ctx.project, + started_at=started_at, + finished_at=finished_at, + failures=failures, + engine=getattr(ex, "engine_name", None), + results=[ + UTestResult( + model=str(r.get("model") or ""), + case=str(r.get("case") or ""), + status=str(r.get("status") or ""), + duration_ms=int(r.get("duration_ms") or 0), + cache_hit=bool(r.get("cache_hit")), + message=(str(r.get("message")) if r.get("message") else None), + target_relation=( + str(r.get("target_relation")) if r.get("target_relation") else None + ), + spec_path=str(r.get("spec_path") or ""), + ) + for r in collected + if (r.get("model") and r.get("case")) + ], + ) + raise typer.Exit(code=2 if failures > 0 else 0) diff --git a/src/fastflowtransform/config/project.py b/src/fastflowtransform/config/project.py index ca7a71f..7c4f645 100644 --- a/src/fastflowtransform/config/project.py +++ b/src/fastflowtransform/config/project.py @@ -132,6 +132,24 @@ class SeedsBlock(BaseModel): # --------------------------------------------------------------------------- +class DocsColumnConfig(BaseModel): + """Column-level docs surfaced in generated documentation.""" + + model_config = ConfigDict(extra="allow") + + description: str | None = None + + +class DocsModelConfig(BaseModel): + """Model-level docs surfaced in generated documentation.""" + + # Allow extra keys so docs stay flexible. + model_config = ConfigDict(extra="allow") + + description: str | None = None + columns: dict[str, DocsColumnConfig] = Field(default_factory=dict) + + class DocsConfig(BaseModel): """ Optional documentation-related configuration. @@ -140,11 +158,19 @@ class DocsConfig(BaseModel): docs: dag_dir: "site/dag" + include_rendered_sql: true + models: + users: + description: "Raw users table" + columns: + id: "Primary key" """ model_config = ConfigDict(extra="forbid") dag_dir: str | None = None + include_rendered_sql: bool = Field(default=False) + models: dict[str, DocsModelConfig] = Field(default_factory=dict) # --------------------------------------------------------------------------- diff --git a/src/fastflowtransform/docs.py b/src/fastflowtransform/docs.py index 2d4cba5..f6eee7b 100644 --- a/src/fastflowtransform/docs.py +++ b/src/fastflowtransform/docs.py @@ -1,9 +1,12 @@ # fastflowtransform/docs.py from __future__ import annotations +import inspect import json import re import shutil +from collections.abc import Mapping +from contextlib import suppress from dataclasses import dataclass, field from datetime import UTC, datetime from pathlib import Path @@ -32,8 +35,17 @@ class ModelDoc: relation: str deps: list[str] materialized: str + owners: list[str] = field(default_factory=list) + tags: list[str] = field(default_factory=list) + domain: str | None = None description_html: str | None = None description_short: str | None = None + contract: dict[str, Any] | None = None + python_signature: str | None = None + python_docstring: str | None = None + python_source: str | None = None + python_requires: dict[str, list[str]] | None = None # dep -> required columns (best-effort) + inferred_lineage: dict[str, list[dict[str, Any]]] | None = None # out_col -> lineage refs @dataclass @@ -55,6 +67,30 @@ def _safe_filename(name: str) -> str: return s or "_model" +def _as_list(v: Any) -> list[str]: + if v is None: + return [] + if isinstance(v, list): + return [str(x).strip() for x in v if str(x).strip()] + if isinstance(v, str): + # allow comma-separated + return [s.strip() for s in v.split(",") if s.strip()] + s = str(v).strip() + return [s] if s else [] + + +def _domain_from_path(path: str) -> str: + p = (path or "").replace("\\", "/").lstrip("/") + # try to make domain stable even if absolute paths leak in + # common case: ".../models//..." + if "/models/" in p.lower(): + p = p.lower().split("/models/", 1)[1] + elif p.lower().startswith("models/"): + p = p[7:] + parts = [x for x in p.split("/") if x] + return parts[0] if parts else "" + + def _collect_columns(executor: Any) -> dict[str, list[ColumnInfo]]: """ Best-effort schema discovery delegated to the executor. @@ -94,6 +130,25 @@ def _read_project_yaml_docs(project_dir: Path) -> dict[str, Any]: return models if isinstance(models, dict) else {} +def _read_project_yaml_docs_settings(project_dir: Path) -> dict[str, Any]: + """ + Read docs settings from project.yml: + docs: + include_rendered_sql: true|false + ... + Returns the whole docs dict (or {}). + """ + cfg_path = project_dir / "project.yml" + if not cfg_path.exists(): + return {} + try: + cfg = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or {} + except Exception: + return {} + docs = (cfg or {}).get("docs") or {} + return docs if isinstance(docs, dict) else {} + + _FRONT_MATTER_RE = re.compile(r"^\s*---\s*\n(.*?)\n---\s*\n?", re.DOTALL) _LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)") _CODE_RE = re.compile(r"`([^`]+)`") @@ -118,6 +173,152 @@ def _render_minimarkdown(md: str) -> str: return html +def _rendered_refs_for_docs(executor: Any, node: Node) -> list[dict[str, str]]: + """ + Best-effort list of resolved refs for the "Refs resolved" UI. + Avoids inlining ephemeral SQL (which can be huge); marks ephemeral as inlined. + """ + out: list[dict[str, str]] = [] + for d in node.deps or []: + try: + dep = REGISTRY.get_node(d) if hasattr(REGISTRY, "get_node") else REGISTRY.nodes.get(d) + except Exception: + dep = None + try: + if ( + dep is not None + and str((dep.meta or {}).get("materialized", "")).lower() == "ephemeral" + ): + rel = "" + else: + rel = executor._format_relation_for_ref(d) + except Exception: + rel = relation_for(d) + out.append({"name": str(d), "relation": str(rel)}) + return out + + +def _compile_sql_for_docs(executor: Any, node: Node, rendered_sql: str) -> str: + """ + Best-effort "compiled" SQL preview for docs: + - strips leading {{ config(...) }} + - if DDL-looking, returns the statement as-is + - else wraps selectable body into a generic CREATE OR REPLACE {TABLE|VIEW} ... AS + For incremental/snapshot/ephemeral, returns "" (too engine/flow-specific). + """ + meta: Mapping[str, Any] = getattr(node, "meta", {}) or {} + try: + if getattr(executor, "_meta_is_incremental", None) and executor._meta_is_incremental(meta): + return "" + if getattr(executor, "_meta_is_snapshot", None) and executor._meta_is_snapshot(meta): + return "" + except Exception: + pass + if str(meta.get("materialized", "")).lower() == "ephemeral": + return "" + + try: + sql = executor._strip_leading_config(rendered_sql).strip() + except Exception: + sql = (rendered_sql or "").strip() + if not sql: + return "" + + try: + if executor._looks_like_direct_ddl(sql): + return sql + except Exception: + pass + + try: + body = executor._selectable_body(sql).rstrip(" ;\n\t") + target_sql = executor._format_relation_for_ref(node.name) + mat = str(meta.get("materialized", "table")).lower() + if mat == "view": + return f"CREATE OR REPLACE VIEW {target_sql} AS\n{body}" + return f"CREATE OR REPLACE TABLE {target_sql} AS\n{body}" + except Exception: + return "" + + +_LEADING_BLANK_LINES = re.compile(r"^(?:[ \t]*\n)+") + + +def _drop_leading_blank_lines(s: str) -> str: + return _LEADING_BLANK_LINES.sub("", s or "") + + +def _copy_runtime_artifacts(out_dir: Path, proj_dir: Path | None) -> None: + if not proj_dir: + return + src_dir = proj_dir / ".fastflowtransform" / "target" + if not src_dir.exists(): + return + + assets_dir = out_dir / "assets" + assets_dir.mkdir(parents=True, exist_ok=True) + + for fname in ("run_results.json", "test_results.json", "utest_results.json"): + src = src_dir / fname + if src.exists(): + shutil.copy2(src, assets_dir / fname) + + +def _render_sql_for_docs( + nodes: dict[str, Node], + executor: Any, + *, + include_payload: bool, + project_dir: Path | None = None, +) -> tuple[dict[str, str], dict[str, str], dict[str, str], dict[str, list[dict[str, str]]]]: + """ + Render SQL once and reuse for: + - lineage inference + - docs manifest SQL viewer payload + Returns: + rendered_by_model, raw_by_model, compiled_by_model, rendered_refs_by_model + """ + rendered_by: dict[str, str] = {} + raw_by: dict[str, str] = {} + compiled_by: dict[str, str] = {} + refs_by: dict[str, list[dict[str, str]]] = {} + + for n in nodes.values(): + if n.kind != "sql": + continue + + # Always try to read raw SQL (UI baseline), regardless of include_payload + try: + p = Path(n.path) + if not p.is_absolute() and project_dir is not None: + p = project_dir / p + raw_by[n.name] = p.read_text(encoding="utf-8") + except Exception: + raw_by[n.name] = "" + + rendered = "" + if include_payload: + try: + rendered = executor.render_sql( + n, + REGISTRY.env, + ref_resolver=lambda nm: executor._resolve_ref(nm, REGISTRY.env), + source_resolver=executor._resolve_source, + ) + rendered = _drop_leading_blank_lines(rendered) + except Exception: + rendered = "" + + if rendered: + rendered_by[n.name] = rendered + + if include_payload: + compiled_by[n.name] = _compile_sql_for_docs(executor, n, rendered) + refs_by[n.name] = _rendered_refs_for_docs(executor, n) + + return rendered_by, raw_by, compiled_by, refs_by + + def _strip_html(text: str) -> str: """Remove very simple HTML tags for generating a short preview snippet.""" if not text: @@ -214,6 +415,11 @@ def _build_spa_manifest( cols_by_table: dict[str, list[ColumnInfo]], model_source_refs: dict[str, list[tuple[str, str]]], sources_by_key: dict[tuple[str, str], SourceDoc], + raw_sql_by_model: dict[str, str] | None = None, + rendered_sql_by_model: dict[str, str] | None = None, + compiled_sql_by_model: dict[str, str] | None = None, + rendered_refs_by_model: dict[str, list[dict[str, str]]] | None = None, + include_rendered_sql: bool = False, ) -> dict[str, Any]: def _col_to_dict(c: ColumnInfo) -> dict[str, Any]: html = c.description_html @@ -260,13 +466,31 @@ def _col_to_dict(c: ColumnInfo) -> dict[str, Any]: "deps": list(m.deps or []), "used_by": list(used_by.get(m.name, []) or []), "materialized": m.materialized, + "owners": list(m.owners or []), + "tags": list(m.tags or []), + "domain": m.domain, "description_html": m.description_html, "description_text": _html_to_text(model_desc_html_s), "description_short": m.description_short, "sources_used": src_used, "columns": cols, + "contract": m.contract, + "python_signature": m.python_signature, + "python_docstring": m.python_docstring, + "python_source": m.python_source, + "python_requires": m.python_requires, + "inferred_lineage": m.inferred_lineage, } ) + if m.kind == "sql": + md = out_models[-1] + md["raw_sql"] = (raw_sql_by_model or {}).get(m.name, "") + + if include_rendered_sql and m.kind == "sql": + md = out_models[-1] + md["rendered_sql"] = (rendered_sql_by_model or {}).get(m.name, "") + md["compiled_sql"] = (compiled_sql_by_model or {}).get(m.name, "") + md["rendered_refs"] = (rendered_refs_by_model or {}).get(m.name, []) out_sources: list[dict[str, Any]] = [] for s in sources: @@ -293,6 +517,7 @@ def _col_to_dict(c: ColumnInfo) -> dict[str, Any]: "generated_at": datetime.now(UTC).isoformat(), "env": env_name, "with_schema": bool(with_schema), + "include_rendered_sql": bool(include_rendered_sql), }, "dag": {"graph": graph, "mermaid": mermaid_src}, "models": out_models, @@ -340,17 +565,33 @@ def _build_macro_list(proj_dir: Path | None) -> list[dict[str, str]]: def _collect_models(nodes: dict[str, Node]) -> list[ModelDoc]: - models = [ - ModelDoc( - name=n.name, - kind=n.kind, - path=str(n.path), - relation=relation_for(n.name), - deps=list(n.deps or []), - materialized=(getattr(n, "meta", {}) or {}).get("materialized", "table"), + models: list[ModelDoc] = [] + for n in nodes.values(): + meta = getattr(n, "meta", {}) or {} + if not isinstance(meta, dict): + meta = {} + + # Prefer explicit meta; fall back to deriving domain from path + owners = _as_list(meta.get("owners") or meta.get("owner")) + tags = _as_list(meta.get("tags") or meta.get("tag")) + domain = meta.get("domain") or meta.get("group") + domain_s = str(domain).strip() if domain is not None else "" + if not domain_s: + domain_s = _domain_from_path(str(n.path)) + + models.append( + ModelDoc( + name=n.name, + kind=n.kind, + path=str(n.path), + relation=relation_for(n.name), + deps=list(n.deps or []), + materialized=(meta or {}).get("materialized", "table"), + owners=owners, + tags=tags, + domain=domain_s or None, + ) ) - for n in nodes.values() - ] models.sort(key=lambda m: m.name) return models @@ -475,6 +716,52 @@ def _attach_consumers_to_sources( s.consumers = source_consumers.get((s.source_name, s.table_name), []) +def _attach_python_model_details(models: list[ModelDoc]) -> None: + py_funcs = getattr(REGISTRY, "py_funcs", {}) or {} + py_requires = getattr(REGISTRY, "py_requires", {}) or {} + + for m in models: + if m.kind != "python": + continue + + fn = py_funcs.get(m.name) + if callable(fn): + # Signature + try: + sig = str(inspect.signature(fn)) + except Exception: + sig = "(...)" + qn = getattr(fn, "__qualname__", getattr(fn, "__name__", m.name)) + mod = getattr(fn, "__module__", None) + prefix = f"{mod}." if mod else "" + m.python_signature = f"{prefix}{qn}{sig}" + + # Docstring (cleaned) + doc = inspect.getdoc(fn) or "" + m.python_docstring = doc.strip() or None + + # Required-columns hints (best-effort) + req = py_requires.get(m.name) + if isinstance(req, dict): + norm: dict[str, list[str]] = {} + for dep, cols in req.items(): + if not cols: + continue + norm[str(dep)] = sorted(str(c) for c in cols) + m.python_requires = norm or None + + # Python source (best-effort) + try: + p = Path(m.path) + if p.exists() and p.is_file(): + txt = p.read_text(encoding="utf-8") + if len(txt) > 300_000: + txt = txt[:300_000] + "\n\n# … truncated …\n" + m.python_source = txt + except Exception: + pass + + def _apply_descriptions_to_models( models: list[ModelDoc], docs_meta: dict[str, Any], @@ -497,6 +784,11 @@ def _apply_descriptions_to_models( m.description_short = (short[:char_limit] + "…") if len(short) > char_limit else short else: m.description_short = None + m.contract = ( + _normalize_contract(model_meta.get("contract")) + if isinstance(model_meta, dict) + else None + ) if not with_schema or m.relation not in cols_by_table: continue @@ -506,6 +798,40 @@ def _apply_descriptions_to_models( c.description_html = rel_desc_map.get(c.name) or mdl_desc_map.get(c.name) +def _mark_lineage_confidence(lin: dict[str, list[dict[str, Any]]], conf: str) -> None: + for items in (lin or {}).values(): + if not isinstance(items, list): + continue + for it in items: + if isinstance(it, dict): + it.setdefault("confidence", conf) + + +def _clean_lineage(lin: dict[str, list[dict[str, Any]]]) -> dict[str, list[dict[str, Any]]]: + out: dict[str, list[dict[str, Any]]] = {} + for out_col, items in (lin or {}).items(): + if not isinstance(items, list): + continue + keep: list[dict[str, Any]] = [] + for it in items: + if not isinstance(it, dict): + continue + fr = str(it.get("from_relation") or "").strip() + fc = str(it.get("from_column") or "").strip() + + # drop empty / unknown placeholders like "?" so UI doesn't show "?.?" + if not fr or not fc: + continue + if fr in {"?", "unknown"} or fc in {"?", "unknown"}: + continue + + keep.append(it) + + if keep: + out[out_col] = keep + return out + + def _infer_and_attach_lineage( models: list[ModelDoc], executor: Any | None, @@ -513,31 +839,45 @@ def _infer_and_attach_lineage( cols_by_table: dict[str, list[ColumnInfo]], *, with_schema: bool, + rendered_sql_by_model: dict[str, str] | None = None, ) -> None: """Best-effort Lineage ermitteln (SQL/Python) und auf Columns mappen.""" for m in models: inferred: dict[str, list[dict[str, Any]]] = {} try: if m.kind == "sql" and executor is not None: - try: - rendered = executor.render_sql( - REGISTRY.nodes[m.name], - REGISTRY.env, - ref_resolver=lambda nm: executor._resolve_ref(nm, REGISTRY.env), - source_resolver=executor._resolve_source, - ) - except Exception: - rendered = None + rendered = (rendered_sql_by_model or {}).get(m.name) + if not rendered: + try: + rendered = executor.render_sql( + REGISTRY.nodes[m.name], + REGISTRY.env, + ref_resolver=lambda nm: executor._resolve_ref(nm, REGISTRY.env), + source_resolver=executor._resolve_source, + ) + except Exception: + rendered = None if rendered: inferred = infer_sql_lineage(rendered) + inferred = _clean_lineage(inferred) + _mark_lineage_confidence(inferred, "inferred") + overrides = parse_sql_lineage_overrides(rendered) + overrides = _clean_lineage(overrides) + _mark_lineage_confidence(overrides, "annotated") + inferred = merge_lineage(inferred, overrides) elif m.kind == "python": func = getattr(REGISTRY, "py_funcs", {}).get(m.name) - inferred = infer_py_lineage(func) + src = m.python_source or (inspect.getsource(func) if callable(func) else "") + inferred = infer_py_lineage(src) + _mark_lineage_confidence(inferred, "inferred") except Exception: inferred = {} + if m.kind == "python": + m.inferred_lineage = inferred or None + # YAML overrides (bereits in docs_meta gemerged) model_meta = ( (docs_meta.get("models", {}) or {}).get(m.name, {}) @@ -560,6 +900,7 @@ def _infer_and_attach_lineage( "from_relation": s["table"], "from_column": s["column"], "transformed": transformed_flag, + "confidence": "annotated", } ) if items: @@ -567,10 +908,30 @@ def _infer_and_attach_lineage( if norm: inferred = merge_lineage(inferred, norm) + _mark_lineage_confidence(inferred, "inferred") + + # Always expose inferred lineage on the model as a fallback/debug view (SQL + Python) + m.inferred_lineage = inferred or None + if with_schema and (m.relation in cols_by_table) and inferred: + + def _norm_col(s: object) -> str: + x = str(s or "").strip() + # strip common identifier quotes + if (x.startswith("`") and x.endswith("`")) or ( + x.startswith('"') and x.endswith('"') + ): + x = x[1:-1] + if x.startswith("[") and x.endswith("]"): + x = x[1:-1] + return x.lower() + + inferred_norm = {_norm_col(k): v for k, v in inferred.items()} + for c in cols_by_table[m.relation]: - if c.name in inferred: - c.lineage = inferred[c.name] + items = inferred_norm.get(_norm_col(c.name), []) + if items: + c.lineage = items def _reverse_deps(nodes: dict[str, Node]) -> dict[str, list[str]]: @@ -657,6 +1018,7 @@ def render_site( with_schema: bool = True, spa: bool = True, legacy_pages: bool = False, + include_rendered_sql: bool | None = None, ) -> None: out_dir.mkdir(parents=True, exist_ok=True) _copy_template_assets(out_dir) @@ -681,14 +1043,47 @@ def render_site( direction="LR", ) proj_dir = _get_project_dir() + docs_settings = _read_project_yaml_docs_settings(proj_dir) if proj_dir else {} + if include_rendered_sql is None: + include_rendered_sql = bool((docs_settings or {}).get("include_rendered_sql")) + docs_meta = read_docs_metadata(proj_dir) if proj_dir else {"models": {}, "columns": {}} + + rendered_sql_by_model: dict[str, str] = {} + raw_sql_by_model: dict[str, str] = {} + compiled_sql_by_model: dict[str, str] = {} + rendered_refs_by_model: dict[str, list[dict[str, str]]] = {} + + if executor is not None and (include_rendered_sql or with_schema): + rendered_sql_by_model, raw_sql_by_model, compiled_sql_by_model, rendered_refs_by_model = ( + _render_sql_for_docs( + nodes, executor, include_payload=bool(include_rendered_sql), project_dir=proj_dir + ) + ) + + # If we can, also resolve source relations to the physical reference the executor uses. + # This improves the "Refs resolved" view in the SPA. + for s in sources: + with suppress(Exception): + s.relation = executor._resolve_source(s.source_name, s.table_name) + models = _collect_models(nodes) + + _attach_python_model_details(models) + mat_legend = _materialization_legend() macro_list = _build_macro_list(proj_dir) cols_by_table = _collect_columns(executor) if (executor and with_schema) else {} _apply_descriptions_to_models(models, docs_meta, cols_by_table, with_schema=with_schema) - _infer_and_attach_lineage(models, executor, docs_meta, cols_by_table, with_schema=with_schema) + _infer_and_attach_lineage( + models, + executor, + docs_meta, + cols_by_table, + with_schema=with_schema, + rendered_sql_by_model=(rendered_sql_by_model or None), + ) used_by = _reverse_deps(nodes) @@ -709,6 +1104,11 @@ def render_site( cols_by_table=cols_by_table, model_source_refs=model_source_refs, sources_by_key=sources_by_key, + raw_sql_by_model=(raw_sql_by_model or None), + rendered_sql_by_model=(rendered_sql_by_model or None), + compiled_sql_by_model=(compiled_sql_by_model or None), + rendered_refs_by_model=(rendered_refs_by_model or None), + include_rendered_sql=bool(include_rendered_sql), ) assets_dir = out_dir / "assets" assets_dir.mkdir(parents=True, exist_ok=True) @@ -716,6 +1116,8 @@ def render_site( json.dumps(manifest, indent=2), encoding="utf-8" ) + _copy_runtime_artifacts(out_dir, proj_dir) + # SPA shell (index.html.j2) _render_index( env, @@ -763,6 +1165,89 @@ def render_site( _render_source_pages(env, out_dir, sources) +def _normalize_contract(obj: Any) -> dict[str, Any] | None: + """ + Normalize contract specs from YAML/front-matter into a manifest-friendly shape: + { + "enforced": bool | None, + "columns": [ + {"name": str, "dtype": str|None, "nullable": bool|None, "constraints": list[Any]} + ], + "constraints": list[Any], + } + """ + if obj is None or obj is False: + return None + + # Allow "contract: true" as a lightweight marker + if obj is True: + return {"enforced": None, "columns": [], "constraints": []} + + if not isinstance(obj, dict): + return None + + enforced = obj.get("enforced", None) + if enforced is None: + enforced = obj.get("enabled", None) + if enforced is not None: + enforced = bool(enforced) + + # Accept aliases: columns/schema/fields + cols_spec = obj.get("columns", None) + if cols_spec is None: + cols_spec = obj.get("schema", None) + if cols_spec is None: + cols_spec = obj.get("fields", None) + + cols_out: list[dict[str, Any]] = [] + + def _constraints(v: Any) -> list[Any]: + if v is None or v == "": + return [] + if isinstance(v, list): + return v + return [v] + + if isinstance(cols_spec, dict): + for name, spec in cols_spec.items(): + row: dict[str, Any] = {"name": str(name)} + if isinstance(spec, str): + row["dtype"] = spec + elif isinstance(spec, dict): + row["dtype"] = spec.get("dtype") or spec.get("type") or spec.get("data_type") + if "nullable" in spec: + row["nullable"] = bool(spec.get("nullable")) + elif "not_null" in spec: + row["nullable"] = not bool(spec.get("not_null")) + row["constraints"] = _constraints(spec.get("constraints") or spec.get("tests")) + cols_out.append(row) + elif isinstance(cols_spec, list): + for it in cols_spec: + if not isinstance(it, dict): + continue + nm = it.get("name") + if not nm: + continue + row = {"name": str(nm)} + row["dtype"] = it.get("dtype") or it.get("type") or it.get("data_type") + if "nullable" in it: + row["nullable"] = bool(it.get("nullable")) + elif "not_null" in it: + row["nullable"] = not bool(it.get("not_null")) + row["constraints"] = _constraints(it.get("constraints") or it.get("tests")) + cols_out.append(row) + + tbl_constraints = obj.get("constraints") or obj.get("table_constraints") or [] + if tbl_constraints and not isinstance(tbl_constraints, list): + tbl_constraints = [tbl_constraints] + + return { + "enforced": enforced, + "columns": cols_out, + "constraints": tbl_constraints or [], + } + + def read_docs_metadata(project_dir: Path) -> dict[str, Any]: """ Merge YAML + Markdown descriptions with priority: Markdown > YAML. @@ -771,7 +1256,10 @@ def read_docs_metadata(project_dir: Path) -> dict[str, Any]: "models": { : { "description_html": "

" | None, - "columns": { : "

" } + "columns": { + : + "description_html": "

" | None, + } }, }, "columns": { : { : "

" } } @@ -784,27 +1272,46 @@ def read_docs_metadata(project_dir: Path) -> dict[str, Any]: desc = (meta or {}).get("description") cols = (meta or {}).get("columns") or {} lineage_yaml = (meta or {}).get("lineage") + contract_yaml = (meta or {}).get("contract") + contract_norm = _normalize_contract(contract_yaml) + + def _col_desc(v: Any) -> str | None: + if v is None: + return None + if isinstance(v, dict): + v = v.get("description") or v.get("desc") + if not v: + return None + return _render_minimarkdown(str(v)) out_models[model] = { "description_html": _render_minimarkdown(desc) if desc else None, "columns": { - str(k): _render_minimarkdown(str(v)) - for k, v in (cols.items() if isinstance(cols, dict) else []) + str(k): _col_desc(v) for k, v in (cols.items() if isinstance(cols, dict) else []) }, } if isinstance(lineage_yaml, dict): out_models[model]["lineage"] = lineage_yaml + if contract_norm is not None: + out_models[model]["contract"] = contract_norm # 2) Markdown model overrides: docs/models/.md md_models_dir = project_dir / "docs" / "models" if md_models_dir.exists(): for p in md_models_dir.glob("*.md"): model_name = p.stem - _, body = _read_markdown_file(p) + fm, body = _read_markdown_file(p) if body.strip(): out_models.setdefault(model_name, {"description_html": None, "columns": {}}) out_models[model_name]["description_html"] = _render_minimarkdown(body) + # contract in front matter overrides YAML + if isinstance(fm, dict) and "contract" in fm: + c = _normalize_contract(fm.get("contract")) + if c is not None: + out_models.setdefault(model_name, {"description_html": None, "columns": {}}) + out_models[model_name]["contract"] = c + # 3) Markdown column overrides: docs/columns//.md out_columns: dict[str, dict[str, str]] = {} cols_root = project_dir / "docs" / "columns" diff --git a/src/fastflowtransform/lineage.py b/src/fastflowtransform/lineage.py index bd6d5a3..af1fa4d 100644 --- a/src/fastflowtransform/lineage.py +++ b/src/fastflowtransform/lineage.py @@ -1,271 +1,872 @@ +# fastflowtransform/lineage.py from __future__ import annotations -import inspect +import ast +import json import re +from collections.abc import Callable from typing import Any -# ──────────────────────────────────────────────────────────────────────────────── -# Data structures -# ──────────────────────────────────────────────────────────────────────────────── +import sqlparse +from sqlparse.sql import ( + Function, + Identifier, + IdentifierList, + Parenthesis, + Statement, + Token, + TokenList, +) +from sqlparse.tokens import DML, Keyword, Name, Newline, Whitespace -LineageItem = dict[str, Any] # { "from_relation": str, "from_column": str, "transformed": bool } -LineageMap = dict[str, list[LineageItem]] # out_col -> [LineageItem, ...] +LineageItem = dict[str, Any] +LineageMap = dict[str, list[LineageItem]] +Projection = Token | TokenList -# ──────────────────────────────────────────────────────────────────────────────── -# SQL lineage (heuristic) -# ──────────────────────────────────────────────────────────────────────────────── +# --------------------------- +# Public API +# --------------------------- -_FROM_RE = re.compile( - r"\b(from|join)\s+([a-zA-Z_][\w\.\$\"`]*)\s+(?:as\s+)?([a-zA-Z_][\w\$]*)", - flags=re.IGNORECASE, -) -_SEL_RE = re.compile(r"\bselect\b(.*?)\bfrom\b", flags=re.IGNORECASE | re.DOTALL) + +def infer_sql_lineage(rendered_sql: str, ref_map: dict[str, str] | None = None) -> LineageMap: + """ + Infer column-level lineage for SQL: + - CTE-aware (WITH ... AS (...)) + - tracks simple transforms (lower/cast/trim/upper/etc.) + - expands CTE edges to base relations + - does NOT emit placeholder unknown edges; if ambiguous/unresolved -> no edge + """ + sql = (rendered_sql or "").strip() + if not sql: + return {} + + sql = _strip_to_query(sql) + stmts = sqlparse.parse(sql) + if not stmts: + return {} + + # Use last statement (CREATE VIEW ...; SELECT ...; etc.) + stmt = stmts[-1] + + # If statement still isn't query-like (e.g. CREATE ... AS SELECT ...), + # strip again with token-based method and reparse. + if not _contains_select(stmt): + sql2 = _strip_to_query(str(stmt)) + stmts2 = sqlparse.parse(sql2) + if not stmts2: + return {} + stmt = stmts2[-1] + + ctes, main_stmt = _split_ctes(stmt) + + # infer CTEs in order (CTEs can reference earlier CTEs) + cte_lineage: dict[str, LineageMap] = {} + for name, cte_sql in ctes: + cte_map = infer_sql_lineage(cte_sql, ref_map=ref_map) + # Expand references to already known CTEs inside this CTE (chained CTEs) + cte_map = _expand_cte_edges(cte_map, cte_lineage) + cte_lineage[name] = cte_map + + # infer main statement and expand through CTEs + out = _infer_select_stmt(main_stmt, ref_map=ref_map) + out = _expand_cte_edges(out, cte_lineage) + return out -def _split_select_list(select_clause: str) -> list[str]: +def parse_sql_lineage_overrides(rendered_sql: str) -> LineageMap: """ - Split a SELECT clause into top-level comma-separated expressions. - Handles parentheses depth; does not handle quoted commas - good enough for common cases. + Parse inline overrides from SQL comments. + + Supported: + -- lineage: out_col <- relation.column + -- lineage: out_col <- relation.column xform + -- lineage: out_col <- relation.column, other_rel.other_col + /* lineage: + out_col <- relation.column xform + other <- rel.col + */ + + Also supports JSON: + -- lineage-json: {"out_col":[{"from_relation":"t","from_column":"c","transformed":true}]} + /* lineage-json: {...} */ """ - parts: list[str] = [] - buf: list[str] = [] + sql = rendered_sql or "" + if not sql.strip(): + return {} + + overrides: LineageMap = {} + + # JSON override blocks first (if present) + for payload in _extract_comment_payloads(sql, keys=("lineage-json", "fft-lineage-json")): + try: + obj = json.loads(payload) + if isinstance(obj, dict): + parsed = _normalize_lineage_map(obj) + overrides = merge_lineage(overrides, parsed) + except Exception: + # ignore malformed JSON override blocks + pass + + # Text override lines + for payload in _extract_comment_payloads(sql, keys=("lineage", "fft-lineage")): + parsed = _parse_lineage_text_block(payload) + overrides = merge_lineage(overrides, parsed) + + return overrides + + +def merge_lineage(base: LineageMap, overlay: LineageMap) -> LineageMap: + """ + Union-merge two lineage maps with dedupe. + """ + out: LineageMap = {k: list(v) for k, v in (base or {}).items()} + for col, items in (overlay or {}).items(): + if not isinstance(items, list): + continue + out.setdefault(col, []) + out[col].extend(items) + out[col] = _dedupe_items(out[col]) + # drop empties + return {k: v for k, v in out.items() if v} + + +def infer_py_lineage(py_source: str, ref_map: dict[str, str] | None = None) -> LineageMap: + """ + Minimal python lineage: + - If the file defines __lineage__ = {...} or LINEAGE = {...}, we literal-eval it. + - Otherwise return {}. + + This keeps python models supported without forcing heavy AST/dataframe analysis. + """ + src = py_source or "" + if not src.strip(): + return {} + + try: + tree = ast.parse(src) + except SyntaxError: + return {} + + for node in tree.body: + if isinstance(node, ast.Assign): + for target in node.targets: + if isinstance(target, ast.Name) and target.id in { + "__lineage__", + "LINEAGE", + "lineage", + }: + try: + val = ast.literal_eval(node.value) + if isinstance(val, dict): + lm = _normalize_lineage_map(val) + # Optional: apply ref_map rewriting of relations + if ref_map: + lm = _apply_ref_map(lm, ref_map) + return lm + except Exception: + pass + return {} + + +# --------------------------- +# SQL inference internals +# --------------------------- + +_SIMPLE_WRAPPER_FUNCS = { + # common "simple" wrappers where arg lineage should be preserved + "lower", + "upper", + "trim", + "ltrim", + "rtrim", + "cast", + "date", + "timestamp", + "coalesce", # if multiple args, we keep all refs + "nullif", +} + +_CLAUSE_TERMINATORS = { + "WHERE", + "GROUP BY", + "HAVING", + "ORDER BY", + "QUALIFY", + "LIMIT", + "FETCH", + "UNION", + "EXCEPT", + "INTERSECT", +} + + +def _strip_to_query(sql: str) -> str: + """ + Try to slice down to the query part for statements like: + CREATE VIEW x AS WITH ... SELECT ... + CREATE TABLE x AS SELECT ... + """ + s = sql.strip() + + # Prefer "... AS WITH|SELECT" + m = re.search(r"\bAS\s+(WITH|SELECT)\b", s, flags=re.IGNORECASE) + if m: + return s[m.start(1) :].strip() + + # Otherwise start at first WITH or SELECT + m2 = re.search(r"\b(WITH|SELECT)\b", s, flags=re.IGNORECASE) + if m2: + return s[m2.start(1) :].strip() + + return s + + +def _contains_select(stmt: Statement) -> bool: + return any(t.ttype is DML and t.value.lower() == "select" for t in stmt.flatten()) + + +def _split_ctes(stmt: Statement) -> tuple[list[tuple[str, str]], Statement]: + """ + If stmt begins with WITH (or WITH RECURSIVE), extract (cte_name, cte_sql) in order + and return the main SELECT statement. Otherwise returns ([], stmt). + + Fix: when reconstructing the "main" SQL, preserve whitespace from the original + token stream (joining the no-whitespace stream breaks parsing: e.g. "selectcol"). + """ + tokens_no_ws = [t for t in stmt.tokens if not _is_ws(t)] + if not tokens_no_ws: + return [], stmt + + # Robust WITH detection across sqlparse versions + if tokens_no_ws[0].value.lower() != "with": + return [], stmt + + # Handle optional "RECURSIVE" + start = 1 + if ( + len(tokens_no_ws) > 1 + and tokens_no_ws[1].ttype is Keyword + and tokens_no_ws[1].value.upper() == "RECURSIVE" + ): + start = 2 + + # Find the first TOP-LEVEL SELECT token after the CTE definitions. + select_idx = None + for i in range(start, len(tokens_no_ws)): + t = tokens_no_ws[i] + if t.ttype is DML and t.value.lower() == "select": + select_idx = i + break + if select_idx is None: + return [], stmt + + # Collect CTE identifiers between WITH[..] and main SELECT + cte_tokens = tokens_no_ws[start:select_idx] + cte_defs: list[Identifier] = [] + for ct in cte_tokens: + if isinstance(ct, IdentifierList): + for it in ct.get_identifiers(): + if isinstance(it, Identifier): + cte_defs.append(it) + elif isinstance(ct, Identifier): + cte_defs.append(ct) + + ctes: list[tuple[str, str]] = [] + for ident in cte_defs: + name = ident.get_real_name() or ident.get_name() + if not name: + continue + parens = [t for t in ident.tokens if isinstance(t, Parenthesis)] + if not parens: + continue + inner = parens[0].value.strip() + if inner.startswith("(") and inner.endswith(")"): + inner = inner[1:-1].strip() + if inner: + ctes.append((name, inner)) + + # Reconstruct main SQL from the ORIGINAL token stream (preserve whitespace) + sel_tok = tokens_no_ws[select_idx] + orig_tokens = list(stmt.tokens) + try: + orig_sel_idx = next(i for i, t in enumerate(orig_tokens) if t is sel_tok) + except StopIteration: + orig_sel_idx = None + + if orig_sel_idx is None: + main_sql = str(stmt).strip() + else: + main_sql = "".join(t.value for t in orig_tokens[orig_sel_idx:]).strip() + + main_parsed = sqlparse.parse(main_sql) + main_stmt = main_parsed[0] if main_parsed else stmt + return ctes, main_stmt + + +def _infer_select_stmt(stmt: Statement, ref_map: dict[str, str] | None = None) -> LineageMap: + """ + Infer lineage for a single SELECT statement (no CTE expansion here). + """ + # Find SELECT list + FROM clause + projections = _get_projections(stmt) + alias_map, base_relations = _get_from_sources(stmt) + + # allow externally provided mapping (e.g. ref('x') -> schema.table) + if ref_map: + alias_map.update(ref_map) + + out: LineageMap = {} + + for proj in projections: + out_col = _projection_output_name(proj) + if not out_col: + continue + + refs = _extract_column_refs(proj) + if not refs: + # no column references found -> no lineage edge + continue + + transformed = _is_transformed_projection(proj) + + edges: list[LineageItem] = [] + for parent, col in refs: + rel = None + if parent: + rel = alias_map.get(parent) or parent + # only safe: single base relation + elif len(base_relations) == 1: + rel = base_relations[0] + + if rel and col: + edges.append({"from_relation": rel, "from_column": col, "transformed": transformed}) + + if edges: + out[out_col] = _dedupe_items(edges) + + return out + + +def _get_projections(stmt: Statement) -> list[Projection]: + """ + Returns a list of projection tokens from the top-level SELECT. + """ + tokens = [t for t in stmt.tokens if not _is_ws(t)] + seen_select = False + projs: list[Projection] = [] + + for t in tokens: + if t.ttype is DML and t.value.lower() == "select": + seen_select = True + continue + if not seen_select: + continue + if t.ttype is Keyword and t.value.upper() == "FROM": + break + + if isinstance(t, IdentifierList): + for it in t.get_identifiers(): + if isinstance(it, (Identifier, Function, Parenthesis, TokenList)): + projs.append(it) + elif isinstance(it, Token) and it.ttype is Name: + projs.append(it) # bare column name + elif isinstance(t, (Identifier, Function, Parenthesis)): + projs.append(t) + elif isinstance(t, Token) and t.ttype is Name: + projs.append(t) # bare column name + + # fallback: if sqlparse didn't group things nicely, reparse select list by string + if not projs: + select_list = _select_list_text(stmt) + for expr in _split_top_level_commas(select_list): + tmp = sqlparse.parse(f"SELECT {expr} FROM __dummy__") + if tmp: + projs.extend(_get_projections(tmp[0])) + + return projs + + +def _select_list_text(stmt: Statement) -> str: + """ + Extract SELECT list as raw text between SELECT and first top-level FROM. + """ + s = str(stmt) + # crude but effective for fallback; we only use when sqlparse didn't group + m = re.search(r"\bselect\b", s, flags=re.IGNORECASE) + if not m: + return "" + start = m.end() depth = 0 - for ch in select_clause: + for i in range(start, len(s)): + ch = s[i] if ch == "(": depth += 1 elif ch == ")": depth = max(0, depth - 1) - if ch == "," and depth == 0: - parts.append("".join(buf).strip()) + if depth == 0 and s[i : i + 4].lower() == "from": + return s[start:i].strip() + return "" + + +def _split_top_level_commas(s: str) -> list[str]: + parts: list[str] = [] + buf: list[str] = [] + depth = 0 + in_single = False + + i = 0 + while i < len(s): + ch = s[i] + + # basic single-quote string skipping + if ch == "'" and (i == 0 or s[i - 1] != "\\"): + in_single = not in_single + + if not in_single: + if ch == "(": + depth += 1 + elif ch == ")": + depth = max(0, depth - 1) + + if ch == "," and depth == 0 and not in_single: + part = "".join(buf).strip() + if part: + parts.append(part) buf = [] else: buf.append(ch) + + i += 1 + tail = "".join(buf).strip() if tail: parts.append(tail) return parts -def _alias_map_from_sql(sql: str) -> dict[str, str]: +def _get_from_sources(stmt: Statement) -> tuple[dict[str, str], list[str]]: """ - Build alias -> relation map by scanning FROM/JOIN clauses in the *rendered* SQL. + Parse top-level FROM/JOIN sources into: + - alias_map: alias -> relation (and relation -> relation) + - base_relations: list of relations encountered (FROM/JOIN order) """ - out: dict[str, str] = {} - for _, rel, alias in _FROM_RE.findall(sql): - # Strip quoting if present - rel_clean = rel.strip('"`') - out[alias] = rel_clean - return out + from_clause = _extract_from_clause_text(stmt) + if not from_clause: + return {}, [] + + # Match: FROM rel [AS alias] ; JOIN rel [AS alias] + # rel supports schema.table and quoted identifiers. + pat = re.compile( + r"\b(from|join)\s+" + r"(?P(?:`[^`]+`|\"[^\"]+\"|\[[^\]]+\]|[a-zA-Z_][\w\$]*)(?:\.(?:`[^`]+`|\"[^\"]+\"|\[[^\]]+\]|[a-zA-Z_][\w\$]*))*)" + r"(?:\s+(?:as\s+)?(?P[a-zA-Z_][\w\$]*))?", + flags=re.IGNORECASE, + ) + + alias_map: dict[str, str] = {} + base: list[str] = [] + for m in pat.finditer(from_clause): + rel = _strip_ident_quotes(m.group("rel")) + alias = m.group("alias") + if not rel: + continue + base.append(rel) + alias_map.setdefault(rel, rel) + if alias: + alias_map[alias] = rel + + return alias_map, base -def _append_lineage(lineage: LineageMap, out_col: str | None, item: LineageItem) -> None: +def _extract_from_clause_text(stmt: Statement) -> str: """ - Append a lineage item to the map if an output column name is available. - This guards Optional[str] and keeps type-checkers happy. + Extract raw FROM ... part until next clause terminator. """ - if not out_col: - return - lineage.setdefault(out_col, []).append(item) + s = str(stmt) + m = re.search(r"\bfrom\b", s, flags=re.IGNORECASE) + if not m: + return "" + start = m.start() + # stop at next clause terminator keyword outside parentheses + depth = 0 + i = start + end = len(s) + while i < len(s): + ch = s[i] + if ch == "(": + depth += 1 + elif ch == ")": + depth = max(0, depth - 1) -def infer_sql_lineage(rendered_sql: str, ref_map: dict[str, str] | None = None) -> LineageMap: + if depth == 0: + # check terminators + for term in _CLAUSE_TERMINATORS: + if s[i : i + len(term)].upper() == term: + end = i + i = len(s) + break + i += 1 + + return s[start:end].strip() + + +def _projection_output_name(proj: Projection) -> str | None: """ - Infer a mapping from output column -> upstream sources (relation.column) for common patterns: - - . AS - - AS (relation unknown) - - FUNC(.) AS → transformed=True - - bare . → out=, direct - Joins with aliases are resolved via → relation from FROM/JOIN. + Determine output column name for a projection: + - prefer alias + - else, for simple identifiers return column name + - otherwise None (we need an alias to attach lineage reliably) """ - lineage: LineageMap = {} - if not rendered_sql: - return lineage + if isinstance(proj, Identifier): + return proj.get_alias() or proj.get_name() or proj.get_real_name() + if isinstance(proj, Token) and proj.ttype is Name: + return proj.value + return None - alias_map = ref_map or _alias_map_from_sql(rendered_sql) - m = _SEL_RE.search(rendered_sql) - if not m: - return lineage - select_clause = m.group(1) - exprs = _split_select_list(select_clause) - - # Patterns - as_pat = re.compile(r"^(?P.+?)\s+as\s+(?P[a-zA-Z_][\w\$]*)$", re.IGNORECASE) - qual_col = re.compile(r"^(?P[a-zA-Z_]\w*)\.(?P[a-zA-Z_]\w*)$") - func_of_qual = re.compile( - r"^[a-zA-Z_]\w*\s*\(\s*(?P[a-zA-Z_]\w*)\.(?P[a-zA-Z_]\w*)\s*\)\s*$", re.IGNORECASE - ) +def _is_transformed_projection(proj: Projection) -> bool: + """ + Heuristic transform flag: + - False only for direct column references (optionally qualified), + with alias == column if present + - True otherwise (functions, ops, casts, renames, etc.) + """ + if isinstance(proj, Identifier): + # direct a.b or b + alias = proj.get_alias() + col = proj.get_real_name() + parent = proj.get_parent_name() + + # Remove alias portion (e.g., "AS alias" or trailing alias) before checking for direct refs + expr_tokens = list(proj.tokens) + if alias: + cut = None + for i, t in enumerate(expr_tokens): + if t.ttype is Keyword and t.value.upper() == "AS": + cut = i + break + if cut is not None: + expr_tokens = expr_tokens[:cut] + else: + alias_norm = alias.strip("\"'").lower() + for i in range(len(expr_tokens) - 1, -1, -1): + t = expr_tokens[i] + if ( + isinstance(t, Token) + and t.ttype is Name + and t.value.strip("\"'").lower() == alias_norm + ): + expr_tokens = expr_tokens[:i] + break + + base_expr = "".join(t.value for t in expr_tokens).strip() + direct = col and ( + base_expr == col or (parent and base_expr == f"{parent}.{col}") or base_expr == f"{col}" + ) + if direct and (alias is None or alias == col): + return False - for raw in exprs: - expr = raw.strip() - if expr == "*" or not expr: - continue + # function inside identifier => transformed + if any(isinstance(t, Function) for t in proj.tokens): + return True - out_col: str | None = None - expr_only = expr - m_as = as_pat.match(expr) - if m_as: - out_col = m_as.group("alias") - expr_only = m_as.group("expr").strip() - - # func(alias.col) - m_func = func_of_qual.match(expr_only) - if m_func: - a, c = m_func.group("a"), m_func.group("c") - rel = alias_map.get(a) - item = { - "from_relation": rel or "?", - "from_column": c, - "transformed": True, - } - if out_col is None: - out_col = c # best-effort - _append_lineage(lineage, out_col, item) - continue + # rename-only is still "transformed" per docs UI expectation + if alias and col and alias != col: + return True - # alias.col - m_q = qual_col.match(expr_only) - if m_q: - a, c = m_q.group("a"), m_q.group("c") - rel = alias_map.get(a) - item = { - "from_relation": rel or "?", - "from_column": c, - "transformed": False, - } - if out_col is None: - out_col = c - _append_lineage(lineage, out_col, item) - continue + # default: transformed + return True - # plain col (no qualifier) - we can only map column name with unknown relation - m_col = re.match(r"^[a-zA-Z_]\w*$", expr_only) - if m_col: - c = expr_only - item = {"from_relation": "?", "from_column": c, "transformed": False} - if out_col is None: - out_col = c - _append_lineage(lineage, out_col, item) - continue - # func(col) or complex expression → mark as transformed with unknown relation/col - _append_lineage( - lineage, out_col, {"from_relation": "?", "from_column": "?", "transformed": True} +def _extract_column_refs(tok: Any) -> list[tuple[str | None, str]]: + """ + Extract column references as (parent/table_alias, column_name). + + Fix: don't treat output aliases as input column refs (e.g. "... as first_signup"). + """ + refs: list[tuple[str | None, str]] = [] + + def walk_children(node: Any) -> list[tuple[str | None, str]]: + out: list[tuple[str | None, str]] = [] + for child in getattr(node, "tokens", []) or []: + out.extend(_extract_column_refs(child)) + return out + + def handle_name(node: Token) -> list[tuple[str | None, str]]: + return [(None, node.value)] + + def handle_identifier(node: Identifier) -> list[tuple[str | None, str]]: + alias = node.get_alias() + + is_expr = any(isinstance(t, (Function, Parenthesis)) for t in node.tokens) or any( + getattr(t, "is_group", False) for t in node.tokens ) + if is_expr: + expr_tokens = list(node.tokens) + + if alias: + cut = None + for i, t in enumerate(expr_tokens): + if t.ttype is Keyword and t.value.upper() == "AS": + cut = i + break + + if cut is not None: + expr_tokens = expr_tokens[:cut] + else: + alias_norm = alias.strip("\"'").lower() + for i in range(len(expr_tokens) - 1, -1, -1): + t = expr_tokens[i] + if ( + isinstance(t, Token) + and t.ttype is Name + and t.value.strip("\"'").lower() == alias_norm + ): + expr_tokens = expr_tokens[:i] + break + + out: list[tuple[str | None, str]] = [] + for t in expr_tokens: + out.extend(_extract_column_refs(t)) + return out + + col = node.get_real_name() + parent = node.get_parent_name() + return [(parent, col)] if col else [] + + def handle_identifier_list(node: IdentifierList) -> list[tuple[str | None, str]]: + out: list[tuple[str | None, str]] = [] + for it in node.get_identifiers(): + out.extend(_extract_column_refs(it)) + return out + + def handle_function(node: Function) -> list[tuple[str | None, str]]: + fn_name = (node.get_name() or "").lower() + out: list[tuple[str | None, str]] = [] + for t in node.tokens: + if isinstance(t, Identifier) and (t.get_name() or "").lower() == fn_name: + continue + out.extend(_extract_column_refs(t)) + return out + + handlers: dict[type[Any], Callable[[Any], list[tuple[str | None, str]]]] = { + Identifier: handle_identifier, + IdentifierList: handle_identifier_list, + Function: handle_function, + Parenthesis: walk_children, + } + + # Bare Name token (rarely used directly in projections, but kept for compatibility) + if isinstance(tok, Token) and tok.ttype is Name: + return handle_name(tok) + + for typ, fn in handlers.items(): + if isinstance(tok, typ): + return fn(tok) + + if getattr(tok, "is_group", False): + return walk_children(tok) + + return refs + + +def _expand_cte_edges(lin: LineageMap, cte_lineage: dict[str, LineageMap]) -> LineageMap: + """ + Replace edges pointing to a CTE relation with that CTE's own lineage for that column. + """ + if not lin: + return {} - return lineage + out: LineageMap = {} + for out_col, edges in lin.items(): + expanded: list[LineageItem] = [] + for e in edges or []: + rel = str(e.get("from_relation") or "") + col = str(e.get("from_column") or "") + if rel in cte_lineage and col in (cte_lineage[rel] or {}): + for sub in cte_lineage[rel][col]: + expanded.append( + { + "from_relation": sub.get("from_relation"), + "from_column": sub.get("from_column"), + "transformed": bool(e.get("transformed")) + or bool(sub.get("transformed")), + } + ) + else: + expanded.append(e) + expanded = [x for x in expanded if x.get("from_relation") and x.get("from_column")] + if expanded: + out[out_col] = _dedupe_items(expanded) + return out -# ──────────────────────────────────────────────────────────────────────────────── -# Python (pandas) lineage (very light heuristic) -# ──────────────────────────────────────────────────────────────────────────────── +# --------------------------- +# Override parsing internals +# --------------------------- -_ASSIGN_RE = re.compile( - r"""\b(?P[_a-zA-Z]\w*)\s*\[\s*['"](?P[_a-zA-Z]\w*)['"]\s*\]\s*=\s* - (?P[_a-zA-Z]\w*)\s*\[\s*['"](?P[_a-zA-Z]\w*)['"]\s*\]""", - re.VERBOSE, -) -_RENAME_RE = re.compile( - r"""\.rename\s*\(\s*columns\s*=\s*\{(?P.*?)\}\s*\)""", - re.DOTALL, + +def _extract_comment_payloads(sql: str, keys: tuple[str, ...]) -> list[str]: + """ + Extract payloads from comments like: + -- key: payload + /* key: payload */ + Returns a list of payload strings (not including the key). + """ + out: list[str] = [] + + # line comments + for key in keys: + line_pat = re.compile(rf"--\s*{re.escape(key)}\s*:\s*(.+)$", re.IGNORECASE | re.MULTILINE) + out.extend(m.group(1).strip() for m in line_pat.finditer(sql)) + + # block comments + for key in keys: + block_pat = re.compile(rf"/\*\s*{re.escape(key)}\s*:\s*(.*?)\*/", re.IGNORECASE | re.DOTALL) + out.extend(m.group(1).strip() for m in block_pat.finditer(sql)) + + return out + + +_LINEAGE_TEXT_LINE = re.compile( + r"^\s*(?P[a-zA-Z_][\w\$]*)\s*(?:<-|<=|=|:)\s*(?P.+?)\s*$" ) -_PAIR_RE = re.compile(r"""['"](?P[_a-zA-Z]\w*)['"]\s*:\s*['"](?P[_a-zA-Z]\w*)['"]""") -_ASSIGN_LAMBDA_RE = re.compile( - r"""\.assign\s*\(\s*([_a-zA-Z]\w*)\s*=\s*lambda\s+\w+\s*:\s*(?P[^,)]+)\)""", - re.DOTALL, + +_SRC_REF = re.compile( + r"(?P(?:`[^`]+`|\"[^\"]+\"|\[[^\]]+\]|[a-zA-Z_][\w\$]*)(?:\.(?:`[^`]+`|\"[^\"]+\"|\[[^\]]+\]|[a-zA-Z_][\w\$]*))*)" + r"\.(?P[a-zA-Z_][\w\$]*)" ) -_BODY_SRC_COL = re.compile(r"""\[\s*['"](?P[_a-zA-Z]\w*)['"]\s*\]""") -def infer_py_lineage( - func: Any, requires: dict | None = None, source_code: str | None = None -) -> LineageMap: +def _parse_lineage_text_block(block: str) -> LineageMap: """ - Very small regex-based inference for common pandas patterns: - - out["x"] = df["y"] → x <- y (direct) - - df.rename(columns={"y": "x"}) → x <- y (transformed=True) - - .assign(x=lambda d: d["y"].str.upper()) → x <- y (transformed=True) [best-effort] - Relation is unknown ("?"); full mapping across multiple inputs would require deeper analysis. + Parse a multi-line block of text overrides. """ - code = source_code or "" - try: - if not code and func is not None: - code = inspect.getsource(func) - except Exception: - pass + out: LineageMap = {} + for raw_line in (block or "").splitlines(): + line = raw_line.strip() + if not line: + continue - lineage: LineageMap = {} - if not code: - return lineage + m = _LINEAGE_TEXT_LINE.match(line) + if not m: + continue - # Assign pattern: out["x"] = df["y"] - for m in _ASSIGN_RE.finditer(code): out_col = m.group("out") - src_col = m.group("col") - _append_lineage( - lineage, out_col, {"from_relation": "?", "from_column": src_col, "transformed": False} - ) + srcs = m.group("srcs") + + # xform flag + xform = False + if re.search(r"\b(xform|transformed)\b", srcs, flags=re.IGNORECASE): + xform = True + srcs = re.sub(r"\b(xform|transformed)\b", "", srcs, flags=re.IGNORECASE).strip() + + items: list[LineageItem] = [] + for src in re.split(r"\s*,\s*", srcs): + sm = _SRC_REF.search(src) + if not sm: + continue + rel = _strip_ident_quotes(sm.group("rel")) + col = sm.group("col") + items.append({"from_relation": rel, "from_column": col, "transformed": xform}) + + if items: + out[out_col] = _dedupe_items(out.get(out_col, []) + items) - # Rename pattern: .rename(columns={"old":"new"}) - for m in _RENAME_RE.finditer(code): - pairs = m.group("pairs") - for p in _PAIR_RE.finditer(pairs): - old, new = p.group("old"), p.group("new") - _append_lineage( - lineage, new, {"from_relation": "?", "from_column": old, "transformed": True} - ) - - # assign(x=lambda d: ...) - for m in _ASSIGN_LAMBDA_RE.finditer(code): - out_col = m.group(1) - body = m.group("body") - m2 = _BODY_SRC_COL.search(body) - if m2: - src_col = m2.group("col") - _append_lineage( - lineage, - out_col, - {"from_relation": "?", "from_column": src_col, "transformed": True}, - ) - - return lineage - - -# ──────────────────────────────────────────────────────────────────────────────── -# Overrides (YAML / SQL comment directives) -# ──────────────────────────────────────────────────────────────────────────────── - -_LINEAGE_DIRECTIVE = re.compile( - r"""--\s*@lineage\s+([_a-zA-Z]\w*)\s*:\s*([_a-zA-Z]\w*)\.([_a-zA-Z]\w*) - (?:\s*\(\s*(transformed)\s*\))?""", - re.IGNORECASE | re.VERBOSE, -) + return out -def parse_sql_lineage_overrides(sql_text: str) -> LineageMap: +def _normalize_lineage_map(obj: Any) -> LineageMap: """ - Parse optional SQL comment directives: - -- @lineage email_upper: users.email (transformed) + Accepts either: + {"col":[{...},{...}], "col2":[...]} + or: + {"col": {...single...}} + and normalizes to LineageMap. """ + if not isinstance(obj, dict): + return {} + out: LineageMap = {} - for m in _LINEAGE_DIRECTIVE.finditer(sql_text or ""): - out_col, rel, col, tr = m.group(1), m.group(2), m.group(3), m.group(4) - out.setdefault(out_col, []).append( - {"from_relation": rel, "from_column": col, "transformed": bool(tr)} + for k, v in obj.items(): + if isinstance(v, dict): + out[str(k)] = [v] + elif isinstance(v, list): + out[str(k)] = [x for x in v if isinstance(x, dict)] + for k in list(out.keys()): + out[k] = _dedupe_items(out[k]) + return out + + +def _apply_ref_map(lm: LineageMap, ref_map: dict[str, str]) -> LineageMap: + out: LineageMap = {} + for col, items in (lm or {}).items(): + new_items: list[LineageItem] = [] + for it in items: + rel = it.get("from_relation") + if isinstance(rel, str) and rel in ref_map: + item_copy = dict(it) + item_copy["from_relation"] = ref_map[rel] + new_items.append(item_copy) + else: + new_items.append(it) + out[col] = _dedupe_items(new_items) + return out + + +# --------------------------- +# Utilities +# --------------------------- + + +def _dedupe_items(items: list[LineageItem]) -> list[LineageItem]: + seen = set() + out: list[LineageItem] = [] + for it in items or []: + rel = it.get("from_relation") + col = it.get("from_column") + if not rel or not col: + continue + key = ( + str(rel), + str(col), + bool(it.get("transformed")), + str(it.get("confidence") or ""), ) + if key in seen: + continue + seen.add(key) + out.append(it) return out -def merge_lineage(*maps: LineageMap | None) -> LineageMap: +def _strip_ident_quotes(s: str) -> str: """ - Merge multiple lineage maps. Later maps override/extend earlier ones by output column. - If a later map provides any entries for a column, it replaces previous entries for that column. + Remove simple quoting from identifiers: `x`, "x", [x] + Leaves internal dots intact. """ - merged: LineageMap = {} - for mp in maps: - if not mp: - continue - for out_col, items in mp.items(): - merged[out_col] = list(items) - return merged + s = (s or "").strip() + if not s: + return s + + def strip_one(part: str) -> str: + part = part.strip() + if part.startswith("`") and part.endswith("`"): + return part[1:-1] + if part.startswith('"') and part.endswith('"'): + return part[1:-1] + if part.startswith("[") and part.endswith("]"): + return part[1:-1] + return part + + return ".".join(strip_one(p) for p in s.split(".")) + + +def _is_ws(t: Any) -> bool: + return ( + t is None + or t.ttype in (Whitespace, Newline) + or (hasattr(t, "is_whitespace") and t.is_whitespace) + ) diff --git a/src/fastflowtransform/templates/assets/spa.css b/src/fastflowtransform/templates/assets/spa.css index dc59ec7..d61d054 100644 --- a/src/fastflowtransform/templates/assets/spa.css +++ b/src/fastflowtransform/templates/assets/spa.css @@ -530,3 +530,179 @@ tr.colHit{ cursor: pointer; } .facetClear:disabled { opacity: 0.5; cursor: not-allowed; } + +/* Docs coverage UI */ +.docPills{ + display:flex; + gap:8px; + flex-wrap:wrap; + margin-top:8px; +} +.docPillsCompact{ margin-top:0; } + +.docList{ + list-style:none; + margin:12px 0 0 0; + padding:0; + display:flex; + flex-direction:column; + gap:8px; +} +.docRow a{ + display:flex; + justify-content:space-between; + align-items:flex-start; + gap:12px; + border:1px solid var(--border); + border-radius:14px; + padding:10px 12px; + text-decoration:none; + color:inherit; +} +.docRow a:hover{ + border-color:var(--accent); + background: color-mix(in srgb, var(--accent), transparent 94%); +} +.docRowMain{ min-width: 0; } +.docRowTitle{ font-weight:600; } +.docRowSub{ + margin-top:4px; + font-size:12px; + color:var(--muted); + overflow:hidden; + text-overflow:ellipsis; + white-space:nowrap; + max-width: 60ch; +} +.docRowPills{ + display:flex; + gap:8px; + flex-wrap:wrap; + justify-content:flex-end; + align-items:center; + margin-top:2px; + flex: 0 0 auto; +} +.docPillsHeader { margin-bottom: 10px; } + +.pillSmall.pillWarn{ + border-color: color-mix(in srgb, #f59e0b, var(--border) 60%); + background: color-mix(in srgb, #f59e0b, transparent 88%); +} + +.codeDetails { margin-top: 8px; } +.codeSummary { cursor: pointer; color: var(--fg); } +.codeBlock { + margin-top: 10px; + padding: 10px 12px; + border: 1px solid var(--border); + border-radius: 14px; + overflow: auto; + background: color-mix(in srgb, var(--card), transparent 10%); +} + +.codeSummary { cursor: pointer; } +.codeDetails { margin-top: 8px; } + +.facetChipsWrap { flex-wrap: wrap; } + +.groupHeader { margin-top: 10px; list-style:none; } +.groupHeaderRow{ + display:flex; + align-items:center; + justify-content:space-between; + gap:10px; + color: var(--muted); + font-size: 11px; + letter-spacing: .06em; + text-transform: uppercase; + padding: 6px 2px; +} + +.codeBlock{ + margin-top: 10px; + padding: 10px 12px; + border: 1px solid var(--border); + border-radius: 14px; + overflow: auto; + background: color-mix(in srgb, var(--card), transparent 10%); + max-height: 520px; +} +.codeWrap{ + white-space: pre-wrap; + word-break: break-word; +} + +/* --- SQL highlighting (no deps) --- */ +.codeBlock code { + display: block; +} + +/* defaults (light) */ +.codeBlock .tok-kw { color: #6d28d9; font-weight: 600; } /* keywords */ +.codeBlock .tok-fn { color: #1d4ed8; } /* functions */ +.codeBlock .tok-str { color: #15803d; } /* strings */ +.codeBlock .tok-num { color: #b45309; } /* numbers */ +.codeBlock .tok-com { color: #6b7280; font-style: italic; } /* comments */ +.codeBlock .tok-id { color: inherit; } /* identifiers */ + +/* dark mode */ +@media (prefers-color-scheme: dark) { + .codeBlock .tok-kw { color: #c084fc; } + .codeBlock .tok-fn { color: #93c5fd; } + .codeBlock .tok-str { color: #86efac; } + .codeBlock .tok-num { color: #fbbf24; } + .codeBlock .tok-com { color: #9ca3af; } +} + +/* Health strip layout */ +.healthSplit{ + display:flex; + gap:14px; /* controls spacing between Last run and Tests */ + align-items:flex-start; + justify-content:flex-start; + flex-wrap:wrap; /* stacks on narrow screens */ +} + +.healthPane{ + flex: 1 1 360px; /* two columns when space allows */ + min-width: 320px; /* prevent ultra-narrow columns */ +} + +/* Optional: tighten headings inside health */ +.healthPane h4{ + margin: 0 0 8px 0; +} + +.healthStack{ + display:flex; + flex-direction:column; + gap:12px; +} + +.healthRow{ + border: 1px solid var(--border, rgba(0,0,0,.12)); + border-radius: 10px; + padding: 10px 12px; + background: var(--card, transparent); +} + +.healthRowSummary{ + cursor: pointer; + list-style: none; + display:flex; + align-items:center; + gap:10px; +} + +/* remove default marker in some browsers */ +.healthRowSummary::-webkit-details-marker{ display:none; } + +.healthRowHead{ + flex: 0 0 auto; +} + +.healthRowBody{ + margin-top:10px; +} + diff --git a/src/fastflowtransform/templates/assets/spa.js b/src/fastflowtransform/templates/assets/spa.js index 02a2ce1..91ee60c 100644 --- a/src/fastflowtransform/templates/assets/spa.js +++ b/src/fastflowtransform/templates/assets/spa.js @@ -1,13 +1,28 @@ const MANIFEST_URL = window.__FFT_MANIFEST_PATH__ || "assets/docs_manifest.json"; +const RUN_RESULTS_URL = window.__FFT_RUN_RESULTS_PATH__ || "assets/run_results.json"; +const TEST_RESULTS_URL = window.__FFT_TEST_RESULTS_PATH__ || "assets/test_results.json"; +const UTEST_RESULTS_URL = window.__FFT_UTEST_RESULTS_PATH__ || "assets/utest_results.json"; function el(tag, attrs = {}, ...children) { const n = document.createElement(tag); for (const [k, v] of Object.entries(attrs || {})) { + if (v == null) continue; + if (k === "class") n.className = v; else if (k === "html") n.innerHTML = v; - else if (k.startsWith("on") && typeof v === "function") n.addEventListener(k.slice(2), v); + else if (k.startsWith("on") && typeof v === "function") + n.addEventListener(k.slice(2).toLowerCase(), v); + + // ✅ critical: boolean attributes + else if (k === "disabled") n.disabled = !!v; + else if (typeof v === "boolean") { + if (v) n.setAttribute(k, ""); + // if false: omit attribute + } + else n.setAttribute(k, String(v)); } + for (const c of children) { if (c == null) continue; n.appendChild(typeof c === "string" ? document.createTextNode(c) : c); @@ -94,6 +109,9 @@ const FACET_Q = { kind: "mk", // "sql" | "python" (omit => both) materialized: "mm", // normalized materialization, or "__unknown__" path: "mp", // path prefix + tags:"mt", + owner: "mo", + group:"mg" }; const MAT_UNKNOWN = "__unknown__"; @@ -121,6 +139,15 @@ function modelMatchesPathPrefix(modelPath, prefix) { function readModelFacetsFromQuery(query) { const mk = (query.get(FACET_Q.kind) || "").trim().toLowerCase(); + const mt = (query.get(FACET_Q.tags) || "").trim(); + const tags = mt ? mt.split(",").map(s => s.trim()).filter(Boolean) : []; + + const mo = (query.get(FACET_Q.owner) || "").trim(); + const owners = mo ? mo.split(",").map(s => s.trim()).filter(Boolean) : []; + + const mg = (query.get(FACET_Q.group) || "").trim().toLowerCase(); + const groupBy = (mg === "owner" || mg === "domain") ? mg : ""; + const kinds = mk ? mk.split(",").map(s => s.trim()).filter(Boolean) : ["sql", "python"]; @@ -132,6 +159,9 @@ function readModelFacetsFromQuery(query) { kinds: Array.from(validKinds), materialized: normalizeMaterialized(query.get(FACET_Q.materialized) || ""), pathPrefix: query.get(FACET_Q.path) || "", + tags, + owners, + groupBy }; } @@ -149,6 +179,15 @@ function writeModelFacetsToQuery(query, facets) { if (facets.pathPrefix) query.set(FACET_Q.path, facets.pathPrefix); else query.delete(FACET_Q.path); + + if (facets.tags && facets.tags.length) query.set(FACET_Q.tags, facets.tags.join(",")); + else query.delete(FACET_Q.tags); + + if (facets.owners && facets.owners.length) query.set(FACET_Q.owner, facets.owners.join(",")); + else query.delete(FACET_Q.owner); + + if (facets.groupBy) query.set(FACET_Q.group, facets.groupBy); + else query.delete(FACET_Q.group); } function currentModelFacets() { @@ -158,13 +197,23 @@ function currentModelFacets() { function facetsActiveCount(f) { const kinds = new Set(f.kinds || []); const kindActive = !(kinds.has("sql") && kinds.has("python")); - return (kindActive ? 1 : 0) + (f.materialized ? 1 : 0) + (f.pathPrefix ? 1 : 0); + const tagsN = (f.tags || []).length; + const ownersN = (f.owners || []).length; + const groupActive = f.groupBy ? 1 : 0; + return (kindActive ? 1 : 0) + + (f.materialized ? 1 : 0) + + (f.pathPrefix ? 1 : 0) + + tagsN + + ownersN + + groupActive; } function filterModelsWithFacets(models, facets) { const kinds = new Set((facets.kinds || []).map(k => (k || "").toLowerCase())); const mat = normalizeMaterialized(facets.materialized || ""); const pref = facets.pathPrefix || ""; + const tagSet = new Set((facets.tags || []).map(t => t.toLowerCase())); + const ownerSet = new Set((facets.owners || []).map(o => String(o).toLowerCase())); return (models || []).filter(m => { const kind = normalizeModelKind(m.kind); @@ -180,17 +229,126 @@ function filterModelsWithFacets(models, facets) { } if (pref && !modelMatchesPathPrefix(m.path || "", pref)) return false; + + if (tagSet.size) { + const mtags = Array.isArray(m.tags) ? m.tags : []; + const ok = mtags.some(t => tagSet.has(String(t).toLowerCase())); + if (!ok) return false; + } + + if (ownerSet.size) { + const owners = Array.isArray(m.owners) ? m.owners : []; + const ok = owners.some(o => ownerSet.has(String(o).toLowerCase())); + if (!ok) return false; + } return true; }); } +// Shareable UI state (URL-encoded) +const SHARE_Q = { + sidebar: "sf", // sidebar filter text + gPin: "gp", // pinned node id + gMode: "gm", // up|down|both|off + gDepth: "gd", // 1..8 + gDir: "gr", // LR|TB +}; + +function readShareStateFromUrl(state) { + const { query } = parseHashWithQuery(); + + // Sidebar search (treat URL as source of truth) + state.filter = query.get(SHARE_Q.sidebar) || ""; + + // Graph state (defaults if absent) + state.graphUI ||= { mode: "both", depth: 2, pinned: "", dir: "LR" }; + + const mode = (query.get(SHARE_Q.gMode) || "").toLowerCase(); + if (["up","down","both","off"].includes(mode)) state.graphUI.mode = mode; + else state.graphUI.mode = "both"; + + const depthRaw = Number(query.get(SHARE_Q.gDepth)); + if (Number.isFinite(depthRaw)) state.graphUI.depth = Math.max(1, Math.min(8, depthRaw)); + else state.graphUI.depth = 2; + + state.graphUI.pinned = query.get(SHARE_Q.gPin) || ""; + + const dir = (query.get(SHARE_Q.gDir) || "").toUpperCase(); + if (dir === "LR" || dir === "TB") state.graphUI.dir = dir; + else state.graphUI.dir = (state.manifest?.dag?.graph?.direction || "LR").toUpperCase(); + + // Keep sidebar input in sync if it exists + if (state.ui?.sidebar?.input) state.ui.sidebar.input.value = state.filter; +} + +function writeShareStateToQuery(q, state) { + const sf = String(state.filter || ""); + if (sf) q.set(SHARE_Q.sidebar, sf); + else q.delete(SHARE_Q.sidebar); + + const g = state.graphUI || {}; + if ((g.mode || "both") !== "both") q.set(SHARE_Q.gMode, g.mode); + else q.delete(SHARE_Q.gMode); + + if (Number(g.depth || 2) !== 2) q.set(SHARE_Q.gDepth, String(g.depth)); + else q.delete(SHARE_Q.gDepth); + + if (g.pinned) q.set(SHARE_Q.gPin, g.pinned); + else q.delete(SHARE_Q.gPin); + + if ((g.dir || "LR").toUpperCase() !== "LR") q.set(SHARE_Q.gDir, (g.dir || "LR").toUpperCase()); + else q.delete(SHARE_Q.gDir); +} + +function syncShareStateToUrl(state) { + replaceHashQuery((q) => writeShareStateToQuery(q, state)); +} + +function copyCurrentLink(state) { + // Ensure URL contains current search + graph state + syncShareStateToUrl(state); + copyToClipboard(location.href); +} + +function copyLinkBtn(state, cls = "btnTiny") { + return el("button", { + class: cls, + type: "button", + title: "Copy a shareable link to this view", + onclick: () => copyCurrentLink(state), + }, "Copy link"); +} + // Merge current facet params into an arbitrary hash route (e.g. "#/model/x?tab=columns") function routeWithFacets(route) { const facets = currentModelFacets(); const r = (route || "#/").startsWith("#") ? (route || "#/").slice(1) : (route || "/"); const [pathPart, queryPart] = r.split("?", 2); const q = new URLSearchParams(queryPart || ""); + + if (String(pathPart || "").startsWith("/model/")) { + const curQ = parseHashWithQuery().query; + const curTab = curQ.get("tab") || ""; + const curCode = curQ.get("code") || ""; + + if (!q.has("tab") && curTab) q.set("tab", curTab); + + const effectiveTab = q.get("tab") || curTab; + if (effectiveTab === "code") { + if (!q.has("code") && curCode) q.set("code", curCode); + } else { + // avoid leaking stale code=... into non-code tabs + q.delete("code"); + } + } + + // Preserve share-state params across navigation unless target already sets them + const curQ = parseHashWithQuery().query; + for (const k of Object.values(SHARE_Q)) { + if (!q.has(k) && curQ.has(k)) q.set(k, curQ.get(k)); + } + writeModelFacetsToQuery(q, facets); const next = q.toString() ? `${pathPart}?${q.toString()}` : `${pathPart}`; @@ -222,6 +380,47 @@ function debounce(fn, ms = 180) { return wrapped; } + +// -------- Source freshness helpers ----------------------------------------- +function toNumOrNull(v) { + if (v == null) return null; + const n = Number(v); + return Number.isFinite(n) ? n : null; +} + +function formatMinutesCompact(mins) { + const m = toNumOrNull(mins); + if (m == null) return "—"; + const total = Math.max(0, Math.round(m)); + const d = Math.floor(total / 1440); + const h = Math.floor((total % 1440) / 60); + const mm = total % 60; + const parts = []; + if (d) parts.push(`${d}d`); + if (h) parts.push(`${h}h`); + if (mm || parts.length === 0) parts.push(`${mm}m`); + return parts.join(" "); +} + +function sourceFreshnessConfig(s) { + const loadedAtField = String((s && s.loaded_at_field) || "").trim(); + const warnMinutes = toNumOrNull(s && s.warn_after_minutes); + const errorMinutes = toNumOrNull(s && s.error_after_minutes); + + const hasLoaded = !!loadedAtField; + const hasThresh = warnMinutes != null || errorMinutes != null; + const configured = hasLoaded && hasThresh; + + let reason = ""; + if (configured) reason = "Freshness is configured (loaded_at field + thresholds)."; + else if (!hasLoaded && hasThresh) reason = "Warn/error thresholds are set but loaded_at field is missing."; + else if (hasLoaded && !hasThresh) reason = "loaded_at field is set but warn/error thresholds are missing."; + else reason = "No freshness configuration found for this source."; + + return { configured, reason, loadedAtField, warnMinutes, errorMinutes }; +} + + function setTabInHash(tab) { const full = (location.hash || "#/").slice(1); const [pathPart, queryPart] = full.split("?", 2); @@ -244,6 +443,19 @@ function setModelQuery({ tab, col }) { location.hash = `#${next.startsWith("/") ? "" : "/"}${next}`; } +function setModelCodeQuery({ code }) { + const full = (location.hash || "#/").slice(1); + const [pathPart, queryPart] = full.split("?", 2); + const q = new URLSearchParams(queryPart || ""); + + q.set("tab", "code"); + if (code) q.set("code", code); + else q.delete("code"); + + const next = q.toString() ? `${pathPart}?${q.toString()}` : `${pathPart}`; + location.hash = `#${next.startsWith("/") ? "" : "/"}${next}`; +} + function parseRoute() { const { parts, query } = parseHashWithQuery(); if (parts.length === 0) { @@ -256,12 +468,18 @@ function parseRoute() { name: decodeURIComponent(parts.slice(1).join("/")), tab: query.get("tab") || "", col: query.get("col") || "", + code: query.get("code") || "", // "rendered" | "raw" | "refs" }; } if (parts[0] === "source" && parts[1] && parts[2]) { return { route: "source", source: decodeURIComponent(parts[1]), table: decodeURIComponent(parts[2]) }; } - if (parts[0] === "macros") return { route: "macros" }; + + if (parts[0] === "macro" && parts[1]) { + return { route: "macro", name: decodeURIComponent(parts.slice(1).join("/")) }; + } + + if (parts[0] === "macros") return { route: "macros", q: query.get("mq") || "" }; return { route: "home" }; } @@ -500,202 +718,1515 @@ function buildNeighborhoodGraph(fullGraph, centerId, opts) { return { ...fullGraph, nodes, edges, bounds, direction: "LR" }; } -function renderHome(state) { - const { manifest } = state; - const graph = manifest.dag?.graph; - - const graphHost = el("div", { class: "graphHost" }); - const miniHost = el("div", { class: "minimapHost" }); - - const modeBtn = (id, label) => - el("button", { - class: `btn ${state.graphUI.mode === id ? "active" : ""}`, - onclick: () => { state.graphUI.mode = id; state._graphCtl?.refresh?.(); } - }, label); - const depthPill = el("span", { class: "pill" }, `Depth ${state.graphUI.depth}`); - const depthSlider = el("input", { - type: "range", min: "1", max: "8", - value: String(state.graphUI.depth), - oninput: (e) => { - state.graphUI.depth = Number(e.target.value || 2); - depthPill.textContent = `Depth ${state.graphUI.depth}`; - rerenderMini(); - } - }); +function hasDocs(obj) { + const txt = (obj?.description_text || "").trim(); + const html = (obj?.description_html || "").trim(); + return !!(txt || html); +} - const fitBtn = el("button", { class: "btnTiny", title: "Fit to screen", onclick: () => state._graphCtl?.fit?.() }, "Fit"); - const resetBtn = el("button", { class: "btnTiny", title: "Reset pan/zoom", onclick: () => state._graphCtl?.reset?.() }, "Reset"); - const zoomOutBtn = el("button", { class: "btnTiny", title: "Zoom out", onclick: () => state._graphCtl?.zoomOut?.() }, "–"); - const zoomInBtn = el("button", { class: "btnTiny", title: "Zoom in", onclick: () => state._graphCtl?.zoomIn?.() }, "+"); +function isColumnDocumented(c) { + const txt = (c?.description_text || "").trim(); + const html = (c?.description_html || "").trim(); + return !!(txt || html); +} - const dirPill = el("span", { class: "pillSmall" }, - state.graphUI.dir === "TB" ? "Top → Bottom" : "Left → Right" - ); +function modelDocsStatus(state, m) { + const described = hasDocs(m); + const withSchema = !!state.manifest.project?.with_schema; - const lrBtn = el("button", { class: `tab ${state.graphUI.dir === "LR" ? "active" : ""}` }, "LR"); - const tbBtn = el("button", { class: `tab ${state.graphUI.dir === "TB" ? "active" : ""}` }, "TB"); + if (!withSchema) { + return { described, withSchema: false, colDoc: 0, colTotal: 0, colMissing: 0 }; + } - function setDir(dir) { - dir = (dir || "LR").toUpperCase(); - if (state.graphUI.dir === dir) return; + const cols = m.columns || []; + const colTotal = cols.length; + const colDoc = cols.filter(isColumnDocumented).length; + const colMissing = colTotal - colDoc; - state.graphUI.dir = dir; + return { described, withSchema: true, colDoc, colTotal, colMissing }; +} - // update segmented control UI - lrBtn.classList.toggle("active", dir === "LR"); - tbBtn.classList.toggle("active", dir === "TB"); - dirPill.textContent = dir === "TB" ? "Top → Bottom" : "Left → Right"; +function computeDocsCoverage(state, modelsOverride) { + const models = modelsOverride || state.manifest.models || []; + const withSchema = !!state.manifest.project?.with_schema; - // remount graph - const g = graphTransformDirection(state.manifest.dag.graph, dir); - state._graphCtl = mountGraph(state, graphHost, g, { miniHost, showMini: true }); - } + let modelsDescribed = 0; + let colsTotal = 0; + let colsDoc = 0; + + const perModel = models.map(m => { + const described = hasDocs(m); + if (described) modelsDescribed += 1; + + let colTotal = 0, colDoc = 0, colMissing = 0; + if (withSchema) { + const cols = m.columns || []; + colTotal = cols.length; + colDoc = cols.filter(isColumnDocumented).length; + colMissing = colTotal - colDoc; + colsTotal += colTotal; + colsDoc += colDoc; + } - lrBtn.onclick = () => setDir("LR"); - tbBtn.onclick = () => setDir("TB"); + return { + name: m.name, + kind: m.kind, + path: m.path || "", + described, + colDoc, colTotal, colMissing, + }; + }); - // use this in your toolbar row: - const layoutTabs = el("div", { class: "tabs" }, lrBtn, tbBtn); + const undocumented = perModel.filter(p => !p.described || (withSchema && p.colMissing > 0)); + const missingModelDesc = perModel.filter(p => !p.described); + const missingColDocs = perModel.filter(p => withSchema && p.colMissing > 0); + const fullyDocumented = perModel.filter(p => p.described && (!withSchema || p.colMissing === 0)); - const graphCard = el("div", { class: "card" }, - el("div", { class: "grid" }, - el("div", { class: "dagHeader" }, - el("div", { class: "dagHeaderLeft" }, - el("div", { class: "dagTitleRow" }, - el("h2", {}, "DAG"), - dirPill - ), - el("p", { class: "dagSubtle" }, - "Pan/zoom • click a node to pin • click again to unpin • Ctrl/Cmd-click opens." - ) - ), + return { + withSchema, + modelsTotal: perModel.length, + modelsDescribed, + colsTotal, + colsDoc, + perModel, + undocumented, + missingModelDesc, + missingColDocs, + fullyDocumented, + }; +} - el("div", { class: "dagHeaderRight" }, - el("div", { class: "dagToolsRow" }, - fitBtn, resetBtn, zoomOutBtn, zoomInBtn, - layoutTabs - ), - el("div", { class: "dagToolsRow" }, - el("div", { class: "tabs dagModeTabs" }, - modeBtn("up", "Up"), - modeBtn("down", "Down"), - modeBtn("both", "Both"), - modeBtn("off", "Off"), - ), - el("div", { class: "dagDepth" }, depthPill, depthSlider), - ) - ) - ), +function renderDocsBadges(state, m, { compact = false } = {}) { + const st = modelDocsStatus(state, m); - el("div", { class: "graphWrap" }, graphHost, miniHost) - ) - ); + const modelPill = st.described + ? el("span", { class: "pillSmall pillGood" }, compact ? "Model docs" : "Model described") + : el("span", { class: "pillSmall pillBad" }, compact ? "No model docs" : "No model docs"); - queueMicrotask(() => { - const g0 = graphTransformDirection(graph, state.graphUI.dir); - state._graphCtl = mountGraph(state, graphHost, g0, { miniHost, showMini: true }); + let colPill = null; + if (!st.withSchema) { + colPill = el("span", { class: "pillSmall" }, compact ? "Schema off" : "Schema disabled"); + } else if (!st.colTotal) { + colPill = el("span", { class: "pillSmall" }, compact ? "No cols" : "No columns found"); + } else { + const ok = st.colMissing === 0; + colPill = el("span", { class: `pillSmall ${ok ? "pillGood" : "pillBad"}` }, + compact ? `${st.colDoc}/${st.colTotal} cols` : `${st.colDoc}/${st.colTotal} columns documented` + ); + } - const r = parseRoute(); - if (r.route === "home" && r.focus) { - state._graphCtl?.focus?.(r.focus, { zoom: 1.25, pin: true }); - } - }); + const contracted = hasContract(m); + const contractPill = contracted + ? el("span", { class:"pillSmall pillGood", title:"Model contract defined" }, "Contracted") + : null; - return graphCard; + const cls = compact ? "docPills docPillsCompact" : "docPills docPillsHeader"; + return el("div", { class: cls }, modelPill, colPill, contractPill); } -function renderModel(state, name, tabFromRoute, colFromRoute) { - const m = state.byModel.get(name); - if (!m) { - return el("div", { class: "card" }, el("h2", {}, "Model not found"), el("p", { class: "empty" }, name)); - } - - const active = (tabFromRoute || state.modelTabDefault || "overview").toLowerCase(); - const hasCol = !!(colFromRoute && String(colFromRoute).trim()); +function renderDocsCoverageCard(state, cov) { + const jumpBtn = el("button", { + class: "btn", + onclick: () => document.getElementById("undocModels")?.scrollIntoView({ behavior: "smooth", block: "start" }) + }, "Undocumented models ↓"); - let tab = ["overview","columns","lineage","code","meta"].includes(active) ? active : "overview"; - // Only force columns if col is present AND the URL didn't explicitly set a tab - if (hasCol && !tabFromRoute) tab = "columns"; + const colsNode = cov.withSchema + ? el("span", {}, `${cov.colsDoc}/${cov.colsTotal}`) + : el("span", { class: "empty" }, "Schema disabled"); - const header = el("div", { class: "card" }, + return el("div", { class: "card", id: "docsCoverage" }, el("div", { class: "grid2" }, el("div", {}, - el("h2", {}, m.name), - el("p", { class: "empty" }, m.relation ? `Relation: ${m.relation}` : "") + el("h2", {}, "Docs coverage"), + el("p", { class: "empty" }, "How complete your model + column descriptions are.") ), - el("div", {}, - el("button", { - class: "btn", - onclick: () => { location.hash = routeWithFacets("#/"); } - }, "← Overview"), - el("button", { - class: "btn", - onclick: async () => { try { await navigator.clipboard.writeText(m.path || ""); } catch {} } - }, "Copy path") - ) + el("div", {}, jumpBtn) ), - renderTabs(tab, (next) => { - // Persist default for convenience - state.modelTabDefault = next; - safeSet(state.STORE.modelTab, next); + el("div", { class: "kv" }, + el("div", { class: "k" }, "Models described"), + el("div", {}, `${cov.modelsDescribed}/${cov.modelsTotal}`), - setModelQuery({ - tab: next, - col: (next === "columns") ? (colFromRoute || "") : "" // clear col when leaving Columns - }); + el("div", { class: "k" }, "Columns documented"), + el("div", {}, colsNode), - }) + el("div", { class: "k" }, "Undocumented models"), + el("div", {}, `${cov.undocumented.length}/${cov.modelsTotal}`) + ) ); - - const panel = el("div", { class: "tabPanel" }, renderModelPanel(state, m, tab, colFromRoute)); - - return el("div", { class: "grid" }, header, panel); } -function renderModelPanel(state, m, tab, colFromRoute) { - if (tab === "overview") { - const deps = (m.deps || []).map(d => el("a", { href: routeWithFacets(`#/model/${escapeHashPart(d)}`) }, d)); - const usedBy = (m.used_by || []).map(u => el("a", { href: `#/model/${escapeHashPart(u)}` }, u)); - const sourcesUsed = (m.sources_used || []).map(s => - el("a", { href: routeWithFacets(`#/source/${escapeHashPart(s.source_name)}/${escapeHashPart(s.table_name)}`) }, `${s.source_name}.${s.table_name}`) - ); - const modelId = m.name; +function renderUndocumentedModelsCard(state, cov) { + state.coverageUI ||= { tab: "undoc", q: "" }; + const uiState = state.coverageUI; - // --- Neighborhood mini-graph (MODEL PAGE) --- - state.modelMini = state.modelMini || { mode: "both", depth: 2 }; + const countNode = el("span", { class: "colCount" }, ""); - const miniGraphHost = el("div", { class: "miniGraphHost" }); - let miniCtl = null; + const qInput = el("input", { + class: "search", + type: "search", + placeholder: "Filter models… (name or path)", + value: uiState.q || "", + oninput: (e) => { + uiState.q = e.target.value || ""; + renderList(); + }, + }); - const depthPill = el("span", { class: "pill" }, `Depth ${state.modelMini.depth}`); - const depthSlider = el("input", { - type: "range", min: "1", max: "6", - value: String(state.modelMini.depth), - oninput: (e) => { - state.modelMini.depth = Number(e.target.value || 2); - depthPill.textContent = `Depth ${state.modelMini.depth}`; - rerenderMini(); - } - }); + const tabs = [ + ["undoc", "Undocumented"], + ["noDesc", "Missing model docs"], + ["missingCols", "Undocumented columns"], + ["fully", "Fully documented"], + ["all", "All models"], + ]; - const miniModeTabs = el("div", { class: "tabs" }, - el("button", { class: `tab ${state.modelMini.mode==="up"?"active":""}`, onclick:()=>{ state.modelMini.mode="up"; syncMiniTabs(); rerenderMini(); } }, "Upstream"), - el("button", { class: `tab ${state.modelMini.mode==="down"?"active":""}`, onclick:()=>{ state.modelMini.mode="down"; syncMiniTabs(); rerenderMini(); } }, "Downstream"), - el("button", { class: `tab ${state.modelMini.mode==="both"?"active":""}`, onclick:()=>{ state.modelMini.mode="both"; syncMiniTabs(); rerenderMini(); } }, "Both"), - ); + const tabBtns = new Map(); - function syncMiniTabs() { - const btns = miniModeTabs.querySelectorAll(".tab"); - btns.forEach(b => b.classList.remove("active")); - const idx = state.modelMini.mode === "up" ? 0 : state.modelMini.mode === "down" ? 1 : 2; - btns[idx]?.classList.add("active"); + const tabRow = el("div", { class: "tabs" }, + ...tabs.map(([id, label]) => { + const btn = el("button", { + class: `tab ${uiState.tab === id ? "active" : ""}`, + onclick: () => { + uiState.tab = id; + syncTabs(); + renderList(); + queueMicrotask(() => qInput.focus()); + }, + }, label); + tabBtns.set(id, btn); + return btn; + }) + ); + + function syncTabs() { + for (const [id, btn] of tabBtns.entries()) { + btn.classList.toggle("active", uiState.tab === id); } + } - function rerenderMini() { - miniGraphHost.textContent = ""; - const centerNode = (state.manifest.dag?.graph?.nodes || []) - .find(n => n.kind === "model" && n.name === m.name); + const list = el("ul", { class: "docList" }); + + function baseRows() { + if (uiState.tab === "undoc") return cov.undocumented.slice(); + if (uiState.tab === "noDesc") return cov.missingModelDesc.slice(); + if (uiState.tab === "missingCols") return cov.missingColDocs.slice(); + if (uiState.tab === "fully") return cov.fullyDocumented.slice(); + return cov.perModel.slice(); + } + + function getRows() { + let rows = baseRows(); + + const q = (uiState.q || "").trim().toLowerCase(); + if (q) { + rows = rows.filter(r => + (r.name || "").toLowerCase().includes(q) || + (r.path || "").toLowerCase().includes(q) + ); + } + + // Most "work" first: missing model docs, then missing cols, then name + rows.sort((a, b) => { + const aw = (a.described ? 0 : 100000) + (a.colMissing || 0); + const bw = (b.described ? 0 : 100000) + (b.colMissing || 0); + if (bw !== aw) return bw - aw; + return String(a.name).localeCompare(String(b.name)); + }); + + return rows; + } + + function rowNode(r) { + const pills = el("div", { class: "docRowPills" }, + r.described + ? el("span", { class: "pillSmall pillGood" }, "Model described") + : el("span", { class: "pillSmall pillBad" }, "No model docs"), + cov.withSchema + ? (r.colTotal + ? el("span", { class: `pillSmall ${r.colMissing ? "pillBad" : "pillGood"}` }, `${r.colDoc}/${r.colTotal} cols`) + : el("span", { class: "pillSmall" }, "No cols") + ) + : el("span", { class: "pillSmall" }, "Schema off"), + ); + + const main = el("div", { class: "docRowMain" }, + el("div", { class: "docRowTitle" }, el("code", {}, r.name)), + r.path ? el("div", { class: "docRowSub" }, r.path) : null + ); + + return el("li", { class: "docRow" }, + el("a", { + href: routeWithFacets(`#/model/${escapeHashPart(r.name)}`), + onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets(`#/model/${escapeHashPart(r.name)}`); }, + title: r.path || r.name, + }, main, pills) + ); + } + + function renderList() { + const rows = getRows(); + countNode.textContent = `${rows.length} model${rows.length === 1 ? "" : "s"}`; + + if (!rows.length) { + list.replaceChildren(el("li", { class: "empty" }, "No matches.")); + return; + } + list.replaceChildren(...rows.map(rowNode)); + } + + renderList(); + + return el("div", { class: "card", id: "undocModels" }, + el("h2", {}, "Undocumented models"), + el("p", { class: "empty" }, "Jump straight to models that need documentation."), + el("div", { class: "colTools" }, qInput, countNode), + tabRow, + list + ); +} + +// ----------- Contracts -------------------- +function hasContract(m) { + const c = m && m.contract; + if (!c) return false; + if (c === true) return true; + const cols = contractColumnsFrom(m); + const tbl = contractTableConstraintsFrom(m); + return (cols && cols.length) || (tbl && tbl.length) || (c.enforced != null); +} + +function contractColumnsFrom(m) { + const c = m && m.contract; + if (!c) return []; + if (Array.isArray(c)) return c; + + const colsSpec = c.columns ?? c.schema ?? c.fields; + if (!colsSpec) return []; + + if (Array.isArray(colsSpec)) return colsSpec.filter(x => x && typeof x === "object"); + + if (colsSpec && typeof colsSpec === "object") { + return Object.entries(colsSpec).map(([name, spec]) => { + if (spec && typeof spec === "object" && !Array.isArray(spec)) return { name, ...spec }; + if (typeof spec === "string") return { name, dtype: spec }; + return { name }; + }); + } + return []; +} + +function contractTableConstraintsFrom(m) { + const c = m && m.contract; + if (!c || typeof c !== "object") return []; + const v = c.constraints ?? c.table_constraints; + if (!v) return []; + return Array.isArray(v) ? v : [v]; +} + +function normalizeContractCol(col) { + const name = String(col?.name || "").trim(); + if (!name) return null; + + const dtype = col?.dtype ?? col?.type ?? col?.data_type; + let nullable = col?.nullable; + if (nullable == null && col?.not_null != null) nullable = !col.not_null; + + let constraints = col?.constraints ?? col?.tests ?? []; + if (constraints && !Array.isArray(constraints)) constraints = [constraints]; + + return { + name, + dtype: dtype != null ? String(dtype) : "", + nullable: (nullable === true || nullable === false) ? nullable : null, + constraints: constraints || [], + }; +} + +function renderConstraintsList(items) { + const arr = Array.isArray(items) ? items : (items ? [items] : []); + if (!arr.length) return el("span", { class:"empty" }, "—"); + + const wrap = el("span", { class:"pillRow" }); + for (const it of arr) { + if (typeof it === "string") { + wrap.appendChild(el("span", { class:"pillSmall" }, it)); + } else if (it && typeof it === "object") { + wrap.appendChild( + el("details", { style:"display:inline-block;" }, + el("summary", { class:"codeSummary" }, "constraint"), + el("pre", { class:"mono", style:"white-space:pre-wrap; margin:8px 0 0 0;" }, jsonPreview(it)) + ) + ); + } + } + return wrap; +} + +function canonicalType(t) { + const s = String(t || "").trim().toLowerCase(); + if (!s) return ""; + // normalize whitespace + strip trailing params like varchar(255), numeric(10,2) + return s.replace(/\s+/g, " ").replace(/\(.*\)\s*$/, "").trim(); +} + +function computeContractDrift(m, withSchema) { + const contracted = hasContract(m); + if (!contracted) { + return { status: "none", missing: [], extra: [], mismatches: [], byName: new Map(), schemaAvailable: false, constraintsComparable: false }; + } + + const schemaAvailable = !!withSchema && Array.isArray(m.columns) && m.columns.length > 0; + if (!schemaAvailable) { + return { status: "unavailable", missing: [], extra: [], mismatches: [], byName: new Map(), schemaAvailable: false, constraintsComparable: false }; + } + + const contractCols = contractColumnsFrom(m).map(normalizeContractCol).filter(Boolean); + const actualCols = (m.columns || []).map(c => ({ + name: String(c.name || ""), + dtype: c.dtype != null ? String(c.dtype) : "", + nullable: !!c.nullable, + constraints: c.constraints ?? null, // only comparable if your schema collector starts emitting it + })); + + const cMap = new Map(); // lowerName -> contract col + for (const c of contractCols) cMap.set(c.name.toLowerCase(), c); + + const aMap = new Map(); // lowerName -> actual col + for (const a of actualCols) aMap.set(a.name.toLowerCase(), a); + + const constraintsComparable = actualCols.some(a => Array.isArray(a.constraints)); + + const missing = []; + const mismatches = []; + const byName = new Map(); // lowerName -> { missing?:true, extra?:true, issues:[], expected, actual } + + for (const c of contractCols) { + const key = c.name.toLowerCase(); + const a = aMap.get(key); + + if (!a) { + missing.push(c.name); + byName.set(key, { missing: true, issues: ["missing"], expected: c, actual: null }); + continue; + } + + const issues = []; + + // type mismatch (if contract specifies dtype) + if (c.dtype) { + const expT = canonicalType(c.dtype); + const actT = canonicalType(a.dtype); + if (expT && actT && expT !== actT) issues.push("type"); + } + + // nullability mismatch (if contract specifies nullable) + if (c.nullable != null) { + if (!!c.nullable !== !!a.nullable) issues.push("nullability"); + } + + // constraints mismatch (only if warehouse schema exposes constraints) + if (constraintsComparable && (c.constraints || []).length) { + const exp = JSON.stringify(c.constraints || []); + const act = JSON.stringify(a.constraints || []); + if (exp !== act) issues.push("constraints"); + } + + if (issues.length) { + mismatches.push({ name: c.name, issues }); + byName.set(key, { issues, expected: c, actual: a }); + } + } + + const extra = []; + for (const a of actualCols) { + const key = a.name.toLowerCase(); + if (!cMap.has(key)) { + extra.push(a.name); + byName.set(key, { extra: true, issues: ["extra"], expected: null, actual: a }); + } + } + + const status = (missing.length || extra.length || mismatches.length) ? "drift" : "verified"; + return { status, missing, extra, mismatches, byName, schemaAvailable: true, constraintsComparable }; +} + + +// -------- Code Viewer helpers --------------------- + +function copyToClipboard(text) { + const s = String(text || ""); + if (!s) return; + if (navigator.clipboard && navigator.clipboard.writeText) { + navigator.clipboard.writeText(s).catch(() => {}); + } else { + // fallback + const ta = document.createElement("textarea"); + ta.value = s; + document.body.appendChild(ta); + ta.select(); + try { document.execCommand("copy"); } catch {} + ta.remove(); + } +} + +function escapeHtml(s) { + return String(s) + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """) + .replaceAll("'", "'"); +} + +const SQL_KW = new Set([ + "SELECT","FROM","WHERE","GROUP","BY","ORDER","HAVING","LIMIT","QUALIFY", + "JOIN","INNER","LEFT","RIGHT","FULL","CROSS","OUTER","ON","USING", + "UNION","ALL","DISTINCT","EXCEPT","INTERSECT", + "WITH","AS", + "CASE","WHEN","THEN","ELSE","END", + "AND","OR","NOT","IN","IS","NULL","LIKE","ILIKE","BETWEEN","EXISTS", + "CAST","TRY_CAST", + "CREATE","TABLE","VIEW","MATERIALIZED","REPLACE", + "INSERT","INTO","VALUES","UPDATE","SET","DELETE", + "OVER","PARTITION","ROWS","RANGE","CURRENT","ROW","FOLLOWING","PRECEDING", + "TRUE","FALSE" +]); + +function highlightSqlToHtml(sql) { + const s = String(sql || ""); + let i = 0; + let out = ""; + + const emit = (cls, chunk) => { + out += `${escapeHtml(chunk)}`; + }; + + const isWordStart = (c) => /[A-Za-z_]/.test(c); + const isWord = (c) => /[A-Za-z0-9_$]/.test(c); + const isDigit = (c) => /[0-9]/.test(c); + + while (i < s.length) { + const c = s[i]; + const n = s[i + 1]; + + // line comment -- + if (c === "-" && n === "-") { + let j = i + 2; + while (j < s.length && s[j] !== "\n") j++; + emit("tok-com", s.slice(i, j)); + i = j; + continue; + } + + // block comment /* ... */ + if (c === "/" && n === "*") { + let j = i + 2; + while (j < s.length && !(s[j] === "*" && s[j + 1] === "/")) j++; + j = Math.min(s.length, j + 2); + emit("tok-com", s.slice(i, j)); + i = j; + continue; + } + + // single-quoted string '...' + if (c === "'") { + let j = i + 1; + while (j < s.length) { + if (s[j] === "'") { + // doubled '' escape + if (s[j + 1] === "'") { j += 2; continue; } + j++; + break; + } + j++; + } + emit("tok-str", s.slice(i, j)); + i = j; + continue; + } + + // quoted identifiers "..." or `...` + if (c === '"' || c === "`") { + const q = c; + let j = i + 1; + while (j < s.length) { + if (s[j] === q) { + // doubled "" or `` escape + if (s[j + 1] === q) { j += 2; continue; } + j++; + break; + } + j++; + } + emit("tok-id", s.slice(i, j)); + i = j; + continue; + } + + // number + if (isDigit(c)) { + let j = i + 1; + while (j < s.length && /[0-9.]/.test(s[j])) j++; + emit("tok-num", s.slice(i, j)); + i = j; + continue; + } + + // word: keyword / function / identifier + if (isWordStart(c)) { + let j = i + 1; + while (j < s.length && isWord(s[j])) j++; + const word = s.slice(i, j); + const upper = word.toUpperCase(); + + // peek next non-space for function call + let k = j; + while (k < s.length && /\s/.test(s[k])) k++; + const isFn = s[k] === "("; + + if (SQL_KW.has(upper)) emit("tok-kw", word); + else if (isFn) emit("tok-fn", word); + else emit("tok-id", word); + + i = j; + continue; + } + + // everything else (punctuation/whitespace) + out += escapeHtml(c); + i++; + } + + return out; +} + +const PY_KW = new Set([ + "False","None","True", + "and","as","assert","async","await", + "break","class","continue", + "def","del", + "elif","else","except", + "finally","for","from", + "global","if","import","in","is", + "lambda","nonlocal","not", + "or","pass", + "raise","return", + "try","while","with", + "yield" +]); + +function highlightPythonToHtml(code) { + const s = String(code || ""); + let i = 0; + let out = ""; + + const emit = (cls, chunk) => { + out += `${escapeHtml(chunk)}`; + }; + + const isWordStart = (c) => /[A-Za-z_]/.test(c); + const isWord = (c) => /[A-Za-z0-9_]/.test(c); + const isDigit = (c) => /[0-9]/.test(c); + + // Remember if next identifier should be styled (def/class name) + let expectDefName = false; + let expectClassName = false; + + const peekNonSpace = (idx) => { + let j = idx; + while (j < s.length && /\s/.test(s[j])) j++; + return s[j] || ""; + }; + + while (i < s.length) { + const c = s[i]; + const n = s[i + 1]; + const n2 = s[i + 2]; + + // Comments: # ... (until newline) + if (c === "#") { + let j = i + 1; + while (j < s.length && s[j] !== "\n") j++; + emit("tok-com", s.slice(i, j)); + i = j; + continue; + } + + // Strings (single/double, triple, with optional prefix r/u/f/b combos) + // Detect prefix only if it immediately precedes a quote and is not part of an identifier. + const startsWithQuote = (ch) => ch === "'" || ch === '"'; + const isPrefixChar = (ch) => /[rRuUbBfF]/.test(ch); + + // Prefix handling: e.g. r"..." f'...' rf"""...""" + let prefixStart = i; + let prefix = ""; + if (isPrefixChar(c)) { + let j = i; + while (j < s.length && isPrefixChar(s[j]) && (j - i) < 3) j++; + const q = s[j]; + // ensure prefix isn't part of a larger identifier (word before prefixStart) + const prev = s[prefixStart - 1] || ""; + if (!isWord(prev) && startsWithQuote(q)) { + prefix = s.slice(i, j); + i = j; // move i onto the quote + } + } + + // Triple quotes + if ((s[i] === "'" && n === "'" && n2 === "'") || (s[i] === '"' && n === '"' && n2 === '"')) { + const quote = s[i]; + let j = i + 3; + while (j < s.length && !(s[j] === quote && s[j + 1] === quote && s[j + 2] === quote)) j++; + j = Math.min(s.length, j + 3); + emit("tok-str", prefix + s.slice(i, j)); + i = j; + prefix = ""; + continue; + } + + // Single/double quoted strings + if (s[i] === "'" || s[i] === '"') { + const quote = s[i]; + let j = i + 1; + while (j < s.length) { + if (s[j] === "\\") { j += 2; continue; } + if (s[j] === quote) { j++; break; } + j++; + } + emit("tok-str", prefix + s.slice(i, j)); + i = j; + prefix = ""; + continue; + } + + // Numbers (simple) + if (isDigit(c)) { + let j = i + 1; + while (j < s.length && /[0-9._]/.test(s[j])) j++; + emit("tok-num", s.slice(i, j)); + i = j; + continue; + } + + // Decorators + if (c === "@") { + let j = i + 1; + while (j < s.length && s[j] !== "\n") j++; + emit("tok-fn", s.slice(i, j)); // reuse tok-fn styling for decorators + i = j; + continue; + } + + // Identifiers / keywords + if (isWordStart(c)) { + let j = i + 1; + while (j < s.length && isWord(s[j])) j++; + const word = s.slice(i, j); + + if (expectDefName) { + emit("tok-fn", word); + expectDefName = false; + i = j; + continue; + } + if (expectClassName) { + emit("tok-id", word); + expectClassName = false; + i = j; + continue; + } + + if (PY_KW.has(word)) { + emit("tok-kw", word); + if (word === "def") expectDefName = true; + if (word === "class") expectClassName = true; + } else { + // highlight function-like calls: name(...) + const next = peekNonSpace(j); + if (next === "(") emit("tok-fn", word); + else emit("tok-id", word); + } + + i = j; + continue; + } + + // Everything else + out += escapeHtml(c); + i++; + } + + return out; +} + +function renderCodeBlock(text, { wrap = false, lang = "", highlight = false } = {}) { + const s = String(text || ""); + const cls = `codeBlock ${wrap ? "codeWrap" : ""}`; + + if (highlight && lang === "sql") { + return el("pre", { class: cls }, + el("code", { class: "language-sql", html: highlightSqlToHtml(s) }) + ); + } + + if (highlight && lang === "python") { + return el("pre", { class: cls }, + el("code", { class: "language-python", html: highlightPythonToHtml(s) }) + ); + } + + return el("pre", { class: cls }, s); +} + +function renderModelCodeTab(state, m, codeView) { + const isSql = (m.kind || "sql") !== "python"; + + const raw = m.raw_sql || m.sql || m.code || ""; + const rendered = m.rendered_sql || m.compiled_sql || ""; + + // Default view: rendered if available, else raw + let view = (codeView || "").toLowerCase(); + if (!["rendered", "raw", "refs"].includes(view)) { + view = rendered ? "rendered" : "raw"; + } + + state.codeUI ||= {}; + const ui = (state.codeUI[m.name] ||= { wrap: false }); + + const setView = (v) => { + setModelCodeQuery({ code: v }); + }; + + const headRight = el("div", { class: "row", style: "gap:8px; justify-content:flex-end;" }, + el("button", { + class: "btnTiny", + type: "button", + onclick: () => { ui.wrap = !ui.wrap; updateMain(); } + }, ui.wrap ? "No wrap" : "Wrap"), + el("button", { + class: "btnTiny", + type: "button", + onclick: () => { + const txt = !isSql + ? (m.python_source || m.source || "") + : view === "rendered" + ? (rendered || "Rendered SQL not available. Enable docs.include_rendered_sql in the generator.") + : view === "raw" + ? raw + : ""; + copyToClipboard(txt); + } + }, "Copy") + ); + + // ---- Python models: no SQL sub-tabs ---- + if (!isSql) { + const py = m.python_source || m.source || ""; + const body = py + ? renderCodeBlock(py, { wrap: ui.wrap, lang: "python", highlight: true }) + : el("p", { class:"empty" }, "No source available for this model."); + + return el("div", { class:"card" }, + el("div", { class:"row", style:"align-items:center; justify-content:space-between;" }, + el("h3", { style:"margin:0;" }, "Python"), + headRight + ), + body + ); + } + + const tabs = el("div", { class: "pillRow" }, + el("button", { class: `tab ${view === "rendered" ? "active" : ""}`, type:"button", onclick: () => setView("rendered") }, "Rendered"), + el("button", { class: `tab ${view === "raw" ? "active" : ""}`, type:"button", onclick: () => setView("raw") }, "Raw"), + el("button", { class: `tab ${view === "refs" ? "active" : ""}`, type:"button", onclick: () => setView("refs") }, "Refs resolved"), + ); + + const body = (() => { + if (view === "rendered") { + return rendered + ? renderCodeBlock(rendered, { wrap: ui.wrap, lang: "sql", highlight: true }) + : el("p", { class:"empty" }, "Rendered SQL not available. Enable docs.include_rendered_sql in the generator."); + } + + if (view === "raw") { + return raw + ? renderCodeBlock(raw, { wrap: ui.wrap, lang: "sql", highlight: true }) + : el("p", { class:"empty" }, "Raw SQL not available in the manifest."); + } + + // refs resolved + // Prefer explicit rendered_refs mapping if provided, else derive from deps/sources_used. + const rows = []; + const seen = new Set(); + const pushRow = (r) => { + const key = `${r.kind}:${r.name}`; + if (seen.has(key)) return; + seen.add(key); + rows.push(r); + }; + + // Models: prefer rendered_refs, else deps + if (m.rendered_refs) { + if (Array.isArray(m.rendered_refs)) { + for (const r of m.rendered_refs) pushRow({ kind:"model", name:r.name || "", relation:r.relation || "" }); + } else if (typeof m.rendered_refs === "object") { + for (const [k, v] of Object.entries(m.rendered_refs)) pushRow({ kind:"model", name:k, relation:String(v || "") }); + } + } else { + const byName = new Map((state.manifest.models || []).map(x => [x.name, x])); + for (const d of (m.deps || [])) { + const md = byName.get(d); + rows.push({ kind:"model", name:d, relation: md?.relation || "" }); + } + } + + // Sources: ALWAYS include + for (const s of (m.sources_used || [])) { + const nm = `${s.source_name}.${s.table_name}`; + rows.push({ + kind: "source", + name: `${s.source_name}.${s.table_name}`, + source_name: s.source_name, + table_name: s.table_name, + relation: s.relation || "" + }); + } + + if (!rows.length) return el("p", { class:"empty" }, "No references detected for this model."); + + return el("table", { class:"table" }, + el("thead", {}, el("tr", {}, + el("th", {}, "Kind"), + el("th", {}, "Reference"), + el("th", {}, "Resolved relation"), + )), + el("tbody", {}, + ...rows.map(r => { + const refCell = (() => { + if (r.kind === "model") { + const href = routeWithFacets(`#/model/${escapeHashPart(r.name)}`); + return el("a", { + href, + onclick: (e) => { e.preventDefault(); location.hash = href; } + }, r.name); + } + + if (r.kind === "source") { + const href = routeWithFacets(`#/source/${escapeHashPart(r.source_name)}/${escapeHashPart(r.table_name)}`); + return el("a", { + href, + onclick: (e) => { e.preventDefault(); location.hash = href; } + }, r.name); + } + + return el("span", {}, r.name); + })(); + + return el("tr", {}, + el("td", {}, el("span", { class:"pillSmall" }, r.kind)), + el("td", {}, refCell), + el("td", {}, r.relation ? el("code", {}, r.relation) : el("span", { class:"empty" }, "—")) + ); + }) + ) + ); + })(); + + return el("div", { class:"card" }, + el("div", { class:"row", style:"align-items:center; justify-content:space-between;" }, + el("h3", { style:"margin:0;" }, isSql ? "SQL" : "Code"), + headRight + ), + tabs, + body + ); +} + +// -------- Landing page (overview dashboard) helpers --------------------- + +function setModelFacetsAndGoHome(nextFacets, extra = {}) { + const { query } = parseHashWithQuery(); + const q = new URLSearchParams(query.toString()); + writeModelFacetsToQuery(q, nextFacets); + + // Optional extra params (e.g. { home: "undoc" }) + for (const [k, v] of Object.entries(extra || {})) { + if (v == null || String(v).trim() === "") q.delete(k); + else q.set(k, String(v)); + } + + const qs = q.toString(); + location.hash = `#/${qs ? "?" + qs : ""}`; +} + +function getModelChangeTs(m) { + const meta = (m && typeof m === "object") ? (m.meta || {}) : {}; + const candidates = [ + m.updated_at, m.modified_at, m.last_modified, m.changed_at, m.created_at, + meta.updated_at, meta.modified_at, meta.last_modified, meta.changed_at, meta.created_at, + ]; + + for (const v of candidates) { + if (v == null || v === "") continue; + if (typeof v === "number" && Number.isFinite(v)) { + // seconds vs ms + return v < 1e12 ? Math.floor(v * 1000) : Math.floor(v); + } + const d = new Date(v); + if (!Number.isNaN(d.getTime())) return d.getTime(); + } + return null; +} + +function fmtDateShort(ms) { + try { + return new Date(ms).toLocaleDateString(undefined, { year: "numeric", month: "short", day: "2-digit" }); + } catch { + return ""; + } +} + +function computeImpactOverview(state, models) { + const rows = (models || []).map(m => { + const deps = (m.deps || []).length; + const usedBy = (m.used_by || []).length; + const score = (usedBy + 1) * (deps + 1); + return { + name: m.name, + kind: m.kind, + materialized: m.materialized || "", + path: m.path || "", + deps, + usedBy, + score, + }; + }); + + const topFanOut = rows + .slice() + .sort((a, b) => (b.usedBy - a.usedBy) || a.name.localeCompare(b.name)) + .slice(0, 10); + + const topCritical = rows + .slice() + .sort((a, b) => (b.score - a.score) || (b.usedBy - a.usedBy) || (b.deps - a.deps) || a.name.localeCompare(b.name)) + .slice(0, 10); + + const edges = rows.reduce((acc, r) => acc + (r.deps || 0), 0); + + return { rows, topFanOut, topCritical, edges }; +} + +function computeRecentChangedOverview(models) { + const rows = (models || []) + .map(m => { + const ts = getModelChangeTs(m); + return ts ? { name: m.name, kind: m.kind, materialized: m.materialized || "", path: m.path || "", ts } : null; + }) + .filter(Boolean) + .sort((a, b) => b.ts - a.ts) + .slice(0, 10); + + return { available: rows.length > 0, rows }; +} + +function renderRankList(state, rows, metricNode) { + const list = el("ul", { class: "docList" }); + + if (!rows.length) { + list.replaceChildren(el("li", { class: "empty" }, "—")); + return list; + } + + list.replaceChildren( + ...rows.map(r => + el("li", { class: "docRow" }, + el("a", { + href: routeWithFacets(`#/model/${escapeHashPart(r.name)}`), + onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets(`#/model/${escapeHashPart(r.name)}`); }, + title: r.path || r.name, + }, + el("div", { class: "docRowMain" }, + el("div", { class: "docRowTitle" }, r.name), + r.path ? el("div", { class: "docRowSub" }, r.path) : null + ), + el("div", { class: "docRowPills" }, + pillForKind(normalizeModelKind(r.kind)), + r.materialized ? el("span", { class: "pillSmall" }, r.materialized) : null, + metricNode(r) + ) + ) + ) + ) + ); + + return list; +} + +function renderOverviewDashboardCard(state, facets, modelsSubset, cov, impact, changed) { + const allModels = state.manifest.models || []; + const totalModels = allModels.length; + const subsetN = (modelsSubset || []).length; + + const pythonN = (modelsSubset || []).filter(m => normalizeModelKind(m.kind) === "python").length; + + const filtN = facetsActiveCount(facets); + const filterPills = []; + if (filtN) { + const kinds = new Set((facets.kinds || []).map(k => (k || "").toLowerCase())); + if (!(kinds.has("sql") && kinds.has("python"))) { + filterPills.push(el("span", { class: "pillSmall" }, `kind:${(facets.kinds || []).join(",")}`)); + } + if (facets.materialized) filterPills.push(el("span", { class: "pillSmall" }, `mat:${facets.materialized}`)); + if (facets.pathPrefix) filterPills.push(el("span", { class: "pillSmall" }, `path:${facets.pathPrefix}`)); + } + + const links = el("div", { style: "display:flex; gap:10px; flex-wrap:wrap; margin-top:10px;" }, + el("a", { + class: "btnTiny", + href: routeWithFacets("#/?home=undoc"), + onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets("#/?home=undoc"); }, + }, `Undocumented (${cov.undocumented.length})`), + + el("a", { + class: "btnTiny", + href: routeWithFacets("#/?home=impact"), + onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets("#/?home=impact"); }, + }, "High impact"), + + el("button", { + class: "btnTiny", + type: "button", + onclick: () => { + // Set facets to python-only, clearing other facet constraints. + setModelFacetsAndGoHome({ kinds: ["python"], materialized: "", pathPrefix: "" }, { home: "" }); + }, + }, `Python models (${(state.manifest.models || []).filter(m => normalizeModelKind(m.kind) === "python").length})`), + + filtN + ? el("button", { + class: "btnTiny", + type: "button", + onclick: () => setModelFacetsAndGoHome({ kinds: ["sql", "python"], materialized: "", pathPrefix: "" }, { home: "" }), + }, "Clear filters") + : null + ); + + const newestLine = changed.available + ? (() => { + const r = changed.rows[0]; + return el("div", {}, + el("span", { class: "pillSmall" }, "Newest"), + " ", + el("span", {}, `${r.name} • ${fmtDateShort(r.ts)}`) + ); + })() + : el("span", { class: "empty" }, "No change timestamps available in manifest."); + + return el("div", { class: "card" }, + el("div", { class: "grid2" }, + el("div", {}, + el("h2", {}, "Overview dashboard"), + el("p", { class: "empty" }, "Stats, impact hotspots, docs coverage, and quick links."), + filterPills.length + ? el("div", { class: "docPills docPillsCompact", style: "margin-top:8px;" }, ...filterPills) + : null, + links + ), + el("div", {}, + el("h3", {}, "Stats"), + el("div", { class: "kv" }, + el("div", { class: "k" }, "Models"), + el("div", {}, filtN ? `${subsetN}/${totalModels}` : `${subsetN}`), + + el("div", { class: "k" }, "Python models"), + el("div", {}, String(pythonN)), + + el("div", { class: "k" }, "Edges"), + el("div", {}, String(impact.edges)), + + el("div", { class: "k" }, "Model described"), + el("div", {}, `${cov.modelsDescribed}/${cov.modelsTotal}`), + + el("div", { class: "k" }, "Columns documented"), + el("div", {}, cov.withSchema ? `${cov.colsDoc}/${cov.colsTotal}` : "Schema off"), + + el("div", { class: "k" }, "Undocumented"), + el("div", {}, `${cov.undocumented.length}/${cov.modelsTotal}`), + + el("div", { class: "k" }, "Newest/changed"), + el("div", {}, newestLine), + ) + ) + ) + ); +} + +function renderTopFanOutCard(state, modelsSubset, impact) { + const rows = (impact.topFanOut || []).filter(r => r.usedBy > 0); + + return el("div", { class: "card", id: "fanoutModels" }, + el("h2", {}, "Top fan-out nodes"), + el("p", { class: "empty" }, "Models with the most downstream consumers (direct)."), + renderRankList(state, rows, (r) => el("span", { class: "pillSmall" }, `used by ${r.usedBy}`)) + ); +} + +function renderCriticalModelsCard(state, impact) { + const rows = (impact.topCritical || []).slice(); + + return el("div", { class: "card", id: "impactModels" }, + el("h2", {}, "Most critical models"), + el("p", { class: "empty" }, "Heuristic score combining upstream deps and downstream usage."), + renderRankList(state, rows, (r) => el("span", { class: "pillSmall" }, `score ${r.score}`)) + ); +} + +function renderRecentChangedCard(state, changed) { + if (!changed.available) { + return el("div", { class: "card", id: "changedModels" }, + el("h2", {}, "Newest / changed"), + el("p", { class: "empty" }, "No per-model change timestamps were found in the manifest."), + el("p", { class: "empty" }, "If you add fields like updated_at / modified_at to model entries, this list will populate.") + ); + } + + const rows = changed.rows || []; + return el("div", { class: "card", id: "changedModels" }, + el("h2", {}, "Newest / changed"), + el("p", { class: "empty" }, "Models sorted by last change timestamp (if provided)."), + renderRankList(state, rows, (r) => el("span", { class: "pillSmall" }, fmtDateShort(r.ts))) + ); +} + +function filterGraphWithModelFacets(graph, models, facets) { + if (!graph || !Array.isArray(graph.nodes)) return graph; + + // Which model names survive facets? + const keepModelNames = new Set( + filterModelsWithFacets(models || [], facets).map(m => m.name) + ); + + const byId = new Map((graph.nodes || []).map(n => [n.id, n])); + + // Start with kept model node IDs + const keepIds = new Set(); + for (const n of (graph.nodes || [])) { + if (n.kind === "model" && keepModelNames.has(n.name)) keepIds.add(n.id); + } + + // Keep edges where all model endpoints are kept; pull in connected sources + const edges = []; + for (const e of (graph.edges || [])) { + const a = byId.get(e.from); + const b = byId.get(e.to); + if (!a || !b) continue; + + if (a.kind === "model" && !keepIds.has(a.id)) continue; + if (b.kind === "model" && !keepIds.has(b.id)) continue; + + // skip source->source noise (shouldn't happen, but safe) + if (a.kind !== "model" && b.kind !== "model") continue; + + edges.push({ ...e }); + keepIds.add(a.id); + keepIds.add(b.id); + } + + const nodes = (graph.nodes || []) + .filter(n => keepIds.has(n.id)) + .map(n => ({ ...n })); + + // Empty result → small safe bounds so Fit doesn't explode + if (!nodes.length) { + return { nodes: [], edges: [], direction: graph.direction || "LR", bounds: { minx:0, miny:0, maxx:100, maxy:100, width:100, height:100 } }; + } + + // Normalize coordinates so min x/y are near PAD (important for your translate-based renderer) + const PAD = 24; + let minx = Infinity, miny = Infinity, maxx = -Infinity, maxy = -Infinity; + for (const n of nodes) { + const x = Number(n.x || 0), y = Number(n.y || 0); + const w = Number(n.w || 0), h = Number(n.h || 0); + minx = Math.min(minx, x); + miny = Math.min(miny, y); + maxx = Math.max(maxx, x + w); + maxy = Math.max(maxy, y + h); + } + const dx = PAD - minx; + const dy = PAD - miny; + for (const n of nodes) { n.x = Number(n.x || 0) + dx; n.y = Number(n.y || 0) + dy; } + maxx += dx; maxy += dy; + + return { + nodes, + edges, + direction: graph.direction || "LR", + bounds: { minx: PAD, miny: PAD, maxx, maxy, width: (maxx - PAD + PAD), height: (maxy - PAD + PAD) } + }; +} + +function renderHome(state) { + const { manifest } = state; + + const facets = currentModelFacets(); + const allModels = manifest.models || []; + + const graphRaw = manifest.dag?.graph; + const graphBase = filterGraphWithModelFacets(graphRaw, allModels, facets); + + // If the pinned node got filtered away, clear it (avoids “nothing highlights” confusion) + if (state.graphUI.pinned && !(graphBase.nodes || []).some(n => n.id === state.graphUI.pinned)) { + state.graphUI.pinned = ""; + } + + function rerenderMini() { + state._graphCtl?.refresh?.(); + } + + const graphHost = el("div", { class: "graphHost" }); + const miniHost = el("div", { class: "minimapHost" }); + + const modeBtn = (id, label) => + el("button", { + class: `btn ${state.graphUI.mode === id ? "active" : ""}`, + onclick: () => { + state.graphUI.mode = id; + syncShareStateToUrl(state); + state._graphCtl?.refresh?.(); + } + }, label); + + const depthPill = el("span", { class: "pill" }, `Depth ${state.graphUI.depth}`); + const depthSlider = el("input", { + type: "range", min: "1", max: "8", + value: String(state.graphUI.depth), + oninput: (e) => { + state.graphUI.depth = Number(e.target.value || 2); + syncShareStateToUrl(state); + depthPill.textContent = `Depth ${state.graphUI.depth}`; + rerenderMini(); + } + }); + + const fitBtn = el("button", { class: "btnTiny", title: "Fit to screen", onclick: () => state._graphCtl?.fit?.() }, "Fit"); + const resetBtn = el("button", { class: "btnTiny", title: "Reset pan/zoom", onclick: () => state._graphCtl?.reset?.() }, "Reset"); + const zoomOutBtn = el("button", { class: "btnTiny", title: "Zoom out", onclick: () => state._graphCtl?.zoomOut?.() }, "–"); + const zoomInBtn = el("button", { class: "btnTiny", title: "Zoom in", onclick: () => state._graphCtl?.zoomIn?.() }, "+"); + + const dirPill = el("span", { class: "pillSmall" }, + state.graphUI.dir === "TB" ? "Top → Bottom" : "Left → Right" + ); + + const lrBtn = el("button", { class: `tab ${state.graphUI.dir === "LR" ? "active" : ""}` }, "LR"); + const tbBtn = el("button", { class: `tab ${state.graphUI.dir === "TB" ? "active" : ""}` }, "TB"); + + function setDir(dir) { + dir = (dir || "LR").toUpperCase(); + if (state.graphUI.dir === dir) return; + + state.graphUI.dir = dir; + syncShareStateToUrl(state); + + // update segmented control UI + lrBtn.classList.toggle("active", dir === "LR"); + tbBtn.classList.toggle("active", dir === "TB"); + dirPill.textContent = dir === "TB" ? "Top → Bottom" : "Left → Right"; + + // remount graph + const g = graphTransformDirection(graphBase, dir); + state._graphCtl = mountGraph(state, graphHost, g, { miniHost, showMini: true }); + } + + lrBtn.onclick = () => setDir("LR"); + tbBtn.onclick = () => setDir("TB"); + + // use this in your toolbar row: + const layoutTabs = el("div", { class: "tabs" }, lrBtn, tbBtn); + + const graphCard = el("div", { class: "card" }, + el("div", { class: "grid" }, + el("div", { class: "dagHeader" }, + el("div", { class: "dagHeaderLeft" }, + el("div", { class: "dagTitleRow" }, + el("h2", {}, "DAG"), + dirPill + ), + el("p", { class: "dagSubtle" }, + "Pan/zoom • click a node to pin • click again to unpin • Ctrl/Cmd-click opens." + ) + ), + + el("div", { class: "dagHeaderRight" }, + el("div", { class: "dagToolsRow" }, + copyLinkBtn(state), + fitBtn, resetBtn, zoomOutBtn, zoomInBtn, + layoutTabs + ), + el("div", { class: "dagToolsRow" }, + el("div", { class: "tabs dagModeTabs" }, + modeBtn("up", "Up"), + modeBtn("down", "Down"), + modeBtn("both", "Both"), + modeBtn("off", "Off"), + ), + el("div", { class: "dagDepth" }, depthPill, depthSlider), + ) + ) + ), + + el("div", { class: "graphWrap" }, graphHost, miniHost) + ) + ); + + queueMicrotask(() => { + const g0 = graphTransformDirection(graphBase, state.graphUI.dir); + state._graphCtl = mountGraph(state, graphHost, g0, { miniHost, showMini: true }); + + const r = parseRoute(); + if (r.route === "home" && r.focus) { + state._graphCtl?.focus?.(r.focus, { zoom: 1.25, pin: true }); + } + }); + + // Dashboard data respects the current model facets (kind/materialized/path prefix). + const modelsSubset = filterModelsWithFacets(allModels, facets); + + // If the URL requests a section, set the default undoc tab. + const homeMode = parseHashWithQuery().query.get("home") || ""; + if (homeMode === "undoc") { + state.coverageUI ||= { tab: "undoc", q: "" }; + state.coverageUI.tab = "undoc"; + } + + const cov = computeDocsCoverage(state, modelsSubset); + const impact = computeImpactOverview(state, modelsSubset); + const changed = computeRecentChangedOverview(modelsSubset); + + const dashCard = renderOverviewDashboardCard(state, facets, modelsSubset, cov, impact, changed); + const coverageCard = renderDocsCoverageCard(state, cov); + const fanOutCard = renderTopFanOutCard(state, modelsSubset, impact); + const criticalCard = renderCriticalModelsCard(state, impact); + const changedCard = renderRecentChangedCard(state, changed); + const undocCard = renderUndocumentedModelsCard(state, cov); + + const root = el("div", { class: "grid" }, + dashCard, + graphCard, + coverageCard, + fanOutCard, + criticalCard, + changedCard, + undocCard + ); + + queueMicrotask(() => { + if (homeMode === "undoc") document.getElementById("undocModels")?.scrollIntoView?.({ behavior: "smooth", block: "start" }); + if (homeMode === "impact") document.getElementById("impactModels")?.scrollIntoView?.({ behavior: "smooth", block: "start" }); + if (homeMode === "changed") document.getElementById("changedModels")?.scrollIntoView?.({ behavior: "smooth", block: "start" }); + if (homeMode === "fanout") document.getElementById("fanoutModels")?.scrollIntoView?.({ behavior: "smooth", block: "start" }); + }); + + return root; + +} + + +function renderModel(state, name, tabFromRoute, colFromRoute) { + const m = state.byModel.get(name); + if (!m) { + return el("div", { class: "card" }, el("h2", {}, "Model not found"), el("p", { class: "empty" }, name)); + } + + const active = (tabFromRoute || state.modelTabDefault || "overview").toLowerCase(); + const hasCol = !!(colFromRoute && String(colFromRoute).trim()); + + let tab = ["overview","columns","contract","lineage","code","meta"].includes(active) ? active : "overview"; + // Only force columns if col is present AND the URL didn't explicitly set a tab + if (hasCol && !tabFromRoute) tab = "columns"; + + const header = el("div", { class: "card" }, + el("div", { class: "grid2" }, + el("div", {}, + el("h2", {}, m.name), + el("p", { class: "empty" }, m.relation ? `Relation: ${m.relation}` : ""), + renderDocsBadges(state, m) + ), + el("div", {}, + el("button", { + class: "btn", + onclick: () => { location.hash = routeWithFacets("#/"); } + }, "← Overview"), + copyLinkBtn(state, "btn"), + el("button", { + class: "btn", + onclick: async () => { try { await navigator.clipboard.writeText(m.path || ""); } catch {} } + }, "Copy path") + ) + ), + renderTabs(tab, (next) => { + // Persist default for convenience + state.modelTabDefault = next; + safeSet(state.STORE.modelTab, next); + + setModelQuery({ + tab: next, + col: (next === "columns") ? (colFromRoute || "") : "" // clear col when leaving Columns + }); + + }) + ); + + const panel = el("div", { class: "tabPanel" }, renderModelPanel(state, m, tab, colFromRoute)); + + return el("div", { class: "grid" }, header, panel); +} + +function renderModelPanel(state, m, tab, colFromRoute) { + if (tab === "overview") { + const deps = (m.deps || []).map(d => el("a", { href: routeWithFacets(`#/model/${escapeHashPart(d)}`) }, d)); + const usedBy = (m.used_by || []).map(u => el("a", { href: routeWithFacets(`#/model/${escapeHashPart(u)}`) }, u)); + const sourcesUsed = (m.sources_used || []).map(s => + el("a", { href: routeWithFacets(`#/source/${escapeHashPart(s.source_name)}/${escapeHashPart(s.table_name)}`) }, `${s.source_name}.${s.table_name}`) + ); + const modelId = m.name; + const healthCard = renderHealthCardForModel(state, m); + + // --- Neighborhood mini-graph (MODEL PAGE) --- + state.modelMini = state.modelMini || { mode: "both", depth: 2 }; + + const miniGraphHost = el("div", { class: "miniGraphHost" }); + let miniCtl = null; + + const depthPill = el("span", { class: "pill" }, `Depth ${state.modelMini.depth}`); + const depthSlider = el("input", { + type: "range", min: "1", max: "6", + value: String(state.modelMini.depth), + oninput: (e) => { + state.modelMini.depth = Number(e.target.value || 2); + depthPill.textContent = `Depth ${state.modelMini.depth}`; + rerenderMini(); + } + }); + + const miniModeTabs = el("div", { class: "tabs" }, + el("button", { class: `tab ${state.modelMini.mode==="up"?"active":""}`, onclick:()=>{ state.modelMini.mode="up"; syncMiniTabs(); rerenderMini(); } }, "Upstream"), + el("button", { class: `tab ${state.modelMini.mode==="down"?"active":""}`, onclick:()=>{ state.modelMini.mode="down"; syncMiniTabs(); rerenderMini(); } }, "Downstream"), + el("button", { class: `tab ${state.modelMini.mode==="both"?"active":""}`, onclick:()=>{ state.modelMini.mode="both"; syncMiniTabs(); rerenderMini(); } }, "Both"), + ); + + function syncMiniTabs() { + const btns = miniModeTabs.querySelectorAll(".tab"); + btns.forEach(b => b.classList.remove("active")); + const idx = state.modelMini.mode === "up" ? 0 : state.modelMini.mode === "down" ? 1 : 2; + btns[idx]?.classList.add("active"); + } + + function rerenderMini() { + miniGraphHost.textContent = ""; + const centerNode = (state.manifest.dag?.graph?.nodes || []) + .find(n => n.kind === "model" && n.name === m.name); const centerId = centerNode?.id || `m:${m.name}`; @@ -730,11 +2261,16 @@ function renderModelPanel(state, m, tab, colFromRoute) { el("div", { class: "k" }, "Kind"), el("div", {}, m.kind), el("div", { class: "k" }, "Materialized"), el("div", {}, m.materialized || "—"), el("div", { class: "k" }, "Path"), el("div", {}, el("code", {}, m.path || "—")), + el("div", { class: "k" }, "Model docs"), el("div", {}, modelDocsStatus(state, m).described ? el("span", { class: "pillSmall pillGood" }, "Model described") : el("span", { class: "pillSmall pillBad" }, "No model docs")), + el("div", { class: "k" }, "Columns docs"), el("div", {}, (() => { const st = modelDocsStatus(state, m); if (!st.withSchema) return el("span", { class: "empty" }, "Schema disabled"); if (!st.colTotal) return el("span", { class: "empty" }, "No columns found"); return el("span", { class: `pillSmall ${st.colMissing ? "pillBad" : "pillGood"}` }, `${st.colDoc}/${st.colTotal} columns documented`); })()), el("div", { class: "k" }, "Deps"), el("div", {}, deps.length ? joinInline(deps) : el("span", { class: "empty" }, "—")), el("div", { class: "k" }, "Used by"), el("div", {}, usedBy.length ? joinInline(usedBy) : el("span", { class: "empty" }, "—")), el("div", { class: "k" }, "Sources"), el("div", {}, sourcesUsed.length ? joinInline(sourcesUsed) : el("span", { class: "empty" }, "—")), ) ), + healthCard ? healthCard : null, + renderModelConfigMetaCard(m), + renderPythonModelCard(state, m), miniPanel, m.description_html ? el("div", { class: "card" }, el("h3", {}, "Description"), el("div", { class: "desc", html: m.description_html })) @@ -748,52 +2284,60 @@ function renderModelPanel(state, m, tab, colFromRoute) { if (tab === "lineage") { const cols = m.columns || []; - const rows = cols + const colRows = cols .filter(c => (c.lineage || []).length) .map(c => el("tr", {}, - el("td", {}, el("code", {}, c.name)), - el("td", {}, renderLineage(c.lineage || [])) + el("td", {}, + (() => { + const href = routeWithFacets(`#/model/${escapeHashPart(m.name)}?tab=columns&col=${encodeURIComponent(c.name)}`); + return el("a", { href, onclick:(e)=>{ e.preventDefault(); location.hash = href; } }, el("code", {}, c.name)); + })() + ), + el("td", {}, renderLineage(state, c.lineage || [])) + ) + ) + + const inferred = m.inferred_lineage || {}; + const infRows = Object.entries(inferred) + .filter(([_, lin]) => Array.isArray(lin) && lin.length) + .map(([outCol, lin]) => + el("tr", {}, + el("td", {}, el("code", {}, outCol)), + el("td", {}, renderLineage(state, lin || [])) ) ); + const rows = colRows.length ? colRows : infRows; + const title = colRows.length ? "Column lineage" : "Inferred lineage"; + return el("div", { class: "card" }, - el("h3", {}, "Column lineage"), + el("h3", {}, title), rows.length ? el("table", { class: "table" }, el("thead", {}, el("tr", {}, el("th", {}, "Column"), el("th", {}, "Lineage"))), el("tbody", {}, ...rows) ) - : el("p", { class: "empty" }, "No lineage available for this model’s columns.") + : el("p", { class: "empty" }, "No lineage available for this model.") ); } if (tab === "code") { - // Placeholder until we add compiled SQL / python source to manifest - return el("div", { class: "card" }, - el("h3", {}, "Code"), - el("p", { class: "empty" }, "Code view not yet available. Next step: include rendered SQL / Python source in the manifest.") - ); + const { query } = parseHashWithQuery(); + const codeView = query.get("code") || ""; + return renderModelCodeTab(state, m, codeView); } if (tab === "meta") { - // Show a structured dump of whatever we have - const meta = { - name: m.name, - kind: m.kind, - relation: m.relation, - materialized: m.materialized, - path: m.path, - deps: m.deps || [], - used_by: m.used_by || [], - sources_used: m.sources_used || [], - }; - return el("div", { class: "card" }, - el("h3", {}, "Meta"), - el("pre", { class: "mono", style: "white-space:pre-wrap; margin:0;" }, JSON.stringify(meta, null, 2)) + return el("div", {}, + renderModelConfigMetaCard(m, { includeRaw: true }) ); } + if (tab === "contract") { + return buildContractCard(state, m); + } + return el("div", { class: "card" }, el("p", { class: "empty" }, "Unknown tab.")); } @@ -809,77 +2353,417 @@ function renderSource(state, sourceName, tableName) { return el("div", { class: "card" }, el("h2", {}, "Source not found"), el("p", { class: "empty" }, key)); } - const consumers = (s.consumers || []).map(m => el("a", { href: routeWithFacets(`#/model/${escapeHashPart(m)}`) }, m)); + const consumers = (s.consumers || []).map(m => + el("a", { href: routeWithFacets(`#/model/${escapeHashPart(m)}`) }, m) + ); - const freshness = (() => { - const warn = s.warn_after_minutes != null ? `${s.warn_after_minutes}m warn` : null; - const err = s.error_after_minutes != null ? `${s.error_after_minutes}m error` : null; - const parts = [warn, err].filter(Boolean); - return parts.length ? parts.join(" • ") : "—"; - })(); + const fc = sourceFreshnessConfig(s); + + const statusBadge = fc.configured + ? el("span", { class: "pillSmall pillGood", title: fc.reason }, "Configured") + : el("span", { class: "pillSmall pillBad", title: fc.reason }, "Missing freshness"); + + const loadedAtNode = fc.loadedAtField + ? el("code", {}, fc.loadedAtField) + : el("span", { class: "empty" }, "—"); + + const warnNode = fc.warnMinutes != null + ? el("span", { class: "pillSmall pillWarn", title: `${Math.round(fc.warnMinutes)} minutes` }, `Warn after ${formatMinutesCompact(fc.warnMinutes)}`) + : el("span", { class: "empty" }, "—"); + + const errNode = fc.errorMinutes != null + ? el("span", { class: "pillSmall pillBad", title: `${Math.round(fc.errorMinutes)} minutes` }, `Error after ${formatMinutesCompact(fc.errorMinutes)}`) + : el("span", { class: "empty" }, "—"); return el("div", { class: "grid" }, el("div", { class: "card" }, el("div", { class: "grid2" }, el("div", {}, el("h2", {}, key)), el("div", {}, - el("button", { class: "btn", onclick: () => { location.hash = "#/"; } }, "← Overview") + el("button", { class: "btn", onclick: () => { location.hash = routeWithFacets("#/"); } }, "← Overview"), + copyLinkBtn(state, "btn"), + ) + ), + el("div", { class: "kv" }, + el("div", { class: "k" }, "Relation"), el("div", {}, el("code", {}, s.relation || "—")), + el("div", { class: "k" }, "Freshness"), el("div", {}, statusBadge), + el("div", { class: "k" }, "Loaded at field"), el("div", {}, loadedAtNode), + el("div", { class: "k" }, "Warn threshold"), el("div", {}, warnNode), + el("div", { class: "k" }, "Error threshold"), el("div", {}, errNode), + el("div", { class: "k" }, "Consumers"), el("div", {}, consumers.length ? joinInline(consumers) : el("span", { class: "empty" }, "—")) + ) + ), + s.description_html + ? el("div", { class: "card" }, el("h2", {}, "Description"), el("div", { class: "desc", html: s.description_html })) + : null + ); +} + +function macroSourceText(m) { + if (!m) return ""; + return ( + m.source || + m.raw_sql || + m.sql || + m.definition || + m.code || + (m.meta && (m.meta.source || m.meta.raw_sql || m.meta.sql || m.meta.code)) || + "" + ); +} + +function renderMacros(state, qFromRoute) { + const ms = state.manifest.macros || []; + const wrap = el("div", { class: "card" }, el("h2", {}, "Macros")); + + if (!ms.length) { + wrap.appendChild(el("p", { class: "empty" }, "No macros discovered.")); + return wrap; + } + + const q0 = (qFromRoute || "").trim(); + state.macroQuery = q0; + + const countEl = el("div", { class: "muted", style: "margin-top:6px;" }, ""); + const list = el("ul", { class: "docList" }); + + const input = el("input", { + class: "search", + type: "search", + placeholder: "Search macros…", + value: q0, + }); + + const apply = () => { + const q = (input.value || "").trim().toLowerCase(); + + const filtered = q + ? ms.filter(m => { + const name = (m.name || "").toLowerCase(); + const kind = (m.kind || "").toLowerCase(); + const path = (m.path || "").toLowerCase(); + const src = macroSourceText(m).toLowerCase(); + return name.includes(q) || kind.includes(q) || path.includes(q) || src.includes(q); + }) + : ms.slice(); + + filtered.sort((a, b) => (a.name || "").localeCompare(b.name || "")); + + countEl.textContent = q + ? `${filtered.length} of ${ms.length} macros` + : `${ms.length} macros`; + + list.replaceChildren( + ...filtered.map(m => { + const src = macroSourceText(m); + const snip = q ? makeSnippet(src, q, 90) : ""; + const subParts = [ + (m.kind || "macro").toUpperCase(), + m.path ? `• ${m.path}` : "", + snip ? `• ${snip}` : "", + ].filter(Boolean).join(" "); + + const href = routeWithFacets(`#/macro/${escapeHashPart(m.name)}`); + + return el("li", { class: "docRow" }, + el("a", { + href, + onclick: (e) => { e.preventDefault(); location.hash = href; }, + title: m.path || m.name, + }, + el("div", { class: "docRowMain" }, + el("div", { class: "docRowTitle" }, m.name), + el("div", { class: "docRowSub" }, subParts) + ), + el("div", { class: "docRowPills" }, + el("span", { class: "pillSmall" }, m.kind || "macro") + ) + ), + copyLinkBtn(state, "btn"), + ); + }) + ); + }; + + const syncUrl = debounce(() => { + const v = (input.value || "").trim(); + replaceHashQuery((q) => { + if (v) q.set("mq", v); + else q.delete("mq"); + }); + }, 200); + + input.oninput = () => { + syncUrl(); + apply(); + }; + + input.onkeydown = (e) => { + if (e.key === "Escape") { + e.preventDefault(); + input.value = ""; + syncUrl(); + apply(); + } + }; + + wrap.appendChild(input); + wrap.appendChild(countEl); + wrap.appendChild(list); + + apply(); + return wrap; +} + +function renderMacro(state, name) { + const ms = state.manifest.macros || []; + const m = + ms.find(x => x.name === name) || + ms.find(x => (x.name || "").toLowerCase() === (name || "").toLowerCase()); + + const back = state.macroQuery + ? `#/macros?mq=${encodeURIComponent(state.macroQuery)}` + : "#/macros"; + + if (!m) { + return el("div", { class: "card" }, + el("div", { class: "row" }, + el("a", { + class: "btn", + href: routeWithFacets(back), + onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets(back); } + }, "← Macros") + ), + el("h2", {}, "Macro not found"), + el("p", { class: "empty" }, name) + ); + } + + const src = macroSourceText(m); + const maxChars = 16000; + const clipped = src && src.length > maxChars ? (src.slice(0, maxChars) + "\n\n… (truncated)") : src; + + return el("div", { class: "card" }, + el("div", { class: "row" }, + el("a", { + class: "btn", + href: routeWithFacets(back), + onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets(back); } + }, "← Macros"), + el("span", { class: "pill" }, m.kind || "macro") + ), + el("h2", {}, m.name), + el("div", { class: "kvRows" }, + el("div", { class: "kvRow" }, el("span", { class: "k" }, "Kind"), el("span", { class: "v" }, m.kind || "macro")), + el("div", { class: "kvRow" }, el("span", { class: "k" }, "Path"), el("span", { class: "v" }, m.path ? el("code", {}, m.path) : "—")) + ), + clipped + ? el("details", { class: "codeDetails" }, + el("summary", { class: "codeSummary" }, "Show macro source"), + el("pre", { class: "codeBlock" }, clipped) + ) + : el("p", { class: "empty" }, "No macro source available in manifest.") + ); +} + +function joinInline(nodes) { + const wrap = el("span", {}); + nodes.forEach((n, i) => { + if (i) wrap.appendChild(document.createTextNode(", ")); + wrap.appendChild(n); + }); + return wrap; +} + +// -------- Structured meta/config panel --------------------------------- + +function tryParseRelationParts(rel) { + const s = String(rel || "").trim(); + if (!s) return {}; + // Avoid guessing when relation contains quoting or brackets + if (/[`"\[\]]/.test(s)) return {}; + const parts = s.split(".").map(p => p.trim()).filter(Boolean); + if (parts.length === 3) return { database: parts[0], schema: parts[1], identifier: parts[2] }; + if (parts.length === 2) return { schema: parts[0], identifier: parts[1] }; + return {}; +} + +function asStringArray(v) { + if (!v) return []; + if (Array.isArray(v)) return v.map(x => String(x)).filter(Boolean); + if (typeof v === "string") { + // allow comma-separated + return v.split(",").map(s => s.trim()).filter(Boolean); + } + return [String(v)]; +} + +function renderPillList(values) { + const vs = asStringArray(values); + if (!vs.length) return el("span", { class: "empty" }, "—"); + return joinInline(vs.map(x => el("span", { class: "pillSmall" }, x))); +} + +function jsonPreview(obj, maxChars = 6000) { + let s; + try { s = JSON.stringify(obj, null, 2); } + catch { s = String(obj); } + if (s.length > maxChars) s = s.slice(0, maxChars) + "\n… (truncated)"; + return s; +} + +function renderMetaValue(v) { + if (v == null || v === "") return el("span", { class: "empty" }, "—"); + if (typeof v === "boolean") return el("span", { class: "pillSmall" }, v ? "true" : "false"); + if (typeof v === "number") return el("code", {}, String(v)); + if (Array.isArray(v)) return renderPillList(v); + if (typeof v === "object") { + return el("details", {}, + el("summary", { class: "codeSummary" }, "View"), + el("pre", { class: "mono", style: "white-space:pre-wrap; margin:8px 0 0 0;" }, jsonPreview(v)) + ); + } + const s = String(v); + // prefer code styling for short config-y strings + return s.length <= 80 && !/\s/.test(s) ? el("code", {}, s) : el("span", {}, s); +} + +function pick(obj, keys) { + for (const k of keys) { + const v = obj && obj[k]; + if (v != null && String(v).trim() !== "") return v; + } + return ""; +} + +function renderModelConfigMetaCard(m, { includeRaw = false } = {}) { + const relParts = tryParseRelationParts(m.relation); + + const database = pick(m, ["database"]) || relParts.database || ""; + const schema = pick(m, ["schema"]) || relParts.schema || ""; + const alias = pick(m, ["alias", "identifier", "name"]) || relParts.identifier || ""; + + const tags = pick(m, ["tags"]) || (m.meta && (m.meta.tags || m.meta.tag)) || ""; + const owners = pick(m, ["owners", "owner"]) || (m.meta && (m.meta.owners || m.meta.owner)) || ""; + + const reserved = new Set([ + "name","kind","relation","materialized","path", + "database","schema","alias","identifier", + "tags","tag","owners","owner", + ]); + + // Custom meta/config: show whatever else is present without duplicating known fields. + const custom = {}; + const metaObj = (m.meta && typeof m.meta === "object" && !Array.isArray(m.meta)) ? m.meta : null; + const cfgObj = (m.config && typeof m.config === "object" && !Array.isArray(m.config)) ? m.config : null; + + function addCustomFrom(obj) { + if (!obj) return; + for (const [k, v] of Object.entries(obj)) { + if (reserved.has(k)) continue; + if (v == null || v === "" || (Array.isArray(v) && !v.length)) continue; + if (custom[k] == null) custom[k] = v; + } + } + addCustomFrom(cfgObj); + addCustomFrom(metaObj); + + const customKeys = Object.keys(custom).sort((a, b) => a.localeCompare(b)); + + const customDetails = customKeys.length + ? el("details", {}, + el("summary", { class: "codeSummary" }, `Custom meta (${customKeys.length})`), + el("table", { class: "table", style: "margin-top:10px;" }, + el("thead", {}, el("tr", {}, el("th", {}, "Key"), el("th", {}, "Value"))), + el("tbody", {}, + ...customKeys.map(k => + el("tr", {}, + el("td", {}, el("code", {}, k)), + el("td", {}, renderMetaValue(custom[k])) + ) + ) + ) ) - ), - el("div", { class: "kv" }, - el("div", { class: "k" }, "Relation"), el("div", {}, el("code", {}, s.relation || "—")), - el("div", { class: "k" }, "Loaded at field"), el("div", {}, el("code", {}, s.loaded_at_field || "—")), - el("div", { class: "k" }, "Freshness"), el("div", {}, freshness), - el("div", { class: "k" }, "Consumers"), el("div", {}, consumers.length ? joinInline(consumers) : el("span", { class: "empty" }, "—")), ) + : el("p", { class: "empty", style: "margin:10px 0 0 0;" }, "No custom meta."); + + const rawBlock = includeRaw + ? el("details", { class: "codeDetails", style: "margin-top:10px;" }, + el("summary", { class: "codeSummary" }, "Raw meta/config JSON"), + el("pre", { class: "mono", style: "white-space:pre-wrap; margin:8px 0 0 0;" }, + jsonPreview({ config: cfgObj || null, meta: metaObj || null }) + ) + ) + : null; + + return el("div", { class: "card" }, + el("h3", {}, "Config & meta"), + el("div", { class: "kv" }, + el("div", { class: "k" }, "Materialized"), el("div", {}, renderMetaValue(m.materialized || "")), + el("div", { class: "k" }, "Database"), el("div", {}, renderMetaValue(database)), + el("div", { class: "k" }, "Schema"), el("div", {}, renderMetaValue(schema)), + el("div", { class: "k" }, "Alias"), el("div", {}, renderMetaValue(alias)), + el("div", { class: "k" }, "Relation"), el("div", {}, m.relation ? el("code", {}, m.relation) : el("span", { class: "empty" }, "—")), + el("div", { class: "k" }, "Path"), el("div", {}, el("code", {}, m.path || "—")), + el("div", { class: "k" }, "Tags"), el("div", {}, renderPillList(tags)), + el("div", { class: "k" }, "Owners"), el("div", {}, renderPillList(owners)), ), - s.description_html - ? el("div", { class: "card" }, el("h2", {}, "Description"), el("div", { class: "desc", html: s.description_html })) - : null + customDetails, + rawBlock ); } -function renderMacros(state) { - const ms = state.manifest.macros || []; +function renderPythonModelCard(state, m) { + if ((m.kind || "sql") !== "python") return null; + + const sig = (m.python_signature || "").trim(); + const doc = (m.python_docstring || "").trim(); + const req = m.python_requires || {}; + + const reqRows = Object.entries(req) + .sort((a,b) => a[0].localeCompare(b[0])) + .map(([dep, cols]) => + el("tr", {}, + el("td", {}, el("a", { href: routeWithFacets(`#/model/${escapeHashPart(dep)}`) }, dep)), + el("td", {}, (cols && cols.length) + ? el("code", {}, cols.join(", ")) + : el("span", { class: "empty" }, "—") + ), + ) + ); + return el("div", { class: "card" }, - el("h2", {}, "Macros"), - ms.length + el("h3", {}, "Python model"), + el("div", { class: "k" }, "Function signature"), + sig ? renderCodeBlock(sig, { wrap: true }) : el("p", { class: "empty" }, "Signature not available."), + el("div", { class: "k", style: "margin-top:10px;" }, "Docstring"), + doc ? el("pre", { class: "codeBlock codeWrap" }, doc) : el("p", { class: "empty" }, "No docstring."), + el("div", { class: "k", style: "margin-top:10px;" }, "Required inputs (best-effort)"), + reqRows.length ? el("table", { class: "table" }, - el("thead", {}, el("tr", {}, - el("th", {}, "Name"), - el("th", {}, "Kind"), - el("th", {}, "Path"), - )), - el("tbody", {}, - ...ms.map(m => el("tr", {}, - el("td", {}, el("code", {}, m.name)), - el("td", {}, m.kind), - el("td", {}, el("code", {}, m.path)), - )) - ) + el("thead", {}, el("tr", {}, el("th", {}, "Dependency"), el("th", {}, "Required columns"))), + el("tbody", {}, ...reqRows) ) - : el("p", { class: "empty" }, "No macros discovered.") + : el("p", { class: "empty" }, "No required-column hints found.") ); } -function joinInline(nodes) { - const wrap = el("span", {}); - nodes.forEach((n, i) => { - if (i) wrap.appendChild(document.createTextNode(", ")); - wrap.appendChild(n); - }); - return wrap; -} +function renderLineage(state, items) { + const ul = el("ul", { class: "lineage" }); + + for (const it of (items || [])) { + const conf = (it.confidence || "inferred").toLowerCase(); -function renderLineage(items) { - if (!items || !items.length) return el("span", { class: "empty" }, "—"); - // items are already normalized by docs.py lineage logic: - // { from_relation, from_column, transformed } - const ul = el("ul", { style: "margin:0; padding-left:16px;" }); - for (const it of items) { - const label = `${it.from_relation}.${it.from_column}` + (it.transformed ? " (xform)" : ""); - ul.appendChild(el("li", {}, el("code", {}, label))); + ul.append( + el("li", {}, + renderRelationColRef(state, it.from_relation, it.from_column), + " ", + renderConfPill(conf), + it.transformed ? el("span", { class: "pillSmall" }, "XFORM") : "" + ) + ); } + return ul; } @@ -908,6 +2792,7 @@ function renderTabs(active, onPick) { const tabs = [ ["overview", "Overview"], ["columns", "Columns"], + ["contract", "Contract"], ["lineage", "Lineage"], ["code", "Code"], ["meta", "Meta"], @@ -966,7 +2851,7 @@ function buildColumnsCard(state, m, colFromRoute) { const tools = el("div", { class: "colTools" }); const qInput = el("input", { - class: "input", + class: "search", type: "search", placeholder: "Filter columns…", value: uiState.q || "", @@ -1079,7 +2964,7 @@ function buildColumnsCard(state, m, colFromRoute) { const lin = c.lineage || []; const linNode = lin.length - ? renderLineage(lin) + ? renderLineage(state, lin) : el("span", { class: "empty" }, "No lineage available."); const copyName = el("button", { @@ -1119,12 +3004,72 @@ function buildColumnsCard(state, m, colFromRoute) { onclick: async (e) => { e.stopPropagation(); const rows = (lin || []).map(x => - [x.from_relation ?? "", x.from_column ?? "", x.transformed ? "1" : "0"].join(",") + [x.from_relation ?? "", x.from_column ?? "", (x.confidence ?? "inferred"), x.transformed ? "1" : "0"].join(",") ); - await copyText(["from_relation,from_column,transformed", ...rows].join("\n")); + await copyText(["from_relation,from_column,confidence,transformed", ...rows].join("\n")); } }, "Copy lineage CSV"); + ensureLineageIndex(state); + const idx = state.lineageIndex; + const selfKey = colKey(m.relation || "", c.name); + + const upstream = idx.forward.get(selfKey) || []; + const downstream = idx.reverse.get(selfKey) || []; + + // dedupe downstream by model+col + const seenDown = new Set(); + const downRows = []; + for (const e of downstream) { + const k = `${e.to_model}::${e.to_column}`; + if (seenDown.has(k)) continue; + seenDown.add(k); + downRows.push(e); + } + + const impactNode = el("div", {}, + el("div", { style: "margin-bottom:10px;" }, + el("div", { class: "drawerTitle", style: "margin-bottom:6px;" }, "Upstream columns"), + upstream.length + ? el("ul", { class: "lineage" }, + ...upstream.map(e => + el("li", {}, + renderRelationColRef(state, e.from_relation, e.from_column), + " ", + renderConfPill(e.confidence), + e.transformed ? el("span", { class: "pillSmall" }, "XFORM") : "" + ) + ) + ) + : el("span", { class: "empty" }, "No upstream columns detected.") + ), + + el("div", {}, + el("div", { class: "drawerTitle", style: "margin-bottom:6px;" }, "Used downstream in"), + downRows.length + ? el("table", { class: "table" }, + el("thead", {}, el("tr", {}, + el("th", {}, "Model"), + el("th", {}, "Column"), + el("th", {}, "Confidence"), + )), + el("tbody", {}, + ...downRows.map(e => { + const href = routeWithFacets(`#/model/${escapeHashPart(e.to_model)}?tab=columns&col=${encodeURIComponent(e.to_column)}`); + return el("tr", {}, + el("td", {}, + el("a", { href, onclick:(ev)=>{ ev.preventDefault(); location.hash = href; } }, e.to_model) + ), + el("td", {}, el("code", {}, e.to_column)), + el("td", {}, renderConfPill(e.confidence)), + ); + }) + ) + ) + : el("span", { class: "empty" }, "No downstream usage detected.") + ) + ); + return el("div", { class: "drawer" }, el("div", { class: "colTools" }, el("span", { class: "pillSmall" }, "COLUMN"), @@ -1146,6 +3091,10 @@ function buildColumnsCard(state, m, colFromRoute) { el("div", { class: "drawerBox" }, el("div", { class: "drawerTitle" }, "Lineage"), linNode + ), + el("div", { class: "drawerBox" }, + el("div", { class: "drawerTitle" }, "Impact analysis"), + impactNode ) ) ); @@ -1328,6 +3277,202 @@ function buildColumnsCard(state, m, colFromRoute) { return card; } +function buildContractCard(state, m) { + const withSchema = !!state.manifest.project?.with_schema; + const drift = computeContractDrift(m, withSchema); + + const rawCols = contractColumnsFrom(m).map(normalizeContractCol).filter(Boolean); + const tblConstraints = contractTableConstraintsFrom(m); + + // UI state per model + state.contractUI ||= {}; + const uiState = (state.contractUI[m.name] ||= { q: "" }); + + const card = el("div", { class:"card" }); + + const tools = el("div", { class:"colTools" }); + const qInput = el("input", { + class:"search", + type:"search", + placeholder:"Filter contract columns…", + value: uiState.q || "", + oninput: (e) => { uiState.q = e.target.value || ""; renderBody(); } + }); + + const headRow = el("div", { class:"row", style:"align-items:center; justify-content:space-between;" }, + el("h3", { style:"margin:0;" }, "Contract"), + hasContract(m) + ? el("div", { class:"pillRow" }, + el("span", { class:"pillSmall pillGood" }, "Contracted"), + drift.status === "verified" + ? el("span", { class:"pillSmall pillGood", title:"Contract matches warehouse schema" }, "Verified") + : drift.status === "drift" + ? el("span", { class:"pillSmall pillBad", title:"Contract differs from warehouse schema" }, "Drift detected") + : el("span", { class:"pillSmall pillWarn", title:"Warehouse schema not available (run with schema collection enabled)" }, "Schema unavailable"), + (m.contract && typeof m.contract === "object" && m.contract.enforced != null) + ? el("span", { class:"pillSmall" }, m.contract.enforced ? "enforced" : "not enforced") + : null + ) + : null + ); + + tools.appendChild(qInput); + + const body = el("div", {}); + + function renderBody() { + const q = (uiState.q || "").trim().toLowerCase(); + + const rows = rawCols + .filter(c => { + if (!q) return true; + const cstr = [ + c.name, + c.dtype || "", + (c.nullable === true ? "nullable" : c.nullable === false ? "not null" : ""), + JSON.stringify(c.constraints || []), + ].join(" ").toLowerCase(); + return cstr.includes(q); + }) + .sort((a,b) => a.name.localeCompare(b.name)) + .map(c => { + // Optional: show “missing/mismatch” if schema is available + let statusNode = null; + if (drift.schemaAvailable) { + const key = c.name.toLowerCase(); + const rec = drift.byName.get(key); + if (!rec || rec.missing) { + statusNode = el("span", { class:"pillSmall pillBad" }, "missing"); + } else if (rec.issues && rec.issues.length) { + const title = (() => { + const exp = rec.expected ? `${rec.expected.dtype || "—"} / ${rec.expected.nullable == null ? "—" : (rec.expected.nullable ? "nullable" : "not null")}` : ""; + const act = rec.actual ? `${rec.actual.dtype || "—"} / ${(rec.actual.nullable ? "nullable" : "not null")}` : ""; + return (exp && act) ? `expected: ${exp}\nactual: ${act}` : ""; + })(); + statusNode = el("span", { class:"pillSmall pillWarn", title }, `mismatch: ${rec.issues.join(", ")}`); + } else { + statusNode = el("span", { class:"pillSmall pillGood" }, "ok"); + } + } else if (withSchema) { + // with_schema enabled but no columns returned for this model + statusNode = el("span", { class:"pillSmall pillWarn" }, "unavailable"); + } + + const colLink = routeWithFacets( + `#/model/${escapeHashPart(m.name)}?tab=columns&col=${encodeURIComponent(c.name)}` + ); + + return el("tr", {}, + el("td", {}, + el("a", { + href: colLink, + onclick: (e) => { e.preventDefault(); location.hash = colLink; }, + style:"text-decoration:none;" + }, el("code", {}, c.name)) + ), + el("td", {}, c.dtype ? el("code", {}, c.dtype) : el("span", { class:"empty" }, "—")), + el("td", {}, + c.nullable === true ? el("span", { class:"pillSmall" }, "nullable") : + c.nullable === false ? el("span", { class:"pillSmall pillBad" }, "not null") : + el("span", { class:"empty" }, "—") + ), + el("td", {}, renderConstraintsList(c.constraints)), + el("td", {}, statusNode || el("span", { class:"empty" }, "—")) + ); + }); + + const hasAnything = rawCols.length || (tblConstraints && tblConstraints.length); + + body.replaceChildren( + !hasAnything + ? el("p", { class:"empty", style:"margin:10px 0 0 0;" }, + "No contract defined for this model." + ) + : el("div", {}, + drift.status !== "none" + ? el("div", { style:"margin:10px 0 14px 0;" }, + el("div", { class:"k", style:"margin-bottom:6px;" }, "Drift summary"), + drift.status === "unavailable" + ? el("p", { class:"empty", style:"margin:0;" }, "Schema unavailable for diff. Enable schema collection to verify the contract.") + : el("div", { class:"pillRow" }, + el("span", { class:"pillSmall" }, `missing: ${drift.missing.length}`), + el("span", { class:"pillSmall" }, `extra: ${drift.extra.length}`), + el("span", { class:"pillSmall" }, `mismatched: ${drift.mismatches.length}`), + (!drift.constraintsComparable && rawCols.some(c => (c.constraints || []).length)) + ? el("span", { class:"pillSmall pillWarn", title:"Warehouse schema does not include constraints yet" }, "constraints: not verifiable") + : null + ) + ) + : null, + + (tblConstraints && tblConstraints.length) + ? el("div", { style:"margin:10px 0 14px 0;" }, + el("div", { class:"k", style:"margin-bottom:6px;" }, "Table constraints"), + el("div", {}, renderConstraintsList(tblConstraints)) + ) + : null, + + (drift.status !== "unavailable" && drift.extra.length) + ? el("div", { style:"margin:10px 0 14px 0;" }, + el("div", { class:"k", style:"margin-bottom:6px;" }, "Extra columns in warehouse (not in contract)"), + el("div", {}, + joinInline( + drift.extra + .slice(0, 60) + .map(nm => { + const href = routeWithFacets(`#/model/${escapeHashPart(m.name)}?tab=columns&col=${encodeURIComponent(nm)}`); + return el("a", { href, onclick:(e)=>{ e.preventDefault(); location.hash = href; } }, nm); + }) + ), + drift.extra.length > 60 ? el("span", { class:"empty" }, ` … +${drift.extra.length - 60} more`) : null + ) + ) + : null, + + rawCols.length + ? el("table", { class:"table" }, + el("thead", {}, el("tr", {}, + el("th", {}, "Column"), + el("th", {}, "Type"), + el("th", {}, "Nullability"), + el("th", {}, "Constraints"), + el("th", {}, withSchema ? "Status vs actual" : "Status"), + )), + el("tbody", {}, ...rows) + ) + : el("p", { class:"empty" }, "No contract columns specified.") + ) + ); + } + + renderBody(); + card.appendChild(headRow); + card.appendChild(tools); + card.appendChild(body); + + // Small “how to define” hint (kept lightweight + collapsible) + card.appendChild( + el("details", { class:"codeDetails", style:"margin-top:12px;" }, + el("summary", { class:"codeSummary" }, "How to define a contract"), + el("pre", { class:"mono", style:"white-space:pre-wrap; margin:8px 0 0 0;" }, +`# project.yml +docs: + models: + ${m.name}: + contract: + enforced: true + columns: + some_col: + dtype: text + nullable: false + constraints: ["unique"]` + ) + ) + ); + + return card; +} + function normalizeMermaidKey(s) { s = (s || "").trim(); if (!s) return ""; @@ -1523,6 +3668,7 @@ function mountGraph(state, host, graph, opts = {}) { function setPinned(id) { state.graphUI.pinned = id || ""; + syncShareStateToUrl(state); applyHighlight(); } @@ -1818,83 +3964,499 @@ function mountGraph(state, host, graph, opts = {}) { const dx = p.x - miniDrag.p0.x; const dy = p.y - miniDrag.p0.y; - // visible size (graph coords) - const sr = svg.getBoundingClientRect(); - const vw = sr.width / scale; - const vh = sr.height / scale; + // visible size (graph coords) + const sr = svg.getBoundingClientRect(); + const vw = sr.width / scale; + const vh = sr.height / scale; + + const vb = miniSvg.viewBox.baseVal; + const boundX = vb.x + vb.width - vw; + const boundY = vb.y + vb.height - vh; + + const vx = clamp(miniDrag.vx0 + dx, Math.min(vb.x, boundX), Math.max(vb.x, boundX)); + const vy = clamp(miniDrag.vy0 + dy, Math.min(vb.y, boundY), Math.max(vb.y, boundY)); + + tx = -vx * scale; + ty = -vy * scale; + + if (Math.abs(dx) + Math.abs(dy) > 0.5) miniDrag.moved = true; + apply(); + }); + + function endMiniDrag(ev) { + if (!miniDrag || ev.pointerId !== miniDrag.id) return; + ev.preventDefault(); + + miniSuppressClick = miniDrag.moved; + miniDrag = null; + + try { miniSvg.releasePointerCapture(ev.pointerId); } catch {} + miniView.style.cursor = "grab"; + } + + miniSvg.addEventListener("pointerup", endMiniDrag); + miniSvg.addEventListener("pointercancel", endMiniDrag); + + miniSvg.addEventListener("click", (ev) => { + if (miniSuppressClick) { miniSuppressClick = false; return; } + const rect = miniSvg.getBoundingClientRect(); + const px = (ev.clientX - rect.left) / rect.width; + const py = (ev.clientY - rect.top) / rect.height; + + const vb = miniSvg.viewBox.baseVal; + const gx = vb.x + px * vb.width; + const gy = vb.y + py * vb.height; + + // center clicked point + const sr = svg.getBoundingClientRect(); + tx = sr.width / 2 - gx * scale; + ty = sr.height / 2 - gy * scale; + apply(); + }); + + miniHost.appendChild(miniSvg); + updateMini(); + } + + function centerOn(id, zoom = 1.25) { + const n = byId.get(id); + if (!n) return; + + const sr = svg.getBoundingClientRect(); + const cx = n.x + n.w / 2; + const cy = n.y + n.h / 2; + + scale = Math.max(0.1, Math.min(3.0, zoom)); + tx = sr.width / 2 - cx * scale; + ty = sr.height / 2 - cy * scale; + apply(); + } + + return { + fit, reset, + zoomIn: () => zoomBy(1.12, svg.getBoundingClientRect().left + 10, svg.getBoundingClientRect().top + 10), + zoomOut: () => zoomBy(1 / 1.12, svg.getBoundingClientRect().left + 10, svg.getBoundingClientRect().top + 10), + svg, + focus: (id, { zoom = 1.25, pin = true } = {}) => { centerOn(id, zoom); if (pin) setPinned(id); }, + refresh: () => applyHighlight(), + setPinned, + }; +} + +function normRel(r) { + return String(r || "") + .trim() + .replace(/["`]/g, "") + .replace(/\s+/g, " ") + .toLowerCase(); +} +function normCol(c) { + return String(c || "").trim().toLowerCase(); +} +function colKey(rel, col) { + return `${normRel(rel)}::${normCol(col)}`; +} + +function buildLineageIndex(manifest) { + const relToEntity = new Map(); + + for (const m of (manifest.models || [])) { + if (m.relation) relToEntity.set(normRel(m.relation), { kind: "model", name: m.name, relation: m.relation }); + } + for (const s of (manifest.sources || [])) { + if (s.relation) relToEntity.set(normRel(s.relation), { kind: "source", source_name: s.source_name, table_name: s.table_name, relation: s.relation }); + } + + const forward = new Map(); // toKey -> edges[] + const reverse = new Map(); // fromKey -> edges[] + + const push = (mp, k, v) => { + if (!mp.has(k)) mp.set(k, []); + mp.get(k).push(v); + }; + + for (const m of (manifest.models || [])) { + if (!m.relation) continue; + for (const c of (m.columns || [])) { + const toKey = colKey(m.relation, c.name); + for (const it of (c.lineage || [])) { + const fromRel = it.from_relation || ""; + const fromCol = it.from_column || ""; + if (!fromRel || !fromCol) continue; + + const fromKey = colKey(fromRel, fromCol); + const edge = { + from_relation: fromRel, + from_column: fromCol, + confidence: (it.confidence || "inferred").toLowerCase(), + transformed: !!it.transformed, + + // downstream target + to_model: m.name, + to_column: c.name, + to_relation: m.relation, + + fromKey, + toKey, + }; + + push(forward, toKey, edge); + push(reverse, fromKey, edge); + } + } + } + + return { relToEntity, forward, reverse }; +} + +function ensureLineageIndex(state) { + if (!state.lineageIndex) state.lineageIndex = buildLineageIndex(state.manifest); +} + +function renderConfPill(conf) { + const c = String(conf || "inferred").toLowerCase(); + if (c === "annotated") return el("span", { class: "pillSmall pillGood", title: "Annotated (manual override / YAML)" }, "ANNOTATED"); + if (c === "inferred") return el("span", { class: "pillSmall pillWarn", title: "Inferred from SQL/Python" }, "INFERRED"); + return el("span", { class: "pillSmall" }, c.toUpperCase()); +} + +function renderRelationColRef(state, rel, col) { + ensureLineageIndex(state); + const ent = state.lineageIndex.relToEntity.get(normRel(rel)); + + if (ent?.kind === "model") { + const href = routeWithFacets(`#/model/${escapeHashPart(ent.name)}?tab=columns&col=${encodeURIComponent(col)}`); + return el("a", { + href, + onclick: (e) => { e.preventDefault(); location.hash = href; } + }, `${ent.name}.${col}`); + } + + if (ent?.kind === "source") { + const href = routeWithFacets(`#/source/${escapeHashPart(ent.source_name)}/${escapeHashPart(ent.table_name)}`); + return el("a", { + href, + onclick: (e) => { e.preventDefault(); location.hash = href; } + }, `${ent.source_name}.${ent.table_name}.${col}`); + } + + return el("code", {}, `${rel}.${col}`); +} + +// -------- Health helpers ----------------------------------------- + +function fmtDateTime(v) { + const d = (typeof v === "number") ? new Date(v) : new Date(String(v || "")); + if (Number.isNaN(d.getTime())) return ""; + return d.toLocaleString(undefined, { + year: "numeric", month: "short", day: "2-digit", + hour: "2-digit", minute: "2-digit", + }); +} + +function fmtDurationMs(ms) { + ms = Number(ms || 0); + if (!Number.isFinite(ms) || ms <= 0) return "—"; + const s = Math.round(ms / 1000); + if (s < 60) return `${s}s`; + const m = Math.floor(s / 60), rs = s % 60; + if (m < 60) return `${m}m ${rs}s`; + const h = Math.floor(m / 60), rm = m % 60; + return `${h}h ${rm}m`; +} + +function testBucket(t) { + const st = String(t.status || t.state || "").toLowerCase(); + if (st === "pass" || st === "ok" || st === "success") return "pass"; + if (st === "warn" || st === "warning") return "warn"; + if (st === "skip" || st === "skipped") return "skip"; + // fallback if you ever store boolean ok + if (t.ok === true) return "pass"; + if (t.ok === false) return "fail"; + return "fail"; +} + +function buildTestIndex(state) { + const tests = (state.testResults?.results || state.testResults?.tests || []); + const relToEnt = new Map(); + + for (const m of (state.manifest.models || [])) { + if (m.relation) relToEnt.set(normRel(m.relation), { kind: "model", name: m.name }); + } + for (const s of (state.manifest.sources || [])) { + if (s.relation) relToEnt.set(normRel(s.relation), { + kind: "source", + key: `${s.source_name}.${s.table_name}`, + source_name: s.source_name, + table_name: s.table_name, + }); + } + + const byModel = new Map(); + const bySource = new Map(); + const byModelCol = new Map(); + const summary = { pass: 0, warn: 0, fail: 0, skip: 0, total: 0 }; + + for (const t of tests) { + const bucket = testBucket(t); + summary[bucket] = (summary[bucket] || 0) + 1; + summary.total++; + + // Figure out the target entity + let ent = null; + + if (t.model_name) { + ent = { kind: "model", name: String(t.model_name) }; + } else if (t.source_name && t.table_name) { + ent = { kind: "source", key: `${t.source_name}.${t.table_name}`, source_name: t.source_name, table_name: t.table_name }; + } else { + const rel = normRel(t.relation || t.table || t.target_relation || t.target_table || ""); + ent = rel ? relToEnt.get(rel) : null; + } + + if (!ent) continue; + + const rec = { ...t, _bucket: bucket }; + const col = String(t.column || t.target_column || "").trim(); + + if (ent.kind === "model") { + const arr = byModel.get(ent.name) || []; + arr.push(rec); + byModel.set(ent.name, arr); + + if (col) { + const k = `${ent.name}.${col}`; + const arr2 = byModelCol.get(k) || []; + arr2.push(rec); + byModelCol.set(k, arr2); + } + } else if (ent.kind === "source") { + const arr = bySource.get(ent.key) || []; + arr.push(rec); + bySource.set(ent.key, arr); + } + } + + return { byModel, bySource, byModelCol, summary }; +} + +function utestBucket(t) { + const st = String(t.status || t.state || "").toLowerCase(); + if (st === "pass" || st === "ok" || st === "success") return "pass"; + if (st === "skip" || st === "skipped") return "skip"; + if (st === "error") return "error"; + if (st === "fail" || st === "failed") return "fail"; + if (t.ok === true) return "pass"; + if (t.ok === false) return "fail"; + return st || "fail"; +} + +function buildUTestIndex(state) { + const tests = (state.utestResults?.results || state.utestResults?.tests || []); + const byModel = new Map(); + const summary = { pass: 0, fail: 0, error: 0, skip: 0, total: 0 }; + + for (const t of tests) { + const model = String(t.model || t.model_name || "").trim(); + if (!model) continue; + + const bucket = utestBucket(t); + summary[bucket] = (summary[bucket] || 0) + 1; + summary.total++; + + const rec = { ...t, _bucket: bucket }; + const arr = byModel.get(model) || []; + arr.push(rec); + byModel.set(model, arr); + } + + return { byModel, summary }; +} + +function pillForRunStatus(status) { + const st = String(status || "").toLowerCase(); + const cls = + (st === "success" || st === "ok" || st === "pass") ? "pillGood" : + (st === "skipped" || st === "warn" || st === "warning") ? "pillWarn" : + (st ? "pillBad" : "pill"); + return el("span", { class: `pillSmall ${cls}` }, st || "unknown"); +} + +function pillForTestBucket(bucket) { + const cls = + bucket === "pass" ? "pillGood" : + bucket === "warn" ? "pillWarn" : + bucket === "skip" ? "" : + "pillBad"; + return el("span", { class: `pillSmall ${cls}` }, bucket); +} + +function pillForUTestBucket(bucket) { + const cls = + bucket === "pass" ? "pillGood" : + bucket === "skip" ? "" : + "pillBad"; // fail + error both bad + return el("span", { class: `pillSmall ${cls}` }, bucket); +} + +function renderHealthCardForModel(state, m) { + const hasRuns = !!state.runResults; + const hasTests = !!state.testResults; + const hasUTests = !!state.utestResults; + + if (!hasRuns && !hasTests && !hasUTests) return null; + + const run = state.byRun?.[m.name] || state.byRun?.get?.(m.name); + const tests = state.testIndex?.byModel?.get?.(m.name) || []; + const utests = state.utestIndex?.byModel?.get?.(m.name) || []; + + const runBlock = (() => { + if (!hasRuns) return el("p", { class: "empty" }, "No run results were loaded."); + if (!run) return el("p", { class: "empty" }, "No run info found for this model in run_results.json."); + + return el("div", { class: "kv" }, + el("div", { class: "k" }, "Status"), + el("div", {}, pillForRunStatus(run.status)), + + el("div", { class: "k" }, "Finished"), + el("div", {}, fmtDateTime(run.finished_at || run.started_at) || "—"), + + el("div", { class: "k" }, "Duration"), + el("div", {}, fmtDurationMs(run.duration_ms)), + + el("div", { class: "k" }, "Rows"), + el("div", {}, (run.rows == null ? "—" : String(run.rows))), + + el("div", { class: "k" }, "Bytes scanned"), + el("div", {}, (run.bytes_scanned == null ? "—" : String(run.bytes_scanned))), + ); + })(); - const vb = miniSvg.viewBox.baseVal; - const boundX = vb.x + vb.width - vw; - const boundY = vb.y + vb.height - vh; + const testsBlock = (() => { + if (!hasTests) return el("p", { class: "empty" }, "No test results were loaded."); + if (!tests.length) return el("p", { class: "empty" }, "No tests recorded for this model."); - const vx = clamp(miniDrag.vx0 + dx, Math.min(vb.x, boundX), Math.max(vb.x, boundX)); - const vy = clamp(miniDrag.vy0 + dy, Math.min(vb.y, boundY), Math.max(vb.y, boundY)); + const counts = { pass: 0, warn: 0, fail: 0, skip: 0 }; + for (const t of tests) counts[t._bucket] = (counts[t._bucket] || 0) + 1; - tx = -vx * scale; - ty = -vy * scale; + const header = el("div", { class: "row", style: "gap:8px; flex-wrap:wrap;" }, + el("span", { class: "pillSmall pillGood" }, `pass ${counts.pass || 0}`), + el("span", { class: "pillSmall pillWarn" }, `warn ${counts.warn || 0}`), + el("span", { class: "pillSmall pillBad" }, `fail ${counts.fail || 0}`), + el("span", { class: "pillSmall" }, `skip ${counts.skip || 0}`), + ); - if (Math.abs(dx) + Math.abs(dy) > 0.5) miniDrag.moved = true; - apply(); - }); + const rows = tests + .slice() + .sort((a, b) => String(a._bucket).localeCompare(String(b._bucket))) // keeps fails near top-ish + .slice(0, 50); - function endMiniDrag(ev) { - if (!miniDrag || ev.pointerId !== miniDrag.id) return; - ev.preventDefault(); + return el("div", {}, + header, + el("details", { style: "margin-top:10px;" }, + el("summary", {}, "Show test details"), + el("table", { class: "table", style: "margin-top:10px;" }, + el("thead", {}, el("tr", {}, + el("th", {}, "Status"), + el("th", {}, "Test"), + el("th", {}, "Column"), + el("th", {}, "Message"), + )), + el("tbody", {}, + ...rows.map(t => el("tr", {}, + el("td", {}, pillForTestBucket(t._bucket)), + el("td", {}, el("code", {}, String(t.name || t.test_name || t.test || t.kind || "test"))), + el("td", {}, t.column ? el("code", {}, String(t.column)) : el("span", { class: "empty" }, "—")), + el("td", {}, String(t.message || t.msg || t.error || "")), + )) + ) + ) + ) + ); + })(); - miniSuppressClick = miniDrag.moved; - miniDrag = null; + const utestsBlock = (() => { + if (!hasUTests) return el("p", { class: "empty" }, "No unit test results were loaded."); + if (!utests.length) return el("p", { class: "empty" }, "No unit tests recorded for this model."); - try { miniSvg.releasePointerCapture(ev.pointerId); } catch {} - miniView.style.cursor = "grab"; - } + const counts = { pass: 0, fail: 0, error: 0, skip: 0 }; + for (const t of utests) counts[t._bucket] = (counts[t._bucket] || 0) + 1; - miniSvg.addEventListener("pointerup", endMiniDrag); - miniSvg.addEventListener("pointercancel", endMiniDrag); + const header = el("div", { class: "row", style: "gap:8px; flex-wrap:wrap;" }, + el("span", { class: "pillSmall pillGood" }, `pass ${counts.pass || 0}`), + el("span", { class: "pillSmall pillBad" }, `fail ${counts.fail || 0}`), + el("span", { class: "pillSmall pillBad" }, `error ${counts.error || 0}`), + el("span", { class: "pillSmall" }, `skip ${counts.skip || 0}`), + ); - miniSvg.addEventListener("click", (ev) => { - if (miniSuppressClick) { miniSuppressClick = false; return; } - const rect = miniSvg.getBoundingClientRect(); - const px = (ev.clientX - rect.left) / rect.width; - const py = (ev.clientY - rect.top) / rect.height; + const rows = utests + .slice() + .sort((a, b) => String(a._bucket).localeCompare(String(b._bucket))) + .slice(0, 50); - const vb = miniSvg.viewBox.baseVal; - const gx = vb.x + px * vb.width; - const gy = vb.y + py * vb.height; + return el("div", {}, + header, + el("details", { style: "margin-top:10px;" }, + el("summary", {}, "Show unit test details"), + el("table", { class: "table", style: "margin-top:10px;" }, + el("thead", {}, el("tr", {}, + el("th", {}, "Status"), + el("th", {}, "Case"), + el("th", {}, "Duration"), + el("th", {}, "Cache"), + el("th", {}, "Message"), + )), + el("tbody", {}, + ...rows.map(t => el("tr", {}, + el("td", {}, pillForUTestBucket(t._bucket)), + el("td", {}, el("code", {}, String(t.case || ""))), + el("td", {}, fmtDurationMs(t.duration_ms)), + el("td", {}, t.cache_hit ? el("span", { class: "pillSmall" }, "hit") : el("span", { class: "empty" }, "—")), + el("td", {}, String(t.message || "")), + )) + ) + ) + ) + ); + })(); - // center clicked point - const sr = svg.getBoundingClientRect(); - tx = sr.width / 2 - gx * scale; - ty = sr.height / 2 - gy * scale; - apply(); - }); + return el("div", { class: "card" }, + el("h3", { style: "margin-top:0;" }, "Health"), - miniHost.appendChild(miniSvg); - updateMini(); - } + el("div", { class: "healthStack" }, + // Row 1: Last run (always visible) + el("div", { class: "healthRow" }, + el("div", { class: "healthRowHead" }, el("h4", { style:"margin:0;" }, "Last run")), + el("div", { class: "healthRowBody" }, runBlock) + ), - function centerOn(id, zoom = 1.25) { - const n = byId.get(id); - if (!n) return; + // Row 2: Tests (collapsible) + hasTests ? el("details", { class: "healthRow", open: false }, + el("summary", { class: "healthRowSummary" }, + el("div", { class: "healthRowHead" }, el("h4", { style:"margin:0;" }, "Tests")), + ), + el("div", { class: "healthRowBody" }, testsBlock) + ) : null, - const sr = svg.getBoundingClientRect(); - const cx = n.x + n.w / 2; - const cy = n.y + n.h / 2; + // Row 3: Unit tests (collapsible) + hasUTests ? el("details", { class: "healthRow", open: false }, + el("summary", { class: "healthRowSummary" }, + el("div", { class: "healthRowHead" }, el("h4", { style:"margin:0;" }, "Unit tests")), + ), + el("div", { class: "healthRowBody" }, utestsBlock) + ) : null + ) + ); +} - scale = Math.max(0.1, Math.min(3.0, zoom)); - tx = sr.width / 2 - cx * scale; - ty = sr.height / 2 - cy * scale; - apply(); +async function loadOptionalJson(url) { + try { + const res = await fetch(url, { cache: "no-store" }); + if (!res.ok) return null; + return await res.json(); + } catch { + return null; } - - return { - fit, reset, - zoomIn: () => zoomBy(1.12, svg.getBoundingClientRect().left + 10, svg.getBoundingClientRect().top + 10), - zoomOut: () => zoomBy(1 / 1.12, svg.getBoundingClientRect().left + 10, svg.getBoundingClientRect().top + 10), - svg, - focus: (id, { zoom = 1.25, pin = true } = {}) => { centerOn(id, zoom); if (pin) setPinned(id); }, - refresh: () => applyHighlight(), - setPinned, - }; } async function copyText(text) { @@ -1913,12 +4475,25 @@ async function main() { app.textContent = "Loading…"; const manifest = await loadManifest(); + const [runResults, testResults, utestResults] = await Promise.all([ + loadOptionalJson(RUN_RESULTS_URL), + loadOptionalJson(TEST_RESULTS_URL), + loadOptionalJson(UTEST_RESULTS_URL), + ]); + const state = { manifest, filter: "", byModel: byName(manifest.models || [], (m) => m.name), bySource: byName(manifest.sources || [], (s) => `${s.source_name}.${s.table_name}`), + runResults, + testResults, + utestResults, }; + state.byRun = byName((runResults?.results || []), (r) => r.name); + state.testIndex = buildTestIndex(state); + state.utestIndex = buildUTestIndex(state); + state.sidebarMatches = { models: 0, sources: 0 }; state.graphUI = { mode: "both", // "up" | "down" | "both" | "off" @@ -1936,6 +4511,7 @@ async function main() { paletteList: null, }; state.ui = ui; + state.lineageIndex = buildLineageIndex(state.manifest); // Mount shell once const shell = el("div", { class: "shell" }, @@ -1956,11 +4532,16 @@ async function main() { paletteQuery: `fft_docs:${projKey}:palette_query`, }; STORE.modelTab = `fft_docs:${projKey}:model_tab_default`; + STORE.modelCodeView = `fft_docs:${projKey}:model_code_view_default`; + state.modelTabDefault = safeGet(STORE.modelTab) || "overview"; + state.modelCodeViewDefault = safeGet(STORE.modelCodeView) || ""; state.STORE = STORE; + // Allow replaceHashQuery() (global) to keep lastHash in sync even when we use history.replaceState. window.__fftLastHashKey = STORE.lastHash; + // Persisted UI state state.filter = safeGet(STORE.filter) ?? ""; state.sidebarCollapsed = safeGetJSON(STORE.collapsed, { @@ -1977,6 +4558,7 @@ async function main() { // Initialize model facets from URL (shareable filters) state.modelFacets = currentModelFacets(); + readShareStateFromUrl(state); toastOnce({ key: `fft_docs_search_toast_seen:${projKey}`, @@ -2102,32 +4684,6 @@ async function main() { const sel = Math.max(0, Math.min(state.search.selected || 0, results.length - 1)); const q = (state.search.query || "").trim(); - // const sub = (() => { - // if (r.kind === "column") { - // const parts = [ - // "COLUMN", - // r.model || "", - // r.relation ? `• ${r.relation}` : "", - // r.dtype ? `• ${r.dtype}` : "", - // ].filter(Boolean).join(" "); - // const snip = makeSnippet(r.descText || "", q, 90); - // return snip ? `${parts} • ${snip}` : parts; - // } - // if (r.kind === "model") { - // const snip = makeSnippet((r.descText || ""), q, 90); - // return snip ? `MODEL • ${r.subtitle || ""} • ${snip}` : `MODEL • ${r.subtitle || ""}`; - // } - // if (r.kind === "source") { - // const snip = makeSnippet((r.descText || ""), q, 90); - // return snip ? `SOURCE • ${r.subtitle || ""} • ${snip}` : `SOURCE • ${r.subtitle || ""}`; - // } - // return `${(r.kind || "").toUpperCase()} • ${r.subtitle || ""}`; - // })(); - - // const right = r.kind === "column" && r.dtype - // ? el("span", { class: "pill" }, r.dtype) - // : el("div", { class: "kbd" }, "↵"); - const subFor = (r) => { if (r.kind === "column") { const parts = [ @@ -2152,8 +4708,8 @@ async function main() { const rightFor = (r) => (r.kind === "column" && r.dtype) - ? el("span", { class: "pill" }, r.dtype) - : el("div", { class: "kbd" }, "↵"); + ? el("span", { class: "pill" }, r.dtype) + : el("div", { class: "kbd" }, "↵"); state.ui.paletteList.replaceChildren( ...(results.length @@ -2357,211 +4913,285 @@ async function main() { } function buildSidebar() { - if (ui.sidebar.root) return; + if (ui.sidebar.root) return; - ui.sidebar.input = el("input", { - class: "search", - type: "search", - placeholder: "Filter sidebar… (press /)", - value: state.filter || "", - oninput: (e) => { - state.filter = e.target.value || ""; - safeSet(STORE.filter, state.filter); - updateSidebarLists(); - }, - onkeydown: (e) => { - if (e.key !== "Enter") return; + ui.sidebar.input = el("input", { + class: "search", + type: "search", + placeholder: "Filter sidebar… (press /)", + value: state.filter || "", + oninput: (e) => { + state.filter = e.target.value || ""; + safeSet(STORE.filter, state.filter); + syncShareStateToUrl(state); + updateSidebarLists(); + }, + onkeydown: (e) => { + if (e.key !== "Enter") return; - const q = (state.filter || "").trim(); - const total = (state.sidebarMatches.models || 0) + (state.sidebarMatches.sources || 0); + const q = (state.filter || "").trim(); + const total = (state.sidebarMatches.models || 0) + (state.sidebarMatches.sources || 0); - if (!q) { - e.preventDefault(); - openPalette(""); - return; - } + if (!q) { + e.preventDefault(); + openPalette(""); + return; + } - if (total === 0) { - e.preventDefault(); - openPalette(q); - return; - } - }, - }); + if (total === 0) { + e.preventDefault(); + openPalette(q); + return; + } + }, + }); - // --- Facets live next to the sidebar search (not under Models) --- - const applyFacetsToUrl = () => { - replaceHashQuery((q) => writeModelFacetsToQuery(q, state.modelFacets)); - // keep state in sync (routeWithFacets reads from URL) - state.modelFacets = currentModelFacets(); - }; + // --- Facets live next to the sidebar search (not under Models) --- + const applyFacetsToUrl = () => { + replaceHashQuery((q) => writeModelFacetsToQuery(q, state.modelFacets)); + // keep state in sync (routeWithFacets reads from URL) + state.modelFacets = currentModelFacets(); + }; + + const debouncedPath = debounce((val) => { + state.modelFacets.pathPrefix = (val || "").trim(); + applyFacetsToUrl(); + updateSidebarLists(); + }, 180); + + ui.sidebar.kindSqlBtn = el("button", { + class: "facetChip", + type: "button", + onclick: () => { + const kinds = new Set(state.modelFacets.kinds || ["sql", "python"]); + if (kinds.has("sql")) kinds.delete("sql"); else kinds.add("sql"); + if (kinds.size === 0) { kinds.add("sql"); kinds.add("python"); } // avoid empty selection + state.modelFacets.kinds = Array.from(kinds); - const debouncedPath = debounce((val) => { - state.modelFacets.pathPrefix = (val || "").trim(); applyFacetsToUrl(); updateSidebarLists(); - }, 180); + } + }, "SQL"); - ui.sidebar.kindSqlBtn = el("button", { - class: "facetChip", - type: "button", - onclick: () => { - const kinds = new Set(state.modelFacets.kinds || ["sql", "python"]); - if (kinds.has("sql")) kinds.delete("sql"); else kinds.add("sql"); - if (kinds.size === 0) { kinds.add("sql"); kinds.add("python"); } // avoid empty selection - state.modelFacets.kinds = Array.from(kinds); + ui.sidebar.kindPyBtn = el("button", { + class: "facetChip", + type: "button", + onclick: () => { + const kinds = new Set(state.modelFacets.kinds || ["sql", "python"]); + if (kinds.has("python")) kinds.delete("python"); else kinds.add("python"); + if (kinds.size === 0) { kinds.add("sql"); kinds.add("python"); } + state.modelFacets.kinds = Array.from(kinds); - applyFacetsToUrl(); - updateSidebarLists(); - } - }, "SQL"); + applyFacetsToUrl(); + updateSidebarLists(); + } + }, "Python"); - ui.sidebar.kindPyBtn = el("button", { - class: "facetChip", - type: "button", - onclick: () => { - const kinds = new Set(state.modelFacets.kinds || ["sql", "python"]); - if (kinds.has("python")) kinds.delete("python"); else kinds.add("python"); - if (kinds.size === 0) { kinds.add("sql"); kinds.add("python"); } - state.modelFacets.kinds = Array.from(kinds); + ui.sidebar.matSelect = el("select", { + class: "facetSelect", + onchange: (e) => { + state.modelFacets.materialized = normalizeMaterialized(e.target.value || ""); + applyFacetsToUrl(); + updateSidebarLists(); + } + }); - applyFacetsToUrl(); - updateSidebarLists(); - } - }, "Python"); - - ui.sidebar.matSelect = el("select", { - class: "facetSelect", - onchange: (e) => { - state.modelFacets.materialized = normalizeMaterialized(e.target.value || ""); - applyFacetsToUrl(); - updateSidebarLists(); - } - }); + ui.sidebar.pathDatalist = el("datalist", { id: "modelPathPrefixes" }); + ui.sidebar.pathInput = el("input", { + class: "facetInput", + type: "search", + placeholder: "Path prefix…", + list: "modelPathPrefixes", + value: state.modelFacets.pathPrefix || "", + oninput: (e) => debouncedPath(e.target.value || ""), + onkeydown: (e) => { + if (e.key !== "Enter") return; + debouncedPath.cancel(); + state.modelFacets.pathPrefix = (e.target.value || "").trim(); + applyFacetsToUrl(); + updateSidebarLists(); + } + }); - ui.sidebar.pathDatalist = el("datalist", { id: "modelPathPrefixes" }); - ui.sidebar.pathInput = el("input", { - class: "facetInput", - type: "search", - placeholder: "Path prefix…", - list: "modelPathPrefixes", - value: state.modelFacets.pathPrefix || "", - oninput: (e) => debouncedPath(e.target.value || ""), - onkeydown: (e) => { - if (e.key !== "Enter") return; - debouncedPath.cancel(); - state.modelFacets.pathPrefix = (e.target.value || "").trim(); - applyFacetsToUrl(); - updateSidebarLists(); - } - }); + ui.sidebar.clearFacetsBtn = el("button", { + class: "facetClear", + type: "button", + onclick: () => { + debouncedPath.cancel(); + state.modelFacets = { + kinds: ["sql", "python"], + materialized: "", + pathPrefix: "", + tags: [], + owners: [], + groupBy: "" + }; + ui.sidebar.pathInput.value = ""; + applyFacetsToUrl(); + updateSidebarLists(); + } + }, "Clear"); - ui.sidebar.clearFacetsBtn = el("button", { - class: "facetClear", - type: "button", - onclick: () => { - debouncedPath.cancel(); - state.modelFacets = { kinds: ["sql", "python"], materialized: "", pathPrefix: "" }; - ui.sidebar.pathInput.value = ""; - applyFacetsToUrl(); - updateSidebarLists(); - } - }, "Clear"); + ui.sidebar.groupSelect = el("select", { class:"facetSelect", onchange:(e)=>{ + state.modelFacets.groupBy = (e.target.value === "owner" || e.target.value === "domain") ? e.target.value : ""; + replaceHashQuery((q)=> writeModelFacetsToQuery(q, state.modelFacets)); + state.modelFacets = currentModelFacets(); + updateSidebarLists(); + }}); + ui.sidebar.tagBox = el("div", { class:"facetChips facetChipsWrap" }); + ui.sidebar.tagInput = el("input", { class:"facetInput", placeholder:"Add tag…", list:"modelTags" }); + ui.sidebar.tagDatalist = el("datalist", { id:"modelTags" }); + ui.sidebar.groupSelect.replaceChildren( + el("option", { value:"" }, "No grouping"), + el("option", { value:"owner" }, "Group: owner"), + el("option", { value:"domain" }, "Group: domain"), + ); + ui.sidebar.tagInput.onkeydown = (e) => { + if (e.key === "Enter") { + e.preventDefault(); + const t = (ui.sidebar.tagInput.value || "").trim(); + if (!t) return; + const tags = new Set(state.modelFacets.tags || []); + tags.add(t); + state.modelFacets.tags = Array.from(tags); + ui.sidebar.tagInput.value = ""; + replaceHashQuery((q)=> writeModelFacetsToQuery(q, state.modelFacets)); + state.modelFacets = currentModelFacets(); + updateSidebarLists(); + } + if (e.key === "Escape") { + e.preventDefault(); + ui.sidebar.tagInput.value = ""; + } + }; - ui.sidebar.facetBox = el("div", { class: "facetBox" }, - el("div", { class: "facetRow" }, - el("div", { class: "facetChips" }, ui.sidebar.kindSqlBtn, ui.sidebar.kindPyBtn), - ui.sidebar.matSelect - ), - el("div", { class: "facetRow" }, - ui.sidebar.pathInput, - ui.sidebar.clearFacetsBtn - ), - ui.sidebar.pathDatalist - ); - const overviewSection = el("div", { class: "section" }, - el("div", {}, - (ui.sidebar.overviewLink = el("a", { - href: routeWithFacets("#/"), - onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets("#/"); }, - class: "itemLink", - style: "display:flex; align-items:center; justify-content:space-between; padding:8px 10px; border:1px solid var(--border); border-radius:12px; text-decoration:none; color:inherit;" - }, - el("span", {}, "Overview (DAG)"), - el("span", { class: "pill" }, "Home") - )) - ) - ); + ui.sidebar.ownerBox = el("div", { class:"facetChips facetChipsWrap" }); + ui.sidebar.ownerInput = el("input", { class:"facetInput", placeholder:"Add owner…", list:"modelOwners" }); + ui.sidebar.ownerDatalist = el("datalist", { id:"modelOwners" }); + + ui.sidebar.ownerInput.onkeydown = (e) => { + if (e.key === "Enter") { + e.preventDefault(); + const o = (ui.sidebar.ownerInput.value || "").trim(); + if (!o) return; + const owners = new Set(state.modelFacets.owners || []); + owners.add(o); + state.modelFacets.owners = Array.from(owners); + ui.sidebar.ownerInput.value = ""; + replaceHashQuery((q)=> writeModelFacetsToQuery(q, state.modelFacets)); + state.modelFacets = currentModelFacets(); + updateSidebarLists(); + } + }; + + ui.sidebar.facetBox = el("div", { class:"facetBox" }, + el("div", { class:"facetRow" }, + el("div", { class:"facetChips" }, ui.sidebar.kindSqlBtn, ui.sidebar.kindPyBtn), + ui.sidebar.matSelect, + ui.sidebar.groupSelect + ), + el("div", { class:"facetRow" }, + ui.sidebar.pathInput, + ui.sidebar.clearFacetsBtn + ), + el("div", { class:"facetRow" }, + el("div", { class:"facetLabel" }, "Tags"), + ui.sidebar.tagInput + ), + ui.sidebar.tagBox, + ui.sidebar.tagDatalist, + el("div", { class:"facetRow" }, + el("div", { class:"facetLabel" }, "Owners"), + ui.sidebar.ownerInput + ), + ui.sidebar.ownerBox, + ui.sidebar.ownerDatalist, + ui.sidebar.pathDatalist + ); - ui.sidebar.modelsTitle = el("div"); - ui.sidebar.sourcesTitle = el("div"); - ui.sidebar.macrosTitle = el("div"); + const overviewSection = el("div", { class: "section" }, + el("div", {}, + (ui.sidebar.overviewLink = el("a", { + href: routeWithFacets("#/"), + onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets("#/"); }, + class: "itemLink", + style: "display:flex; align-items:center; justify-content:space-between; padding:8px 10px; border:1px solid var(--border); border-radius:12px; text-decoration:none; color:inherit;" + }, + el("span", {}, "Overview (DAG)"), + el("span", { class: "pill" }, "Home") + )) + ) + ); - ui.sidebar.modelsList = el("ul", { class: "list" }); - ui.sidebar.sourcesList = el("ul", { class: "list" }); - ui.sidebar.macrosList = el("ul", { class: "list" }); + ui.sidebar.modelsTitle = el("div"); + ui.sidebar.sourcesTitle = el("div"); + ui.sidebar.macrosTitle = el("div"); - ui.sidebar.modelsSection = el("div", { class: "section" }, ui.sidebar.modelsTitle, ui.sidebar.modelsList); - ui.sidebar.sourcesSection = el("div", { class: "section" }, ui.sidebar.sourcesTitle, ui.sidebar.sourcesList); - ui.sidebar.macrosSection = el("div", { class: "section" }, ui.sidebar.macrosTitle, ui.sidebar.macrosList); + ui.sidebar.modelsList = el("ul", { class: "list" }); + ui.sidebar.sourcesList = el("ul", { class: "list" }); + ui.sidebar.macrosList = el("ul", { class: "list" }); - ui.sidebar.projectTitle = el("div"); - const statRow = (k, v) => - el("div", { class: "kvRow" }, - el("span", { class: "k" }, k), - el("span", { class: "v" }, v) - ); + ui.sidebar.modelsSection = el("div", { class: "section" }, ui.sidebar.modelsTitle, ui.sidebar.modelsList); + ui.sidebar.sourcesSection = el("div", { class: "section" }, ui.sidebar.sourcesTitle, ui.sidebar.sourcesList); + ui.sidebar.macrosSection = el("div", { class: "section" }, ui.sidebar.macrosTitle, ui.sidebar.macrosList); - ui.sidebar.projectBody = el("div", { class: "kvRows" }, - statRow("Models", String((state.manifest.models || []).length)), - statRow("Sources", String((state.manifest.sources || []).length)), - statRow("Macros", String((state.manifest.macros || []).length)), - statRow("Schema", state.manifest.project?.with_schema ? "enabled" : "disabled"), - statRow("Generated", state.manifest.project?.generated_at || "—"), + ui.sidebar.projectTitle = el("div"); + const statRow = (k, v) => + el("div", { class: "kvRow" }, + el("span", { class: "k" }, k), + el("span", { class: "v" }, v) ); - ui.sidebar.projectSection = el("div", { class: "section" }, - ui.sidebar.projectTitle, - ui.sidebar.projectBody - ); + ui.sidebar.projectBody = el("div", { class: "kvRows" }, + statRow("Models", String((state.manifest.models || []).length)), + statRow("Sources", String((state.manifest.sources || []).length)), + statRow("Macros", String((state.manifest.macros || []).length)), + statRow("Schema", state.manifest.project?.with_schema ? "enabled" : "disabled"), + statRow("Generated", state.manifest.project?.generated_at || "—"), + ); + + ui.sidebar.projectSection = el("div", { class: "section" }, + ui.sidebar.projectTitle, + ui.sidebar.projectBody + ); - ui.sidebar.root = el( + ui.sidebar.root = el( + "div", + { class: "sidebar" }, + el( "div", - { class: "sidebar" }, - el( - "div", - { class: "brand" }, - (ui.sidebar.brandLink = el("a", { - href: routeWithFacets("#/"), - style: "color:inherit; text-decoration:none;", - onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets("#/"); } - }, el("h1", {}, state.manifest.project?.name || "Docs"))), - el("span", { class: "badge", title: `Generated: ${state.manifest.project?.generated_at || ""}` }, "SPA") - ), - el( - "div", - { class: "searchWrap" }, - ui.sidebar.input, - el("span", { class: "searchKbd kbd" }, "/") - ), - ui.sidebar.facetBox, - el("div", { class: "searchTip" }, "Tip: Press / (or Ctrl+K) to search everything (models, sources, columns)."), - overviewSection, - ui.sidebar.projectSection, - ui.sidebar.modelsSection, - ui.sidebar.sourcesSection, - ui.sidebar.macrosSection, - ); + { class: "brand" }, + (ui.sidebar.brandLink = el("a", { + href: routeWithFacets("#/"), + style: "color:inherit; text-decoration:none;", + onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets("#/"); } + }, el("h1", {}, state.manifest.project?.name || "Docs"))), + el("span", { class: "badge", title: `Generated: ${state.manifest.project?.generated_at || ""}` }, "SPA") + ), + el( + "div", + { class: "searchWrap" }, + ui.sidebar.input, + el("span", { class: "searchKbd kbd" }, "/") + ), + ui.sidebar.facetBox, + el("div", { class: "searchTip" }, "Tip: Press / (or Ctrl+K) to search everything (models, sources, columns)."), + overviewSection, + ui.sidebar.projectSection, + ui.sidebar.modelsSection, + ui.sidebar.sourcesSection, + ui.sidebar.macrosSection, + ); - ui.sidebarHost.replaceChildren(ui.sidebar.root); + ui.sidebarHost.replaceChildren(ui.sidebar.root); - // Turn titles into toggle headers - sectionHeader(ui.sidebar.modelsTitle, "models", "Models"); - sectionHeader(ui.sidebar.sourcesTitle, "sources", "Sources"); - sectionHeader(ui.sidebar.macrosTitle, "macros", "Macros"); - sectionHeader(ui.sidebar.projectTitle, "project", "Project"); - } + // Turn titles into toggle headers + sectionHeader(ui.sidebar.modelsTitle, "models", "Models"); + sectionHeader(ui.sidebar.sourcesTitle, "sources", "Sources"); + sectionHeader(ui.sidebar.macrosTitle, "macros", "Macros"); + sectionHeader(ui.sidebar.projectTitle, "project", "Project"); +} function applySidebarCollapse() { const c = state.sidebarCollapsed || {}; @@ -2574,6 +5204,7 @@ async function main() { function updateSidebarLists() { // Keep facets in sync with URL in case of back/forward or manual edits state.modelFacets = currentModelFacets(); + ui.sidebar.groupSelect.value = state.modelFacets.groupBy || ""; const q = (state.filter || "").trim().toLowerCase(); const models = state.manifest.models || []; @@ -2675,6 +5306,19 @@ async function main() { ui.sidebar.clearFacetsBtn.textContent = activeN ? `Clear (${activeN})` : "Clear"; ui.sidebar.clearFacetsBtn.disabled = activeN === 0; + const baseForTags = filterModelsWithFacets(modelsAfterText, { ...state.modelFacets, tags: [] }); + const tagCounts = new Map(); + for (const m of baseForTags) { + for (const t of (Array.isArray(m.tags) ? m.tags : [])) { + const key = String(t).trim(); + if (!key) continue; + tagCounts.set(key, (tagCounts.get(key) || 0) + 1); + } + } + const topTags = Array.from(tagCounts.entries()) + .sort((a,b)=> (b[1]-a[1]) || a[0].localeCompare(b[0])) + .slice(0, 30); + // Keep "home" hrefs up-to-date for copy/open-in-new-tab if (ui.sidebar.brandLink) ui.sidebar.brandLink.href = routeWithFacets("#/"); if (ui.sidebar.overviewLink) ui.sidebar.overviewLink.href = routeWithFacets("#/"); @@ -2683,21 +5327,128 @@ async function main() { ui.sidebar.modelsTitle.textContent = `Models (${filteredModels.length})`; ui.sidebar.sourcesTitle.textContent = `Sources (${filteredSources.length})`; - ui.sidebar.modelsList.replaceChildren( - ...filteredModels.map(m => - el("li", { class: "item" }, - el("a", { - href: routeWithFacets(`#/model/${escapeHashPart(m.name)}`), - onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets(`#/model/${escapeHashPart(m.name)}`); }, - title: [m.description_short || "", m.path ? `(${m.path})` : ""].filter(Boolean).join(" ") || m.name, - }, - el("span", {}, m.name), - pillForKind(m.kind === "python" ? "python" : "sql") - ) - ) - ) + ui.sidebar.tagDatalist.replaceChildren( + ...topTags.map(([t,n]) => el("option", { value:t }, `${t} (${n})`)) + ); + + const activeTags = new Set((state.modelFacets.tags || []).map(x => x.toLowerCase())); + + ui.sidebar.tagBox.replaceChildren( + ...topTags.map(([t,n]) => { + const on = activeTags.has(t.toLowerCase()); + const b = el("button", { + class: `facetChip ${on ? "active" : ""}`, + type:"button", + onclick: () => { + const tags = new Set(state.modelFacets.tags || []); + if (on) { + // remove case-insensitively + for (const x of Array.from(tags)) if (String(x).toLowerCase() === t.toLowerCase()) tags.delete(x); + } else { + tags.add(t); + } + state.modelFacets.tags = Array.from(tags); + replaceHashQuery((q)=> writeModelFacetsToQuery(q, state.modelFacets)); + state.modelFacets = currentModelFacets(); + updateSidebarLists(); + } + }, `${t} (${n})`); + return b; + }) + ); + + const baseForOwners = filterModelsWithFacets(modelsAfterText, { ...state.modelFacets, owners: [] }); + const ownerCounts = new Map(); + for (const m of baseForOwners) { + for (const o of (Array.isArray(m.owners) ? m.owners : [])) { + const key = String(o).trim(); + if (!key) continue; + ownerCounts.set(key, (ownerCounts.get(key) || 0) + 1); + } + } + const topOwners = Array.from(ownerCounts.entries()) + .sort((a,b)=> (b[1]-a[1]) || a[0].localeCompare(b[0])) + .slice(0, 30); + + ui.sidebar.ownerDatalist.replaceChildren( + ...topOwners.map(([o,n]) => el("option", { value:o }, `${o} (${n})`)) + ); + + const activeOwners = new Set((state.modelFacets.owners || []).map(x => String(x).toLowerCase())); + ui.sidebar.ownerBox.replaceChildren( + ...topOwners.map(([o,n]) => { + const on = activeOwners.has(o.toLowerCase()); + return el("button", { + class: `facetChip ${on ? "active" : ""}`, + type:"button", + onclick: () => { + const owners = new Set(state.modelFacets.owners || []); + if (on) { + for (const x of Array.from(owners)) if (String(x).toLowerCase() === o.toLowerCase()) owners.delete(x); + } else { + owners.add(o); + } + state.modelFacets.owners = Array.from(owners); + replaceHashQuery((q)=> writeModelFacetsToQuery(q, state.modelFacets)); + state.modelFacets = currentModelFacets(); + updateSidebarLists(); + } + }, `${o} (${n})`); + }) ); + function modelGroupKey(m) { + if (state.modelFacets.groupBy === "owner") { + const owners = Array.isArray(m.owners) ? m.owners : []; + return owners.length ? String(owners[0]) : "(unowned)"; + } + if (state.modelFacets.groupBy === "domain") { + return (m.domain || stripModelsPrefix(m.path || "").split("/")[0] || "(no domain)"); + } + return ""; + } + + function renderModelLi(m) { + const href = routeWithFacets(`#/model/${escapeHashPart(m.name)}`); + return el("li", { class:"item" }, + el("a", { + href, + onclick:(e)=>{ e.preventDefault(); location.hash = href; }, + title: [m.description_short || "", m.path ? `(${m.path})` : ""].filter(Boolean).join(" ") || m.name, + }, + el("span", {}, m.name), + pillForKind(m.kind === "python" ? "python" : "sql") + ) + ); + } + + if (!state.modelFacets.groupBy) { + ui.sidebar.modelsList.replaceChildren(...filteredModels.map(renderModelLi)); + } else { + const groups = new Map(); + for (const m of filteredModels) { + const k = modelGroupKey(m); + if (!groups.has(k)) groups.set(k, []); + groups.get(k).push(m); + } + const keys = Array.from(groups.keys()).sort((a,b)=> a.localeCompare(b)); + + const children = []; + for (const k of keys) { + const items = groups.get(k); + items.sort((a,b)=> (a.name||"").localeCompare(b.name||"")); + + children.push(el("li", { class:"groupHeader" }, + el("div", { class:"groupHeaderRow" }, + el("span", {}, k), + el("span", { class:"pill" }, String(items.length)) + ) + )); + for (const m of items) children.push(renderModelLi(m)); + } + ui.sidebar.modelsList.replaceChildren(...children); + } + ui.sidebar.sourcesList.replaceChildren( ...filteredSources.map(s => { const key = `${s.source_name}.${s.table_name}`; @@ -2721,8 +5472,8 @@ async function main() { ...macros.map(m => el("li", { class: "item" }, el("a", { - href: routeWithFacets("#/macros"), - onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets("#/macros"); }, + href: routeWithFacets(`#/macro/${escapeHashPart(m.name)}`), + onclick: (e) => { e.preventDefault(); location.hash = routeWithFacets(`#/macro/${escapeHashPart(m.name)}`); }, title: m.path || m.name, }, el("span", {}, m.name), @@ -2741,30 +5492,47 @@ async function main() { } function updateMain() { - const route = parseRoute(); - let view; - if (route.route === "model") view = renderModel(state, route.name, route.tab, route.col); - else if (route.route === "source") view = renderSource(state, route.source, route.table); - else if (route.route === "macros") view = renderMacros(state); - else view = renderHome(state); - - state.ui.mainHost.replaceChildren(view); - - // If home view contains mermaid, render it now (same as before) - if (route.route === "home") { - queueMicrotask(async () => { - const target = document.getElementById("mermaidTarget"); - if (!target) return; - const dagSrc = state.manifest.dag?.mermaid || ""; - if (!state.mermaid) { - target.textContent = dagSrc; - return; - } - target.innerHTML = `
${dagSrc}
`; - try { await state.mermaid.run({ querySelector: "#mermaidTarget .mermaid" }); } catch {} - }); + try { + const route = parseRoute(); + let view; + if (route.route === "model") view = renderModel(state, route.name, route.tab, route.col); + else if (route.route === "source") view = renderSource(state, route.source, route.table); + else if (route.route === "macro") view = renderMacro(state, route.name); + else if (route.route === "macros") { state.macroQuery = route.q || ""; view = renderMacros(state, state.macroQuery); } + else view = renderHome(state); + + state.ui.mainHost.replaceChildren(view); + + // If home view contains mermaid, render it now (same as before) + if (route.route === "home") { + queueMicrotask(async () => { + const target = document.getElementById("mermaidTarget"); + if (!target) return; + const dagSrc = state.manifest.dag?.mermaid || ""; + if (!state.mermaid) { + target.textContent = dagSrc; + return; + } + target.innerHTML = `
${dagSrc}
`; + try { await state.mermaid.run({ querySelector: "#mermaidTarget .mermaid" }); } catch {} + }); + } + } catch (e) { + console.error(e); + state.ui.mainHost.replaceChildren( + el("div", { class: "main" }, + el("div", { class: "card" }, + el("h2", {}, "Render failed"), + el("p", { class: "empty" }, String(e?.message || e)), + el("pre", { class: "mono", style: "white-space:pre-wrap; margin:10px 0 0 0;" }, + String(e?.stack || "") + ) + ) + ) + ); } } + window.addEventListener("keydown", (e) => { const tag = e.target?.tagName?.toLowerCase(); @@ -2783,8 +5551,11 @@ async function main() { window.addEventListener("hashchange", () => { safeSet(STORE.lastHash, location.hash || "#/"); - closePalette(); - updateSidebarLists(); // facets live in URL query params + closePalette(); // optional: close palette on navigation + + readShareStateFromUrl(state); + + updateSidebarLists(); updateMain(); }); @@ -2808,4 +5579,4 @@ main().catch((e) => { ) ) ); -}); +}); \ No newline at end of file diff --git a/src/fastflowtransform/utest.py b/src/fastflowtransform/utest.py index f9231dd..1a3e47e 100644 --- a/src/fastflowtransform/utest.py +++ b/src/fastflowtransform/utest.py @@ -4,10 +4,11 @@ import hashlib import json import os -from collections.abc import Iterable, Mapping +from collections.abc import Generator, Iterable, Mapping from contextlib import suppress from dataclasses import dataclass, field from pathlib import Path +from time import perf_counter from typing import Any, cast import pandas as pd @@ -655,55 +656,61 @@ def _maybe_skip_by_cache(node: Any, cand_fp: str | None, ctx: UtestCtx) -> bool: return False -def _execute_and_update_cache(node: Any, cand_fp: str | None, ctx: UtestCtx) -> bool: +def _execute_and_update_cache( + node: Any, cand_fp: str | None, ctx: UtestCtx +) -> tuple[bool, str | None]: + """ + Execute the model node and update cache fingerprint if enabled. + + Returns: + (ok, message) where message is only set on failure. + """ ok, err = _execute_node(ctx.executor, node, ctx.jenv) if not ok: - print(f" ❌ execution failed: {err}") + msg = f"execution failed: {err}" + print(f" ❌ {msg}") ctx.failures += 1 - return False + return False, msg + if cand_fp and ctx.cache and ctx.cache_mode == "rw": ctx.computed_fps[node.name] = cand_fp - return True + + return True, None -def _read_and_assert(spec: Any, case: Any, ctx: UtestCtx) -> None: +def _read_and_assert(spec: Any, case: Any, ctx: UtestCtx) -> tuple[bool, str | None]: + """ + Read the target relation and assert expected rows. + + Returns: + (ok, message) where message is set when read/assert fails. + """ ok, df_or_exc, target_rel = _read_target_df(ctx.executor, spec, case) + if not ok: - print(f" ❌ cannot read result '{target_rel}': {df_or_exc}") + msg: str | None = f"cannot read result '{target_rel}': {df_or_exc}" + print(f" ❌ {msg}") ctx.failures += 1 - return + return False, msg + ok2, msg = _assert_expected_rows(df_or_exc, case) if ok2: print(" ✅ ok") - else: - print(f" ❌ {msg}") - ctx.failures += 1 + return True, None + # msg is already a human-readable diff/summary from your assert helper + print(f" ❌ {msg}") + ctx.failures += 1 + return False, msg -def run_unit_specs( - specs: list[UnitSpec], - executor: Any, - jenv: Any, - only_case: str | None = None, - *, - cache_mode: str = "off", - reuse_meta: bool = False, -) -> int: - """ - Execute discovered unit-test specs. Returns the number of failed cases. - - Args: - cache_mode: 'off' | 'ro' | 'rw'. Default 'off' for deterministic runs. - reuse_meta: reserved (no-op). - """ - cache_mode = _normalize_cache_mode(cache_mode) +def _build_utest_ctx(executor: Any, jenv: Any, cache_mode: str) -> UtestCtx: project_dir = _get_project_dir_safe() engine_name = _detect_engine_name(executor) env_ctx = _make_env_ctx(engine_name) cache = _make_cache(project_dir, engine_name) - ctx = UtestCtx( + return UtestCtx( executor=executor, jenv=jenv, engine_name=engine_name, @@ -712,64 +719,230 @@ def run_unit_specs( cache_mode=cache_mode, ) - for spec in specs: - if spec.engine and spec.engine != engine_name: - continue - node = REGISTRY.nodes.get(spec.model) - if not node: - print(f"⚠️ Model '{spec.model}' not found (in {spec.path})") - ctx.failures += 1 +def _append_result( + results_out: list[dict[str, Any]] | None, + *, + spec: UnitSpec, + case_name: str, + status: str, + message: str | None, + duration_ms: int, + cache_hit: bool, + target_relation: str, +) -> None: + if results_out is None: + return + + results_out.append( + { + "model": spec.model, + "case": case_name, + "status": status, + "message": (message or ""), + "duration_ms": duration_ms, + "cache_hit": cache_hit, + "target_relation": target_relation, + "spec_path": str(spec.path), + } + ) + + +def _record_model_not_found( + ctx: UtestCtx, + *, + spec: UnitSpec, + results_out: list[dict[str, Any]] | None, +) -> None: + print(f"⚠️ Model '{spec.model}' not found (in {spec.path})") + ctx.failures += 1 + _append_result( + results_out, + spec=spec, + case_name="", + status="error", + message="model not found", + duration_ms=0, + cache_hit=False, + target_relation="", + ) + + +def _compute_target_relation(spec_model: str, case: Any) -> str: + target_rel_cfg = getattr(case, "expect", None) + if isinstance(target_rel_cfg, UnitExpect): + return target_rel_cfg.relation or relation_for(spec_model) + if isinstance(target_rel_cfg, Mapping): + return target_rel_cfg.get("relation") or relation_for(spec_model) + return relation_for(spec_model) + + +def _iter_cases(spec: UnitSpec, only_case: str | None) -> Generator[UnitCase]: + for raw_case in spec.cases: + case = spec.merged_case(raw_case) + if only_case and case.name != only_case: continue + yield case - for raw_case in spec.cases: - # Apply spec.defaults to each case (merged view) - case = spec.merged_case(raw_case) - if only_case and case.name != only_case: - continue - print(f"→ {spec.model} :: {case.name}") +def _duration_ms(t0: float, t1: float) -> int: + return int((t1 - t0) * 1000) - if not reuse_meta: - with suppress(Exception): - delete_meta_for_node(executor, node.name) - cand_fp = _fingerprint_case(node, spec, case, ctx) +def _skip_due_to_input_failure( + ctx: UtestCtx, + *, + spec: UnitSpec, + case: Any, + t0: float, + results_out: list[dict[str, Any]] | None, +) -> None: + print(" ⚠️ skipping execution due to input load failure") + t1 = perf_counter() + _append_result( + results_out, + spec=spec, + case_name=case.name, + status="error", + message="input load failure", + duration_ms=_duration_ms(t0, t1), + cache_hit=False, + target_relation="", + ) - before_failures = ctx.failures - ctx.failures += _load_inputs_for_case(executor, spec, case, node) - # If any input failed to load, skip execution & assertion for this case. - if ctx.failures > before_failures: - print(" ⚠️ skipping execution due to input load failure") - continue +def _run_one_case( + ctx: UtestCtx, + *, + spec: UnitSpec, + case: Any, + node: Any, + reuse_meta: bool, + results_out: list[dict[str, Any]] | None, +) -> None: + print(f"→ {spec.model} :: {case.name}") + t0 = perf_counter() - if _maybe_skip_by_cache(node, cand_fp, ctx): - _read_and_assert(spec, case, ctx) - _cleanup_inputs_for_case(executor, case) - continue + status = "pass" + message: str | None = None - target_rel_cfg = getattr(case, "expect", None) - if isinstance(target_rel_cfg, UnitExpect): - target_rel = target_rel_cfg.relation or relation_for(spec.model) - elif isinstance(target_rel_cfg, Mapping): - target_rel = target_rel_cfg.get("relation") or relation_for(spec.model) - else: - target_rel = relation_for(spec.model) + if not reuse_meta: + with suppress(Exception): + delete_meta_for_node(ctx.executor, node.name) - _reset_utest_relation(executor, target_rel) + cand_fp = _fingerprint_case(node, spec, case, ctx) - if not _execute_and_update_cache(node, cand_fp, ctx): - _cleanup_inputs_for_case(executor, case) - continue + before_failures = ctx.failures + ctx.failures += _load_inputs_for_case(ctx.executor, spec, case, node) + if ctx.failures > before_failures: + _skip_due_to_input_failure(ctx, spec=spec, case=case, t0=t0, results_out=results_out) + return + + target_rel = _compute_target_relation(spec.model, case) + + if _maybe_skip_by_cache(node, cand_fp, ctx): + ok, msg = _read_and_assert(spec, case, ctx) + if not ok: + status = "fail" + message = msg + + _cleanup_inputs_for_case(ctx.executor, case) + t1 = perf_counter() + _append_result( + results_out, + spec=spec, + case_name=case.name, + status=status, + message=message, + duration_ms=_duration_ms(t0, t1), + cache_hit=True, + target_relation=target_rel, + ) + return + + _reset_utest_relation(ctx.executor, target_rel) + + ok_exec, exec_msg = _execute_and_update_cache(node, cand_fp, ctx) + if not ok_exec: + _cleanup_inputs_for_case(ctx.executor, case) + t1 = perf_counter() + _append_result( + results_out, + spec=spec, + case_name=case.name, + status="error", + message=exec_msg, + duration_ms=_duration_ms(t0, t1), + cache_hit=False, + target_relation=target_rel, + ) + return + + ok, msg = _read_and_assert(spec, case, ctx) + if not ok: + status = "fail" + message = msg + + _cleanup_inputs_for_case(ctx.executor, case) + t1 = perf_counter() + _append_result( + results_out, + spec=spec, + case_name=case.name, + status=status, + message=message, + duration_ms=_duration_ms(t0, t1), + cache_hit=False, + target_relation=target_rel, + ) - _read_and_assert(spec, case, ctx) - _cleanup_inputs_for_case(executor, case) +def _finalize_cache(ctx: UtestCtx) -> None: if ctx.cache and ctx.computed_fps and ctx.cache_mode == "rw": # pragma: no cover ctx.cache.update_many(ctx.computed_fps) ctx.cache.save() + +def run_unit_specs( + specs: list[UnitSpec], + executor: Any, + jenv: Any, + only_case: str | None = None, + *, + cache_mode: str = "off", + reuse_meta: bool = False, + results_out: list[dict[str, Any]] | None = None, +) -> int: + """ + Execute discovered unit-test specs. Returns the number of failed cases. + + Args: + cache_mode: 'off' | 'ro' | 'rw'. Default 'off' for deterministic runs. + reuse_meta: reserved (no-op). + """ + cache_mode = _normalize_cache_mode(cache_mode) + ctx = _build_utest_ctx(executor, jenv, cache_mode) + + for spec in specs: + if spec.engine and spec.engine != ctx.engine_name: + continue + + node = REGISTRY.nodes.get(spec.model) + if not node: + _record_model_not_found(ctx, spec=spec, results_out=results_out) + continue + + for case in _iter_cases(spec, only_case): + _run_one_case( + ctx, + spec=spec, + case=case, + node=node, + reuse_meta=reuse_meta, + results_out=results_out, + ) + + _finalize_cache(ctx) return ctx.failures diff --git a/tests/unit/docs/test_docs_unit.py b/tests/unit/docs/test_docs_unit.py index ff97c4c..4db32d8 100644 --- a/tests/unit/docs/test_docs_unit.py +++ b/tests/unit/docs/test_docs_unit.py @@ -382,7 +382,9 @@ def _resolve_source(self, source, table): # ASSERT col_id = cols_by_table["project.dataset.model_sql"][0] assert col_id.name == "id" - assert col_id.lineage == [{"from_relation": "src_table", "from_column": "id"}] + assert col_id.lineage == [ + {"from_relation": "src_table", "from_column": "id", "confidence": "inferred"} + ] @pytest.mark.unit @@ -448,5 +450,6 @@ def test_infer_and_attach_lineage_yaml_override_branch_is_used(monkeypatch: pyte "from_relation": "project.dataset.orders", "from_column": "amount", "transformed": True, + "confidence": "annotated", } ] diff --git a/tests/unit/lineage/test_lineage_py_unit.py b/tests/unit/lineage/test_lineage_py_unit.py deleted file mode 100644 index dc9083f..0000000 --- a/tests/unit/lineage/test_lineage_py_unit.py +++ /dev/null @@ -1,25 +0,0 @@ -import pandas as pd -import pytest - -from fastflowtransform.lineage import infer_py_lineage - - -@pytest.mark.unit -def test_pandas_rename_and_assign(): - """Basic pandas lineage patterns: rename + new column from existing.""" - - # Build a small function dynamically to pass to infer_py_lineage - def fn(df: pd.DataFrame) -> pd.DataFrame: - out = df.rename(columns={"email": "email_upper"}) - out["flag"] = df["email"] - return out - - lin = infer_py_lineage(fn) - # email_upper comes from email (transformed=True due to rename heuristic) - assert "email_upper" in lin - assert any(s["from_column"] == "email" for s in lin["email_upper"]) - # flag comes from email (direct/unknown transform - # -> direct=False or True both acceptable by heuristic, - # but we require at least mapping to email) - assert "flag" in lin - assert any(s["from_column"] == "email" for s in lin["flag"]) diff --git a/tests/unit/test_utest_unit.py b/tests/unit/test_utest_unit.py index 5e6c0fb..25c41ec 100644 --- a/tests/unit/test_utest_unit.py +++ b/tests/unit/test_utest_unit.py @@ -525,8 +525,9 @@ def test_execute_and_update_cache_success(fake_registry, duckdbutor): cache_mode="rw", ) node = fake_registry.nodes["model_a"] - ok = utest._execute_and_update_cache(node, "abc123", ctx) + ok, msg = utest._execute_and_update_cache(node, "abc123", ctx) assert ok is True + assert msg is None assert ctx.computed_fps["model_a"] == "abc123" @@ -545,8 +546,9 @@ def test_execute_and_update_cache_failure(fake_registry, duckdbutor): cache_mode="off", ) node = fake_registry.nodes["model_a"] - ok = utest._execute_and_update_cache(node, None, ctx) + ok, msg = utest._execute_and_update_cache(node, None, ctx) assert ok is False + assert msg is not None assert ctx.failures == 1 diff --git a/uv.lock b/uv.lock index 22707db..b2c7c77 100644 --- a/uv.lock +++ b/uv.lock @@ -745,6 +745,7 @@ dependencies = [ { name = "python-dotenv" }, { name = "pyyaml" }, { name = "sqlalchemy" }, + { name = "sqlparse" }, { name = "typer" }, ] @@ -842,6 +843,7 @@ requires-dist = [ { name = "snowflake-snowpark-python", marker = "extra == 'full'", specifier = ">=1.40.0" }, { name = "snowflake-snowpark-python", marker = "extra == 'snowflake'", specifier = ">=1.40.0" }, { name = "sqlalchemy", specifier = ">=2.0" }, + { name = "sqlparse", specifier = ">=0.5.5" }, { name = "typer", specifier = ">=0.12" }, { name = "types-pyyaml", marker = "extra == 'dev'", specifier = ">=6.0.12" }, ] @@ -3362,6 +3364,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/c6/898d67661b1109833f90a804ac242d7581003d4e5bc5392a40a4bd4bb2d9/sqlglot-27.27.0-py3-none-any.whl", hash = "sha256:7f1e91f04e9dbedf08fcd703b54cb78b548d9c25b5b1829951e65c5a2380806e", size = 523420, upload-time = "2025-10-13T06:28:04.852Z" }, ] +[[package]] +name = "sqlparse" +version = "0.5.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/90/76/437d71068094df0726366574cf3432a4ed754217b436eb7429415cf2d480/sqlparse-0.5.5.tar.gz", hash = "sha256:e20d4a9b0b8585fdf63b10d30066c7c94c5d7a7ec47c889a2d83a3caa93ff28e", size = 120815, upload-time = "2025-12-19T07:17:45.073Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/4b/359f28a903c13438ef59ebeee215fb25da53066db67b305c125f1c6d2a25/sqlparse-0.5.5-py3-none-any.whl", hash = "sha256:12a08b3bf3eec877c519589833aed092e2444e68240a3577e8e26148acc7b1ba", size = 46138, upload-time = "2025-12-19T07:17:46.573Z" }, +] + [[package]] name = "stack-data" version = "0.6.3"