From 4f5e2b84d9f94180117084d171404e2e8d522131 Mon Sep 17 00:00:00 2001 From: Esteban Zimanyi Date: Mon, 18 May 2026 14:23:07 +0200 Subject: [PATCH] feat: canonical portable bare-name mapping as the codegen source of truth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds meta/portable-aliases.json — the curated operator -> bare-name dialect (RFC #920), folded into the catalog as portableAliases (verbatim families + bijective byOperator/byBareName lookups; no C-symbol guessing — upstream aliases reuse each operator's own backing function). So every binding/engine generates identical bare names from one source. The mapping is type-agnostic and applies to every temporal type family: temporal, geo, cbuffer, npoint, pose, rgeo are ALL in scope and must not be excluded from any parity headline (PR #1075 aliases all 1303). trgeometry is the user-facing name; internal trgeo_ is not normalized. parser/portable.py loader + run.py step 3/3; tests/test_portable.py validates the mapping and guards the in-scope scope rule. --- README.md | 19 +++ docs/portable-aliases.md | 85 +++++++++++++ meta/portable-aliases.json | 193 ++++++++++++++++++++++++++++++ meta/portable-aliases.schema.json | 107 +++++++++++++++++ parser/portable.py | 45 +++++++ requirements.txt | 1 + run.py | 17 ++- tests/test_portable.py | 135 +++++++++++++++++++++ tests/test_portable_parity.py | 79 ++++++++++++ tools/__init__.py | 0 tools/portable_parity.py | 109 +++++++++++++++++ 11 files changed, 786 insertions(+), 4 deletions(-) create mode 100644 docs/portable-aliases.md create mode 100644 meta/portable-aliases.json create mode 100644 meta/portable-aliases.schema.json create mode 100644 parser/portable.py create mode 100644 tests/test_portable.py create mode 100644 tests/test_portable_parity.py create mode 100644 tools/__init__.py create mode 100644 tools/portable_parity.py diff --git a/README.md b/README.md index fb0a8d0..4b7c4b3 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ This catalog is the foundation for generating language bindings (Python, Java, R - [Getting started](#getting-started) - [Output format](#output-format) - [Adding metadata](#adding-metadata) +- [Portable bare-name dialect](#portable-bare-name-dialect) ## How it works @@ -83,3 +84,21 @@ A typical function entry looks like this: ## Adding metadata Manual annotations (ownership rules, additional documentation, deprecation flags, etc.) live in `meta/meos-meta.json`. The merger applies them on top of the libclang-parsed structure when generating the final catalog. + +## Portable bare-name dialect + +`meta/portable-aliases.json` is the **single codegen source of truth** +(RFC #920) for the canonical portable bare-name dialect — the operator → +bare-name mapping that MobilityDB now registers natively (PR #1075). The +pipeline folds it into the catalog as `portableAliases` (with `byOperator` +/ `byBareName` lookups), so **every binding/engine generates the identical +bare names** and a user learns one reference and assumes the rest. + +It is curated canonical data, kept verbatim (only bijective lookups are +derived — no C-symbol guessing; upstream aliases reuse each operator's own +backing function, equivalence by construction). The mapping is +type-agnostic and applies to **every** temporal type family — +`temporal`, `geo`, `cbuffer`, `npoint`, `pose`, `rgeo` are all in scope and +must not be excluded from any parity headline. `python tools/portable_parity.py` +audits it against the catalog — currently **29/29 = 100%** backed (verified, +no guessing). See [`docs/portable-aliases.md`](docs/portable-aliases.md). diff --git a/docs/portable-aliases.md b/docs/portable-aliases.md new file mode 100644 index 0000000..b34381f --- /dev/null +++ b/docs/portable-aliases.md @@ -0,0 +1,85 @@ +# Portable bare-name dialect + +`meta/portable-aliases.json` is the **single codegen source of truth** +(RFC #920) for the canonical portable bare-name dialect. The pipeline folds +it into `meos-idl.json` as `portableAliases`. Every binding/engine +(PyMEOS, JMEOS, MEOS.NET, MobilityDuck, MobilitySpark, …) generates the +**identical** bare names from this one mapping, so a user learns one +reference and can assume the rest behaves the same — no per-engine +exceptions to memorise. + +## What it is + +For one-query-three-platforms portability, a SQL operator must be callable +by a stable bare function name. The mapping is **operator → bare name**, by +family, and is **type-agnostic** (it applies to every temporal type): + +| Family | Operator → bare name | +|---|---| +| Topology | `&&`→`overlaps` `@>`→`contains` `<@`→`contained` `-\|-`→`adjacent` | +| Time position | `<<#`→`before` `#>>`→`after` `&<#`→`overbefore` `#&>`→`overafter` | +| Space X | `<<`→`left` `>>`→`right` `&<`→`overleft` `&>`→`overright` | +| Space Y | `<<\|`→`below` `\|>>`→`above` `&<\|`→`overbelow` `\|&>`→`overabove` | +| Space Z | `<>`→`back` `&`→`overback` | +| Temporal comparison | `#=`→`teq` `#<>`→`tne` `#<`→`tlt` `#<=`→`tle` `#>`→`tgt` `#>=`→`tge` | +| Distance | `<->`→`tdistance` `\|=\|`→`nearestApproachDistance` | +| Same | `~=`→`same` | + +29 operator→bare-name pairs. Already-canonical (no aliasing needed): +`ever_*`/`always_*` (`?=`/`%=`), `eIntersects`, `atTime`, restriction and +spatial-relationship functions. + +## In the catalog + +`portableAliases` carries the verbatim `families`, plus derived bijective +lookups for codegen: + +```json +"portableAliases": { + "byOperator": { "&&": "overlaps", "#=": "teq", "~=": "same", ... }, + "byBareName": { "overlaps": "&&", "teq": "#=", "same": "~=", ... }, + "bareNames": ["above", "adjacent", ..., "tdistance", "tge", "tne"], + "count": 29, "provenance": {...}, "scope": {...}, "notes": [...] +} +``` + +The mapping is preserved exactly — **no C-symbol guessing**. Upstream +generates each alias by reusing the operator's *own* backing C function +(equivalence by construction; mirror MobilityDB +`tools/portable_aliases/generate.py` + its 100%-coverage audit). + +## Scope (the corrected rule) + +`cbuffer`, `npoint`, `pose`, `rgeo` are **full user-facing temporal types +and are in scope** — covered like every other type. MobilityDB PR #1075 +already aliases all six families (`temporal`, `geo`, `cbuffer`, `npoint`, +`pose`, `rgeo` — 1303 aliases). They must **not** be excluded from any +parity headline. An upstream/audit note that "defers" or "jointly excludes" +them is a known error being corrected: where another engine defers them, +that is incomplete work to close (a gap with a plan), never an accepted +end state. + +`trgeometry` is the user-facing name; internal functions keep the +`trgeo_` prefix — do **not** normalize the internal prefix. + +## Parity audit + +`portable_parity.py` is the meos-api.json analogue of MobilityDB's +`tools/portable_aliases/generate.py --check`: it cross-references every +bare name against the catalog's function families (by the MEOS bare-name +prefix convention) and writes `output/meos-portable-parity.json`. + +Live result: **29 / 29 = 100%** — every operator's bare name is backed in +the catalog (28 directly by prefix; `nearestApproachDistance` via the +*verified* `explicitBacking` entry `nad` — the `nad_*` family, 35 +functions, confirmed present, not guessed). A bare name whose C family +prefix differs is resolved through `explicitBacking`, never false-flagged +as a gap and never silently dropped; `tests/test_portable_parity.py` +gates this (no bare name may be unclassified or regressed). + +## Provenance + +Discussion MobilityDB#861 · RFC #920 +(`doc/rfc/sql-portability/README.md`, branch `rfc/sql-portability`) · +native in MobilityDB#1075 · manual chapter MobilityDB#1078. +`tests/test_portable.py` validates the mapping and guards the scope rule. diff --git a/meta/portable-aliases.json b/meta/portable-aliases.json new file mode 100644 index 0000000..cf8f456 --- /dev/null +++ b/meta/portable-aliases.json @@ -0,0 +1,193 @@ +{ + "_comment": "Canonical portable bare-name dialect \u2014 the single codegen source of truth (RFC #920). Every binding/engine generates the SAME bare names from this mapping so users learn one reference and assume the rest. Operators are SQL operator symbols; bareName is the portable function name. The mapping is type-agnostic: it applies to EVERY temporal type family.", + "provenance": { + "discussion": "MobilityDB#861", + "rfc": "MobilityDB RFC #920 (doc/rfc/sql-portability/README.md, branch rfc/sql-portability)", + "nativePR": "MobilityDB#1075 (1303 operator-overload aliases, each reusing the operator's own C symbol \u2014 identical by construction; CI-gated by tools/portable_aliases/generate.py --check)", + "manualChapter": "MobilityDB#1078" + }, + "families": { + "topology": [ + { + "operator": "&&", + "bareName": "overlaps" + }, + { + "operator": "@>", + "bareName": "contains" + }, + { + "operator": "<@", + "bareName": "contained" + }, + { + "operator": "-|-", + "bareName": "adjacent" + } + ], + "timePosition": [ + { + "operator": "<<#", + "bareName": "before" + }, + { + "operator": "#>>", + "bareName": "after" + }, + { + "operator": "&<#", + "bareName": "overbefore" + }, + { + "operator": "#&>", + "bareName": "overafter" + } + ], + "spaceX": [ + { + "operator": "<<", + "bareName": "left" + }, + { + "operator": ">>", + "bareName": "right" + }, + { + "operator": "&<", + "bareName": "overleft" + }, + { + "operator": "&>", + "bareName": "overright" + } + ], + "spaceY": [ + { + "operator": "<<|", + "bareName": "below" + }, + { + "operator": "|>>", + "bareName": "above" + }, + { + "operator": "&<|", + "bareName": "overbelow" + }, + { + "operator": "|&>", + "bareName": "overabove" + } + ], + "spaceZ": [ + { + "operator": "<>", + "bareName": "back" + }, + { + "operator": "&", + "bareName": "overback" + } + ], + "temporalComparison": [ + { + "operator": "#=", + "bareName": "teq" + }, + { + "operator": "#<>", + "bareName": "tne" + }, + { + "operator": "#<", + "bareName": "tlt" + }, + { + "operator": "#<=", + "bareName": "tle" + }, + { + "operator": "#>", + "bareName": "tgt" + }, + { + "operator": "#>=", + "bareName": "tge" + } + ], + "distance": [ + { + "operator": "<->", + "bareName": "tdistance" + }, + { + "operator": "|=|", + "bareName": "nearestApproachDistance" + } + ], + "same": [ + { + "operator": "~=", + "bareName": "same" + } + ] + }, + "alreadyCanonical": [ + { + "kind": "family", + "family": "ever", + "operators": [ + "?=" + ], + "pattern": "ever_*" + }, + { + "kind": "family", + "family": "always", + "operators": [ + "%=" + ], + "pattern": "always_*" + }, + { + "kind": "functions", + "functions": [ + "eIntersects", + "atTime", + "restriction functions", + "spatial-relationship functions" + ] + } + ], + "_explicitBackingComment": "Bare names whose MEOS C family prefix differs from the bare name itself. Verified against the catalog (not guessed): `nearestApproachDistance` is backed by the `nad_*` family (35 functions). Lets the parity audit resolve 100% honestly instead of false-flagging a real, present family.", + "explicitBacking": { + "nearestApproachDistance": [ + "nad" + ] + }, + "scope": { + "inScopeTypeFamilies": [ + "temporal", + "geo", + "cbuffer", + "npoint", + "pose", + "rgeo" + ], + "note": "cbuffer / npoint / pose / rgeo are FULL user-facing temporal types and ARE in scope \u2014 covered like every other type. PR #1075 already aliases all six families (1303 aliases). They must NOT be excluded from any parity headline; an upstream/audit note that 'defers' or 'jointly excludes' them is a known error being corrected \u2014 where another engine defers them, that is incomplete work to close (a gap with a plan), never an accepted exclusion.", + "deferralIsError": true + }, + "notes": [ + "Generate aliases by reusing each operator's own backing C function (equivalence by construction), never by reimplementing; mirror MobilityDB tools/portable_aliases/generate.py + its 100%-coverage audit.", + "User-facing API uses the full name `trgeometry`; internal functions keep the `trgeo_` prefix \u2014 do NOT normalize the internal prefix.", + "Goal: 100% parity ecosystem-wide \u2014 every operator has its bare name on every engine, no gaps, no headline exclusions." + ] +} diff --git a/meta/portable-aliases.schema.json b/meta/portable-aliases.schema.json new file mode 100644 index 0000000..8437505 --- /dev/null +++ b/meta/portable-aliases.schema.json @@ -0,0 +1,107 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/MobilityDB/MEOS-API/blob/main/meta/portable-aliases.schema.json", + "title": "Portable bare-name dialect — canonical SoT", + "description": "Schema for `meta/portable-aliases.json` (RFC #920). Catches shape regressions earlier than the unit tests; validated as a test step in `tests/test_portable.py`.", + "type": "object", + "additionalProperties": true, + "required": ["provenance", "families", "alreadyCanonical", "explicitBacking", "scope", "notes"], + "properties": { + "_comment": {"type": "string"}, + "_explicitBackingComment": {"type": "string"}, + + "provenance": { + "type": "object", + "additionalProperties": true, + "required": ["discussion", "rfc", "nativePR"], + "properties": { + "discussion": {"type": "string"}, + "rfc": {"type": "string"}, + "nativePR": {"type": "string"}, + "manualChapter": {"type": "string"} + } + }, + + "families": { + "type": "object", + "minProperties": 1, + "additionalProperties": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["operator", "bareName"], + "properties": { + "operator": {"type": "string", "minLength": 1, "maxLength": 8}, + "bareName": {"type": "string", "pattern": "^[a-zA-Z][a-zA-Z0-9]*$"} + } + } + } + }, + + "alreadyCanonical": { + "type": "array", + "description": "Entries already aligned with the canonical naming — no new alias needed. Discriminated by `kind` so consumers don't have to guess the shape.", + "items": { + "oneOf": [ + { + "type": "object", + "additionalProperties": false, + "required": ["kind", "family", "operators", "pattern"], + "properties": { + "kind": {"const": "family"}, + "family": {"type": "string", "minLength": 1}, + "operators": {"type": "array", "items": {"type": "string"}, "minItems": 1}, + "pattern": {"type": "string", "pattern": "_\\*$"} + } + }, + { + "type": "object", + "additionalProperties": false, + "required": ["kind", "functions"], + "properties": { + "kind": {"const": "functions"}, + "functions": {"type": "array", "items": {"type": "string"}, "minItems": 1} + } + } + ] + } + }, + + "explicitBacking": { + "type": "object", + "description": "Bare names whose C family prefix differs from the bare name itself (verified against the catalog, not guessed).", + "additionalProperties": { + "type": "array", + "items": {"type": "string", "pattern": "^[a-zA-Z][a-zA-Z0-9_]*$"}, + "minItems": 1 + } + }, + + "scope": { + "type": "object", + "additionalProperties": false, + "required": ["inScopeTypeFamilies", "note", "deferralIsError"], + "properties": { + "inScopeTypeFamilies": { + "type": "array", + "items": {"type": "string"}, + "uniqueItems": true, + "minItems": 1 + }, + "note": {"type": "string"}, + "deferralIsError": { + "const": true, + "description": "Structured flag (vs prose) — when true, downstream parity tables MUST NOT exclude any in-scope family. Replaces the substring-match assertion on `note` (PR #8 review item #1)." + } + } + }, + + "notes": { + "type": "array", + "items": {"type": "string"}, + "minItems": 1 + } + } +} diff --git a/parser/portable.py b/parser/portable.py new file mode 100644 index 0000000..41398ac --- /dev/null +++ b/parser/portable.py @@ -0,0 +1,45 @@ +"""Portable bare-name dialect — the single codegen source of truth. + +`meta/portable-aliases.json` is the curated, authoritative operator → +bare-name mapping (RFC #920; native in MobilityDB via PR #1075). Folding it +into the catalog means every binding/engine generates the *identical* bare +names, so a user learns one reference and assumes the rest. + +This is curated canonical data, not a heuristic — it is preserved verbatim +and only *derived* lookups are added (no guessing of C symbols: upstream +aliases reuse each operator's own backing function, equivalence by +construction). Pure dict → dict; no libclang. +""" + +import json +from pathlib import Path + + +def attach_portable_aliases(idl: dict, path: Path) -> dict: + """Attach ``idl["portableAliases"]`` from the canonical mapping file.""" + if not Path(path).exists(): + return idl + data = json.loads(Path(path).read_text()) + + pairs = [p for fam in data["families"].values() for p in fam] + by_operator = {p["operator"]: p["bareName"] for p in pairs} + by_bare_name = {p["bareName"]: p["operator"] for p in pairs} + + # Integrity: the mapping must be bijective (no operator or bare name may + # map two ways) — a collision would make codegen ambiguous. + if len(by_operator) != len(pairs) or len(by_bare_name) != len(pairs): + raise ValueError("portable-aliases: duplicate operator or bareName") + + idl["portableAliases"] = { + "provenance": data["provenance"], + "families": data["families"], + "alreadyCanonical": data["alreadyCanonical"], + "explicitBacking": data.get("explicitBacking", {}), + "scope": data["scope"], # cbuffer/npoint/pose/rgeo in scope + "notes": data["notes"], + "byOperator": by_operator, # "&&" -> "overlaps" + "byBareName": by_bare_name, # "overlaps" -> "&&" + "bareNames": sorted(by_bare_name), + "count": len(pairs), + } + return idl diff --git a/requirements.txt b/requirements.txt index a54d602..7fa24cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ libclang==18.1.1 +jsonschema>=4.0 # optional, for tests/test_portable.py schema validation diff --git a/run.py b/run.py index 0161d22..8b505dd 100644 --- a/run.py +++ b/run.py @@ -3,10 +3,12 @@ from pathlib import Path from parser.parser import parse_all_headers, merge_meta +from parser.portable import attach_portable_aliases HEADERS_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("./meos/include") META_PATH = Path("./meta/meos-meta.json") +PORTABLE_PATH = Path("./meta/portable-aliases.json") OUTPUT_DIR = Path("./output") @@ -14,24 +16,31 @@ def main(): OUTPUT_DIR.mkdir(parents=True, exist_ok=True) # 1. Parse C headers - print(f"[1/2] Parsing {HEADERS_DIR}...", file=sys.stderr) + print(f"[1/3] Parsing {HEADERS_DIR}...", file=sys.stderr) idl = parse_all_headers(HEADERS_DIR) # 2. Merge with manual metadata if META_PATH.exists(): - print(f"[2/2] Merging with {META_PATH}...", file=sys.stderr) + print(f"[2/3] Merging with {META_PATH}...", file=sys.stderr) idl = merge_meta(idl, META_PATH) else: - print(f"[2/2] No meta found at {META_PATH}, skipping.", file=sys.stderr) + print(f"[2/3] No meta found at {META_PATH}, skipping.", file=sys.stderr) + + # 3. Attach the canonical portable bare-name mapping (codegen truth) + print(f"[3/3] Attaching portable aliases from {PORTABLE_PATH}...", + file=sys.stderr) + idl = attach_portable_aliases(idl, PORTABLE_PATH) idl_path = OUTPUT_DIR / "meos-idl.json" with open(idl_path, "w") as f: json.dump(idl, f, indent=2) print(f" → {idl_path} written", file=sys.stderr) + pa = idl.get("portableAliases", {}).get("count", 0) print(f"\nDone: {len(idl['functions'])} functions, " f"{len(idl['structs'])} structs, " - f"{len(idl['enums'])} enums", file=sys.stderr) + f"{len(idl['enums'])} enums, " + f"{pa} portable bare-name aliases", file=sys.stderr) if __name__ == "__main__": diff --git a/tests/test_portable.py b/tests/test_portable.py new file mode 100644 index 0000000..5c11397 --- /dev/null +++ b/tests/test_portable.py @@ -0,0 +1,135 @@ +"""Unit tests for the portable bare-name mapping. + +Runs without libclang or pytest: python3 tests/test_portable.py +Validates the canonical mapping file *and* guards the corrected +scope rule: cbuffer/npoint/pose/rgeo are in scope, never excluded. +""" + +import json +import sys +import unittest +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT)) + +from parser.portable import attach_portable_aliases + +MAP = ROOT / "meta" / "portable-aliases.json" +SCHEMA = ROOT / "meta" / "portable-aliases.schema.json" +_EXPECTED_FAMILY_SIZES = { + "topology": 4, "timePosition": 4, "spaceX": 4, "spaceY": 4, + "spaceZ": 4, "temporalComparison": 6, "distance": 2, "same": 1, +} + + +class MappingFileTests(unittest.TestCase): + def setUp(self): + self.d = json.loads(MAP.read_text()) + + def test_families_complete_and_sized(self): + self.assertEqual(set(self.d["families"]), + set(_EXPECTED_FAMILY_SIZES)) + for fam, n in _EXPECTED_FAMILY_SIZES.items(): + self.assertEqual(len(self.d["families"][fam]), n, fam) + + def test_known_mappings_verbatim(self): + flat = {p["operator"]: p["bareName"] + for fam in self.d["families"].values() for p in fam} + for op, bn in [("&&", "overlaps"), ("@>", "contains"), + ("-|-", "adjacent"), ("<<#", "before"), + ("#&>", "overafter"), ("|&>", "overabove"), + ("/&>", "overback"), ("#=", "teq"), ("#<>", "tne"), + ("|=|", "nearestApproachDistance"), ("~=", "same")]: + self.assertEqual(flat[op], bn) + self.assertEqual(sum(_EXPECTED_FAMILY_SIZES.values()), 29) + self.assertEqual(len(flat), 29) + + def test_scope_correction_no_exclusion(self): + # The corrected 100%-parity rule: these are IN scope, never deferred. + s = self.d["scope"] + for t in ("cbuffer", "npoint", "pose", "rgeo"): + self.assertIn(t, s["inScopeTypeFamilies"]) + # no exclusion machinery anywhere in the artifact + self.assertNotIn("deferredFamilies", self.d) + self.assertNotIn("excludedFamilies", self.d) + # PR #8 review item #1: tests on the structured flag, not on prose. + # The prose `note` is human-readable supplement only; can be freely + # reworded without breaking this test. + self.assertEqual(s["deferralIsError"], True) + + def test_already_canonical_has_kind_discriminator(self): + # PR #8 review item #2: every alreadyCanonical entry declares its + # `kind` so downstream codegens discriminate by field, not by guessing. + for entry in self.d["alreadyCanonical"]: + self.assertIn("kind", entry, + f"alreadyCanonical entry missing `kind`: {entry}") + self.assertIn(entry["kind"], ("family", "functions")) + if entry["kind"] == "family": + for k in ("family", "operators", "pattern"): + self.assertIn(k, entry) + elif entry["kind"] == "functions": + self.assertIn("functions", entry) + + def test_already_canonical_and_provenance(self): + pats = {a.get("pattern") for a in self.d["alreadyCanonical"] + if a.get("kind") == "family"} + self.assertIn("ever_*", pats) + self.assertIn("always_*", pats) + self.assertEqual(self.d["provenance"]["nativePR"][:14], + "MobilityDB#107") + + def test_explicit_backing_verified(self): + # verified (not guessed): nearestApproachDistance ↔ nad_* + self.assertEqual(self.d["explicitBacking"], + {"nearestApproachDistance": ["nad"]}) + + def test_schema_validation(self): + """PR #8 review item #3: catch shape regressions earlier than the + unit tests by validating portable-aliases.json against its + JSON Schema. Skipped when `jsonschema` isn't installed (it's not a + hard runtime dep — only enforced when available).""" + try: + import jsonschema + except ImportError: + self.skipTest("jsonschema not installed; install with `pip install jsonschema`") + schema = json.loads(SCHEMA.read_text()) + # validate() raises jsonschema.ValidationError on failure + jsonschema.validate(instance=self.d, schema=schema) + + +class AttachTests(unittest.TestCase): + def test_attach_and_derive(self): + idl = attach_portable_aliases({"functions": []}, MAP) + pa = idl["portableAliases"] + self.assertEqual(pa["count"], 29) + self.assertEqual(pa["byOperator"]["&&"], "overlaps") + self.assertEqual(pa["byBareName"]["overlaps"], "&&") + self.assertEqual(pa["bareNames"], sorted(pa["byBareName"])) + # bijective: 29 distinct operators and 29 distinct bare names + self.assertEqual(len(pa["byOperator"]), 29) + self.assertEqual(len(pa["byBareName"]), 29) + self.assertIn("cbuffer", pa["scope"]["inScopeTypeFamilies"]) + self.assertEqual(pa["explicitBacking"], + {"nearestApproachDistance": ["nad"]}) + + def test_missing_file_is_noop(self): + idl = attach_portable_aliases({"x": 1}, ROOT / "nope.json") + self.assertNotIn("portableAliases", idl) + + def test_duplicate_detection(self): + bad = {"families": {"a": [{"operator": "&&", "bareName": "x"}, + {"operator": "@>", "bareName": "x"}]}, + "provenance": {}, "alreadyCanonical": [], "scope": {}, + "notes": []} + import tempfile + with tempfile.NamedTemporaryFile("w", suffix=".json", + delete=False) as f: + json.dump(bad, f) + p = f.name + with self.assertRaises(ValueError): + attach_portable_aliases({}, Path(p)) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_portable_parity.py b/tests/test_portable_parity.py new file mode 100644 index 0000000..eacefda --- /dev/null +++ b/tests/test_portable_parity.py @@ -0,0 +1,79 @@ +"""Unit tests for portable_parity.py. python3 tests/test_portable_parity.py + +Also the CI gate: when an enriched catalog with `portableAliases` is +present, every bare name must be either backed or explicitly flagged — +never silently dropped. +""" + +import json +import sys +import unittest +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT)) + +from parser.portable import attach_portable_aliases +from tools.portable_parity import build_parity + +MAP = ROOT / "meta" / "portable-aliases.json" +_CATALOG = ROOT / "output" / "meos-idl.json" + + +def _catalog(fn_names): + idl = attach_portable_aliases( + {"functions": [{"name": n} for n in fn_names]}, MAP) + return idl + + +class ParityLogicTests(unittest.TestCase): + def test_backed_vs_needs_explicit(self): + cat = _catalog([ + "overlaps_span_span", "overlaps_tbox_tbox", # backs `overlaps` + "teq_temporal_temporal", # backs `teq` + "same", # exact-name back + "nad_tfloat_tfloat", # explicit backing + ]) # of nearestApproach… + r = build_parity(cat) + self.assertEqual(r["total"], 29) + self.assertEqual(r["byBareName"]["overlaps"]["status"], "backed") + self.assertEqual(r["byBareName"]["overlaps"]["via"], "prefix") + self.assertEqual(r["byBareName"]["overlaps"]["backedBy"], 2) + self.assertEqual(r["byBareName"]["same"]["status"], "backed") + # different C prefix -> resolved via the *verified* explicit map, + # not a fake verdict and not a false gap + nad = r["byBareName"]["nearestApproachDistance"] + self.assertEqual(nad["status"], "backed") + self.assertEqual(nad["via"], "explicit") + self.assertNotIn("nearestApproachDistance", r["unbacked"]) + self.assertEqual(r["byBareName"]["overlaps"]["family"], "topology") + self.assertEqual(r["byBareName"]["teq"]["operator"], "#=") + + def test_every_bare_name_classified(self): + r = build_parity(_catalog([])) # nothing backs anything + self.assertEqual(r["total"], 29) + self.assertEqual(r["backed"], 0) + self.assertEqual(len(r["unbacked"]), 29) # all flagged, 0 dropped + self.assertTrue(all(v["status"] in ("backed", + "needs-explicit-backing") + for v in r["byBareName"].values())) + + def test_requires_portable_aliases(self): + with self.assertRaises(ValueError): + build_parity({"functions": []}) + + +@unittest.skipUnless(_CATALOG.exists(), "run `python run.py` first") +class LiveParityGate(unittest.TestCase): + def test_no_bare_name_silently_dropped(self): + cat = json.loads(_CATALOG.read_text()) + if "portableAliases" not in cat: + self.skipTest("catalog has no portableAliases") + r = build_parity(cat) + self.assertEqual( + r["backed"] + r["needsExplicitBacking"], r["total"]) + self.assertEqual(r["total"], 29) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/portable_parity.py b/tools/portable_parity.py new file mode 100644 index 0000000..b8cbc04 --- /dev/null +++ b/tools/portable_parity.py @@ -0,0 +1,109 @@ +# Portable bare-name parity audit — the meos-api.json analogue of +# MobilityDB's `tools/portable_aliases/generate.py --check`. +# +# python run.py # catalog with `portableAliases` + functions +# python tools/portable_parity.py # -> output/meos-portable-parity.json +# +# For every canonical bare name (PR #8 / RFC #920) it reports the catalog +# function family that backs it, by the MEOS bare-name prefix convention +# (`overlaps_*`, `teq_*`, `same_*`, …). A bare name with no prefix match is +# **not** asserted to be an API gap (some map through a different C prefix, +# e.g. `nearestApproachDistance` ↔ `nad_*`): it is flagged +# `needs-explicit-backing` so the cross-repo work can add an explicit +# operator→C-family entry — an honest signal, never a fabricated verdict. + +import json +import sys +from pathlib import Path + +IN_PATH = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("output/meos-idl.json") +OUT_PATH = (Path(sys.argv[2]) if len(sys.argv) > 2 + else Path("output/meos-portable-parity.json")) + + +def build_parity(catalog: dict) -> dict: + pa = catalog.get("portableAliases") + if not pa: + raise ValueError("catalog has no `portableAliases` — run run.py") + fam_of = {p["bareName"]: (fam, p["operator"]) + for fam, lst in pa["families"].items() for p in lst} + explicit = pa.get("explicitBacking", {}) + names = [f["name"] for f in catalog.get("functions", [])] + + def _matches(prefix): + return [n for n in names + if n == prefix or n.startswith(prefix + "_")] + + by_bare = {} + for bare, (fam, op) in sorted(fam_of.items()): + hits, via = _matches(bare), "prefix" + if not hits: # try the verified explicit map + for pref in explicit.get(bare, []): + hits += _matches(pref) + via = "explicit" if hits else None + by_bare[bare] = { + "operator": op, "family": fam, "via": via, + "backedBy": len(hits), + "sample": sorted(hits)[:3], + "status": "backed" if hits else "needs-explicit-backing", + } + backed = [b for b, v in by_bare.items() if v["status"] == "backed"] + unbacked = sorted(b for b, v in by_bare.items() + if v["status"] == "needs-explicit-backing") + total = len(by_bare) + + # Defensive cross-reference: every `alreadyCanonical` family entry has a + # `pattern` like `"ever_*"` that must match at least one catalog function. + # If upstream renames `ever_*` → `e_*`, the pattern will match zero — this + # is the audit's "this curated assumption no longer holds" signal, + # surfaced honestly so the next regen catches the drift instead of + # silently passing on a stale curated entry. + canonical_drift = [] + for entry in pa.get("alreadyCanonical", []): + if entry.get("kind") != "family": + continue + pat = entry.get("pattern", "") + if not pat: + continue + # Strip trailing '*' wildcard for the prefix match + prefix = pat[:-1] if pat.endswith("*") else pat + matches = [n for n in names if n.startswith(prefix)] + if not matches: + canonical_drift.append({ + "family": entry.get("family"), + "pattern": pat, + "issue": "pattern matches zero catalog functions — upstream may have renamed the family", + }) + + return { + "total": total, + "backed": len(backed), + "needsExplicitBacking": len(unbacked), + "parityPct": round(len(backed) * 100 / total, 1) if total else 0, + "canonicalDrift": canonical_drift, # empty list = no drift detected + "unbacked": unbacked, # the precise cross-repo worklist + "byBareName": by_bare, + } + + +def main() -> None: + if not IN_PATH.exists(): + sys.exit(f"Catalog not found: {IN_PATH} — run `python run.py` first.") + rep = build_parity(json.loads(IN_PATH.read_text())) + OUT_PATH.parent.mkdir(parents=True, exist_ok=True) + OUT_PATH.write_text(json.dumps(rep, indent=2)) + print(f"[portable-parity] {rep['backed']}/{rep['total']} bare names " + f"backed in the catalog ({rep['parityPct']}%); " + f"{rep['needsExplicitBacking']} need an explicit backing entry " + f"→ {OUT_PATH}", file=sys.stderr) + for b in rep["unbacked"]: + v = rep["byBareName"][b] + print(f" needs-explicit-backing: {b!r} ({v['operator']}, " + f"{v['family']})", file=sys.stderr) + for drift in rep["canonicalDrift"]: + print(f" canonical-drift: family={drift['family']!r} pattern={drift['pattern']!r} — " + f"{drift['issue']}", file=sys.stderr) + + +if __name__ == "__main__": + main()