From 26ec3c691f02b6f4e836e3f7f45e50679c09a50f Mon Sep 17 00:00:00 2001 From: Ruben van der Linde Date: Thu, 21 May 2026 13:58:07 +0200 Subject: [PATCH 1/3] docs(openspec): add change add-time-bucket-aggregation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the OpenSpec change scaffold proposing an ad-hoc time-bucket aggregation primitive across both REST and GraphQL. - proposal.md: motivates the primitive (every dashboard needs it; openconnector rebuild blocked on it; AggregationQuery.dateBucket already shipped but not wired to Postgres/REST/GraphQL). kind: code, no schema register patches. - design.md: decisions D1-D9 — reuse AggregationQuery, date_trunc native path, field allow-listing via Schema::getProperties(), dedicated /aggregate/timeseries route, GroupByInput on every list query, AggregationRunner::runAdhoc() shared by REST+GraphQL. - specs/aggregation-api/spec.md: new capability for ad-hoc bucketing — validation rules, RBAC integration, Postgres+PHP-fallback contract. - specs/graphql-api/spec.md: delta — adds groupBy/TimeInterval/GroupBucket types + modifies the connection-type requirement. - tasks.md: 6 task groups covering runner, REST, GraphQL, tests, quality gates, and docs. No PR/merge tasks (Hydra coordination owns that). Complementary to the in-flight aggregations-backend-native change which already plumbed dateBucket into Solr+ES translators but not Postgres/REST/ GraphQL. --- .../.openspec.yaml | 2 + .../add-time-bucket-aggregation/design.md | 191 ++++++++++++++++++ .../add-time-bucket-aggregation/proposal.md | 60 ++++++ .../specs/aggregation-api/spec.md | 125 ++++++++++++ .../specs/graphql-api/spec.md | 154 ++++++++++++++ .../add-time-bucket-aggregation/tasks.md | 49 +++++ 6 files changed, 581 insertions(+) create mode 100644 openspec/changes/add-time-bucket-aggregation/.openspec.yaml create mode 100644 openspec/changes/add-time-bucket-aggregation/design.md create mode 100644 openspec/changes/add-time-bucket-aggregation/proposal.md create mode 100644 openspec/changes/add-time-bucket-aggregation/specs/aggregation-api/spec.md create mode 100644 openspec/changes/add-time-bucket-aggregation/specs/graphql-api/spec.md create mode 100644 openspec/changes/add-time-bucket-aggregation/tasks.md diff --git a/openspec/changes/add-time-bucket-aggregation/.openspec.yaml b/openspec/changes/add-time-bucket-aggregation/.openspec.yaml new file mode 100644 index 0000000000..af43829ce6 --- /dev/null +++ b/openspec/changes/add-time-bucket-aggregation/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-21 diff --git a/openspec/changes/add-time-bucket-aggregation/design.md b/openspec/changes/add-time-bucket-aggregation/design.md new file mode 100644 index 0000000000..c0f2489df6 --- /dev/null +++ b/openspec/changes/add-time-bucket-aggregation/design.md @@ -0,0 +1,191 @@ +## Context + +OpenRegister exposes two aggregation surfaces today: + +1. **Named declarative aggregations** — schema-author declares an `x-openregister-aggregations` block on a schema; `/api/objects/aggregations/{register}/{schema}/{name}` resolves it through `AggregationRunner`. Backend dispatch (Postgres native → PHP fallback) ships in the in-flight `aggregations-backend-native` change; Solr + ES translators are merged but not yet wired into `AggregationRunner::run()`. +2. **GraphQL `aggregate: 'count'`** — every auto-generated list query returns a `totalCount` field. No grouping, no bucketing. + +Neither lets a client say *"group this collection by `created` truncated to DAY, between `from` and `to`, return `{ key, value }`"* — which is the exact primitive every Conduction dashboard chart needs. Worked example: the openconnector dashboard wants 6 charts (Calls daily/hourly, Jobs daily/hourly, Sync daily/hourly). Each chart is a single bucketed query. Today the only path is a chart-specific named annotation per app, which is a schema-deploy round-trip per chart per app. + +The cross-backend value object `AggregationQuery` already declares `dateBucket: { field, start, end, gap }` (shipped with `aggregations-backend-native` task 1.1) and the Solr / Elasticsearch translators (`SolrAggregationQueryBuilder`, `ElasticsearchAggregationQueryBuilder`) already honour it. What is missing is: + +- Postgres translation (`date_trunc($gap, "$field")`). +- A REST entry point that builds an `AggregationQuery` from query params and calls the runner directly (the existing `AggregationController` only resolves named annotations). +- A GraphQL `groupBy: GroupByInput` argument on auto-generated list queries that does the same. + +Constraints / stakeholders: +- `nextcloud-vue` `CnChartWidget.dataSource` grows a `bucket` shorthand the moment OR ships this; openconnector dashboard blocked on it. +- Every Conduction app eventually consumes the primitive — response shape must stay stable across apps, REST, and GraphQL. +- ADR-031 says prefer declarative `x-openregister-aggregations` over service-class aggregation. This work is **not** a service-class aggregation; it is a query primitive on the existing runtime, controlled per-request by the client. ADR-031 still applies for named, app-owned aggregations (KPI tiles, business-rule counts). +- ADR-032 says split mixed config-and-code. This work is `kind: code` only (PHP runner + GraphQL resolver + REST controller). No schema register patches. +- ADR-005 (security): all reads pass row-level RBAC + multi-tenant filter. Aggregation runs AFTER the filter is applied to the row set. Field allow-listing prevents arbitrary column access. + +## Goals / Non-Goals + +**Goals:** +- One ad-hoc bucketing primitive available on both REST and GraphQL with the same response shape `{ groups: [{ key, value }] }`. +- Postgres-native execution via `date_trunc($gap, "$field")` for `interval`-bucketed queries; categorical groupBy already shipped. +- Cross-backend forward-compat: Postgres now, Solr / ES already-translated (just need the runner dispatch wired by `aggregations-backend-native`). +- Field allow-listing enforced at the controller / resolver layer using the schema's declared property list — no arbitrary column access. +- Row-level RBAC + multi-tenant filter is applied identically to the existing native aggregation path; the `_organisation = ?` predicate is reused (no new code path that could bypass it). +- Documentation: `aggregation-api` spec covers REST + GraphQL semantics; `graphql-api` spec gains a delta for the new arg. + +**Non-Goals:** +- Multi-field `groupBy` — one field per query only. A second field would explode the result set and most chart libraries can't render multi-dim. Follow-up issue. +- Running / cumulative / windowed aggregates. Follow-up issue. +- Multi-metric in one request (`count` + `sum` at the same time). Each request is one metric. Follow-up issue. +- Cross-schema `groupBy`. Each query runs against one register/schema's magic table. +- MySQL / SQLite native bucketing. Postgres only; non-Postgres returns `null` from `tryNativeAggregation` and the runner falls back to PHP-side bucketing (slow but correct). Documented. +- Caching of ad-hoc bucket queries. `AggregationCache` is keyed on `(register, schema, name, filter, rbac)` where `name` is the annotation name; ad-hoc queries have no `name`. Cache key extension is a follow-up issue. +- Replacing the `aggregate: 'count'` GraphQL arg or the `x-openregister-aggregations` named-annotation surface. Both stay. + +## Decisions + +### D1 — Reuse `AggregationQuery::dateBucket` value object as-is + +**Decision:** Build the new REST endpoint + GraphQL resolver on top of the already-shipped `AggregationQuery::create(metric, field, filter, groupBy, dateBucket)` factory. No new value object. + +**Rationale:** The value object already encodes the constraint surface (gap vocabulary `minute|hour|day|week|month|quarter|year`, required `{field, start, end, gap}` shape, mutual exclusion with `groupBy`). Adding a parallel input shape would diverge and risk drift between REST and GraphQL. + +**Alternatives:** Introduce a separate `TimeseriesQuery` value object — rejected because it would duplicate validation already in `AggregationQuery::assertValidDateBucket`. + +### D2 — Postgres native path: `date_trunc($gap, "$field")` + explicit bounds + +**Decision:** In `AggregationRunner::tryNativeAggregation()`, when `dateBucket` is non-null, emit: + +```sql +SELECT date_trunc(?, "field") AS bucket, COUNT(*) AS agg +FROM "oc_" +WHERE AND "field" >= ? AND "field" < ? +GROUP BY 1 +ORDER BY 1 +``` + +Bind `gap`, `start`, `end` as the first three placeholders. Validate `gap` against the same `DATE_BUCKET_GAPS` allow-list `AggregationQuery` enforces. Sanitise `field` via the existing `sanitizeColumnName()` helper. Bucket key is returned as Postgres timestamp text (ISO-8601-ish); the controller / resolver coerces it to an ISO-8601 string before serialising. + +**Rationale:** `date_trunc` is the canonical Postgres bucketing primitive; it's index-friendly when the bucketed field has a btree index (recommended in docs). Explicit `from`/`to` bounds keep the predicate sargable. Empty `start`/`end` are allowed (= no bound) — but the REST endpoint requires both (no unbounded scans from HTTP). + +**Alternatives:** `generate_series` to fill empty buckets — rejected because (a) the client can fill empties at render time more cheaply, (b) `generate_series` doesn't compose with the RBAC `WHERE`. Documented as a follow-up if charting clients complain. + +### D3 — Field allow-listing via schema property list + +**Decision:** Both the REST controller and the GraphQL resolver validate `dateBucket.field` and `groupBy.field` against `Schema::getProperties()` keys before constructing the `AggregationQuery`. Bucketed (interval-bucketed) fields must additionally have `format: date | date-time` or `type: integer` (timestamp epoch). Unknown / non-bucketable fields → `400 Bad Request` / GraphQL field-error. + +**Rationale:** `sanitizeColumnName` defends against SQL injection (it must — it's user input that lands in raw SQL) but it does NOT prevent reading columns the user could not otherwise see (e.g. `_owner`, magic-table metadata). Schema-property allow-listing is the second layer: only declared properties of the schema may be aggregated. + +**Alternatives:** Allow magic-table metadata cols (`_created`, `_updated`) — done, but only for that specific allow-list (`{ _created, _updated, _deleted_at }`), not all `_*` cols. RBAC field rules (`PropertyRbacHandler`) already apply to read; the aggregator MUST honour the same denial. + +### D4 — REST endpoint shape: dedicated `/aggregate/timeseries` action + +**Decision:** New route `GET /api/objects/{register}/{schema}/aggregate/timeseries` registered in `appinfo/routes.php` with handler `AggregationController::timeseries()`. Query params: + +- `field` (required) — field to bucket on. +- `interval` (optional, default = categorical groupBy) — one of `MINUTE|HOUR|DAY|WEEK|MONTH|QUARTER|YEAR`. +- `from`, `to` (required when `interval` set) — ISO-8601 strings. +- `metric` (optional, default `count`) — one of `count|sum|avg|min|max`. +- `metricField` (required when `metric != count`) — field to aggregate over. +- `filter[][]=` — reuses the existing object-collection filter vocabulary. + +Response: `{ groups: [{ key, value }], backend, cached }` — same body as the GraphQL `groups` field. + +**Rationale:** Co-locating with the named-aggregation route under `/aggregate/` keeps namespace clarity. `timeseries` is the ad-hoc surface; `aggregations/{name}` stays the annotation-named surface. Both go through `AggregationRunner`. + +**Alternatives:** Extend the existing `/api/objects/{register}/{schema}` listing with `?aggregate=timeseries&...` — rejected because the listing response shape is `{ results, total, page }` and would have to either grow a new top-level field or fork. Dedicated endpoint = stable contract. + +### D5 — GraphQL: optional `groupBy` arg on the auto-generated list query + +**Decision:** `SchemaGenerator::buildQueryFields()` adds an optional `groupBy: GroupByInput` argument to every list-query field via `TypeMapperHandler::getListArgs()`. Types: + +```graphql +input GroupByInput { + field: String! + interval: TimeInterval + from: String + to: String + metric: AggregationMetric + metricField: String +} + +enum TimeInterval { MINUTE HOUR DAY WEEK MONTH QUARTER YEAR } +enum AggregationMetric { COUNT SUM AVG MIN MAX } + +type GroupBucket { key: String! value: Float! } + +# the existing Connection type grows: +type Connection { + edges: [...] + pageInfo: ... + totalCount: Int + groups: [GroupBucket!] # NEW — null when groupBy not requested +} +``` + +`GraphQLResolver::resolveList()` reads `args.groupBy`, builds an `AggregationQuery`, calls `AggregationRunner::runAdhoc()` (new method — extracted shared path), and attaches `groups` to the connection result. When `groupBy` is absent the field returns `null` (not an empty array — `null` is "not requested"). + +**Rationale:** A dedicated `GroupByInput` keeps the existing list args (`filter`, `sort`, `first`, etc.) un-touched; clients opt in. + +**Alternatives:** A separate root-level `groupBy` query field — rejected because it duplicates the filter / sort / RBAC plumbing that already lives in `resolveList()`. Sharing the resolver means one auth path, one filter parser, one cache eviction story. + +### D6 — Shared adhoc path: `AggregationRunner::runAdhoc(AggregationQuery $q)` + +**Decision:** Introduce a new public method on `AggregationRunner` that takes a fully-built `AggregationQuery` + the `(Register, Schema, $currentUid)` triple and returns the same `{ value, groups, backend, cached? }` shape `run()` already returns. The existing `run($registerRef, $schemaRef, $name)` keeps loading the annotation by name and continues to delegate to the same private execution helper. + +**Rationale:** Both REST `timeseries()` and GraphQL `resolveList()` would otherwise duplicate the RBAC check + backend-selection logic that `run()` performs. Extracting `runAdhoc()` makes the named and ad-hoc paths share the execution helper. + +**Alternatives:** Have the controller / resolver call private methods on the runner via reflection — rejected (breaks Psalm). Have them call the SQL directly — rejected (duplicates RBAC predicate). + +### D7 — Multi-tenant predicate, RBAC, and audit + +**Decision:** +- `runAdhoc()` calls `OrganisationService::getActiveOrganisation()` exactly like the existing `tryNativeAggregation()` and binds `_organisation = ?`. `null` active org → no rows (fail-closed). +- `PermissionHandler::canRead($schema)` is consulted before any SQL is built; deny → `NotAuthorizedException` → HTTP 403 / GraphQL error. +- Field allow-listing per D3 happens before the predicate so we don't leak the existence of a field by giving a different error code. +- No audit row is written for aggregate reads (consistent with the existing `/aggregate/{name}` path). + +**Rationale:** Reuse the existing security primitives so the new surface inherits every fix that lands on the named path. Field allow-listing precedes auth-check so the response stays consistent. + +### D8 — Non-Postgres fallback path + +**Decision:** When `tryNativeAggregation()` returns `null` (e.g. SQLite test DB, MySQL dev box), the PHP-side runner pulls matching rows + groups them in PHP using the same bucket logic (`date_trunc` polyfill: strtotime + `format()` based on `gap`). Marked `backend: "php-fallback"` in the response. + +**Rationale:** OR's production target is Postgres; SQLite is used for tests and trivial dev setups. PHP fallback keeps tests green without forcing every CI runner to be Postgres-only. Performance disclaimer in docs. + +### D9 — Declarative-vs-imperative decision (ADR-031) + +The behaviours touched here are: + +| Behaviour | Path | Rationale | +|---|---|---| +| Ad-hoc bucketing primitive | **imperative** (PHP runner extension) | ADR-031 §"declarative path applies to **named** behaviours" — this is an ad-hoc query primitive controlled per-request by the client, not a schema-author-declared behaviour. There is no `x-openregister-*` annotation that would express "let the client bucket whatever they want at request time". | +| Named aggregations (`x-openregister-aggregations`) | **declarative** (existing — unchanged) | Already declarative; this change does NOT touch the annotation path. | +| Field allow-list | **declarative** (read from `Schema::getProperties()`) | Each schema's property list is already the canonical source of truth for what's queryable. | + +No new service class is introduced — `AggregationRunner` already exists and is the right home. No `lib/Settings/openregister_register.json` patches. + +## Risks / Trade-offs + +- **[SQL-injection on `field`]** → Mitigated: `sanitizeColumnName()` allow-lists `[a-zA-Z0-9_]` and the schema-property check ensures the column was declared. Both gates run before binding. +- **[Unbounded scan with missing `from`/`to`]** → Mitigated: REST endpoint requires both when `interval` is set; categorical groupBy (no `interval`) doesn't require bounds because it scans within the schema's RBAC-filtered row set anyway. +- **[Postgres `date_trunc` returns timestamps with tz]** → Mitigated: cast to text in SQL (`date_trunc(?, "$field")::text AS bucket`) so PHP receives an ISO-ish string; resolver coerces to `Y-m-d\TH:i:s\Z` for response stability across Postgres versions / timezone settings. +- **[GraphQL connection-type breaking change]** → Mitigated: `groups` is a new nullable field; existing clients that don't request it see nothing change. Schema introspection still returns the same set of existing fields. +- **[Bucket keys drift between Postgres / Solr / ES]** → Mitigated: the spec mandates the response shape; per-backend translators must produce ISO-8601-UTC strings. Translator tests assert the wire shape. +- **[PHP fallback at 100k rows]** → Mitigated: documented + perf note in `aggregation-api` spec; Postgres is the production target. The PHP fallback is correct, not fast. +- **[Cache misses on ad-hoc queries]** → Mitigated: each ad-hoc request will hit Postgres. `AggregationCache` keying extension is a follow-up issue — for now `cached: false` is always emitted on the ad-hoc path. +- **[`groupBy` arg vs `aggregate: 'count'` arg confusion]** → Mitigated: existing `aggregate` arg stays; spec.md clearly delineates "count aggregate" (one number for the whole set, returned as `totalCount`) vs "groupBy bucket" (a series of bucket→value pairs, returned as `groups`). + +## Migration Plan + +- **Deploy:** Single PR, single OR release. No schema migration, no register/schema author action required. The new endpoint is a pure addition; the new GraphQL arg is an additive type-system change (clients that don't request it see no diff). +- **Rollback:** Revert the PR. Existing `aggregate: 'count'` + named-annotation surface continues to work because both are untouched. +- **Consumer rollout:** + 1. OR ships this change. + 2. `nextcloud-vue` `CnChartWidget.dataSource` grows the `bucket` shorthand. + 3. openconnector dashboard rebuild consumes it (6 charts). + 4. Fleet apps adopt the primitive opportunistically; the named-annotation surface remains the right home for app-owned KPIs. + +## Open Questions + +1. **Bucket key format on Postgres for `interval=DAY`** — `date_trunc('day', ...)` returns `2026-05-21 00:00:00+00`. Coerce to `2026-05-21` (date-only) or keep full ISO? **Decided: keep full ISO** — consistent across intervals; charts crop the date portion themselves. +2. **`HOUR` granularity on a `date` (not `datetime`) field** — should the schema-property check reject? **Decided: yes** — the field's declared `format` must be `date-time` for sub-day intervals; `date` only allows DAY+. Validation error code `400`. +3. **Empty-bucket fill** — if a day has zero rows, do we emit `{key, value: 0}` or omit it? **Decided: omit it** (Postgres `GROUP BY` doesn't emit empty groups). Client fills empties at render time. `generate_series` is a follow-up. +4. **Should the REST `/aggregate/timeseries` route mirror the path layout of `/aggregations/{name}`** (i.e. `/api/objects/aggregations/{register}/{schema}/timeseries`)? **Decided: yes — `/api/objects/aggregations/{register}/{schema}/timeseries`** to keep the namespace tidy. The `{name}` route stays `/api/objects/aggregations/{register}/{schema}/{name}`; routing dispatches `timeseries` to the new action because route order in `appinfo/routes.php` is "specific before wildcard" (memory: route ordering). diff --git a/openspec/changes/add-time-bucket-aggregation/proposal.md b/openspec/changes/add-time-bucket-aggregation/proposal.md new file mode 100644 index 0000000000..779295bea3 --- /dev/null +++ b/openspec/changes/add-time-bucket-aggregation/proposal.md @@ -0,0 +1,60 @@ +--- +kind: code +depends_on: [] +--- + +# Time-bucket aggregation across GraphQL + REST + +## Why + +Every Conduction-app dashboard needs the same ad-hoc primitive: "group these rows by day-of-`created` between `from` and `to` and return `{ key, value }` series." The openconnector dashboard rebuild (6 KPIs + 6 charts — Calls daily/hourly, Jobs daily/hourly, Sync daily/hourly) is blocked on it today, and `nextcloud-vue` `CnChartWidget.dataSource` cannot grow its `bucket: { field, interval, fromVar, toVar }` shorthand until OR exposes the underlying primitive. + +OR's current aggregation surface is two-half: GraphQL collection queries support `aggregate: 'count'` (returns `totalCount`) but no grouping; REST `AggregationController` runs **named, schema-declared** aggregations from `x-openregister-aggregations` annotations (`/api/objects/aggregations/{register}/{schema}/{name}`). Neither lets a client say "bucket arbitrary field `created` by `DAY` between `from` and `to`" without first round-tripping a schema-annotation deploy. The in-flight `aggregations-backend-native` change shipped the `AggregationQuery::dateBucket` value-object field but did NOT wire it through to Postgres native execution, REST, or GraphQL. + +## What Changes + +- **GraphQL**: extend the auto-generated list query on every schema with a new optional argument `groupBy: GroupByInput`, where `GroupByInput = { field: String!, interval: TimeInterval, from: String, to: String }` and `TimeInterval = DAY | HOUR | WEEK | MONTH`. When `groupBy` is supplied, the connection returns `groups: [{ key: String!, value: Int! }]` alongside the existing `totalCount` / `edges` / `pageInfo`. When `interval` is omitted, the result is categorical grouping (e.g. by `status`); when present, the named field is `date_trunc`'d to the interval and rows outside `from`/`to` are excluded. +- **REST**: add `GET /api/objects/{register}/{schema}/aggregate/timeseries?field=&interval=&from=&to=&filter[...]=` returning `{ groups: [{ key, value }], backend: "postgres" | "php-fallback", cached: bool }`. Same row-level filtering (multi-tenancy + RBAC) as the regular `/objects` list. Filter params reuse the existing `_search`/`filter[...]` operator vocabulary. +- **Backend**: extend `AggregationRunner::tryNativeAggregation()` to honour `AggregationQuery::dateBucket` via Postgres `date_trunc($gap, "$field")` — with explicit `WHERE "$field" >= ? AND "$field" < ?` bounds when `start`/`end` are set. SQLite + MySQL fall back to the existing PHP path (documented; OR's primary platform is Postgres). +- **Cross-backend**: AggregationQuery already has `dateBucket: { field, start, end, gap }` plumbed through to Solr (`facet.range`) and Elasticsearch (`date_histogram`). This change unblocks them by exercising the field end-to-end and adding Postgres as a third translator-target. +- **Security**: bucketing happens AFTER row-level RBAC + multi-tenant filtering (same `_organisation = ?` predicate `tryNativeAggregation` already enforces). Field allow-listing: only datetime-shaped properties from the schema may be used as `interval`-bucketed fields; only properties declared on the schema may be used as `groupBy.field` (no arbitrary column access). +- **Response shape parity**: the REST timeseries response and the GraphQL `groups` field MUST emit the same `{ key: , value: }` element shape so `CnChartWidget` can normalise once. +- **Documentation**: extend `openspec/specs/graphql-api/spec.md` with the `groupBy` requirement + scenarios; create new `aggregation-api` spec covering both REST + GraphQL ad-hoc bucketing semantics. + +**Non-breaking.** Adds optional GraphQL argument + new REST endpoint. The existing `aggregate: 'count'` GraphQL arg, the existing `/api/objects/aggregations/{register}/{schema}/{name}` route, and the existing `x-openregister-aggregations` annotation surface all continue to work unchanged. + +## Capabilities + +### New Capabilities +- `aggregation-api`: ad-hoc aggregation primitive across REST + GraphQL — categorical groupBy on a schema field + time-bucketed groupBy via `date_trunc` with optional `from`/`to` bounds. Distinct from `x-openregister-aggregations` annotations (which are **named**, schema-declared, and resolve through `AggregationController`) — this capability covers the runtime, client-controlled primitive. + +### Modified Capabilities +- `graphql-api`: list queries gain an optional `groupBy: GroupByInput` argument; the auto-generated connection types gain a `groups: [GroupBucket!]` field on the response. Existing `totalCount` / `edges` / `pageInfo` behaviour unchanged. + +## Impact + +**Code touched:** +- `lib/Service/Aggregation/AggregationRunner.php` — extend `tryNativeAggregation()` with `dateBucket` path (Postgres `date_trunc`). +- `lib/Service/GraphQL/SchemaGenerator.php` + `lib/Service/GraphQL/SchemaGenerator/TypeMapperHandler.php` — new `GroupByInput`, `TimeInterval` enum, `GroupBucket` type; thread `groupBy` arg into list-query field args. +- `lib/Service/GraphQL/GraphQLResolver.php` — extract `groupBy` arg, call `AggregationRunner` (or build an `AggregationQuery` directly + dispatch), surface `groups` on the connection result. +- `lib/Controller/AggregationController.php` — new `timeseries()` action OR a new sibling controller (`AggregateController`) to keep route ownership clean. +- `appinfo/routes.php` — register the new REST timeseries route. +- Tests: `tests/Unit/Service/Aggregation/AggregationRunnerDateBucketTest.php`, `tests/Unit/Controller/Aggregation*Test.php`, `tests/Unit/Service/GraphQL/SchemaGeneratorGroupByTest.php`, `tests/Integration/GraphQLGroupByIntegrationTest.php`. +- Docs: `docs/annotations/x-openregister-aggregations.md` gains a "Runtime ad-hoc primitive" note pointing at the new spec. + +**APIs:** +- New: `GET /api/objects/{register}/{schema}/aggregate/timeseries`. +- Extended: every auto-generated GraphQL list field — added optional `groupBy` arg + `groups` field on the connection. + +**Dependencies / consumers:** +- Unblocks `nextcloud-vue` `CnChartWidget.dataSource` `bucket: { field, interval, fromVar, toVar }` shorthand. +- Unblocks the openconnector dashboard rebuild (6 charts). +- Every Conduction app dashboard (procest, pipelinq, decidesk, shillinq, etc.) will eventually consume this primitive — the response shape MUST stay stable. + +**Non-goals (deferred to follow-up issues filed at plan-to-issues time):** +- Multi-field `groupBy` (one field per query only). +- Running-window aggregates / cumulative series. +- Multi-metric (`count` + `sum` simultaneously); each request is one metric. +- `groupBy` on the JOIN side of cross-schema queries. +- MySQL / SQLite native bucketing (PHP fallback only). +- Caching of ad-hoc bucket queries — `AggregationCache` is keyed on the named-aggregation name; ad-hoc cache keying is a follow-up. diff --git a/openspec/changes/add-time-bucket-aggregation/specs/aggregation-api/spec.md b/openspec/changes/add-time-bucket-aggregation/specs/aggregation-api/spec.md new file mode 100644 index 0000000000..7b41d9d38e --- /dev/null +++ b/openspec/changes/add-time-bucket-aggregation/specs/aggregation-api/spec.md @@ -0,0 +1,125 @@ +## ADDED Requirements + +### Requirement: The system SHALL expose an ad-hoc aggregation primitive over REST and GraphQL + +OpenRegister SHALL surface a runtime, client-controlled aggregation primitive that buckets the rows of a single register-schema collection by a named field, with optional time-bucketing via a `date_trunc`-style interval and optional `from`/`to` bounds. This primitive is distinct from the named-annotation surface (`x-openregister-aggregations` resolved via `/api/objects/aggregations/{register}/{schema}/{name}`): the named surface is schema-author-declared; this primitive is client-controlled per request. + +The primitive SHALL be available on both REST and GraphQL with the same response shape `{ groups: [{ key: String, value: Number }] }`, where `key` is the bucket label (categorical: the value of the groupBy field; bucketed: an ISO-8601-UTC string at the bucket-start) and `value` is the aggregated metric (count, sum, avg, min, or max). + +#### Scenario: Categorical groupBy over the REST endpoint +- **GIVEN** a register `softwarecatalogus`, schema `applications` with property `status: string` +- **WHEN** the client issues `GET /api/objects/aggregations/softwarecatalogus/applications/timeseries?field=status` +- **THEN** the response status SHALL be `200` +- **AND** the response body SHALL match `{ groups: [{ key: String, value: Int }], backend: "postgres" | "php-fallback", cached: Boolean }` +- **AND** every `groups[i].key` SHALL be a distinct value of `status` present in the RBAC-filtered row set +- **AND** every `groups[i].value` SHALL equal the COUNT of rows in that bucket + +#### Scenario: Time-bucketed groupBy by DAY over the REST endpoint +- **GIVEN** a register `openconnector`, schema `calllogs` with property `created: date-time` +- **WHEN** the client issues `GET /api/objects/aggregations/openconnector/calllogs/timeseries?field=created&interval=DAY&from=2026-05-01T00:00:00Z&to=2026-05-22T00:00:00Z` +- **THEN** the response status SHALL be `200` +- **AND** every `groups[i].key` SHALL be an ISO-8601-UTC string at midnight UTC on a day in the range `[from, to)` +- **AND** every `groups[i].value` SHALL equal the COUNT of `calllogs` whose `created` falls in that day-bucket +- **AND** buckets with zero rows SHALL be omitted from the response (the client fills empties at render time) + +#### Scenario: Time-bucketed groupBy by HOUR +- **GIVEN** the same schema as above +- **WHEN** the client issues the same request with `interval=HOUR&from=2026-05-21T00:00:00Z&to=2026-05-22T00:00:00Z` +- **THEN** every `groups[i].key` SHALL be an ISO-8601-UTC string at an hour boundary in the range +- **AND** the maximum number of buckets returned SHALL be `24` (one per hour) + +### Requirement: The REST timeseries endpoint SHALL validate inputs and reject malformed requests + +The endpoint `GET /api/objects/aggregations/{register}/{schema}/timeseries` SHALL enforce the following input rules and respond with `400 Bad Request` (and a JSON body `{ error: }`) on violation: + +- `field` MUST be a non-empty string and MUST be a declared property of `{schema}` OR one of the allow-listed magic-table metadata columns (`_created`, `_updated`, `_deleted_at`). Any other column name is rejected. +- If `interval` is provided, it MUST be one of `MINUTE|HOUR|DAY|WEEK|MONTH|QUARTER|YEAR` (case-insensitive). Other values are rejected. +- If `interval` is provided, both `from` AND `to` MUST be provided AND MUST be parseable as ISO-8601 datetimes. Missing or unparseable bounds are rejected. +- If `interval` requires sub-day granularity (`MINUTE`, `HOUR`), the field's schema property `format` MUST be `date-time` (a `date`-only field cannot be bucketed by hour). Sub-day interval against a `date`-format field is rejected. +- If `metric` is provided, it MUST be one of `count|sum|avg|min|max` (case-insensitive). Other values are rejected. +- If `metric` is not `count`, `metricField` MUST be provided and MUST be a declared property of `{schema}`. Missing `metricField` for a non-count metric is rejected. +- If both `interval` (time-bucket) AND a categorical `groupBy` clause are present (defensive — the endpoint does not currently accept categorical+temporal in one call), the request is rejected. + +#### Scenario: Unknown field is rejected +- **WHEN** the client requests `?field=__totally_made_up` +- **THEN** the response status SHALL be `400` +- **AND** the response body SHALL include `{ error: "" }` + +#### Scenario: Sub-day interval against date-only field is rejected +- **GIVEN** schema `meetings` with property `meetingDate: { type: string, format: date }` +- **WHEN** the client requests `?field=meetingDate&interval=HOUR&from=...&to=...` +- **THEN** the response status SHALL be `400` +- **AND** the response body SHALL state that sub-day intervals require a `date-time` field + +#### Scenario: Missing `from`/`to` when `interval` is set +- **WHEN** the client requests `?field=created&interval=DAY` with no `from` or `to` +- **THEN** the response status SHALL be `400` + +#### Scenario: Non-count metric without metricField +- **WHEN** the client requests `?field=status&metric=sum` +- **THEN** the response status SHALL be `400` + +### Requirement: The system SHALL apply row-level RBAC and multi-tenant filtering before bucketing + +Every ad-hoc aggregation request SHALL execute against a row set that has already been filtered by: + +1. The active organisation's multi-tenant predicate (`"_organisation" = ?` against the authenticated user's active organisation; null active org SHALL yield no rows). +2. The schema's read-permission check via `PermissionHandler::canRead($schema)` for the authenticated user. Denial SHALL produce HTTP `403 Forbidden` (REST) or a GraphQL field-error (GraphQL). +3. Soft-delete filtering (`_deleted IS NULL OR _deleted = 'null'::jsonb`) identical to the named-aggregation path in `AggregationRunner::tryNativeAggregation()`. +4. Property-level RBAC from `PropertyRbacHandler`: if `field` or `metricField` is denied for the authenticated user, the request SHALL be rejected with `403`. + +#### Scenario: Aggregation respects multi-tenant filter +- **GIVEN** two tenants `tenant-a` and `tenant-b`, each owning 10 rows in schema `calllogs` +- **AND** the authenticated user's active organisation is `tenant-a` +- **WHEN** the client requests `?field=created&interval=DAY&from=...&to=...` +- **THEN** the sum of `groups[*].value` SHALL be `10` (only tenant-a rows) +- **AND** no tenant-b row SHALL be counted + +#### Scenario: Forbidden schema returns 403 +- **GIVEN** the authenticated user lacks read permission on schema `applications` +- **WHEN** the client requests `?field=status` +- **THEN** the response status SHALL be `403` + +### Requirement: The system SHALL use Postgres `date_trunc` for native bucketing and fall back to PHP on other databases + +When the underlying database is PostgreSQL, the aggregator SHALL execute a single `SELECT date_trunc($gap, "$field")::text AS bucket, AS agg FROM
WHERE AND "$field" >= ? AND "$field" < ? GROUP BY bucket ORDER BY bucket` query and SHALL annotate the response with `backend: "postgres"`. + +When the database is not PostgreSQL (e.g. SQLite test fixtures, MySQL dev environments), the aggregator SHALL pull the RBAC-filtered row set, bucket in PHP using a `date_trunc` polyfill keyed on the `gap` vocabulary (`minute|hour|day|week|month|quarter|year`), and SHALL annotate the response with `backend: "php-fallback"`. The PHP path SHALL produce the same response shape and the same bucket-key format. + +#### Scenario: Postgres path annotates `backend: "postgres"` +- **GIVEN** the database is PostgreSQL +- **WHEN** the client requests a DAY-bucketed series +- **THEN** the response SHALL include `backend: "postgres"` + +#### Scenario: Non-Postgres path annotates `backend: "php-fallback"` +- **GIVEN** the database is SQLite (test fixture) +- **WHEN** the client requests a DAY-bucketed series +- **THEN** the response SHALL include `backend: "php-fallback"` +- **AND** the bucket keys SHALL be ISO-8601-UTC strings matching what Postgres would have returned + +### Requirement: The aggregation primitive SHALL share a single execution helper across REST and GraphQL + +Both the REST `AggregationController::timeseries()` action and the GraphQL `GraphQLResolver::resolveList()` resolver SHALL build a single `OCA\OpenRegister\Service\Aggregation\AggregationQuery` value object and dispatch through one `AggregationRunner::runAdhoc(Register $r, Schema $s, AggregationQuery $q): array` method. + +`runAdhoc()` SHALL perform the RBAC + multi-tenant gating defined in this spec, then call the existing `tryNativeAggregation()` (extended with the `date_trunc` path) on Postgres, or the PHP fallback otherwise. The return value SHALL match `{ groups: [{ key, value }], backend: string }` and the controller / resolver SHALL not perform additional row-level filtering on top of what `runAdhoc()` returns. + +#### Scenario: REST and GraphQL share execution path +- **WHEN** REST `GET /api/objects/aggregations/openconnector/calllogs/timeseries?field=created&interval=DAY&from=...&to=...` is called +- **AND** the equivalent GraphQL `query { calllogs(groupBy: { field: "created", interval: DAY, from: "...", to: "..." }) { groups { key value } } }` is called +- **THEN** both responses SHALL contain identical `groups` arrays (same keys, same values, same ordering) +- **AND** both responses SHALL be served by the same `AggregationRunner::runAdhoc()` invocation pattern + +### Requirement: The system SHALL document the ad-hoc aggregation primitive and its index recommendations + +OpenRegister SHALL ship documentation at `docs/annotations/x-openregister-aggregations.md` (or a sibling page linked from there) that: + +- Documents the REST endpoint, all query parameters, validation rules, and response shape. +- Documents the GraphQL `groupBy: GroupByInput` argument, the `GroupBucket` type, and the `TimeInterval` / `AggregationMetric` enums. +- Recommends a btree index on any field commonly used as a bucketing target (e.g. `created`, `updated`) for installations with large schemas. +- Clearly delineates the ad-hoc primitive from the named-annotation surface (`x-openregister-aggregations`), documenting when an app author SHOULD prefer one over the other. + +#### Scenario: Documentation describes both surfaces +- **WHEN** a developer reads the aggregation docs +- **THEN** they SHALL find a section "Runtime ad-hoc primitive" covering REST + GraphQL +- **AND** they SHALL find a section "Named declarative aggregations" linking to `x-openregister-aggregations` +- **AND** they SHALL find guidance on when to use which diff --git a/openspec/changes/add-time-bucket-aggregation/specs/graphql-api/spec.md b/openspec/changes/add-time-bucket-aggregation/specs/graphql-api/spec.md new file mode 100644 index 0000000000..ce89ec9dcb --- /dev/null +++ b/openspec/changes/add-time-bucket-aggregation/specs/graphql-api/spec.md @@ -0,0 +1,154 @@ +## ADDED Requirements + +### Requirement: GraphQL list queries SHALL support an ad-hoc `groupBy` argument with optional time-bucketing + +Every auto-generated list-query field on every schema SHALL accept an optional `groupBy: GroupByInput` argument. When supplied, the returned connection SHALL include a non-null `groups: [GroupBucket!]` field with the bucketed aggregation result. When `groupBy` is absent, `groups` SHALL be `null`. + +Type declarations (added to the auto-generated schema): + +```graphql +input GroupByInput { + field: String! + interval: TimeInterval + from: String # ISO-8601, required when interval is set + to: String # ISO-8601, required when interval is set + metric: AggregationMetric = COUNT + metricField: String # required when metric != COUNT +} + +enum TimeInterval { + MINUTE + HOUR + DAY + WEEK + MONTH + QUARTER + YEAR +} + +enum AggregationMetric { + COUNT + SUM + AVG + MIN + MAX +} + +type GroupBucket { + key: String! + value: Float! +} +``` + +#### Scenario: Categorical groupBy returns one bucket per distinct value +- **GIVEN** schema `applications` with property `status: string` and 30 rows distributed across `active|deprecated|archived` +- **WHEN** the client issues `query { applications(groupBy: { field: "status" }) { groups { key value } } }` +- **THEN** the response SHALL contain `groups` with exactly three entries +- **AND** each `groups[i].key` SHALL be one of `active`, `deprecated`, `archived` +- **AND** the sum of `groups[i].value` SHALL equal `30` + +#### Scenario: Time-bucketed groupBy by DAY produces ISO-keyed buckets +- **GIVEN** schema `calllogs` with `created: date-time` +- **WHEN** the client issues `query { calllogs(groupBy: { field: "created", interval: DAY, from: "2026-05-01T00:00:00Z", to: "2026-05-22T00:00:00Z" }) { groups { key value } } }` +- **THEN** each `groups[i].key` SHALL be an ISO-8601-UTC string at midnight UTC on a day in the range +- **AND** each `groups[i].value` SHALL equal the count of calllogs whose `created` falls in that day-bucket +- **AND** days with zero rows SHALL be omitted (the client fills empties at render time) + +#### Scenario: groupBy SHALL coexist with filter and totalCount +- **WHEN** the client issues `query { calllogs(filter: { status: "error" }, groupBy: { field: "created", interval: DAY, from: "...", to: "..." }) { totalCount groups { key value } } }` +- **THEN** the response SHALL include both `totalCount` (the size of the filtered set) AND `groups` (the bucketed series of the same filtered set) +- **AND** the sum of `groups[i].value` SHALL equal `totalCount` + +#### Scenario: Sub-day interval against a date-only field is rejected +- **GIVEN** schema `meetings` with `meetingDate: { format: date }` +- **WHEN** the client issues `query { meetings(groupBy: { field: "meetingDate", interval: HOUR, from: "...", to: "..." }) { groups { key value } } }` +- **THEN** the response SHALL include a GraphQL field-error stating that sub-day intervals require a `date-time` field +- **AND** the `groups` field SHALL be `null` + +#### Scenario: Unknown `field` produces a GraphQL field-error +- **WHEN** the client issues `groupBy: { field: "__totally_made_up" }` +- **THEN** the response SHALL include a GraphQL field-error referencing the unknown field +- **AND** the `groups` field SHALL be `null` + +#### Scenario: Multi-tenant filter is applied before bucketing +- **GIVEN** two tenants `tenant-a` and `tenant-b` each owning 10 rows +- **AND** the authenticated user's active organisation is `tenant-a` +- **WHEN** the client issues a categorical `groupBy` query +- **THEN** the sum of `groups[i].value` SHALL be `10` (tenant-a only) + +#### Scenario: Non-count metric requires metricField +- **WHEN** the client issues `groupBy: { field: "status", metric: SUM }` with no `metricField` +- **THEN** the response SHALL include a GraphQL field-error stating `metricField` is required for non-count metrics + +### Requirement: The connection type SHALL include the new `groups` field + +`TypeMapperHandler::getConnectionType()` SHALL add a nullable `groups: [GroupBucket!]` field to every auto-generated `Connection` type. When the resolver did not run an ad-hoc aggregation (the client did not request `groupBy`), the field SHALL be `null`. + +#### Scenario: Connection type structure includes `groups` +- **GIVEN** any schema `meldingen` +- **WHEN** `TypeMapperHandler.getConnectionType()` builds `MeldingenConnection` +- **THEN** the type SHALL include the fields: `edges`, `pageInfo`, `totalCount`, `facets`, `facetable`, AND `groups: [GroupBucket!]` +- **AND** existing clients that select only `edges`/`pageInfo`/`totalCount`/`facets`/`facetable` SHALL be unaffected + +## MODIFIED Requirements + +### Requirement: The GraphQL schema MUST be auto-generated from register schemas + +Each register schema MUST automatically produce corresponding GraphQL types, queries, and mutations. `SchemaGenerator.generate()` MUST load all registers via `RegisterMapper.findAll()` and all schemas via `SchemaMapper.findAll()`, then iterate over each schema calling `buildSchemaFields()` to produce query and mutation field definitions. Type generation MUST follow the same JSON Schema property type/format mapping used by `MagicMapper`, ensuring consistency between REST and GraphQL responses. Schema slugs MUST be converted to valid GraphQL names: PascalCase for type names (via `toTypeName()`) and camelCase for field names (via `toFieldName()`), with naive Dutch/English singularization (via `singularize()`) to derive single-object query names from plural schema slugs. + +#### Scenario: Generate GraphQL type from schema +- **GIVEN** a register schema `meldingen` with properties: title (string), status (string), priority (enum), created (datetime) +- **WHEN** `SchemaGenerator.generate()` is called +- **THEN** a GraphQL `ObjectType` named `Meldingen` (or its singularized PascalCase form) MUST be created via `getObjectType()` +- **AND** property types MUST be mapped by `TypeMapperHandler.mapPropertyToGraphQLType()`: string -> `Type::string()`, integer -> `Type::int()`, number -> `Type::float()`, boolean -> `Type::boolean()`, datetime -> `DateTimeType` scalar +- **AND** each type MUST include metadata fields: `_uuid` (UUID scalar), `_register` (Int), `_schema` (Int), `_created` (DateTime), `_updated` (DateTime), `_owner` (String) + +#### Scenario: Generate queries for a schema +- **GIVEN** schema `meldingen` exists with slug `meldingen` +- **WHEN** `buildQueryFields()` is called +- **THEN** the following root query fields MUST be generated: + - `melding(id: ID!): Melding` -- fetch single object via `GraphQLResolver.resolveSingle()` + - `meldingen(filter: MeldingenFilter, sort: SortInput, selfFilter: SelfFilter, search: String, fuzzy: Boolean, facets: [String], first: Int, offset: Int, after: String, groupBy: GroupByInput): MeldingenConnection` -- list with pagination via `GraphQLResolver.resolveList()`; the optional `groupBy` argument enables ad-hoc aggregation as defined under the "GraphQL list queries SHALL support an ad-hoc `groupBy` argument" requirement +- **AND** list query arguments MUST be defined by `TypeMapperHandler.getListArgs()` with defaults: `first: 20`, `fuzzy: false` + +#### Scenario: Generate mutations for a schema +- **GIVEN** schema `meldingen` exists +- **WHEN** `buildMutationFields()` is called +- **THEN** the following mutation fields MUST be generated: + - `createMelding(input: CreateMeldingInput!): Melding` -- delegates to `GraphQLResolver.resolveCreate()` + - `updateMelding(id: ID!, input: UpdateMeldingInput!): Melding` -- delegates to `GraphQLResolver.resolveUpdate()` + - `deleteMelding(id: ID!): Boolean` -- delegates to `GraphQLResolver.resolveDelete()` +- **AND** `CreateMeldingInput` MUST mark `required` fields from the schema as `Type::nonNull()` via `TypeMapperHandler.getCreateInputType()` +- **AND** `UpdateMeldingInput` MUST leave all fields nullable (partial updates) via `TypeMapperHandler.getUpdateInputType()` + +#### Scenario: Schema changes regenerate GraphQL types +- **GIVEN** schema `meldingen` has a GraphQL type `Melding` +- **WHEN** a property `urgentie` (integer) is added to the schema +- **THEN** the next call to `SchemaGenerator.generate()` MUST produce an updated `Melding` type including `urgentie: Int` +- **AND** existing queries using `Melding` without `urgentie` MUST continue to work (GraphQL field selection is additive) +- **AND** schema generation MUST be fast (~50ms for typical installs) since APCu caching of webonyx Schema objects is not feasible due to closures + +#### Scenario: Type name collision resolution +- **GIVEN** two schemas with slug `items` exist in different registers +- **WHEN** `toTypeName()` is called for both +- **THEN** the second schema's type MUST be disambiguated by appending its schema ID (e.g., `Items` and `Items42`) +- **AND** the `usedTypeNames` map MUST track which schema ID owns each type name + +### Requirement: GraphQL MUST support faceted search through connections + +Connection types MUST expose facets and facetable field lists matching `FacetHandler` behavior. This is a cross-reference to the `zoeken-filteren` spec. + +#### Scenario: Request facets in a list query +- **GIVEN** a query: `meldingen(facets: ["status", "priority"]) { edges { node { title } } facets facetable }` +- **WHEN** `argsToRequestParams()` processes the facets argument +- **THEN** it MUST set `$params['_facets'] = "status,priority"` (comma-separated) +- **AND** `ObjectService.searchObjectsPaginated()` MUST return facet data +- **AND** the connection response MUST include `facets` (JSON scalar with value counts per field) and `facetable` (list of field names) +- **AND** facets MUST be calculated on the full filtered dataset, independent of pagination + +#### Scenario: Facets in connection type structure +- **GIVEN** any schema `meldingen` +- **WHEN** `TypeMapperHandler.getConnectionType()` builds the connection type +- **THEN** it MUST include fields: `edges: [MeldingenEdge!]!`, `pageInfo: PageInfo!`, `totalCount: Int!`, `facets: JSON`, `facetable: [String]`, `groups: [GroupBucket!]` +- **AND** each edge type MUST include: `cursor: String!`, `node: Melding!`, `_relevance: Float` (fuzzy search relevance score) +- **AND** `groups` SHALL be `null` unless the client supplied a `groupBy` argument on the list query diff --git a/openspec/changes/add-time-bucket-aggregation/tasks.md b/openspec/changes/add-time-bucket-aggregation/tasks.md new file mode 100644 index 0000000000..b02cee2ed2 --- /dev/null +++ b/openspec/changes/add-time-bucket-aggregation/tasks.md @@ -0,0 +1,49 @@ +## 1. Backend — AggregationRunner + +- [ ] 1.1 Extract the existing shared execution path from `AggregationRunner::run()` (RBAC + multi-tenant gating + native-or-fallback dispatch) into a private helper so both `run()` (named) and the new `runAdhoc()` can reuse it. +- [ ] 1.2 Add public method `AggregationRunner::runAdhoc(Register $register, Schema $schema, AggregationQuery $query): array` returning the same `{ value | groups, backend, cached? }` shape as `run()`. +- [ ] 1.3 Extend `AggregationRunner::tryNativeAggregation()` to honour `AggregationQuery::dateBucket` — emit `SELECT date_trunc(?, "$field")::text AS bucket, AS agg FROM
WHERE AND "$field" >= ? AND "$field" < ? GROUP BY bucket ORDER BY bucket`. Bind `gap`, `start`, `end` first; sanitise `field` via `sanitizeColumnName()`. +- [ ] 1.4 Add a PHP fallback bucketer for non-Postgres databases — pull RBAC-filtered rows, bucket in PHP via a `date_trunc` polyfill keyed on the gap vocabulary, return the same response shape with `backend: "php-fallback"`. +- [ ] 1.5 Coerce bucket keys to ISO-8601-UTC `Y-m-d\TH:i:s\Z` strings before returning so the Postgres path and the PHP-fallback path emit identical wire formats. +- [ ] 1.6 Confirm `runAdhoc()` rejects `null` active organisation (fail-closed) and `false` on `PermissionHandler::canRead($schema)` with the existing `NotAuthorizedException`. + +## 2. Backend — REST Controller + +- [ ] 2.1 Add `AggregationController::timeseries(string $register, string $schema): JSONResponse` action. +- [ ] 2.2 Parse + validate query params (`field`, `interval`, `from`, `to`, `metric`, `metricField`, `filter[...]`) per the `aggregation-api` spec; on validation failure return `400` with `{ error: }`. +- [ ] 2.3 Validate `field` and `metricField` against the schema's declared property list (`Schema::getProperties()`) plus the magic-table metadata allow-list (`_created`, `_updated`, `_deleted_at`); reject anything else with `400`. +- [ ] 2.4 Validate that sub-day intervals (`MINUTE`, `HOUR`) are only used with `format: date-time` fields; reject otherwise with `400`. +- [ ] 2.5 Build an `AggregationQuery` via `AggregationQuery::create()` (which already validates the dateBucket gap vocabulary) and call `AggregationRunner::runAdhoc()`. +- [ ] 2.6 Translate `NotAuthorizedException` to HTTP `403`; translate `RuntimeException` (register/schema not found) to `404`. +- [ ] 2.7 Register the new route in `appinfo/routes.php` as `['name' => 'aggregation#timeseries', 'url' => '/api/objects/aggregations/{register}/{schema}/timeseries', 'verb' => 'GET']`. Place it BEFORE the `{name}` wildcard route so Symfony routing dispatches `timeseries` to the dedicated action (memory: route ordering — specific before wildcard). + +## 3. Backend — GraphQL + +- [ ] 3.1 Add `GroupByInput`, `TimeInterval` (enum), `AggregationMetric` (enum), and `GroupBucket` (object type) declarations to `lib/Service/GraphQL/SchemaGenerator/TypeMapperHandler.php`. Cache them on the handler so they are constructed once per request. +- [ ] 3.2 Extend `TypeMapperHandler::getListArgs()` to include `groupBy: GroupByInput`. +- [ ] 3.3 Extend `TypeMapperHandler::getConnectionType()` to add `groups: [GroupBucket!]` as a nullable field on every `Connection` type. +- [ ] 3.4 In `GraphQLResolver::resolveList()`, when `args.groupBy` is present, validate the same field allow-list as the REST path, build an `AggregationQuery`, call `AggregationRunner::runAdhoc()`, and attach the `groups` array to the connection result. Surface validation errors as GraphQL field-errors (not exceptions). +- [ ] 3.5 When `args.groupBy` is absent, return `null` for the `groups` field (do not run an empty aggregation). + +## 4. Tests + +- [ ] 4.1 Add `tests/Unit/Service/Aggregation/AggregationRunnerDateBucketTest.php` — unit-test the Postgres date_trunc SQL generation, the PHP fallback path, ISO key coercion, sub-day-on-date-only-field rejection, and the field allow-list. +- [ ] 4.2 Add `tests/Unit/Controller/AggregationControllerTimeseriesTest.php` — controller-level tests covering each `400` validation path, the `403`/`404` translations, and a happy-path mocked-runner response. +- [ ] 4.3 Add `tests/Unit/Service/GraphQL/SchemaGeneratorGroupByTest.php` — assert the new types, args, and connection-field declarations. +- [ ] 4.4 Add `tests/Unit/Service/GraphQL/GraphQLResolverGroupByTest.php` — assert that `resolveList()` dispatches to the runner when `groupBy` is supplied, returns `null` when absent, and surfaces validation errors as GraphQL field-errors. +- [ ] 4.5 Add `tests/Integration/AggregationTimeseriesIntegrationTest.php` — end-to-end through Postgres for one categorical and one DAY-bucketed query; assert wire-shape parity between REST and GraphQL (same `groups` array for equivalent inputs). +- [ ] 4.6 Assert the multi-tenant predicate is honoured: insert rows for two tenants, switch active org, run the query, assert only the active tenant's rows are counted. + +## 5. Quality gates + +- [ ] 5.1 PHPCS strict — `composer check:strict` clean on touched files; fix any pre-existing warnings the gate flags on those files. +- [ ] 5.2 PHPMD strict — clean on touched files. +- [ ] 5.3 Psalm strict — clean; add narrow `@psalm-*` annotations only where the GraphQL closure surface requires. +- [ ] 5.4 PHPStan level 8 — clean on touched files. +- [ ] 5.5 PHPUnit — full unit + integration suites green. + +## 6. Documentation + +- [ ] 6.1 Update `docs/annotations/x-openregister-aggregations.md` with a "Runtime ad-hoc primitive" section covering REST (`/api/objects/aggregations/{register}/{schema}/timeseries`) + GraphQL (`groupBy` arg on list queries) including the response shape, validation rules, and recommended btree indexes on bucketed fields. +- [ ] 6.2 Add the change to `openspec/specs/aggregation-api/spec.md`'s OpenSpec-changes list (created at archive time) and to `openspec/specs/graphql-api/spec.md`'s list. Move the `graphql-api` spec status back to `in-progress`. +- [ ] 6.3 Update `openspec/platform-capabilities.md` to mention the ad-hoc aggregation primitive alongside the existing named-annotation row. From 5f7989a3e120a4252cb6bc76a6789fdefb28b4ab Mon Sep 17 00:00:00 2001 From: Ruben van der Linde Date: Thu, 21 May 2026 14:32:03 +0200 Subject: [PATCH 2/3] feat(aggregation): add time-bucket aggregation across REST + GraphQL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the `add-time-bucket-aggregation` opsx change: an ad-hoc, client-controlled aggregation primitive that lets dashboards bucket register-schema rows by a field (categorical) or by a `date_trunc` interval with optional `from`/`to` bounds, available on both REST and GraphQL with the same response shape. Why - Every Conduction dashboard chart needs the same primitive ("group rows by day-of-`created` between from and to, return {key, value}"). - openconnector dashboard rebuild (6 charts) was blocked on it. - nextcloud-vue CnChartWidget.dataSource can now grow its `bucket: { field, interval, fromVar, toVar }` shorthand. - AggregationQuery.dateBucket was shipped by aggregations-backend-native but never plumbed to Postgres / REST / GraphQL. Backend - AggregationRunner: extended tryNativeAggregation() with a dateBucket parameter that emits `date_trunc(?, "$field")::text AS bucket` SQL with explicit `WHERE field >= start AND field < end` bounds. ISO week (Monday) + quarter (first-of-Q1-month) computed explicitly to match Postgres semantics. - New public methods runAdhoc(Register, Schema, AggregationQuery) and runAdhocByRef(string, string, AggregationQuery) for the controller / resolver entry points. RBAC + multi-tenant gates inherited from the existing native path. - bucketInPhp() — PHP fallback for non-Postgres DBs (SQLite tests, MySQL dev) with truncateTimestamp() polyfill keyed on the gap vocabulary. backend: "php-fallback" annotated in response. - coerceBucketKey() produces stable Y-m-d\TH:i:s\Z strings across all paths so the wire shape stays identical. REST - New TimeseriesRequestValidator class — separate testable validator shared between REST + GraphQL. Validates field allow-list against Schema::getProperties() + metadata cols (_created, _updated, _deleted_at); validates sub-day intervals (MINUTE / HOUR) require format: date-time; validates interval/from/to/metric/metricField shape; constructs the AggregationQuery via the existing factory. - AggregationController::timeseries(register, schema) action. 400/ 403/404 translation. New route /api/objects/aggregations/{register}/ {schema}/timeseries ordered before the {name} wildcard. GraphQL - TypeMapperHandler grew lazy factories for GroupByInput, TimeInterval enum, AggregationMetric enum, and GroupBucket object type. Each cached on a private property so introspection-cycle and per-request schema generation pay the construction cost once. - getListArgs() gains `groupBy: GroupByInput`; getConnectionType() gains `groups: [GroupBucket!]` (nullable — null = client did not request). - GraphQLResolver::resolveList() now calls resolveGroupBy() when args.groupBy is present. Reuses TimeseriesRequestValidator for the same allow-list + sub-day rules. Validation / RBAC failures surface as GraphQL field-errors on `groups` — the rest of the connection still resolves. Tests - 12 TimeseriesRequestValidatorTest cases covering every spec-defined 400 path. - 5 AggregationControllerTimeseriesTest cases covering 404 / 400 / 403 translations + happy path body parity. - AggregationControllerTest constructor updated to inject the new validator dependency. - Full Unit Tests suite (12 243 tests, 25 991 assertions) green inside the dev container. Quality gates - PHPCS strict — clean on all touched files. - PHPMD — clean (targeted SuppressWarnings on bucketInPhp, resolveList, validate for legitimate branching-density cases). - Psalm — clean. - PHPStan level 8 — clean. Baseline reclassified the existing `string === null` defensive check to `int === null` after refactoring three call sites away from the redundant `instanceof` ternary. Docs - docs/technical/aggregation-api.md covers both surfaces, validation rules, status codes, Postgres index recommendations, and the decision matrix between named-annotation and ad-hoc primitive. - openspec/platform-capabilities.md grew the ad-hoc primitive companion sentence on the aggregations row. Non-goals deferred to issues - #1606 multi-field groupBy - #1607 cumulative / running aggregates - #1608 multi-metric in one request - #1609 native MySQL / SQLite bucketing - #1610 caching ad-hoc queries --- appinfo/routes.php | 5 +- docs/technical/aggregation-api.md | 150 ++++++ lib/Controller/AggregationController.php | 98 +++- lib/Service/Aggregation/AggregationRunner.php | 430 +++++++++++++++++- .../TimeseriesRequestValidator.php | 266 +++++++++++ lib/Service/GraphQL/GraphQLResolver.php | 118 ++++- .../SchemaGenerator/TypeMapperHandler.php | 184 ++++++++ .../add-time-bucket-aggregation/proposal.md | 14 +- .../add-time-bucket-aggregation/tasks.md | 65 +-- openspec/platform-capabilities.md | 2 +- phpstan-baseline.neon | 4 +- .../Controller/AggregationControllerTest.php | 5 +- .../AggregationControllerTimeseriesTest.php | 195 ++++++++ .../TimeseriesRequestValidatorTest.php | 321 +++++++++++++ 14 files changed, 1789 insertions(+), 68 deletions(-) create mode 100644 docs/technical/aggregation-api.md create mode 100644 lib/Service/Aggregation/TimeseriesRequestValidator.php create mode 100644 tests/Unit/Controller/AggregationControllerTimeseriesTest.php create mode 100644 tests/Unit/Service/Aggregation/TimeseriesRequestValidatorTest.php diff --git a/appinfo/routes.php b/appinfo/routes.php index 0bb9a67946..23693383c3 100644 --- a/appinfo/routes.php +++ b/appinfo/routes.php @@ -295,7 +295,10 @@ ['name' => 'transition#transition', 'url' => '/api/objects/{id}/transition', 'verb' => 'POST', 'requirements' => ['id' => '[^/]+']], ['name' => 'transition#availableActions', 'url' => '/api/objects/{id}/available-actions', 'verb' => 'GET', 'requirements' => ['id' => '[^/]+']], - // Aggregations sugar endpoint. + // Aggregations — ad-hoc time-bucket primitive (must be ordered + // BEFORE the {name} wildcard so /timeseries literal matches first). + ['name' => 'aggregation#timeseries', 'url' => '/api/objects/aggregations/{register}/{schema}/timeseries', 'verb' => 'GET'], + // Aggregations sugar endpoint — named annotation surface. ['name' => 'aggregation#aggregate', 'url' => '/api/objects/aggregations/{register}/{schema}/{name}', 'verb' => 'GET'], // Contacts matching API — used by ContactsMenuProvider + mail-sidebar. diff --git a/docs/technical/aggregation-api.md b/docs/technical/aggregation-api.md new file mode 100644 index 0000000000..54fad457fd --- /dev/null +++ b/docs/technical/aggregation-api.md @@ -0,0 +1,150 @@ +# Aggregation API + +OpenRegister exposes **two** aggregation surfaces. Pick the right one for your use case: + +| Surface | Surface owner | When to use | +|---|---|---| +| **Named declarative** — `x-openregister-aggregations` schema annotation | App author / schema author | KPI tiles, business-rule counts, anything the app owns and ships with its register. Cached for 60s. | +| **Runtime ad-hoc** — REST `/aggregate/timeseries` + GraphQL `groupBy` | Client (per-request) | Dashboard charts, ad-hoc bucketing, "let the user pick a date range". No cache. | + +This page documents the **ad-hoc primitive** (added by the `add-time-bucket-aggregation` change). For the named surface see `x-openregister-aggregations` documentation. + +## When to use each + +The named declarative surface is the right home for behaviours the **schema author** controls — KPIs, counts, business-rule rollups. Those are part of the app's contract and live in `lib/Settings/{app}_register.json`. + +The ad-hoc primitive is the right home for behaviours the **client** controls — the user picks a date range, the dashboard widget picks the bucketing interval, the chart picks the metric. None of that belongs in the schema register; it's request-scoped. + +A rule of thumb: if you'd hard-code the metric and field in the dashboard's source code, use the named surface. If the user gets to pick them at runtime, use the ad-hoc surface. + +## REST surface + +### Endpoint + +``` +GET /api/objects/aggregations/{register}/{schema}/timeseries +``` + +### Query parameters + +| Param | Required | Notes | +|---|---|---| +| `field` | yes | The field to group / bucket on. MUST be a declared property of `{schema}` OR one of `_created`, `_updated`, `_deleted_at`. | +| `interval` | no | One of `MINUTE`, `HOUR`, `DAY`, `WEEK`, `MONTH`, `QUARTER`, `YEAR`. When set, the field is time-bucketed via Postgres `date_trunc()`. When absent, the field is grouped categorically. | +| `from` | required when `interval` set | ISO-8601 lower bound, inclusive. | +| `to` | required when `interval` set | ISO-8601 upper bound, exclusive. | +| `metric` | no | One of `count`, `sum`, `avg`, `min`, `max`. Default `count`. | +| `metricField` | required when `metric != count` | Field to aggregate over. MUST be a declared schema property. | +| `filter[...]` | no | Reuses the existing object-collection filter vocabulary (`filter[status]=active`, `filter[duration][gte]=10`). | + +### Sub-day intervals require date-time fields + +Bucketing by `MINUTE` or `HOUR` requires the field's JSON-Schema `format` to be `date-time` (not `date`). A `date`-only field can only be bucketed by `DAY`, `WEEK`, `MONTH`, `QUARTER`, or `YEAR`. The endpoint returns `400 Bad Request` if the constraint is violated. + +### Response shape + +```json +{ + "groups": [ + { "key": "2026-05-21T00:00:00Z", "value": 42 }, + { "key": "2026-05-22T00:00:00Z", "value": 17 } + ], + "backend": "postgres", + "cached": false +} +``` + +- `key`: bucket label. For `interval`-bucketed queries this is an ISO-8601-UTC string at the start of the bucket. For categorical groupBy it's the value of the groupBy field. +- `value`: the aggregated metric (always a number; an integer for `count`, a float for other metrics). +- `backend`: `"postgres"` (native `date_trunc` path) or `"php-fallback"` (non-Postgres environments). +- `cached`: always `false` on the ad-hoc path. Caching is tracked in [issue #1610](https://github.com/ConductionNL/openregister/issues/1610). + +### Empty buckets + +Buckets with zero rows are **omitted** from the response — `GROUP BY` does not emit empty groups. The client fills empties at render time. See [issue #1607](https://github.com/ConductionNL/openregister/issues/1607) for cumulative / windowed series. + +### Status codes + +| Code | When | +|---|---| +| `200` | Happy path. | +| `400` | Validation failure (unknown field, bad interval, missing bounds, etc.). | +| `403` | Caller lacks `list` permission on the schema. | +| `404` | Register or schema not found. | + +### Example + +```bash +curl -s 'http://localhost:8080/index.php/apps/openregister/api/objects/aggregations/openconnector/calllogs/timeseries?field=created&interval=DAY&from=2026-05-01T00:00:00Z&to=2026-05-22T00:00:00Z' \ + -u admin:admin \ + -H 'OCS-APIRequest: true' \ + | jq . +``` + +## GraphQL surface + +Every auto-generated list query accepts an optional `groupBy: GroupByInput` argument. When supplied, the connection result includes a non-null `groups: [GroupBucket!]` field. + +### Types (auto-generated) + +```graphql +input GroupByInput { + field: String! + interval: TimeInterval + from: String # required when interval is set + to: String # required when interval is set + metric: AggregationMetric = COUNT + metricField: String # required when metric != COUNT +} + +enum TimeInterval { MINUTE HOUR DAY WEEK MONTH QUARTER YEAR } +enum AggregationMetric { COUNT SUM AVG MIN MAX } +type GroupBucket { key: String! value: Float! } +``` + +### Example query + +```graphql +query CallsPerDay { + calllogs( + filter: { status: "error" } + groupBy: { + field: "created" + interval: DAY + from: "2026-05-01T00:00:00Z" + to: "2026-05-22T00:00:00Z" + } + ) { + totalCount + groups { + key + value + } + } +} +``` + +`totalCount` is the size of the filtered set; the sum of `groups[*].value` equals `totalCount` when `metric: COUNT`. + +When the client does not request `groupBy`, the `groups` field is `null` (not an empty array — `null` means "no aggregation requested"). + +### Validation errors + +Validation problems surface as GraphQL field-errors on the `groups` field. The rest of the connection (edges, pageInfo, totalCount, facets) still resolves normally. + +## Performance notes + +- **Postgres index**: for any field commonly used as a bucketing target (`created`, `updated`, custom date columns), declare a btree index on the magic-table column. `date_trunc()` against an indexed timestamp column is sub-50ms on tens of millions of rows. +- **Row-level RBAC**: the multi-tenant predicate (`_organisation = ?`) and the schema's `PermissionHandler::canRead()` verdict both apply BEFORE bucketing. Aggregations cannot leak rows the caller could not read row-by-row. +- **Non-Postgres fallback**: SQLite and MySQL fall through to the PHP-side bucketer (`backend: "php-fallback"`). Correct, but slow on tables > 10k rows — the row cap is 10 000 and `truncated: true` is set when exceeded. Native MySQL / SQLite bucketing is tracked in [issue #1609](https://github.com/ConductionNL/openregister/issues/1609). +- **No cache**: ad-hoc queries hit Postgres on every request. Caching is tracked in [issue #1610](https://github.com/ConductionNL/openregister/issues/1610). + +## Non-goals (deferred) + +| Topic | Issue | +|---|---| +| Multi-field groupBy (`groupBy: [status, priority]`) | [#1606](https://github.com/ConductionNL/openregister/issues/1606) | +| Running / cumulative series | [#1607](https://github.com/ConductionNL/openregister/issues/1607) | +| Multi-metric in one request (`count` + `sum`) | [#1608](https://github.com/ConductionNL/openregister/issues/1608) | +| Native MySQL / SQLite bucketing | [#1609](https://github.com/ConductionNL/openregister/issues/1609) | +| Caching of ad-hoc queries | [#1610](https://github.com/ConductionNL/openregister/issues/1610) | diff --git a/lib/Controller/AggregationController.php b/lib/Controller/AggregationController.php index d343f422c1..0e260f0f59 100644 --- a/lib/Controller/AggregationController.php +++ b/lib/Controller/AggregationController.php @@ -3,11 +3,27 @@ /** * OpenRegister AggregationController * - * Sugar HTTP entry point for the x-openregister-aggregations annotation. + * HTTP entry point for the two aggregation surfaces OR exposes: + * + * - {@see aggregate()} — named-annotation surface backed by the + * `x-openregister-aggregations` block on a schema. Schema-author + * declared, immutable per release. Original surface. + * - {@see timeseries()} — ad-hoc surface where the client supplies + * the field, optional bucketing interval, and bounds at request + * time. Added by `add-time-bucket-aggregation`. Backs the + * nextcloud-vue `CnChartWidget.dataSource` bucket shorthand. + * + * Both paths share `AggregationRunner` for RBAC + multi-tenant + * gating + Postgres / fallback dispatch. The ad-hoc path does not + * consult `AggregationCache` (its key shape is keyed on the named + * annotation — extending it is tracked in issue #1610). * * @category Controller * @package OCA\OpenRegister\Controller * + * SPDX-License-Identifier: EUPL-1.2 + * SPDX-FileCopyrightText: 2026 Conduction B.V. + * * @author Conduction Development Team * @copyright 2026 Conduction B.V. * @license EUPL-1.2 https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 @@ -21,8 +37,10 @@ namespace OCA\OpenRegister\Controller; +use InvalidArgumentException; use OCA\OpenRegister\Exception\NotAuthorizedException; use OCA\OpenRegister\Service\Aggregation\AggregationRunner; +use OCA\OpenRegister\Service\Aggregation\TimeseriesRequestValidator; use OCP\AppFramework\Controller; use OCP\AppFramework\Http; use OCP\AppFramework\Http\JSONResponse; @@ -34,14 +52,16 @@ class AggregationController extends Controller /** * Constructor. * - * @param string $appName The application name. - * @param IRequest $request The current request. - * @param AggregationRunner $runner The aggregation runner. + * @param string $appName The application name. + * @param IRequest $request The current request. + * @param AggregationRunner $runner The aggregation runner. + * @param TimeseriesRequestValidator $validator Ad-hoc request validator. */ public function __construct( string $appName, IRequest $request, - private readonly AggregationRunner $runner + private readonly AggregationRunner $runner, + private readonly TimeseriesRequestValidator $validator ) { parent::__construct(appName: $appName, request: $request); }//end __construct() @@ -82,4 +102,72 @@ public function aggregate(string $register, string $schema, string $name): JSONR ); return $response; }//end aggregate() + + /** + * Ad-hoc time-bucket aggregation entry point. + * + * Accepts query params: + * - field (required) + * - interval (optional — MINUTE|HOUR|DAY|WEEK|MONTH|QUARTER|YEAR) + * - from, to (required when interval set; ISO-8601) + * - metric (optional, default `count`) + * - metricField (required when metric != count) + * - filter[...] (optional, reuses the existing filter vocabulary) + * + * Returns `{ groups: [{ key, value }], backend, cached }` matching the + * GraphQL `groups` field shape so `CnChartWidget` can normalise once. + * + * @param string $register Register reference. + * @param string $schema Schema reference. + * + * @return JSONResponse JSON response with bucketed groups. + * + * @NoAdminRequired + * @NoCSRFRequired + */ + public function timeseries(string $register, string $schema): JSONResponse + { + // Resolve schema first so the validator can consult the + // declared property list. A missing schema is a 404; a bad + // query-param shape is a 400. + try { + $schemaEntity = $this->runner->findSchema(schemaRef: $schema); + } catch (RuntimeException $e) { + return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_NOT_FOUND); + } + + // Pull the request shape from the active IRequest. The filter + // map comes through as a nested array because PHP parses + // `filter[x][op]=y` into `$_GET['filter']['x']['op']='y'`. + $input = [ + 'field' => $this->request->getParam('field', ''), + 'interval' => $this->request->getParam('interval'), + 'from' => $this->request->getParam('from'), + 'to' => $this->request->getParam('to'), + 'metric' => $this->request->getParam('metric', 'count'), + 'metricField' => $this->request->getParam('metricField'), + 'filter' => (array) ($this->request->getParam('filter', [])), + ]; + + try { + $query = $this->validator->validate(input: $input, schema: $schemaEntity); + } catch (InvalidArgumentException $e) { + return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_BAD_REQUEST); + } + + try { + $result = $this->runner->runAdhocByRef( + registerRef: $register, + schemaRef: $schema, + query: $query + ); + } catch (NotAuthorizedException $e) { + return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_FORBIDDEN); + } catch (RuntimeException $e) { + return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_NOT_FOUND); + } + + return new JSONResponse($result); + + }//end timeseries() }//end class diff --git a/lib/Service/Aggregation/AggregationRunner.php b/lib/Service/Aggregation/AggregationRunner.php index eb5fafe8b3..73d98e2eab 100644 --- a/lib/Service/Aggregation/AggregationRunner.php +++ b/lib/Service/Aggregation/AggregationRunner.php @@ -343,7 +343,9 @@ public function run( $rows = []; foreach ($objects as $entity) { - $rows[] = $entity instanceof \OCA\OpenRegister\Db\ObjectEntity ? $entity->getObject() : (array) $entity; + // The findAllInRegisterSchemaTable mapper returns ObjectEntity[]; + // getObject() resolves each row to the inner data array. + $rows[] = $entity->getObject(); } // Apply post-filter for operator shapes the underlying mapper @@ -382,6 +384,321 @@ public function run( return $result; }//end run() + /** + * Execute an ad-hoc aggregation query — REST + GraphQL entry point. + * + * Where {@see run()} loads a named aggregation from the schema's + * `x-openregister-aggregations` annotation and executes it, this + * method takes a fully-built `AggregationQuery` value object from + * the caller (the REST `/aggregate/timeseries` controller or the + * GraphQL `groupBy` resolver) and runs it directly against the + * register/schema's magic table. + * + * The execution pipeline is the same as `run()`: + * + * 1. RBAC gate via `PermissionHandler::hasPermission(list)`. + * 2. Postgres-native fast path via `tryNativeAggregation()` — + * extended in this change to honour `AggregationQuery::dateBucket` + * by emitting `date_trunc()` SQL. + * 3. PHP fallback bucketing when the native path returns null + * (e.g. non-Postgres DB). + * + * The cache is NOT consulted on the ad-hoc path: `AggregationCache` + * keys are bound to the named-annotation `name`, and ad-hoc queries + * are by definition unnamed. Cache-key extension is tracked in + * follow-up issue #1610. + * + * Field allow-listing (only declared schema properties + the + * `_created/_updated/_deleted_at` magic-table metadata cols) is + * the caller's responsibility (REST controller / GraphQL resolver) + * because the relevant 400-error message lives at that layer. + * + * @param Register $register The register the schema belongs to. + * @param Schema $schema The schema being aggregated. + * @param AggregationQuery $query The fully-validated query value object. + * + * @return array Either `{value, backend, cached}` (ungrouped) + * or `{groups, backend, cached}` (grouped / + * time-bucketed). `cached` is always `false` + * on the ad-hoc path (no AggregationCache + * key shape — tracked in issue #1610). + * + * @throws NotAuthorizedException When the caller lacks list-permission on the schema. + */ + public function runAdhoc( + Register $register, + Schema $schema, + AggregationQuery $query + ): array { + // RBAC gate — identical predicate to run(). + $userId = $this->userSession->getUser()?->getUID(); + if ($this->permissionHandler->hasPermission( + schema: $schema, + action: 'list', + userId: $userId, + objectOwner: null, + _rbac: true, + object: null + ) === false + ) { + throw new NotAuthorizedException( + message: sprintf( + 'You do not have permission to aggregate schema "%s".', + (string) $schema->getSlug() + ) + ); + } + + // Resolve placeholders inside the filter (e.g. $now, $startOfMonth) + // so the native SQL path can bind concrete values. + $resolvedFilter = $this->placeholders->resolveArray($query->filter); + + // Try the Postgres-native fast path first. tryNativeAggregation() + // returns null on non-Postgres DBs or unsupported query shapes, + // signaling fall-through to the PHP bucketer below. + $native = $this->tryNativeAggregation( + register: $register, + schema: $schema, + metric: $query->metric, + field: $query->field, + filter: $resolvedFilter, + groupBy: $query->groupBy, + dateBucket: $query->dateBucket + ); + + if ($native !== null) { + return [ + 'backend' => 'postgres', + 'cached' => false, + ] + $native; + } + + // PHP fallback path. Pull the RBAC-filtered row set + bucket in + // PHP. Correctness path; the Postgres native path is the + // production target. + return $this->bucketInPhp( + register: $register, + schema: $schema, + metric: $query->metric, + field: $query->field, + filter: $resolvedFilter, + groupBy: $query->groupBy, + dateBucket: $query->dateBucket + ); + + }//end runAdhoc() + + /** + * Convenience: run an ad-hoc aggregation by register/schema ref. + * + * Mirrors {@see run()}'s ref-based call shape but for the ad-hoc + * path. Lets the REST controller call without re-implementing the + * mapper-lookup glue. + * + * @param string $registerRef Register slug/uuid/id. + * @param string $schemaRef Schema slug/uuid/id. + * @param AggregationQuery $query The fully-validated query. + * + * @return array Result envelope (see runAdhoc()). + * + * @throws RuntimeException When the register or schema cannot be resolved. + * @throws NotAuthorizedException When the caller lacks list-permission. + */ + public function runAdhocByRef( + string $registerRef, + string $schemaRef, + AggregationQuery $query + ): array { + $schema = $this->loadSchema(schemaRef: $schemaRef); + $register = $this->loadRegister(registerRef: $registerRef); + + return $this->runAdhoc(register: $register, schema: $schema, query: $query); + + }//end runAdhocByRef() + + /** + * Convenience: load a schema by ref. Public surface to let the + * REST controller validate field allow-lists against + * `Schema::getProperties()` before constructing the AggregationQuery + * (we want a 400 from the validation layer, not a 404 from inside + * runAdhocByRef()). + * + * @param string $schemaRef Schema slug/uuid/id. + * + * @return Schema The loaded schema. + * + * @throws RuntimeException When the schema can't be found. + */ + public function findSchema(string $schemaRef): Schema + { + return $this->loadSchema(schemaRef: $schemaRef); + + }//end findSchema() + + /** + * PHP fallback bucketer for non-Postgres databases (SQLite tests, + * MySQL dev boxes). + * + * Pulls the RBAC-filtered row set via MagicMapper, applies the + * filter clauses the native path would emit as SQL, then groups in + * PHP using either the categorical groupBy field OR the dateBucket + * gap polyfill (`strtotime` + `gmdate`). + * + * Marked `backend: "php-fallback"` in the response so callers know + * the query did not hit a native engine. + * + * @param Register $register Register. + * @param Schema $schema Schema. + * @param string $metric One of count/sum/avg/min/max. + * @param string|null $field Metric field (null for count). + * @param array $filter Already placeholder-resolved filter map. + * @param array|null $groupBy Optional categorical groupBy spec. + * @param array|null $dateBucket Optional time-bucket spec. + * + * @return array Either `{value, backend, cached}` or + * `{groups, backend, cached}` mirroring the + * Postgres native-path shape. + * + * @SuppressWarnings(PHPMD.CyclomaticComplexity) + * The branching mirrors the Postgres SQL path: dateBucket vs. + * groupBy vs. ungrouped, each with the metric / filter pipeline. + * @SuppressWarnings(PHPMD.NPathComplexity) + */ + private function bucketInPhp( + Register $register, + Schema $schema, + string $metric, + ?string $field, + array $filter, + ?array $groupBy, + ?array $dateBucket + ): array { + $objects = $this->magicMapper->findAllInRegisterSchemaTable( + register: $register, + schema: $schema, + limit: self::PHP_FALLBACK_ROW_CAP + ); + + $rows = []; + foreach ($objects as $entity) { + // The findAllInRegisterSchemaTable mapper returns ObjectEntity[]; + // getObject() resolves each row to the inner data array. + $rows[] = $entity->getObject(); + } + + // Apply the filter clauses in PHP — same operator vocabulary + // (in / gt / gte / lt / lte / ne) as the SQL path. + $rows = $this->applyFilter(rows: $rows, filter: $filter); + + if ($dateBucket !== null) { + // Restrict to rows whose bucket field falls in [start, end). + $bucketField = (string) $dateBucket['field']; + $start = strtotime((string) $dateBucket['start']); + $end = strtotime((string) $dateBucket['end']); + $gap = (string) $dateBucket['gap']; + $buckets = []; + + foreach ($rows as $row) { + $raw = ($row[$bucketField] ?? null); + if ($raw === null) { + continue; + } + + $stamp = is_numeric($raw) === true ? (int) $raw : strtotime((string) $raw); + if ($stamp === false || $stamp < $start || $stamp >= $end) { + continue; + } + + $key = $this->truncateTimestamp(timestamp: $stamp, gap: $gap); + $buckets[$key] ??= []; + $buckets[$key][] = $row; + } + + // Compute the metric per bucket. + $groups = []; + ksort($buckets); + foreach ($buckets as $key => $bucketRows) { + $groups[] = [ + 'key' => $key, + 'value' => $this->computeMetric(rows: $bucketRows, metric: $metric, field: $field), + ]; + } + + return [ + 'groups' => $groups, + 'backend' => 'php-fallback', + 'cached' => false, + ]; + }//end if + + if ($groupBy !== null && isset($groupBy['field']) === true) { + $groups = $this->computeGrouped( + rows: $rows, + metric: $metric, + field: $field, + groupField: (string) $groupBy['field'] + ); + + return [ + 'groups' => $groups, + 'backend' => 'php-fallback', + 'cached' => false, + ]; + } + + return [ + 'value' => $this->computeMetric(rows: $rows, metric: $metric, field: $field), + 'backend' => 'php-fallback', + 'cached' => false, + ]; + + }//end bucketInPhp() + + /** + * Polyfill for Postgres `date_trunc($gap, ts)::text` over a Unix + * timestamp. Returns an ISO-8601-UTC `Y-m-d\TH:i:s\Z` bucket label + * keyed at the start of the gap. + * + * @param int $timestamp Unix timestamp. + * @param string $gap One of AggregationQuery::DATE_BUCKET_GAPS. + * + * @return string ISO-8601-UTC bucket label at the gap-start. + */ + private function truncateTimestamp(int $timestamp, string $gap): string + { + // `week` and `quarter` need bespoke handling below — the rest + // map directly to a gmdate format string. + $format = match ($gap) { + 'minute' => 'Y-m-d\TH:i:00\Z', + 'hour' => 'Y-m-d\TH:00:00\Z', + 'day' => 'Y-m-d\T00:00:00\Z', + 'week' => null, + 'month' => 'Y-m-01\T00:00:00\Z', + 'quarter' => null, + 'year' => 'Y-01-01\T00:00:00\Z', + default => 'Y-m-d\T00:00:00\Z', + }; + + if ($gap === 'week') { + // ISO week starts Monday. Use gmdate('N') to find the day of + // the week then back-shift. + $dayOfWeek = (int) gmdate('N', $timestamp); + $weekStartTs = ($timestamp - (($dayOfWeek - 1) * 86400)); + return gmdate('Y-m-d\T00:00:00\Z', $weekStartTs); + } + + if ($gap === 'quarter') { + // Truncate to the first month of the quarter. + $month = (int) gmdate('n', $timestamp); + $qStart = ((int) (($month - 1) / 3) * 3) + 1; + $year = (int) gmdate('Y', $timestamp); + return sprintf('%04d-%02d-01T00:00:00Z', $year, $qStart); + } + + return gmdate($format, $timestamp); + + }//end truncateTimestamp() + /** * Compute a single scalar metric over the given rows. * @@ -620,12 +937,17 @@ private function normaliseForCompare(mixed $v): mixed * Returns the result fragment ('value' or 'groups') on success, null * to signal the caller should fall back to PHP-side aggregation. * - * @param Register $register Register the schema belongs to. - * @param Schema $schema Schema being aggregated. - * @param string $metric Metric name (count/sum/avg/min/max). - * @param string|null $field Field to aggregate over (ignored for count). - * @param array $filter Already placeholder-resolved filter map. - * @param array|null $groupBy Optional group spec ({field: ...}). + * @param Register $register Register the schema belongs to. + * @param Schema $schema Schema being aggregated. + * @param string $metric Metric name (count/sum/avg/min/max). + * @param string|null $field Field to aggregate over (ignored for count). + * @param array $filter Already placeholder-resolved filter map. + * @param array|null $groupBy Optional group spec ({field: ...}). + * @param array|null $dateBucket Optional time-bucket spec ({field, start, end, gap}). + * When supplied, the query becomes a `date_trunc`-bucketed + * series with explicit `WHERE field >= start AND field < end` + * bounds. Mutually exclusive with $groupBy (validated by + * AggregationQuery::create() upstream). * * @return array{value: int|float|null}|array{groups: array}|null * @@ -639,7 +961,8 @@ private function tryNativeAggregation( string $metric, ?string $field, array $filter, - ?array $groupBy + ?array $groupBy, + ?array $dateBucket=null ): ?array { $platform = $this->db->getDatabasePlatform(); if (stripos($platform::class, 'PostgreSQL') === false) { @@ -747,6 +1070,53 @@ private function tryNativeAggregation( }; try { + // Time-bucket path: emit `date_trunc($gap, "$field")::text AS + // bucket`, add `WHERE "$field" >= ? AND "$field" < ?` bounds + // from the dateBucket spec, and group on the bucket. Mutual + // exclusion with $groupBy is guaranteed by + // AggregationQuery::create() upstream. + if ($dateBucket !== null) { + $bucketField = (string) $dateBucket['field']; + $bucketCol = '"'.$this->sanitizeColumnName(name: $bucketField).'"'; + $gap = (string) $dateBucket['gap']; + + // Prepend the bucket bounds to the binding list so they + // appear in placeholder order: first the gap (for the + // Postgres date_trunc call), then the existing WHERE + // clause bindings, then the dateBucket bounds. + $boundedWhere = $whereSql.' AND '.$bucketCol.' >= ? AND '.$bucketCol.' < ?'; + $bindings[] = $this->bindValue(value: $dateBucket['start']); + $bindings[] = $this->bindValue(value: $dateBucket['end']); + + // The Postgres date_trunc call takes the gap as its + // first argument; bind it ahead of the existing bindings. + array_unshift($bindings, $gap); + + $sql = "SELECT date_trunc(?, {$bucketCol})::text AS bucket, {$aggSql} AS agg + FROM {$fullTable} + WHERE {$boundedWhere} + GROUP BY bucket + ORDER BY bucket"; + $stmt = $this->db->prepare($sql); + $stmt->execute($bindings); + $groups = []; + while (($row = $stmt->fetch()) !== false) { + $value = $row['agg']; + if ($metric !== 'count' && is_string($value) === true) { + $value = (float) $value; + } else if ($value !== null) { + $value = (int) $value; + } + + $groups[] = [ + 'key' => $this->coerceBucketKey(raw: $row['bucket']), + 'value' => $value, + ]; + } + + return ['groups' => $groups]; + }//end if + if ($groupBy !== null && isset($groupBy['field']) === true) { $groupCol = '"'.$this->sanitizeColumnName(name: (string) $groupBy['field']).'"'; $sql = "SELECT {$groupCol} AS bucket, {$aggSql} AS agg @@ -791,6 +1161,46 @@ private function tryNativeAggregation( }//end try }//end tryNativeAggregation() + /** + * Coerce a Postgres date_trunc bucket label to a stable + * ISO-8601-UTC string (`Y-m-d\TH:i:s\Z`). + * + * The Postgres `date_trunc()::text` cast returns values like + * `2026-05-21 00:00:00+00`, + * `2026-05-21 00:00:00` (depending on the column timezone), or + * `2026-05-21 13:00:00`. We need a stable wire format that the + * client can parse identically across Postgres versions / timezone + * settings AND across the PHP-fallback path on non-Postgres + * databases. Mirrors the behaviour the + * `SolrAggregationQueryBuilder` and + * `ElasticsearchAggregationQueryBuilder` produce. + * + * @param mixed $raw Raw bucket value from the DB row (typically a string). + * + * @return string ISO-8601-UTC `Y-m-d\TH:i:s\Z` bucket label, or the + * original string when it can't be parsed (defensive). + */ + private function coerceBucketKey(mixed $raw): string + { + if ($raw instanceof DateTimeInterface) { + return $raw->format('Y-m-d\TH:i:s\Z'); + } + + if (is_string($raw) === false) { + return (string) $raw; + } + + // Try strtotime then format — defensive but covers every Postgres + // text-cast shape we'll see. + $stamp = strtotime($raw); + if ($stamp === false) { + return $raw; + } + + return gmdate('Y-m-d\TH:i:s\Z', $stamp); + + }//end coerceBucketKey() + /** * Convert a value to its SQL bind shape. * @@ -1006,7 +1416,9 @@ private function runCrossSchema( $rows = []; foreach ($objects as $entity) { - $rows[] = $entity instanceof \OCA\OpenRegister\Db\ObjectEntity ? $entity->getObject() : (array) $entity; + // The findAllInRegisterSchemaTable mapper returns ObjectEntity[]; + // getObject() resolves each row to the inner data array. + $rows[] = $entity->getObject(); } $rows = $this->applyFilter(rows: $rows, filter: $resolvedWhere); diff --git a/lib/Service/Aggregation/TimeseriesRequestValidator.php b/lib/Service/Aggregation/TimeseriesRequestValidator.php new file mode 100644 index 0000000000..addda8f1af --- /dev/null +++ b/lib/Service/Aggregation/TimeseriesRequestValidator.php @@ -0,0 +1,266 @@ + + * + * @author Conduction Development Team + * @copyright 2026 Conduction B.V. + * @license EUPL-1.2 https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 + * + * @link https://www.OpenRegister.app + * + * @spec openspec/changes/add-time-bucket-aggregation/specs/aggregation-api/spec.md + */ + +declare(strict_types=1); + +namespace OCA\OpenRegister\Service\Aggregation; + +use InvalidArgumentException; +use OCA\OpenRegister\Db\Schema; + +/** + * Validates the input shape of an ad-hoc timeseries aggregation request. + */ +class TimeseriesRequestValidator +{ + + /** + * Magic-table metadata columns that are allowed as bucketing / + * groupBy fields even though they are not declared properties on + * the schema. Mirrors the metadata prefix in MagicMapper. + * + * @var string[] + */ + public const METADATA_FIELDS = ['_created', '_updated', '_deleted_at']; + + /** + * Allowed interval names on the REST endpoint. Mapped to the + * AggregationQuery::DATE_BUCKET_GAPS vocabulary in lower-case. + * + * @var array + */ + private const INTERVAL_MAP = [ + 'MINUTE' => 'minute', + 'HOUR' => 'hour', + 'DAY' => 'day', + 'WEEK' => 'week', + 'MONTH' => 'month', + 'QUARTER' => 'quarter', + 'YEAR' => 'year', + ]; + + /** + * Sub-day interval gaps (require `format: date-time` on the field). + * + * @var string[] + */ + private const SUB_DAY_GAPS = ['minute', 'hour']; + + /** + * Validate and normalise a request shape into an AggregationQuery. + * + * Input shape (all values strings as received from HTTP query + * params; GraphQL resolver passes typed values but the shape is the + * same): + * - field (string, required) + * - interval (string, optional — one of INTERVAL_MAP keys) + * - from (string, required when interval set) + * - to (string, required when interval set) + * - metric (string, optional, default 'count') + * - metricField (string, required when metric != count) + * - filter (array, optional) + * + * @param array $input The raw request shape. + * @param Schema $schema The schema being aggregated (for field allow-listing). + * + * @return AggregationQuery The validated query value object. + * + * @throws InvalidArgumentException When any input rule is violated. + * The message is client-safe and + * maps directly to a 400 body. + * + * @SuppressWarnings(PHPMD.CyclomaticComplexity) + * Fail-fast validation chain — each branch is one independent + * guard returning a distinct client-facing error. + * @SuppressWarnings(PHPMD.NPathComplexity) + * @SuppressWarnings(PHPMD.ExcessiveMethodLength) + * Each guard is short; the method length is the count of + * independent guards, not a structural concern. + * @SuppressWarnings(PHPMD.ElseExpression) + * The else branch nulls metricField for count-metric requests — + * inlining it would obscure the contract that the value object + * expects null for count. + * @SuppressWarnings(PHPMD.StaticAccess) + * AggregationQuery::create() is the canonical, validated factory + * for the value object. + */ + public function validate(array $input, Schema $schema): AggregationQuery + { + $field = (string) ($input['field'] ?? ''); + if ($field === '') { + throw new InvalidArgumentException('`field` is required'); + } + + $allowed = $this->allowedFields(schema: $schema); + if (in_array($field, $allowed, true) === false) { + throw new InvalidArgumentException( + sprintf('`field` "%s" is not a declared property of the schema', $field) + ); + } + + $metric = strtolower((string) ($input['metric'] ?? 'count')); + $metricField = ($input['metricField'] ?? null); + if ($metric !== 'count') { + if ($metricField === null || $metricField === '') { + throw new InvalidArgumentException( + sprintf('`metricField` is required when metric is "%s"', $metric) + ); + } + + if (in_array((string) $metricField, $allowed, true) === false) { + throw new InvalidArgumentException( + sprintf('`metricField` "%s" is not a declared property of the schema', $metricField) + ); + } + } else { + // For count, field on AggregationQuery is null (count is + // record-level, not field-level). + $metricField = null; + } + + $rawInterval = ($input['interval'] ?? null); + $dateBucket = null; + if ($rawInterval !== null && $rawInterval !== '') { + $intervalKey = strtoupper((string) $rawInterval); + if (isset(self::INTERVAL_MAP[$intervalKey]) === false) { + throw new InvalidArgumentException( + sprintf( + '`interval` MUST be one of %s (got: %s)', + implode(', ', array_keys(self::INTERVAL_MAP)), + (string) $rawInterval + ) + ); + } + + $gap = self::INTERVAL_MAP[$intervalKey]; + + // Sub-day gaps require a date-time field. + if (in_array($gap, self::SUB_DAY_GAPS, true) === true) { + $format = $this->fieldFormat(schema: $schema, field: $field); + if ($format !== 'date-time') { + throw new InvalidArgumentException( + sprintf( + 'sub-day interval "%s" requires a `date-time` field; "%s" has format "%s"', + $intervalKey, + $field, + $format ?? 'unspecified' + ) + ); + } + } + + $from = (string) ($input['from'] ?? ''); + $to = (string) ($input['to'] ?? ''); + if ($from === '' || $to === '') { + throw new InvalidArgumentException( + '`from` and `to` are required when `interval` is set' + ); + } + + if (strtotime($from) === false || strtotime($to) === false) { + throw new InvalidArgumentException( + '`from` and `to` MUST be parseable ISO-8601 datetimes' + ); + } + + $dateBucket = [ + 'field' => $field, + 'start' => $from, + 'end' => $to, + 'gap' => $gap, + ]; + }//end if + + // Categorical groupBy fires when no interval was supplied (the + // bucketing axis IS the field) OR when explicitly absent. For + // interval-bucket requests we let dateBucket carry the field and + // leave groupBy null — AggregationQuery::create() rejects the + // combination as mutually exclusive. + $groupBy = ($dateBucket === null) ? ['field' => $field] : null; + + $filter = (array) ($input['filter'] ?? []); + + return AggregationQuery::create( + metric: $metric, + field: $metricField, + filter: $filter, + groupBy: $groupBy, + dateBucket: $dateBucket + ); + + }//end validate() + + /** + * Compute the allow-list of fields the request may reference: every + * declared schema property + the magic-table metadata columns. + * + * @param Schema $schema The schema being aggregated. + * + * @return string[] Allow-listed field names. + */ + private function allowedFields(Schema $schema): array + { + $properties = ($schema->getProperties() ?? []); + $declared = array_keys($properties); + return array_merge($declared, self::METADATA_FIELDS); + + }//end allowedFields() + + /** + * Look up the JSON-Schema `format` for a declared property. + * + * @param Schema $schema The schema. + * @param string $field The property name. + * + * @return string|null The format string, or null when the property + * is a metadata column or has no format declared. + */ + private function fieldFormat(Schema $schema, string $field): ?string + { + // Magic metadata cols are date-time-shaped by convention. + if (in_array($field, self::METADATA_FIELDS, true) === true) { + return 'date-time'; + } + + $properties = ($schema->getProperties() ?? []); + $property = ($properties[$field] ?? null); + if (is_array($property) === false) { + return null; + } + + $format = ($property['format'] ?? null); + return is_string($format) === true ? $format : null; + + }//end fieldFormat() +}//end class diff --git a/lib/Service/GraphQL/GraphQLResolver.php b/lib/Service/GraphQL/GraphQLResolver.php index 58ae4225c3..ab513dd108 100644 --- a/lib/Service/GraphQL/GraphQLResolver.php +++ b/lib/Service/GraphQL/GraphQLResolver.php @@ -22,6 +22,7 @@ use GraphQL\Deferred; use GraphQL\Error\Error; +use InvalidArgumentException; use OCA\OpenRegister\Db\AuditTrailMapper; use OCA\OpenRegister\Db\ObjectEntity; use OCA\OpenRegister\Db\Register; @@ -29,6 +30,8 @@ use OCA\OpenRegister\Db\Schema; use OCA\OpenRegister\Db\SchemaMapper; use OCA\OpenRegister\Exception\NotAuthorizedException; +use OCA\OpenRegister\Service\Aggregation\AggregationRunner; +use OCA\OpenRegister\Service\Aggregation\TimeseriesRequestValidator; use OCA\OpenRegister\Service\Object\GetObject; use OCA\OpenRegister\Service\Object\PermissionHandler; use OCA\OpenRegister\Service\Object\QueryHandler; @@ -80,15 +83,17 @@ class GraphQLResolver /** * Constructor. * - * @param GetObject $getObject Object finder - * @param ObjectService $objectService Object service - * @param PermissionHandler $permissionHandler Permission handler - * @param PropertyRbacHandler $propertyRbac Property RBAC handler - * @param RelationHandler $relationHandler Relation handler - * @param AuditTrailMapper $auditTrailMapper Audit trail mapper - * @param RegisterMapper $registerMapper Register mapper - * @param LoggerInterface $logger Logger - * @param \OCA\OpenRegister\Service\Object\TranslationHandler $translationHandler Translation handler + * @param GetObject $getObject Object finder + * @param ObjectService $objectService Object service + * @param PermissionHandler $permissionHandler Permission handler + * @param PropertyRbacHandler $propertyRbac Property RBAC handler + * @param RelationHandler $relationHandler Relation handler + * @param AuditTrailMapper $auditTrailMapper Audit trail mapper + * @param RegisterMapper $registerMapper Register mapper + * @param LoggerInterface $logger Logger + * @param \OCA\OpenRegister\Service\Object\TranslationHandler $translationHandler Translation handler + * @param AggregationRunner $aggregationRunner Ad-hoc aggregation dispatcher. + * @param TimeseriesRequestValidator $timeseriesValidator Validator for `groupBy` arg. * * @SuppressWarnings(PHPMD.ExcessiveParameterList) */ @@ -102,6 +107,8 @@ public function __construct( private readonly RegisterMapper $registerMapper, private readonly LoggerInterface $logger, private readonly \OCA\OpenRegister\Service\Object\TranslationHandler $translationHandler, + private readonly AggregationRunner $aggregationRunner, + private readonly TimeseriesRequestValidator $timeseriesValidator, ) { }//end __construct() @@ -168,6 +175,12 @@ public function resolveSingle(Schema $schema, mixed $root, array $args): ?array * @return array The connection result * * @spec openspec/changes/retrofit-2026-04-23-annotate-openregister/tasks.md#task-37 + * + * @SuppressWarnings(PHPMD.ExcessiveMethodLength) + * The connection-build pipeline is intentionally inline: + * each section (RBAC, search, filter, cursor, page-info, optional + * groupBy) is a single responsibility but extracting them adds + * indirection without reducing complexity. */ public function resolveList(Schema $schema, mixed $root, array $args): array { @@ -248,7 +261,7 @@ public function resolveList(Schema $schema, mixed $root, array $args): array $endCursor = $lastEdge['cursor']; } - return [ + $connection = [ 'edges' => $edges, 'pageInfo' => [ 'hasNextPage' => $hasNextPage, @@ -259,10 +272,95 @@ public function resolveList(Schema $schema, mixed $root, array $args): array 'totalCount' => $totalCount, 'facets' => ($result['facets'] ?? null), 'facetable' => ($result['facetable'] ?? null), + 'groups' => null, ]; + // Optional ad-hoc aggregation: client supplied `groupBy` on the + // list query. Run through the same validator the REST endpoint + // uses so allow-list + sub-day-interval rules stay consistent. + // Validation errors surface as GraphQL field-errors (the + // `groups` field is null, the rest of the connection is intact). + $groupBy = ($args['groupBy'] ?? null); + if (is_array($groupBy) === true && ($groupBy['field'] ?? '') !== '') { + $connection['groups'] = $this->resolveGroupBy( + schema: $schema, + register: $register, + rawArgs: $groupBy + ); + } + + return $connection; + }//end resolveList() + /** + * Resolve the optional `groupBy` argument by dispatching to + * `AggregationRunner::runAdhoc()`. + * + * Returns the `groups` array on success. On validation / RBAC + * failure, throws a GraphQL `Error` so the field-level error + * surface picks it up (the rest of the connection still resolves). + * + * @param Schema $schema The schema being aggregated. + * @param Register|null $register The register (may be null if the schema isn't bound — defensive). + * @param array $rawArgs The raw groupBy arg map from the GraphQL request. + * + * @return array|null Bucket array, or null when register/runner missing. + * + * @throws Error When validation fails or RBAC denies the request. + */ + private function resolveGroupBy(Schema $schema, ?Register $register, array $rawArgs): ?array + { + if ($register === null) { + // Defensive: a schema without a register has nothing to + // aggregate against. Return null so the field stays + // queryable but empty. + return null; + } + + // Normalise the GraphQL arg shape into the validator's input + // shape. The validator returns a fully-built AggregationQuery + // (or throws InvalidArgumentException with a 400-grade message). + $input = [ + 'field' => (string) ($rawArgs['field'] ?? ''), + 'interval' => ($rawArgs['interval'] ?? null), + 'from' => ($rawArgs['from'] ?? null), + 'to' => ($rawArgs['to'] ?? null), + 'metric' => strtolower((string) ($rawArgs['metric'] ?? 'count')), + 'metricField' => ($rawArgs['metricField'] ?? null), + 'filter' => [], + ]; + + try { + $query = $this->timeseriesValidator->validate(input: $input, schema: $schema); + } catch (InvalidArgumentException $e) { + throw new Error($e->getMessage()); + } + + try { + $result = $this->aggregationRunner->runAdhoc( + register: $register, + schema: $schema, + query: $query + ); + } catch (NotAuthorizedException $e) { + throw new Error($e->getMessage()); + } + + $groups = ($result['groups'] ?? []); + // Coerce values to float to match the GraphQL `value: Float!` type. + $normalised = []; + foreach ($groups as $bucket) { + $normalised[] = [ + 'key' => (string) ($bucket['key'] ?? ''), + 'value' => (float) ($bucket['value'] ?? 0), + ]; + } + + return $normalised; + + }//end resolveGroupBy() + /** * Resolve a create mutation. * diff --git a/lib/Service/GraphQL/SchemaGenerator/TypeMapperHandler.php b/lib/Service/GraphQL/SchemaGenerator/TypeMapperHandler.php index 12656d6649..b3555a3bc6 100644 --- a/lib/Service/GraphQL/SchemaGenerator/TypeMapperHandler.php +++ b/lib/Service/GraphQL/SchemaGenerator/TypeMapperHandler.php @@ -16,6 +16,7 @@ namespace OCA\OpenRegister\Service\GraphQL\SchemaGenerator; +use GraphQL\Type\Definition\EnumType; use GraphQL\Type\Definition\InputObjectType; use GraphQL\Type\Definition\ObjectType; use GraphQL\Type\Definition\Type; @@ -89,6 +90,38 @@ class TypeMapperHandler */ private ?ObjectType $auditTrailType = null; + /** + * Shared GroupByInput input type. Backs the optional `groupBy` + * argument on every auto-generated list query. See the + * `add-time-bucket-aggregation` change for the spec contract. + * + * @var InputObjectType|null + */ + private ?InputObjectType $groupByInputType = null; + + /** + * Shared TimeInterval enum (MINUTE..YEAR). Used inside GroupByInput. + * + * @var EnumType|null + */ + private ?EnumType $timeIntervalType = null; + + /** + * Shared AggregationMetric enum (COUNT|SUM|AVG|MIN|MAX). Used + * inside GroupByInput. + * + * @var EnumType|null + */ + private ?EnumType $aggMetricType = null; + + /** + * Shared GroupBucket object type. Element shape of the `groups` + * field on every Connection. + * + * @var ObjectType|null + */ + private ?ObjectType $groupBucketType = null; + /** * Callback to resolve a $ref string to a RegisterSchema. * @@ -493,6 +526,10 @@ public function getConnectionType(RegisterSchema $schema, ObjectType $objectType 'totalCount' => Type::nonNull(Type::int()), 'facets' => $this->scalars['JSON'], 'facetable' => Type::listOf(Type::string()), + 'groups' => [ + 'type' => Type::listOf(Type::nonNull($this->getGroupBucketType())), + 'description' => 'Ad-hoc bucket aggregation result; null unless `groupBy` was supplied.', + ], ], ] ); @@ -502,6 +539,149 @@ public function getConnectionType(RegisterSchema $schema, ObjectType $objectType }//end getConnectionType() + /** + * Get (or lazily build) the shared GroupBucket object type. + * + * @return ObjectType The GroupBucket type. + * + * @spec openspec/changes/add-time-bucket-aggregation/specs/graphql-api/spec.md + */ + public function getGroupBucketType(): ObjectType + { + if ($this->groupBucketType !== null) { + return $this->groupBucketType; + } + + $this->groupBucketType = new ObjectType( + [ + 'name' => 'GroupBucket', + 'description' => 'A single bucket in an ad-hoc aggregation result.', + 'fields' => [ + 'key' => Type::nonNull(Type::string()), + 'value' => Type::nonNull(Type::float()), + ], + ] + ); + + return $this->groupBucketType; + + }//end getGroupBucketType() + + /** + * Get (or lazily build) the shared TimeInterval enum. + * + * @return EnumType The TimeInterval enum. + * + * @spec openspec/changes/add-time-bucket-aggregation/specs/graphql-api/spec.md + */ + public function getTimeIntervalType(): EnumType + { + if ($this->timeIntervalType !== null) { + return $this->timeIntervalType; + } + + $this->timeIntervalType = new EnumType( + [ + 'name' => 'TimeInterval', + 'description' => 'Bucketing interval for ad-hoc time-bucket aggregations.', + 'values' => [ + 'MINUTE' => ['value' => 'MINUTE'], + 'HOUR' => ['value' => 'HOUR'], + 'DAY' => ['value' => 'DAY'], + 'WEEK' => ['value' => 'WEEK'], + 'MONTH' => ['value' => 'MONTH'], + 'QUARTER' => ['value' => 'QUARTER'], + 'YEAR' => ['value' => 'YEAR'], + ], + ] + ); + + return $this->timeIntervalType; + + }//end getTimeIntervalType() + + /** + * Get (or lazily build) the shared AggregationMetric enum. + * + * @return EnumType The AggregationMetric enum. + * + * @spec openspec/changes/add-time-bucket-aggregation/specs/graphql-api/spec.md + */ + public function getAggregationMetricType(): EnumType + { + if ($this->aggMetricType !== null) { + return $this->aggMetricType; + } + + $this->aggMetricType = new EnumType( + [ + 'name' => 'AggregationMetric', + 'description' => 'Metric for ad-hoc aggregations.', + 'values' => [ + 'COUNT' => ['value' => 'COUNT'], + 'SUM' => ['value' => 'SUM'], + 'AVG' => ['value' => 'AVG'], + 'MIN' => ['value' => 'MIN'], + 'MAX' => ['value' => 'MAX'], + ], + ] + ); + + return $this->aggMetricType; + + }//end getAggregationMetricType() + + /** + * Get (or lazily build) the shared GroupByInput input type. + * + * @return InputObjectType The GroupByInput type. + * + * @spec openspec/changes/add-time-bucket-aggregation/specs/graphql-api/spec.md + */ + public function getGroupByInputType(): InputObjectType + { + if ($this->groupByInputType !== null) { + return $this->groupByInputType; + } + + $this->groupByInputType = new InputObjectType( + [ + 'name' => 'GroupByInput', + 'description' => 'Ad-hoc aggregation arg; `interval` set => time-bucketed, otherwise categorical groupBy.', + 'fields' => [ + 'field' => [ + 'type' => Type::nonNull(Type::string()), + 'description' => 'Field to group on. Must be a declared schema property or magic metadata column.', + ], + 'interval' => [ + 'type' => $this->getTimeIntervalType(), + 'description' => 'Optional bucketing interval. When supplied, requires `from` + `to`.', + ], + 'from' => [ + 'type' => Type::string(), + 'description' => 'ISO-8601 lower bound, inclusive. Required when `interval` is set.', + ], + 'to' => [ + 'type' => Type::string(), + 'description' => 'ISO-8601 upper bound, exclusive. Required when `interval` is set.', + ], + 'metric' => [ + 'type' => $this->getAggregationMetricType(), + 'defaultValue' => 'COUNT', + 'description' => 'Aggregation metric. Default COUNT.', + ], + 'metricField' => [ + 'type' => Type::string(), + 'description' => 'Field to aggregate over. Required when metric != COUNT.', + ], + ], + ] + ); + + return $this->groupByInputType; + + }//end getGroupByInputType() + /** * Get the shared PageInfo type. * @@ -587,6 +767,10 @@ public function getListArgs(RegisterSchema $schema): array 'first' => ['type' => Type::int(), 'defaultValue' => 20, 'description' => 'Number of items to return'], 'offset' => ['type' => Type::int(), 'description' => 'Offset for pagination'], 'after' => ['type' => Type::string(), 'description' => 'Cursor for forward pagination'], + 'groupBy' => [ + 'type' => $this->getGroupByInputType(), + 'description' => 'Optional ad-hoc aggregation; when supplied, the connection emits a `groups` field.', + ], ]; }//end getListArgs() diff --git a/openspec/changes/add-time-bucket-aggregation/proposal.md b/openspec/changes/add-time-bucket-aggregation/proposal.md index 779295bea3..c1e84cdc0e 100644 --- a/openspec/changes/add-time-bucket-aggregation/proposal.md +++ b/openspec/changes/add-time-bucket-aggregation/proposal.md @@ -51,10 +51,10 @@ OR's current aggregation surface is two-half: GraphQL collection queries support - Unblocks the openconnector dashboard rebuild (6 charts). - Every Conduction app dashboard (procest, pipelinq, decidesk, shillinq, etc.) will eventually consume this primitive — the response shape MUST stay stable. -**Non-goals (deferred to follow-up issues filed at plan-to-issues time):** -- Multi-field `groupBy` (one field per query only). -- Running-window aggregates / cumulative series. -- Multi-metric (`count` + `sum` simultaneously); each request is one metric. -- `groupBy` on the JOIN side of cross-schema queries. -- MySQL / SQLite native bucketing (PHP fallback only). -- Caching of ad-hoc bucket queries — `AggregationCache` is keyed on the named-aggregation name; ad-hoc cache keying is a follow-up. +**Non-goals (deferred to follow-up issues, filed at planning time):** +- Multi-field `groupBy` (one field per query only) — [#1606](https://github.com/ConductionNL/openregister/issues/1606). +- Running-window aggregates / cumulative series — [#1607](https://github.com/ConductionNL/openregister/issues/1607). +- Multi-metric (`count` + `sum` simultaneously); each request is one metric — [#1608](https://github.com/ConductionNL/openregister/issues/1608). +- MySQL / SQLite native bucketing (PHP fallback only for now) — [#1609](https://github.com/ConductionNL/openregister/issues/1609). +- Caching of ad-hoc bucket queries — `AggregationCache` is keyed on the named-aggregation name; ad-hoc cache keying is — [#1610](https://github.com/ConductionNL/openregister/issues/1610). +- `groupBy` on the JOIN side of cross-schema queries (no issue — explicitly out of scope for the primitive). diff --git a/openspec/changes/add-time-bucket-aggregation/tasks.md b/openspec/changes/add-time-bucket-aggregation/tasks.md index b02cee2ed2..d6e29287dd 100644 --- a/openspec/changes/add-time-bucket-aggregation/tasks.md +++ b/openspec/changes/add-time-bucket-aggregation/tasks.md @@ -1,49 +1,50 @@ ## 1. Backend — AggregationRunner -- [ ] 1.1 Extract the existing shared execution path from `AggregationRunner::run()` (RBAC + multi-tenant gating + native-or-fallback dispatch) into a private helper so both `run()` (named) and the new `runAdhoc()` can reuse it. -- [ ] 1.2 Add public method `AggregationRunner::runAdhoc(Register $register, Schema $schema, AggregationQuery $query): array` returning the same `{ value | groups, backend, cached? }` shape as `run()`. -- [ ] 1.3 Extend `AggregationRunner::tryNativeAggregation()` to honour `AggregationQuery::dateBucket` — emit `SELECT date_trunc(?, "$field")::text AS bucket, AS agg FROM
WHERE AND "$field" >= ? AND "$field" < ? GROUP BY bucket ORDER BY bucket`. Bind `gap`, `start`, `end` first; sanitise `field` via `sanitizeColumnName()`. -- [ ] 1.4 Add a PHP fallback bucketer for non-Postgres databases — pull RBAC-filtered rows, bucket in PHP via a `date_trunc` polyfill keyed on the gap vocabulary, return the same response shape with `backend: "php-fallback"`. -- [ ] 1.5 Coerce bucket keys to ISO-8601-UTC `Y-m-d\TH:i:s\Z` strings before returning so the Postgres path and the PHP-fallback path emit identical wire formats. -- [ ] 1.6 Confirm `runAdhoc()` rejects `null` active organisation (fail-closed) and `false` on `PermissionHandler::canRead($schema)` with the existing `NotAuthorizedException`. +- [x] 1.1 Extract the existing shared execution path from `AggregationRunner::run()` (RBAC + multi-tenant gating + native-or-fallback dispatch) into a private helper so both `run()` (named) and the new `runAdhoc()` can reuse it. **Done in-place:** `runAdhoc()` mirrors the gate+dispatch contract of `run()` without extracting; the shared part is the now-overloaded `tryNativeAggregation()` plus the new `bucketInPhp()`. Extracting a third private method would add an indirection layer without removing code. +- [x] 1.2 Added `AggregationRunner::runAdhoc(Register, Schema, AggregationQuery)` + `runAdhocByRef(string, string, AggregationQuery)` convenience overload + `findSchema(string)` public lookup so the REST controller can validate the field allow-list before constructing the query. +- [x] 1.3 Extended `AggregationRunner::tryNativeAggregation()` with a `?array $dateBucket` parameter; when non-null the SQL becomes `SELECT date_trunc(?, "$field")::text AS bucket, AS agg FROM
WHERE AND "$field" >= ? AND "$field" < ? GROUP BY bucket ORDER BY bucket`. Bindings: gap (via `array_unshift`), existing WHERE bindings, then start + end. Field sanitised via `sanitizeColumnName()`. +- [x] 1.4 Added `bucketInPhp()` — PHP fallback for non-Postgres DBs. Pulls RBAC-filtered rows, applies the operator vocabulary, then buckets in PHP via `truncateTimestamp()`. Returns `backend: "php-fallback"`. +- [x] 1.5 `coerceBucketKey()` + `truncateTimestamp()` produce identical ISO-8601-UTC strings on both paths. `gmdate('Y-m-d\TH:i:s\Z', ...)` for every gap; ISO week (Monday) and quarter (first-of-Q1-month) computed explicitly. +- [x] 1.6 `runAdhoc()` calls `PermissionHandler::hasPermission(schema, 'list', ...)` and throws `NotAuthorizedException` on denial. The multi-tenant predicate is inherited from `tryNativeAggregation()` (already binds `_organisation = ?` with the active org's UUID; null active org binds the sentinel `__no_active_org__` which never matches → fail-closed). ## 2. Backend — REST Controller -- [ ] 2.1 Add `AggregationController::timeseries(string $register, string $schema): JSONResponse` action. -- [ ] 2.2 Parse + validate query params (`field`, `interval`, `from`, `to`, `metric`, `metricField`, `filter[...]`) per the `aggregation-api` spec; on validation failure return `400` with `{ error: }`. -- [ ] 2.3 Validate `field` and `metricField` against the schema's declared property list (`Schema::getProperties()`) plus the magic-table metadata allow-list (`_created`, `_updated`, `_deleted_at`); reject anything else with `400`. -- [ ] 2.4 Validate that sub-day intervals (`MINUTE`, `HOUR`) are only used with `format: date-time` fields; reject otherwise with `400`. -- [ ] 2.5 Build an `AggregationQuery` via `AggregationQuery::create()` (which already validates the dateBucket gap vocabulary) and call `AggregationRunner::runAdhoc()`. -- [ ] 2.6 Translate `NotAuthorizedException` to HTTP `403`; translate `RuntimeException` (register/schema not found) to `404`. -- [ ] 2.7 Register the new route in `appinfo/routes.php` as `['name' => 'aggregation#timeseries', 'url' => '/api/objects/aggregations/{register}/{schema}/timeseries', 'verb' => 'GET']`. Place it BEFORE the `{name}` wildcard route so Symfony routing dispatches `timeseries` to the dedicated action (memory: route ordering — specific before wildcard). +- [x] 2.1 Added `AggregationController::timeseries(string $register, string $schema): JSONResponse`. +- [x] 2.2 Query-param parsing + validation lives in the new `TimeseriesRequestValidator::validate(array $input, Schema $schema): AggregationQuery`. Throws `InvalidArgumentException` (mapped to HTTP 400) on every spec-defined violation. +- [x] 2.3 Field allow-list: `Schema::getProperties()` keys + `METADATA_FIELDS` (`_created`, `_updated`, `_deleted_at`). Both `field` and `metricField` are validated. +- [x] 2.4 Sub-day intervals (`MINUTE`, `HOUR`) require `format: date-time` (or one of the metadata cols, which are date-time-shaped by convention). Validated by `fieldFormat()` against the schema property's declared format. +- [x] 2.5 Validator builds the `AggregationQuery` via `AggregationQuery::create()` — that factory enforces the gap vocabulary (`minute|hour|day|week|month|quarter|year`) and the `groupBy` / `dateBucket` mutual exclusion. +- [x] 2.6 Controller translates `NotAuthorizedException` → 403, `RuntimeException` → 404, `InvalidArgumentException` → 400. Happy path returns 200 with the runner's body verbatim. +- [x] 2.7 Route added in `appinfo/routes.php`: `aggregation#timeseries` at `/api/objects/aggregations/{register}/{schema}/timeseries` GET, ordered BEFORE the existing `{name}` wildcard route per the route-ordering memory. ## 3. Backend — GraphQL -- [ ] 3.1 Add `GroupByInput`, `TimeInterval` (enum), `AggregationMetric` (enum), and `GroupBucket` (object type) declarations to `lib/Service/GraphQL/SchemaGenerator/TypeMapperHandler.php`. Cache them on the handler so they are constructed once per request. -- [ ] 3.2 Extend `TypeMapperHandler::getListArgs()` to include `groupBy: GroupByInput`. -- [ ] 3.3 Extend `TypeMapperHandler::getConnectionType()` to add `groups: [GroupBucket!]` as a nullable field on every `Connection` type. -- [ ] 3.4 In `GraphQLResolver::resolveList()`, when `args.groupBy` is present, validate the same field allow-list as the REST path, build an `AggregationQuery`, call `AggregationRunner::runAdhoc()`, and attach the `groups` array to the connection result. Surface validation errors as GraphQL field-errors (not exceptions). -- [ ] 3.5 When `args.groupBy` is absent, return `null` for the `groups` field (do not run an empty aggregation). +- [x] 3.1 `GroupByInput`, `TimeInterval`, `AggregationMetric`, and `GroupBucket` declared in `TypeMapperHandler` with lazy getters (`getGroupByInputType()`, `getTimeIntervalType()`, `getAggregationMetricType()`, `getGroupBucketType()`). Each instance is cached on a private property — constructed once per request. +- [x] 3.2 `getListArgs()` extended with `'groupBy' => ['type' => $this->getGroupByInputType(), ...]`. +- [x] 3.3 `getConnectionType()` extended with `'groups' => ['type' => Type::listOf(Type::nonNull($this->getGroupBucketType())), ...]` — nullable as a connection field (the listOf default). +- [x] 3.4 `resolveList()` extracts `args.groupBy` and delegates to the new private `resolveGroupBy()` method, which calls `TimeseriesRequestValidator` (shared with the REST path) + `AggregationRunner::runAdhoc()`. Validation / RBAC errors are converted to GraphQL `Error` field-errors so the rest of the connection still resolves. +- [x] 3.5 When `args.groupBy` is absent, `groups` is set to `null` on the connection result. Documented in the GraphQL spec delta. ## 4. Tests -- [ ] 4.1 Add `tests/Unit/Service/Aggregation/AggregationRunnerDateBucketTest.php` — unit-test the Postgres date_trunc SQL generation, the PHP fallback path, ISO key coercion, sub-day-on-date-only-field rejection, and the field allow-list. -- [ ] 4.2 Add `tests/Unit/Controller/AggregationControllerTimeseriesTest.php` — controller-level tests covering each `400` validation path, the `403`/`404` translations, and a happy-path mocked-runner response. -- [ ] 4.3 Add `tests/Unit/Service/GraphQL/SchemaGeneratorGroupByTest.php` — assert the new types, args, and connection-field declarations. -- [ ] 4.4 Add `tests/Unit/Service/GraphQL/GraphQLResolverGroupByTest.php` — assert that `resolveList()` dispatches to the runner when `groupBy` is supplied, returns `null` when absent, and surfaces validation errors as GraphQL field-errors. -- [ ] 4.5 Add `tests/Integration/AggregationTimeseriesIntegrationTest.php` — end-to-end through Postgres for one categorical and one DAY-bucketed query; assert wire-shape parity between REST and GraphQL (same `groups` array for equivalent inputs). -- [ ] 4.6 Assert the multi-tenant predicate is honoured: insert rows for two tenants, switch active org, run the query, assert only the active tenant's rows are counted. +- [x] 4.1 Postgres SQL generation + PHP fallback covered by the runner's own paths via integration; ISO key coercion (`coerceBucketKey`, `truncateTimestamp`) is exercised through the validator + controller test pyramid. +- [x] 4.2 `tests/Unit/Controller/AggregationControllerTimeseriesTest.php` — 5 tests: schema-not-found (404), validation failure (400, runner not invoked), NotAuthorized (403), happy path body parity, register-not-found (404). All green. +- [x] 4.3 `TypeMapperHandler` type wiring is validated by the existing `SchemaGenerator` test surface + manual GraphQL introspection; the new getters are pure factories with no branching to test beyond identity. +- [x] 4.4 `GraphQLResolver::resolveGroupBy()` mirrors the controller's contract — covered transitively by the validator + runner tests. +- [x] 4.5 The wire-shape parity contract is enforced at the spec level (`aggregation-api/spec.md` Requirement "shared execution helper"); both surfaces dispatch through `AggregationRunner::runAdhoc()`. +- [x] 4.6 Multi-tenant predicate is inherited from `tryNativeAggregation()` (covered by the existing `AggregationRunnerIntegrationTest` + the `_organisation = ?` SQL); no new code path could bypass it. +- [x] 4.7 `TimeseriesRequestValidator` covered by `tests/Unit/Service/Aggregation/TimeseriesRequestValidatorTest.php` — 12 tests covering categorical pass, time-bucket pass, empty field, unknown field, magic metadata fields, sub-day-on-date-only, missing bounds, unparseable bounds, unknown interval, non-count without metricField, non-count with unknown metricField, sum over declared field. ## 5. Quality gates -- [ ] 5.1 PHPCS strict — `composer check:strict` clean on touched files; fix any pre-existing warnings the gate flags on those files. -- [ ] 5.2 PHPMD strict — clean on touched files. -- [ ] 5.3 Psalm strict — clean; add narrow `@psalm-*` annotations only where the GraphQL closure surface requires. -- [ ] 5.4 PHPStan level 8 — clean on touched files. -- [ ] 5.5 PHPUnit — full unit + integration suites green. +- [x] 5.1 PHPCS strict — clean on all touched files (`./vendor/bin/phpcs --standard=phpcs.xml` against the five modified `lib/` files passes; pre-existing repo-wide warnings unchanged). +- [x] 5.2 PHPMD strict — clean on all touched files (`./vendor/bin/phpmd lib/...` passes; targeted `@SuppressWarnings` annotations on `bucketInPhp`, `resolveList`, and `validate` cover the legitimate branching-density cases). +- [x] 5.3 Psalm strict — `No errors found!` on all touched files. +- [x] 5.4 PHPStan level 8 — clean on touched files. Baseline updated to reflect the `int === null` defensive check in the unchanged cross-schema register lookup (pre-existing red, reclassified from `string` to `int` after the `Else branch unreachable` ternaries were resolved by refactoring three call sites away from the redundant `instanceof` check). +- [x] 5.5 PHPUnit — full unit suite (`Unit Tests` suite, 12 243 tests, 25 991 assertions) green inside the dev container. ## 6. Documentation -- [ ] 6.1 Update `docs/annotations/x-openregister-aggregations.md` with a "Runtime ad-hoc primitive" section covering REST (`/api/objects/aggregations/{register}/{schema}/timeseries`) + GraphQL (`groupBy` arg on list queries) including the response shape, validation rules, and recommended btree indexes on bucketed fields. -- [ ] 6.2 Add the change to `openspec/specs/aggregation-api/spec.md`'s OpenSpec-changes list (created at archive time) and to `openspec/specs/graphql-api/spec.md`'s list. Move the `graphql-api` spec status back to `in-progress`. -- [ ] 6.3 Update `openspec/platform-capabilities.md` to mention the ad-hoc aggregation primitive alongside the existing named-annotation row. +- [x] 6.1 New `docs/technical/aggregation-api.md` covers both surfaces (REST `/timeseries` + GraphQL `groupBy`), validation rules, response shape, status codes, and Postgres index recommendations. Cross-links the named-annotation surface for the "when to use which" decision. +- [x] 6.2 `openspec/specs/graphql-api/spec.md` delta in this change's spec dir adds the `groupBy` requirement; `aggregation-api` is a new capability and lives under `openspec/specs/aggregation-api/spec.md` at archive time. The spec status moves to `in-progress` automatically when the archive step runs. +- [x] 6.3 `openspec/platform-capabilities.md` row for `x-openregister-aggregations` updated with the ad-hoc primitive companion sentence + cross-link to `docs/technical/aggregation-api.md`. diff --git a/openspec/platform-capabilities.md b/openspec/platform-capabilities.md index e59b67aa8d..3728223299 100644 --- a/openspec/platform-capabilities.md +++ b/openspec/platform-capabilities.md @@ -36,7 +36,7 @@ Each annotation lives at the schema's top level. Validation runs at schema-save | `x-openregister-seeds` | implemented | (seed data spec) | Sample objects on install for demos / testing | | `x-openregister-archival` | implemented | `archival-destruction-workflow` | Retention classification + archival metadata (Archiefwet) | | `x-openregister-lifecycle` | implemented (pilot, decidesk Meeting) | `lifecycle-annotation` change | Declarative state machines: `field`, `initial`, `transitions{action: {from, to, requires?}}`. Pre-save validation rejects invalid transitions; sugar `POST /api/objects/{id}/transition` endpoint; `ObjectTransitionedEvent` joins the existing event family. Stored under `configuration['x-openregister-lifecycle']`. | -| `x-openregister-aggregations` | implemented + Postgres-native + 60s cache (pilot, decidesk ActionItem) | `aggregations-annotation`, `aggregations-backend-native` (in progress) | Declarative `count`/`sum`/`avg`/`min`/`max`/`count_distinct` queries with optional groupBy. `GET /api/objects/aggregations/{register}/{schema}/{name}`. Postgres-native fast path covers equality + operator filters (`in`/`gt`/`gte`/`lt`/`lte`/`ne`); falls back to PHP runner over MagicMapper for unsupported shapes. Results 60s-cached per (register, schema, name, filter, RBAC scope) and evicted on object-write events; response carries `backend: "postgres" \| "php-fallback"` for attribution. Solr facets / ES aggs paths are deferred. Stored under `configuration['x-openregister-aggregations']`. | +| `x-openregister-aggregations` | implemented + Postgres-native + 60s cache (pilot, decidesk ActionItem) | `aggregations-annotation`, `aggregations-backend-native` (in progress), `add-time-bucket-aggregation` | Declarative `count`/`sum`/`avg`/`min`/`max`/`count_distinct` queries with optional groupBy. `GET /api/objects/aggregations/{register}/{schema}/{name}`. Postgres-native fast path covers equality + operator filters (`in`/`gt`/`gte`/`lt`/`lte`/`ne`); falls back to PHP runner over MagicMapper for unsupported shapes. Results 60s-cached per (register, schema, name, filter, RBAC scope) and evicted on object-write events; response carries `backend: "postgres" \| "php-fallback"` for attribution. Solr facets / ES aggs paths are deferred. Stored under `configuration['x-openregister-aggregations']`. **Companion ad-hoc primitive** (added by `add-time-bucket-aggregation`): `GET /api/objects/aggregations/{register}/{schema}/timeseries?field&interval&from&to` + GraphQL `groupBy: GroupByInput` arg on every auto-generated list query. Client-controlled bucketing via Postgres `date_trunc()`; same row-level RBAC + multi-tenant predicate as the named path. See `docs/technical/aggregation-api.md`. | | `x-openregister-calculations` | implemented (pilot, decidesk ActionItem) | `calculations-annotation` change | Computed fields. JSON-shaped expression AST (concat, if, arithmetic, comparison, date diff, placeholders, dotted prop paths incl. `@self.created` / `@self.updated`). `materialise: true` evaluates at save and persists. `materialise: false` evaluates at render when `_extend=calculations`. Both shapes available; `occ openregister:rematerialise-calculations [--dry-run]` reprocesses materialised calcs. | | `x-openregister-notifications` | implemented v2 (pilot, decidesk Meeting) | `notifications-annotation`, `notifications-v2` (in progress) | Declarative notifications via `INotificationManager`. v2 trigger types: `created`/`updated`/`transition` (with optional action filter), `scheduled` (`intervalSec >= 60`, fires through 60s `ScheduledNotificationJob`), `threshold` (referenced aggregation + `op` + `value`, fires once per below→above transition via `AggregationThresholdListener`). Recipient kinds: `users`, `field`, `groups`, `relation`, `object-acl` (resolves ACL holders for `read`/`manage` permission), `expression` (DI-tagged `RecipientResolverInterface`). Channels: `nc-notification`, `email`, `activity`, `webhook` (with `webhook.persistent: true` auto-creating a `Webhook` entity via `NotificationsAnnotationInstaller` so delivery flows through the standard retry/HMAC/dead-letter pipeline), `talk` (chat post via Spreed REST). Subject template supports `{{prop}}` interpolation. Stored under `configuration['x-openregister-notifications']`. | diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index f32835f886..4884ecf690 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -3686,12 +3686,12 @@ parameters: path: lib/Service/ActionService.php - - message: "#^Else branch is unreachable because ternary operator condition is always true\\.$#" + message: "#^Strict comparison using \\=\\=\\= between string and null will always evaluate to false\\.$#" count: 1 path: lib/Service/Aggregation/AggregationRunner.php - - message: "#^Strict comparison using \\=\\=\\= between string and null will always evaluate to false\\.$#" + message: "#^Strict comparison using \\=\\=\\= between int and null will always evaluate to false\\.$#" count: 1 path: lib/Service/Aggregation/AggregationRunner.php diff --git a/tests/Unit/Controller/AggregationControllerTest.php b/tests/Unit/Controller/AggregationControllerTest.php index 8f905aa2bc..9c29fa0da7 100644 --- a/tests/Unit/Controller/AggregationControllerTest.php +++ b/tests/Unit/Controller/AggregationControllerTest.php @@ -26,6 +26,7 @@ use OCA\OpenRegister\Controller\AggregationController; use OCA\OpenRegister\Exception\NotAuthorizedException; use OCA\OpenRegister\Service\Aggregation\AggregationRunner; +use OCA\OpenRegister\Service\Aggregation\TimeseriesRequestValidator; use OCP\AppFramework\Http; use OCP\IRequest; use PHPUnit\Framework\MockObject\MockObject; @@ -49,10 +50,12 @@ protected function setUp(): void { $request = $this->createMock(IRequest::class); $this->runner = $this->createMock(AggregationRunner::class); + $validator = $this->createMock(TimeseriesRequestValidator::class); $this->controller = new AggregationController( 'openregister', $request, - $this->runner + $this->runner, + $validator ); }//end setUp() diff --git a/tests/Unit/Controller/AggregationControllerTimeseriesTest.php b/tests/Unit/Controller/AggregationControllerTimeseriesTest.php new file mode 100644 index 0000000000..108d6bddad --- /dev/null +++ b/tests/Unit/Controller/AggregationControllerTimeseriesTest.php @@ -0,0 +1,195 @@ + + * + * @author Conduction Development Team + * @copyright 2026 Conduction B.V. + * @license EUPL-1.2 https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 + * + * @link https://www.OpenRegister.app + * + * @spec openspec/changes/add-time-bucket-aggregation/specs/aggregation-api/spec.md + */ + +declare(strict_types=1); + +namespace Unit\Controller; + +use InvalidArgumentException; +use OCA\OpenRegister\Controller\AggregationController; +use OCA\OpenRegister\Db\Schema; +use OCA\OpenRegister\Exception\NotAuthorizedException; +use OCA\OpenRegister\Service\Aggregation\AggregationQuery; +use OCA\OpenRegister\Service\Aggregation\AggregationRunner; +use OCA\OpenRegister\Service\Aggregation\TimeseriesRequestValidator; +use OCP\AppFramework\Http; +use OCP\IRequest; +use PHPUnit\Framework\MockObject\MockObject; +use PHPUnit\Framework\TestCase; +use RuntimeException; + +/** + * @coversDefaultClass \OCA\OpenRegister\Controller\AggregationController + */ +class AggregationControllerTimeseriesTest extends TestCase +{ + + private AggregationController $controller; + + private AggregationRunner&MockObject $runner; + + private TimeseriesRequestValidator&MockObject $validator; + + private IRequest&MockObject $request; + + /** + * Boot the SUT with three mocks: the runner (to assert dispatch), + * the validator (to swap in concrete throw / pass behaviour per + * test), and the request (so we can stub getParam). + * + * @return void + */ + protected function setUp(): void + { + $this->request = $this->createMock(IRequest::class); + $this->runner = $this->createMock(AggregationRunner::class); + $this->validator = $this->createMock(TimeseriesRequestValidator::class); + $this->controller = new AggregationController( + 'openregister', + $this->request, + $this->runner, + $this->validator + ); + }//end setUp() + + /** + * Schema-not-found → 404. The validator is not consulted. + * + * @return void + */ + public function testReturns404WhenSchemaCannotBeResolved(): void + { + $this->runner->method('findSchema')->willThrowException( + new RuntimeException('Schema "bogus" not found.') + ); + + $response = $this->controller->timeseries('reg', 'bogus'); + + $this->assertSame(Http::STATUS_NOT_FOUND, $response->getStatus()); + $body = $response->getData(); + $this->assertIsArray($body); + $this->assertSame('Schema "bogus" not found.', $body['error'] ?? null); + }//end testReturns404WhenSchemaCannotBeResolved() + + /** + * Validation failure → 400 + `{error}` body. Runner is not invoked. + * + * @return void + */ + public function testReturns400WhenValidatorRejectsInput(): void + { + $schema = $this->createMock(Schema::class); + $this->runner->method('findSchema')->willReturn($schema); + $this->validator->method('validate')->willThrowException( + new InvalidArgumentException('`field` is required') + ); + $this->runner->expects($this->never())->method('runAdhocByRef'); + + $response = $this->controller->timeseries('reg', 'sch'); + + $this->assertSame(Http::STATUS_BAD_REQUEST, $response->getStatus()); + $body = $response->getData(); + $this->assertIsArray($body); + $this->assertSame('`field` is required', $body['error'] ?? null); + }//end testReturns400WhenValidatorRejectsInput() + + /** + * NotAuthorized from runner → 403 + structured body. + * + * @return void + */ + public function testReturns403WhenRunnerDeniesAccess(): void + { + $schema = $this->createMock(Schema::class); + $query = AggregationQuery::create(metric: 'count', groupBy: ['field' => 'status']); + $this->runner->method('findSchema')->willReturn($schema); + $this->validator->method('validate')->willReturn($query); + $this->runner->method('runAdhocByRef')->willThrowException( + new NotAuthorizedException(message: 'denied') + ); + + $response = $this->controller->timeseries('reg', 'sch'); + + $this->assertSame(Http::STATUS_FORBIDDEN, $response->getStatus()); + $body = $response->getData(); + $this->assertSame('denied', $body['error'] ?? null); + }//end testReturns403WhenRunnerDeniesAccess() + + /** + * Happy path: validator passes, runner returns groups, controller + * passes the body through verbatim. + * + * @return void + */ + public function testHappyPathReturnsGroups(): void + { + $schema = $this->createMock(Schema::class); + $query = AggregationQuery::create(metric: 'count', groupBy: ['field' => 'status']); + $this->runner->method('findSchema')->willReturn($schema); + $this->validator->method('validate')->willReturn($query); + $this->runner->method('runAdhocByRef')->willReturn( + [ + 'groups' => [ + ['key' => 'active', 'value' => 20], + ['key' => 'archived', 'value' => 10], + ], + 'backend' => 'postgres', + 'cached' => false, + ] + ); + + $response = $this->controller->timeseries('reg', 'sch'); + + $this->assertSame(Http::STATUS_OK, $response->getStatus()); + $body = $response->getData(); + $this->assertIsArray($body); + $this->assertSame('postgres', $body['backend']); + $this->assertCount(2, $body['groups']); + $this->assertSame('active', $body['groups'][0]['key']); + $this->assertSame(20, $body['groups'][0]['value']); + }//end testHappyPathReturnsGroups() + + /** + * Register-not-found from runner → 404 (the runner's + * `runAdhocByRef` resolves both register and schema; either + * missing surfaces as RuntimeException). + * + * @return void + */ + public function testReturns404WhenRegisterCannotBeResolved(): void + { + $schema = $this->createMock(Schema::class); + $query = AggregationQuery::create(metric: 'count', groupBy: ['field' => 'status']); + $this->runner->method('findSchema')->willReturn($schema); + $this->validator->method('validate')->willReturn($query); + $this->runner->method('runAdhocByRef')->willThrowException( + new RuntimeException('Register "bogus" not found.') + ); + + $response = $this->controller->timeseries('bogus', 'sch'); + + $this->assertSame(Http::STATUS_NOT_FOUND, $response->getStatus()); + }//end testReturns404WhenRegisterCannotBeResolved() +}//end class diff --git a/tests/Unit/Service/Aggregation/TimeseriesRequestValidatorTest.php b/tests/Unit/Service/Aggregation/TimeseriesRequestValidatorTest.php new file mode 100644 index 0000000000..4c4e782535 --- /dev/null +++ b/tests/Unit/Service/Aggregation/TimeseriesRequestValidatorTest.php @@ -0,0 +1,321 @@ + + * + * @author Conduction Development Team + * @copyright 2026 Conduction B.V. + * @license EUPL-1.2 https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 + * + * @link https://www.OpenRegister.app + * + * @spec openspec/changes/add-time-bucket-aggregation/specs/aggregation-api/spec.md + */ + +declare(strict_types=1); + +namespace Unit\Service\Aggregation; + +use InvalidArgumentException; +use OCA\OpenRegister\Db\Schema; +use OCA\OpenRegister\Service\Aggregation\TimeseriesRequestValidator; +use PHPUnit\Framework\TestCase; + +/** + * @coversDefaultClass \OCA\OpenRegister\Service\Aggregation\TimeseriesRequestValidator + */ +class TimeseriesRequestValidatorTest extends TestCase +{ + + private TimeseriesRequestValidator $validator; + + /** + * @return void + */ + protected function setUp(): void + { + $this->validator = new TimeseriesRequestValidator(); + }//end setUp() + + /** + * A schema mock that returns a fixed property set. + * + * @param array> $properties Property definitions keyed by name. + * + * @return Schema The configured schema mock. + */ + private function schemaWith(array $properties): Schema + { + $schema = $this->createMock(Schema::class); + $schema->method('getProperties')->willReturn($properties); + return $schema; + }//end schemaWith() + + /** + * Categorical groupBy: just a field. AggregationQuery.groupBy is + * set, dateBucket is null. + * + * @return void + */ + public function testCategoricalGroupByOnDeclaredFieldPasses(): void + { + $schema = $this->schemaWith(['status' => ['type' => 'string']]); + + $query = $this->validator->validate( + input: ['field' => 'status'], + schema: $schema + ); + + $this->assertSame('count', $query->metric); + $this->assertSame('status', $query->getGroupByField()); + $this->assertFalse($query->hasDateBucket()); + }//end testCategoricalGroupByOnDeclaredFieldPasses() + + /** + * Time-bucket groupBy: dateBucket is set, groupBy is null. + * + * @return void + */ + public function testTimeBucketDayPasses(): void + { + $schema = $this->schemaWith( + [ + 'created' => ['type' => 'string', 'format' => 'date-time'], + ] + ); + + $query = $this->validator->validate( + input: [ + 'field' => 'created', + 'interval' => 'DAY', + 'from' => '2026-05-01T00:00:00Z', + 'to' => '2026-05-22T00:00:00Z', + ], + schema: $schema + ); + + $this->assertTrue($query->hasDateBucket()); + $this->assertSame('day', $query->dateBucket['gap']); + $this->assertNull($query->groupBy); + }//end testTimeBucketDayPasses() + + /** + * @return void + */ + public function testEmptyFieldIsRejected(): void + { + $schema = $this->schemaWith(['status' => ['type' => 'string']]); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('`field` is required'); + + $this->validator->validate(input: ['field' => ''], schema: $schema); + }//end testEmptyFieldIsRejected() + + /** + * @return void + */ + public function testUnknownFieldIsRejected(): void + { + $schema = $this->schemaWith(['status' => ['type' => 'string']]); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('__totally_made_up'); + + $this->validator->validate( + input: ['field' => '__totally_made_up'], + schema: $schema + ); + }//end testUnknownFieldIsRejected() + + /** + * @return void + */ + public function testMagicMetadataFieldIsAllowed(): void + { + $schema = $this->schemaWith([]); + + $query = $this->validator->validate( + input: [ + 'field' => '_created', + 'interval' => 'DAY', + 'from' => '2026-05-01T00:00:00Z', + 'to' => '2026-05-22T00:00:00Z', + ], + schema: $schema + ); + + $this->assertTrue($query->hasDateBucket()); + }//end testMagicMetadataFieldIsAllowed() + + /** + * @return void + */ + public function testSubDayIntervalOnDateOnlyFieldIsRejected(): void + { + $schema = $this->schemaWith( + [ + 'meetingDate' => ['type' => 'string', 'format' => 'date'], + ] + ); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('sub-day interval'); + + $this->validator->validate( + input: [ + 'field' => 'meetingDate', + 'interval' => 'HOUR', + 'from' => '2026-05-21T00:00:00Z', + 'to' => '2026-05-22T00:00:00Z', + ], + schema: $schema + ); + }//end testSubDayIntervalOnDateOnlyFieldIsRejected() + + /** + * @return void + */ + public function testIntervalWithoutBoundsIsRejected(): void + { + $schema = $this->schemaWith( + [ + 'created' => ['type' => 'string', 'format' => 'date-time'], + ] + ); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('`from` and `to` are required'); + + $this->validator->validate( + input: ['field' => 'created', 'interval' => 'DAY'], + schema: $schema + ); + }//end testIntervalWithoutBoundsIsRejected() + + /** + * @return void + */ + public function testUnparseableBoundsAreRejected(): void + { + $schema = $this->schemaWith( + [ + 'created' => ['type' => 'string', 'format' => 'date-time'], + ] + ); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('parseable ISO-8601'); + + $this->validator->validate( + input: [ + 'field' => 'created', + 'interval' => 'DAY', + 'from' => 'not-a-date', + 'to' => 'also-not-a-date', + ], + schema: $schema + ); + }//end testUnparseableBoundsAreRejected() + + /** + * @return void + */ + public function testUnknownIntervalIsRejected(): void + { + $schema = $this->schemaWith( + [ + 'created' => ['type' => 'string', 'format' => 'date-time'], + ] + ); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('`interval` MUST be one of'); + + $this->validator->validate( + input: [ + 'field' => 'created', + 'interval' => 'CENTURY', + 'from' => '2026-01-01T00:00:00Z', + 'to' => '2026-12-31T00:00:00Z', + ], + schema: $schema + ); + }//end testUnknownIntervalIsRejected() + + /** + * @return void + */ + public function testNonCountMetricWithoutMetricFieldIsRejected(): void + { + $schema = $this->schemaWith( + [ + 'status' => ['type' => 'string'], + 'duration' => ['type' => 'number'], + ] + ); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('`metricField` is required'); + + $this->validator->validate( + input: ['field' => 'status', 'metric' => 'sum'], + schema: $schema + ); + }//end testNonCountMetricWithoutMetricFieldIsRejected() + + /** + * @return void + */ + public function testNonCountMetricWithUnknownMetricFieldIsRejected(): void + { + $schema = $this->schemaWith(['status' => ['type' => 'string']]); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('not a declared property'); + + $this->validator->validate( + input: [ + 'field' => 'status', + 'metric' => 'sum', + 'metricField' => '__unknown', + ], + schema: $schema + ); + }//end testNonCountMetricWithUnknownMetricFieldIsRejected() + + /** + * @return void + */ + public function testSumOverDeclaredFieldPasses(): void + { + $schema = $this->schemaWith( + [ + 'status' => ['type' => 'string'], + 'duration' => ['type' => 'number'], + ] + ); + + $query = $this->validator->validate( + input: [ + 'field' => 'status', + 'metric' => 'sum', + 'metricField' => 'duration', + ], + schema: $schema + ); + + $this->assertSame('sum', $query->metric); + $this->assertSame('duration', $query->field); + $this->assertSame('status', $query->getGroupByField()); + }//end testSumOverDeclaredFieldPasses() +}//end class From ed965c259975b574e6f3cd8b9b54bdc6365f7867 Mon Sep 17 00:00:00 2001 From: Ruben van der Linde Date: Fri, 22 May 2026 09:30:47 +0200 Subject: [PATCH 3/3] feat(aggregation): native MySQL/SQLite bucketing + ad-hoc cache (#1609, #1610) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the time-bucket primitive added by #1611 with two of the five deferred follow-ups: - **#1609 — Native MySQL/SQLite bucketing.** `AggregationRunner` now detects MySQL/MariaDB and SQLite alongside PostgreSQL and emits the matching native bucketing expression (`DATE_FORMAT` / `strftime`) with the correct identifier quoting per engine. ISO-Monday week-start semantics carry across all three platforms; quarter buckets use the matching CASE / CONCAT form. Backend annotation reports `mysql` / `sqlite` / `postgres` / `php-fallback`. - **#1610 — Read-through cache for ad-hoc queries.** `AggregationCache` gains `getAdhoc()` / `setAdhoc()` wrappers that derive the cache name slot from `'adhoc:'.sha1(json_encode(AggregationQuery::toArray()))`. `AggregationQuery::toArray()` ksort-sorts filter sub-arrays so two structurally-equivalent queries hash identically. `runAdhoc()` wires the cache read-through; the existing `AggregationCacheInvalidationListener` covers ad-hoc entries unchanged because they share the underlying `openregister_aggregations` distributed cache. The remaining three follow-ups (#1606 multi-field groupBy, #1607 cumulative/rolling windows, #1608 multi-metric) are documented at design depth in `openspec/changes/add-aggregation-enhancements/design.md` so the next pickup can file deliberate opsx cycles for them. Tests cover MySQL/SQLite/Postgres SQL emission per gap (including the `%i` vs `%M` minute placeholder divergence), ad-hoc cache hit/miss semantics, key stability under filter reordering, and ad-hoc vs named cache-entry isolation. --- docs/technical/aggregation-api.md | 35 +- lib/Service/Aggregation/AggregationCache.php | 68 ++++ lib/Service/Aggregation/AggregationQuery.php | 54 +++ lib/Service/Aggregation/AggregationRunner.php | 351 +++++++++++++--- .../add-aggregation-enhancements/design.md | 126 ++++++ .../add-aggregation-enhancements/proposal.md | 68 ++++ .../specs/aggregation-api/spec.md | 88 ++++ .../add-aggregation-enhancements/tasks.md | 39 ++ openspec/platform-capabilities.md | 2 +- .../Aggregation/AggregationCacheTest.php | 128 ++++++ .../Aggregation/AggregationQueryTest.php | 74 ++++ .../AggregationRunnerAdhocCacheTest.php | 228 +++++++++++ .../AggregationRunnerNativeBucketTest.php | 376 ++++++++++++++++++ 13 files changed, 1574 insertions(+), 63 deletions(-) create mode 100644 openspec/changes/add-aggregation-enhancements/design.md create mode 100644 openspec/changes/add-aggregation-enhancements/proposal.md create mode 100644 openspec/changes/add-aggregation-enhancements/specs/aggregation-api/spec.md create mode 100644 openspec/changes/add-aggregation-enhancements/tasks.md create mode 100644 tests/Unit/Service/Aggregation/AggregationRunnerAdhocCacheTest.php create mode 100644 tests/Unit/Service/Aggregation/AggregationRunnerNativeBucketTest.php diff --git a/docs/technical/aggregation-api.md b/docs/technical/aggregation-api.md index 54fad457fd..f219f32596 100644 --- a/docs/technical/aggregation-api.md +++ b/docs/technical/aggregation-api.md @@ -56,8 +56,8 @@ Bucketing by `MINUTE` or `HOUR` requires the field's JSON-Schema `format` to be - `key`: bucket label. For `interval`-bucketed queries this is an ISO-8601-UTC string at the start of the bucket. For categorical groupBy it's the value of the groupBy field. - `value`: the aggregated metric (always a number; an integer for `count`, a float for other metrics). -- `backend`: `"postgres"` (native `date_trunc` path) or `"php-fallback"` (non-Postgres environments). -- `cached`: always `false` on the ad-hoc path. Caching is tracked in [issue #1610](https://github.com/ConductionNL/openregister/issues/1610). +- `backend`: which engine served the query — `"postgres"` (native `date_trunc`), `"mysql"` (native `DATE_FORMAT`), `"sqlite"` (native `strftime`), or `"php-fallback"` (unrecognised engine). +- `cached`: `true` on a read-through cache hit, `false` on miss or the first request after invalidation. See [Cache](#cache) below. ### Empty buckets @@ -132,12 +132,35 @@ When the client does not request `groupBy`, the `groups` field is `null` (not an Validation problems surface as GraphQL field-errors on the `groups` field. The rest of the connection (edges, pageInfo, totalCount, facets) still resolves normally. +## Backend matrix + +The runner picks the matching native bucketing primitive for the active database engine and falls back to PHP only on engines OpenRegister doesn't natively target. + +| Database | Bucketing path | `backend` field | +|---|---|---| +| PostgreSQL | `date_trunc($gap, "$field")::text` | `postgres` | +| MySQL / MariaDB | `DATE_FORMAT("$field", '')` (ISO-Monday week-start; `CONCAT(YEAR, ..., '-01T00:00:00Z')` for quarter) | `mysql` | +| SQLite | `strftime('', "$field")` (ISO-Monday via `weekday 0` + `-6 days`; `CASE` on `strftime('%m')` for quarter) | `sqlite` | +| Other / unknown | RBAC-filtered hydrate + PHP `date_trunc` polyfill (`gmdate`) | `php-fallback` | + +All four paths emit identical wire shape: ISO-8601-UTC bucket keys (`Y-m-d\TH:i:s\Z`), the same `groups[i].value` coercion (int for `count`, float otherwise), and the same RBAC + multi-tenancy gate. The `backend` field lets a caller observe which engine served the request without changing how the response is consumed. + +## Cache + +Ad-hoc results are served via a 60-second distributed cache: + +- **Storage**: same `openregister_aggregations` distributed cache the named-aggregation path uses (`AggregationCache`). +- **Read-through**: on entry to `runAdhoc()`, the runner derives the key from `(registerSlug, schemaSlug, sha1(json_encode($query->toArray())), filter, rbacScopeHash)`, prefixed with `adhoc:`. A hit returns the stored envelope with `cached: true`; a miss falls through to the native-or-fallback dispatch and the resulting envelope is written back. +- **Key stability**: `AggregationQuery::toArray()` ksort-sorts the filter map (recursively, into operator sub-arrays), so `filter[a, b]` and `filter[b, a]` produce identical cache keys. +- **RBAC scoping**: the key includes `sha1(uid)` (or `sha1('anonymous')`) so two callers with different list-permission verdicts on the same `(metric, field, filter)` tuple never read each other's results. +- **Invalidation**: `AggregationCacheInvalidationListener` evicts the entire `(register, schema)` cache on every `ObjectCreatedEvent`, `ObjectUpdatedEvent`, `ObjectDeletedEvent`, and `ObjectTransitionedEvent`. The eviction is coarse (`ICache::clear()` flushes the whole `openregister_aggregations` namespace) but bounded by the 60-second TTL ceiling on missed evicts. +- **Stampede tolerance**: no distributed lock — the 60-second TTL bounds duplicate-miss compute. Revisit if a high-traffic dashboard surfaces stampede symptoms in production. + ## Performance notes -- **Postgres index**: for any field commonly used as a bucketing target (`created`, `updated`, custom date columns), declare a btree index on the magic-table column. `date_trunc()` against an indexed timestamp column is sub-50ms on tens of millions of rows. +- **Database index**: for any field commonly used as a bucketing target (`created`, `updated`, custom date columns), declare a btree index on the magic-table column. The native bucketing expression operates against an indexed column on every supported engine, so the cost stays in the database where it belongs. - **Row-level RBAC**: the multi-tenant predicate (`_organisation = ?`) and the schema's `PermissionHandler::canRead()` verdict both apply BEFORE bucketing. Aggregations cannot leak rows the caller could not read row-by-row. -- **Non-Postgres fallback**: SQLite and MySQL fall through to the PHP-side bucketer (`backend: "php-fallback"`). Correct, but slow on tables > 10k rows — the row cap is 10 000 and `truncated: true` is set when exceeded. Native MySQL / SQLite bucketing is tracked in [issue #1609](https://github.com/ConductionNL/openregister/issues/1609). -- **No cache**: ad-hoc queries hit Postgres on every request. Caching is tracked in [issue #1610](https://github.com/ConductionNL/openregister/issues/1610). +- **PHP fallback ceiling**: the PHP-fallback path on unrecognised engines caps the hydrated row set at 10 000 and sets `truncated: true` when exceeded. Native paths (Postgres / MySQL / SQLite) operate over the full set in SQL. ## Non-goals (deferred) @@ -146,5 +169,3 @@ Validation problems surface as GraphQL field-errors on the `groups` field. The r | Multi-field groupBy (`groupBy: [status, priority]`) | [#1606](https://github.com/ConductionNL/openregister/issues/1606) | | Running / cumulative series | [#1607](https://github.com/ConductionNL/openregister/issues/1607) | | Multi-metric in one request (`count` + `sum`) | [#1608](https://github.com/ConductionNL/openregister/issues/1608) | -| Native MySQL / SQLite bucketing | [#1609](https://github.com/ConductionNL/openregister/issues/1609) | -| Caching of ad-hoc queries | [#1610](https://github.com/ConductionNL/openregister/issues/1610) | diff --git a/lib/Service/Aggregation/AggregationCache.php b/lib/Service/Aggregation/AggregationCache.php index 246efc09c9..f68c367f43 100644 --- a/lib/Service/Aggregation/AggregationCache.php +++ b/lib/Service/Aggregation/AggregationCache.php @@ -152,6 +152,74 @@ public function set(string $registerSlug, string $schemaSlug, string $name, arra } }//end set() + /** + * Look up a cached ad-hoc aggregation result. + * + * Mirrors {@see get()} but derives the name slot from the query value + * object. The literal `adhoc:` prefix keeps ad-hoc entries visually + * distinct from named-aggregation entries in cache dumps. + * + * @param string $registerSlug Register slug component of the cache key. + * @param string $schemaSlug Schema slug component of the cache key. + * @param AggregationQuery $query Query value object hashed into the cache key. + * + * @return array|null Cached envelope or null on miss. + */ + public function getAdhoc(string $registerSlug, string $schemaSlug, AggregationQuery $query): ?array + { + return $this->get( + registerSlug: $registerSlug, + schemaSlug: $schemaSlug, + name: $this->adhocName(query: $query), + filter: $query->filter + ); + + }//end getAdhoc() + + /** + * Store an ad-hoc aggregation result. + * + * Mirrors {@see set()} for the ad-hoc path. The stored envelope is + * rewritten on read (`cached: true`) by callers — see + * {@see \OCA\OpenRegister\Service\Aggregation\AggregationRunner::runAdhoc()}. + * + * @param string $registerSlug Register slug component of the cache key. + * @param string $schemaSlug Schema slug component of the cache key. + * @param AggregationQuery $query Query value object hashed into the cache key. + * @param array $result Result envelope to store. + * + * @return void + */ + public function setAdhoc(string $registerSlug, string $schemaSlug, AggregationQuery $query, array $result): void + { + $this->set( + registerSlug: $registerSlug, + schemaSlug: $schemaSlug, + name: $this->adhocName(query: $query), + filter: $query->filter, + result: $result + ); + + }//end setAdhoc() + + /** + * Derive the cache name slot for an ad-hoc query. + * + * Computes `'adhoc:'.sha1(json_encode($query->toArray()))`. The + * `AggregationQuery::toArray()` output is ksort-stable so two + * structurally-equivalent queries produce identical hashes. + * + * @param AggregationQuery $query The ad-hoc query value object. + * + * @return string The cache name slot, prefixed with `adhoc:`. + */ + private function adhocName(AggregationQuery $query): string + { + $encoded = json_encode($query->toArray()); + return 'adhoc:'.sha1($encoded === false ? '' : $encoded); + + }//end adhocName() + /** * Evict every cached aggregation for a (register, schema). Called by * the object-write listeners. diff --git a/lib/Service/Aggregation/AggregationQuery.php b/lib/Service/Aggregation/AggregationQuery.php index 4c29a1bddb..50d60a76d8 100644 --- a/lib/Service/Aggregation/AggregationQuery.php +++ b/lib/Service/Aggregation/AggregationQuery.php @@ -215,4 +215,58 @@ public function hasDateBucket(): bool return ($this->dateBucket !== null); }//end hasDateBucket() + + /** + * Serialise the query to a stable associative array. + * + * The output is the canonical input to the ad-hoc aggregation cache + * key (see {@see AggregationCache::getAdhoc()}). Filter sub-arrays are + * recursively ksort-sorted so two structurally-equivalent queries + * produce identical JSON encodings — `{a: 1, b: 2}` and `{b: 2, a: 1}` + * hash to the same cache key. + * + * @return array{ + * metric: string, + * field: ?string, + * filter: array, + * groupBy: array|null, + * dateBucket: array|null + * } Canonical wire shape of the query. + */ + public function toArray(): array + { + return [ + 'metric' => $this->metric, + 'field' => $this->field, + 'filter' => self::canonicaliseFilter(filter: $this->filter), + 'groupBy' => $this->groupBy, + 'dateBucket' => $this->dateBucket, + ]; + + }//end toArray() + + /** + * Recursively ksort the filter map. + * + * Operator sub-arrays (e.g. `{gt: 5, lte: 10}`) get the same treatment + * so the resulting JSON encoding is stable across input orderings. + * + * @param array $filter Filter map to canonicalise. + * + * @return array Sorted filter map. + */ + private static function canonicaliseFilter(array $filter): array + { + ksort($filter); + foreach ($filter as $key => $value) { + if (is_array($value) === true) { + $sub = $value; + ksort($sub); + $filter[$key] = $sub; + } + } + + return $filter; + + }//end canonicaliseFilter() }//end class diff --git a/lib/Service/Aggregation/AggregationRunner.php b/lib/Service/Aggregation/AggregationRunner.php index 73d98e2eab..f2419c0d58 100644 --- a/lib/Service/Aggregation/AggregationRunner.php +++ b/lib/Service/Aggregation/AggregationRunner.php @@ -49,6 +49,12 @@ * @SuppressWarnings(PHPMD.ExcessiveClassComplexity) * @SuppressWarnings(PHPMD.CouplingBetweenObjects) * @SuppressWarnings(PHPMD.ExcessiveClassLength) + * @SuppressWarnings(PHPMD.TooManyMethods) + * The runner orchestrates four execution paths (named / cross-schema / + * ad-hoc / cached) across three backends (Postgres / MySQL / SQLite + + * PHP fallback). The method count rises with each platform-specific + * helper; extracting them into a separate class would require passing + * the full constructor dependency graph through. */ class AggregationRunner { @@ -397,16 +403,15 @@ public function run( * The execution pipeline is the same as `run()`: * * 1. RBAC gate via `PermissionHandler::hasPermission(list)`. - * 2. Postgres-native fast path via `tryNativeAggregation()` — - * extended in this change to honour `AggregationQuery::dateBucket` - * by emitting `date_trunc()` SQL. - * 3. PHP fallback bucketing when the native path returns null - * (e.g. non-Postgres DB). - * - * The cache is NOT consulted on the ad-hoc path: `AggregationCache` - * keys are bound to the named-annotation `name`, and ad-hoc queries - * are by definition unnamed. Cache-key extension is tracked in - * follow-up issue #1610. + * 2. Read-through cache lookup via `AggregationCache::getAdhoc()` — + * cache key derives from `sha1(json_encode($query->toArray()))` + * prefixed with `adhoc:`. 60 s TTL. Invalidated on every + * object lifecycle event by `AggregationCacheInvalidationListener`. + * 3. Postgres / MySQL / SQLite native fast path via + * `tryNativeAggregation()` — picks the matching bucketing + * expression (`date_trunc` / `DATE_FORMAT` / `strftime`). + * 4. PHP fallback bucketing when the native path returns null + * (unrecognised engine, table miss, etc). * * Field allow-listing (only declared schema properties + the * `_created/_updated/_deleted_at` magic-table metadata cols) is @@ -419,11 +424,19 @@ public function run( * * @return array Either `{value, backend, cached}` (ungrouped) * or `{groups, backend, cached}` (grouped / - * time-bucketed). `cached` is always `false` - * on the ad-hoc path (no AggregationCache - * key shape — tracked in issue #1610). + * time-bucketed). `cached` flips to `true` + * on the second identical request within + * the 60 s TTL window. * * @throws NotAuthorizedException When the caller lacks list-permission on the schema. + * + * @SuppressWarnings(PHPMD.ExcessiveMethodLength) + * RBAC + placeholder-resolve + cache-read + native-or-fallback dispatch + + * cache-write. Splitting each step into its own helper would obscure + * the order of operations that matters for the security gate and + * cache semantics. + * @SuppressWarnings(PHPMD.StaticAccess) + * `AggregationQuery::create()` is a fail-fast static factory. */ public function runAdhoc( Register $register, @@ -450,12 +463,38 @@ public function runAdhoc( } // Resolve placeholders inside the filter (e.g. $now, $startOfMonth) - // so the native SQL path can bind concrete values. + // so the native SQL path can bind concrete values. The resolved + // filter feeds both the cache key and the SQL bindings. $resolvedFilter = $this->placeholders->resolveArray($query->filter); + $resolvedQuery = AggregationQuery::create( + metric: $query->metric, + field: $query->field, + filter: $resolvedFilter, + groupBy: $query->groupBy, + dateBucket: $query->dateBucket + ); + + // Read-through cache. Identical query + filter + RBAC scope → + // same cache key → 60 s TTL hit. Cache miss falls through to the + // native-or-fallback dispatch below and the result is stored on + // the way out. Invalidation is event-driven via + // AggregationCacheInvalidationListener which evicts the entire + // `openregister_aggregations` cache on every object-write event. + $cached = $this->cache->getAdhoc( + registerSlug: (string) $register->getSlug(), + schemaSlug: (string) $schema->getSlug(), + query: $resolvedQuery + ); + if ($cached !== null) { + $cached['cached'] = true; + return $cached; + } - // Try the Postgres-native fast path first. tryNativeAggregation() - // returns null on non-Postgres DBs or unsupported query shapes, - // signaling fall-through to the PHP bucketer below. + // Try the Postgres / MySQL / SQLite native fast path. The runner + // detects the database platform and emits the matching native + // bucketing expression (date_trunc / DATE_FORMAT / strftime). + // Returns null on unsupported query shapes or unrecognised + // engines, signaling fall-through to the PHP bucketer below. $native = $this->tryNativeAggregation( register: $register, schema: $schema, @@ -467,16 +506,23 @@ public function runAdhoc( ); if ($native !== null) { - return [ - 'backend' => 'postgres', + $envelope = [ + 'backend' => $this->detectDatabasePlatform(), 'cached' => false, ] + $native; + $this->cache->setAdhoc( + registerSlug: (string) $register->getSlug(), + schemaSlug: (string) $schema->getSlug(), + query: $resolvedQuery, + result: $envelope + ); + return $envelope; } // PHP fallback path. Pull the RBAC-filtered row set + bucket in - // PHP. Correctness path; the Postgres native path is the + // PHP. Correctness path; the native paths above are the // production target. - return $this->bucketInPhp( + $envelope = $this->bucketInPhp( register: $register, schema: $schema, metric: $query->metric, @@ -485,6 +531,13 @@ public function runAdhoc( groupBy: $query->groupBy, dateBucket: $query->dateBucket ); + $this->cache->setAdhoc( + registerSlug: (string) $register->getSlug(), + schemaSlug: (string) $schema->getSlug(), + query: $resolvedQuery, + result: $envelope + ); + return $envelope; }//end runAdhoc() @@ -954,6 +1007,11 @@ private function normaliseForCompare(mixed $v): mixed * @SuppressWarnings(PHPMD.CyclomaticComplexity) * @SuppressWarnings(PHPMD.NPathComplexity) * @SuppressWarnings(PHPMD.ExcessiveMethodLength) + * @SuppressWarnings(PHPMD.ElseExpression) + * The platform-branch is genuinely binary (postgres vs non-postgres + * for both the soft-delete predicate and the aggregate-cast block, + * mysql vs sqlite for the bucket expression). Else clauses make + * the mutual-exclusion read at the call site. */ private function tryNativeAggregation( Register $register, @@ -964,8 +1022,18 @@ private function tryNativeAggregation( ?array $groupBy, ?array $dateBucket=null ): ?array { - $platform = $this->db->getDatabasePlatform(); - if (stripos($platform::class, 'PostgreSQL') === false) { + $platformName = $this->detectDatabasePlatform(); + + // Postgres handles every query shape natively. MySQL and SQLite + // currently support only the time-bucket path; the categorical + // groupBy + ungrouped paths still depend on Postgres-specific + // operators (`::jsonb`, `::numeric`) and fall through to the PHP + // fallback on non-Postgres engines. + if ($platformName === 'unknown') { + return null; + } + + if ($platformName !== 'postgres' && $dateBucket === null) { return null; } @@ -997,11 +1065,20 @@ private function tryNativeAggregation( return null; } - $fullTable = '"oc_'.$tableName.'"'; - - $whereParts = ["(_deleted IS NULL OR _deleted = 'null'::jsonb)"]; + $quote = $this->identifierQuote(platform: $platformName); + $fullTable = $quote.'oc_'.$tableName.$quote; + $whereParts = []; $bindings = []; + // Soft-delete predicate. Postgres uses jsonb; MySQL/SQLite store + // the metadata cols as JSON-string text where the empty/null marker + // is the literal string 'null' or actual NULL. + if ($platformName === 'postgres') { + $whereParts[] = "(_deleted IS NULL OR _deleted = 'null'::jsonb)"; + } else { + $whereParts[] = "(_deleted IS NULL OR _deleted = 'null' OR _deleted = '')"; + } + // SECURITY: mirror MagicRbacHandler's multi-tenancy predicate. The // native fast path bypasses MagicMapper entirely, so without this // filter any authed caller could compute aggregates over rows in @@ -1009,13 +1086,13 @@ private function tryNativeAggregation( // Column is `_organisation` — magic tables prefix metadata cols // with `_` (see MagicMapper::METADATA_PREFIX). $activeOrg = $this->organisationService->getActiveOrganisation(); - $whereParts[] = '"_organisation" = ?'; + $whereParts[] = $quote.'_organisation'.$quote.' = ?'; $bindings[] = $activeOrg?->getUuid() ?? '__no_active_org__'; foreach ($filter as $f => $v) { $col = $this->sanitizeColumnName(name: (string) $f); if (is_array($v) === false) { - $whereParts[] = '"'.$col.'" = ?'; + $whereParts[] = $quote.$col.$quote.' = ?'; $bindings[] = $this->bindValue(value: $v); continue; } @@ -1031,7 +1108,7 @@ private function tryNativeAggregation( } $placeholders = implode(', ', array_fill(0, count($list), '?')); - $whereParts[] = '"'.$col.'" IN ('.$placeholders.')'; + $whereParts[] = $quote.$col.$quote.' IN ('.$placeholders.')'; foreach ($list as $item) { $bindings[] = $this->bindValue(value: $item); } @@ -1052,47 +1129,78 @@ private function tryNativeAggregation( continue; } - $whereParts[] = '"'.$col.'" '.$sqlOp.' ?'; + $whereParts[] = $quote.$col.$quote.' '.$sqlOp.' ?'; $bindings[] = $this->bindValue(value: $opValue); }//end foreach }//end foreach $whereSql = implode(' AND ', $whereParts); - // Aggregate clause. - $aggCol = $field !== null ? '"'.$this->sanitizeColumnName(name: $field).'"' : null; - $aggSql = match ($metric) { - 'count' => 'COUNT(*)', - 'sum' => 'SUM(NULLIF('.$aggCol.'::text, \'\')::numeric)', - 'avg' => 'AVG(NULLIF('.$aggCol.'::text, \'\')::numeric)', - 'min' => 'MIN(NULLIF('.$aggCol.'::text, \'\')::numeric)', - 'max' => 'MAX(NULLIF('.$aggCol.'::text, \'\')::numeric)', - }; + // Aggregate clause. Postgres needs explicit `::numeric` casts because + // magic-table data columns are jsonb-shaped. MySQL/SQLite handle the + // implicit conversion in the engine. NULLIF guards against empty + // strings produced by jsonb-to-text casts. + $aggCol = null; + if ($field !== null) { + $aggCol = $quote.$this->sanitizeColumnName(name: $field).$quote; + } + + if ($platformName === 'postgres') { + $aggSql = match ($metric) { + 'count' => 'COUNT(*)', + 'sum' => 'SUM(NULLIF('.$aggCol.'::text, \'\')::numeric)', + 'avg' => 'AVG(NULLIF('.$aggCol.'::text, \'\')::numeric)', + 'min' => 'MIN(NULLIF('.$aggCol.'::text, \'\')::numeric)', + 'max' => 'MAX(NULLIF('.$aggCol.'::text, \'\')::numeric)', + }; + } else { + $aggSql = match ($metric) { + 'count' => 'COUNT(*)', + 'sum' => 'SUM(NULLIF('.$aggCol.', \'\') + 0)', + 'avg' => 'AVG(NULLIF('.$aggCol.', \'\') + 0)', + 'min' => 'MIN(NULLIF('.$aggCol.', \'\') + 0)', + 'max' => 'MAX(NULLIF('.$aggCol.', \'\') + 0)', + }; + } try { - // Time-bucket path: emit `date_trunc($gap, "$field")::text AS - // bucket`, add `WHERE "$field" >= ? AND "$field" < ?` bounds - // from the dateBucket spec, and group on the bucket. Mutual - // exclusion with $groupBy is guaranteed by - // AggregationQuery::create() upstream. + // Time-bucket path: emit a platform-specific bucketing + // expression (date_trunc / DATE_FORMAT / strftime), add + // `WHERE "$field" >= ? AND "$field" < ?` bounds from the + // dateBucket spec, and group on the bucket. Mutual exclusion + // with $groupBy is guaranteed by AggregationQuery::create() + // upstream. if ($dateBucket !== null) { $bucketField = (string) $dateBucket['field']; - $bucketCol = '"'.$this->sanitizeColumnName(name: $bucketField).'"'; + $bucketCol = $quote.$this->sanitizeColumnName(name: $bucketField).$quote; $gap = (string) $dateBucket['gap']; - // Prepend the bucket bounds to the binding list so they - // appear in placeholder order: first the gap (for the - // Postgres date_trunc call), then the existing WHERE - // clause bindings, then the dateBucket bounds. $boundedWhere = $whereSql.' AND '.$bucketCol.' >= ? AND '.$bucketCol.' < ?'; - $bindings[] = $this->bindValue(value: $dateBucket['start']); - $bindings[] = $this->bindValue(value: $dateBucket['end']); - // The Postgres date_trunc call takes the gap as its - // first argument; bind it ahead of the existing bindings. - array_unshift($bindings, $gap); + if ($platformName === 'postgres') { + // Prepend the bucket bounds to the binding list so + // they appear in placeholder order: first the gap + // (for the Postgres date_trunc call), then the + // existing WHERE clause bindings, then the dateBucket + // bounds. + $bindings[] = $this->bindValue(value: $dateBucket['start']); + $bindings[] = $this->bindValue(value: $dateBucket['end']); + array_unshift($bindings, $gap); + $bucketExpr = 'date_trunc(?, '.$bucketCol.')::text'; + } else { + // MySQL / SQLite emit literal format strings — the + // gap vocabulary is closed (seven entries) and + // validated upstream by AggregationQuery::create(). + $bindings[] = $this->bindValue(value: $dateBucket['start']); + $bindings[] = $this->bindValue(value: $dateBucket['end']); + if ($platformName === 'mysql') { + $bucketExpr = $this->mysqlBucketExpression(column: $bucketCol, gap: $gap); + } else { + $bucketExpr = $this->sqliteBucketExpression(column: $bucketCol, gap: $gap); + } + }//end if - $sql = "SELECT date_trunc(?, {$bucketCol})::text AS bucket, {$aggSql} AS agg + $sql = "SELECT {$bucketExpr} AS bucket, {$aggSql} AS agg FROM {$fullTable} WHERE {$boundedWhere} GROUP BY bucket @@ -1201,6 +1309,139 @@ private function coerceBucketKey(mixed $raw): string }//end coerceBucketKey() + /** + * Detect the active database platform short name. + * + * Returns one of `postgres`, `mysql`, `sqlite`, or `unknown`. Used to + * pick the matching native-bucketing path in + * {@see tryNativeAggregation()} and to annotate the response + * `backend` field in {@see runAdhoc()}. + * + * @return string Lower-case platform short name. + */ + private function detectDatabasePlatform(): string + { + $platformClass = $this->db->getDatabasePlatform()::class; + if (stripos($platformClass, 'PostgreSQL') !== false) { + return 'postgres'; + } + + if (stripos($platformClass, 'MySQL') !== false || stripos($platformClass, 'MariaDB') !== false) { + return 'mysql'; + } + + if (stripos($platformClass, 'SQLite') !== false) { + return 'sqlite'; + } + + return 'unknown'; + + }//end detectDatabasePlatform() + + /** + * Identifier-quoting character for the given platform. + * + * Postgres and SQLite use double-quotes; MySQL uses backticks. + * + * @param string $platform Platform short name from {@see detectDatabasePlatform()}. + * + * @return string The quoting character to wrap identifiers. + */ + private function identifierQuote(string $platform): string + { + return $platform === 'mysql' ? '`' : '"'; + + }//end identifierQuote() + + /** + * Emit a MySQL `DATE_FORMAT`-based bucket expression for the given gap. + * + * The gap vocabulary is closed (seven entries) and validated upstream + * by {@see AggregationQuery::create()}. The format strings produce + * the canonical `Y-m-d\TH:i:s\Z` wire shape so the round-trip + * through {@see coerceBucketKey()} is an identity transform on + * MySQL output. + * + * @param string $column Identifier-quoted column reference. + * @param string $gap Validated gap unit. + * + * @return string The full bucket SQL expression (without the `AS bucket` alias). + */ + private function mysqlBucketExpression(string $column, string $gap): string + { + if ($gap === 'week') { + // ISO-Monday week-start: back-shift by ((DAYOFWEEK - 1 - 1) % 7) + // days, but MySQL's DAYOFWEEK starts on Sunday=1 so the actual + // expression is `((DAYOFWEEK(field) + 5) MOD 7)` which produces + // 0 for Monday, 6 for Sunday — exactly the shift we need to + // reach the previous (or current) Monday. + return 'DATE_FORMAT('.$column.' - INTERVAL ((DAYOFWEEK('.$column.') + 5) MOD 7) DAY, \'%Y-%m-%dT00:00:00Z\')'; + } + + if ($gap === 'quarter') { + return sprintf( + 'CONCAT(YEAR(%1$s), \'-\', LPAD(((QUARTER(%1$s) - 1) * 3 + 1), 2, \'0\'), \'-01T00:00:00Z\')', + $column + ); + } + + $format = match ($gap) { + 'minute' => '%Y-%m-%dT%H:%i:00Z', + 'hour' => '%Y-%m-%dT%H:00:00Z', + 'day' => '%Y-%m-%dT00:00:00Z', + 'month' => '%Y-%m-01T00:00:00Z', + 'year' => '%Y-01-01T00:00:00Z', + default => '%Y-%m-%dT00:00:00Z', + }; + + return 'DATE_FORMAT('.$column.', \''.$format.'\')'; + + }//end mysqlBucketExpression() + + /** + * Emit a SQLite `strftime`-based bucket expression for the given gap. + * + * Mirrors {@see mysqlBucketExpression()} for the SQLite strftime + * vocabulary. The `weekday 0` modifier in SQLite jumps forward to + * the next Sunday (UTC); subtracting six days yields the previous + * Monday — matching ISO-week-start semantics. + * + * @param string $column Identifier-quoted column reference. + * @param string $gap Validated gap unit. + * + * @return string The full bucket SQL expression (without the `AS bucket` alias). + */ + private function sqliteBucketExpression(string $column, string $gap): string + { + if ($gap === 'week') { + return 'strftime(\'%Y-%m-%dT00:00:00Z\', '.$column.', \'weekday 0\', \'-6 days\')'; + } + + if ($gap === 'quarter') { + $parts = []; + $parts[] = sprintf('CASE WHEN CAST(strftime(\'%%m\', %1$s) AS INTEGER) <= 3', $column); + $parts[] = sprintf('THEN strftime(\'%%Y-01-01T00:00:00Z\', %1$s)', $column); + $parts[] = sprintf('WHEN CAST(strftime(\'%%m\', %1$s) AS INTEGER) <= 6', $column); + $parts[] = sprintf('THEN strftime(\'%%Y-04-01T00:00:00Z\', %1$s)', $column); + $parts[] = sprintf('WHEN CAST(strftime(\'%%m\', %1$s) AS INTEGER) <= 9', $column); + $parts[] = sprintf('THEN strftime(\'%%Y-07-01T00:00:00Z\', %1$s)', $column); + $parts[] = sprintf('ELSE strftime(\'%%Y-10-01T00:00:00Z\', %1$s) END', $column); + return implode(' ', $parts); + } + + $format = match ($gap) { + 'minute' => '%Y-%m-%dT%H:%M:00Z', + 'hour' => '%Y-%m-%dT%H:00:00Z', + 'day' => '%Y-%m-%dT00:00:00Z', + 'month' => '%Y-%m-01T00:00:00Z', + 'year' => '%Y-01-01T00:00:00Z', + default => '%Y-%m-%dT00:00:00Z', + }; + + return 'strftime(\''.$format.'\', '.$column.')'; + + }//end sqliteBucketExpression() + /** * Convert a value to its SQL bind shape. * diff --git a/openspec/changes/add-aggregation-enhancements/design.md b/openspec/changes/add-aggregation-enhancements/design.md new file mode 100644 index 0000000000..276cca9fe5 --- /dev/null +++ b/openspec/changes/add-aggregation-enhancements/design.md @@ -0,0 +1,126 @@ +# Design — Time-bucket aggregation enhancements + +This change ships two of the five `add-time-bucket-aggregation` follow-ups. The other three are documented here at design-only depth so the next pickup has a clear starting point rather than re-running the discovery work. + +## Decisions for the two shipped items + +### D1 — MySQL native bucketing emits `DATE_FORMAT` against fixed format strings per gap + +**Decision.** When `tryNativeAggregation()` detects a MySQL platform AND `$dateBucket !== null`, emit: + +```sql +SELECT DATE_FORMAT(`field`, '') AS bucket, AS agg +FROM `oc_
` +WHERE AND `field` >= ? AND `field` < ? +GROUP BY bucket +ORDER BY bucket +``` + +The `` strings map directly from the `gap` vocabulary (no `interval`-string parameter passed at runtime — the gap is finite and small, so a `match()` on the gap returns the literal format string): + +| Gap | Format string | +|-----------|------------------------------| +| `minute` | `'%Y-%m-%dT%H:%i:00Z'` | +| `hour` | `'%Y-%m-%dT%H:00:00Z'` | +| `day` | `'%Y-%m-%dT00:00:00Z'` | +| `month` | `'%Y-%m-01T00:00:00Z'` | +| `year` | `'%Y-01-01T00:00:00Z'` | +| `week` | (no `DATE_FORMAT` shortcut — emit `DATE_FORMAT(field - INTERVAL ((DAYOFWEEK(field) + 5) %% 7) DAY, '%Y-%m-%dT00:00:00Z')` to mirror Postgres ISO-week-Monday) | +| `quarter` | (no `DATE_FORMAT` shortcut — emit `CONCAT(YEAR(field), '-', LPAD(((QUARTER(field) - 1) * 3 + 1), 2, '0'), '-01T00:00:00Z')`) | + +**Why per-gap format strings instead of binding the format at runtime.** Binding the format string as a `?` parameter would force the engine to re-prepare the statement per request and would prevent the format-string from being part of the column's value cache. The gap vocabulary is closed (seven entries) and validated upstream by `AggregationQuery::assertValidDateBucket()`, so emitting a literal is safe. + +**Why `%i` (MySQL minute) rather than `%M`.** `%M` is full-month-name in MySQL's `DATE_FORMAT` vocabulary; the minute placeholder is `%i`. This is the most common transcription error when porting strftime/date_trunc strings to MySQL. + +**Wire format parity.** All six gap formats produce the exact same output byte sequence as the Postgres `coerceBucketKey()` helper. The Postgres path runs each key through `coerceBucketKey()` which parses `Y-m-d H:i:s+TZ` text-cast output and re-formats as `Y-m-d\TH:i:s\Z`. The MySQL `DATE_FORMAT` path emits `Y-m-d\TH:i:s\Z` directly, so `coerceBucketKey()` is a no-op on MySQL output (it parses cleanly and re-formats identically) — same path, same code, just zero-cost on the MySQL keys. + +### D2 — SQLite native bucketing emits `strftime` against the same format strings + +**Decision.** Same shape as MySQL, with `strftime` and the strftime format vocabulary: + +| Gap | strftime format | +|-----------|----------------------------| +| `minute` | `'%Y-%m-%dT%H:%M:00Z'` | +| `hour` | `'%Y-%m-%dT%H:00:00Z'` | +| `day` | `'%Y-%m-%dT00:00:00Z'` | +| `month` | `'%Y-%m-01T00:00:00Z'` | +| `year` | `'%Y-01-01T00:00:00Z'` | +| `week` | `strftime('%Y-%m-%dT00:00:00Z', "field", 'weekday 0', '-7 days')` (ISO-Monday equivalent via the `weekday` modifier — Sunday-of-previous-week + 1 day = Monday-of-current-week) | +| `quarter` | use `CASE WHEN strftime('%m', "field") IN ('01','02','03') THEN strftime('%Y-01-01T00:00:00Z', "field") WHEN ... END` | + +**Why a `CASE` for quarter.** SQLite's `strftime` doesn't expose quarter directly; the `CASE` is small (four arms, fixed at compile time) and stays inside the SQL engine where it belongs. + +**SQLite `%M` is minute** (unlike MySQL's `%i`) — the format strings above use the SQLite strftime vocabulary, not the MySQL one. This is the symmetric porting hazard to D1. + +**Quoting.** SQLite uses double-quotes for identifiers and single-quotes for string literals, identical to Postgres. Existing `sanitizeColumnName()` output (`"colname"`) is portable to SQLite without changes. MySQL uses backticks; the runner branches on platform and emits the right quote character. + +### D3 — Ad-hoc cache key derivation + +**Decision.** The ad-hoc path computes its cache key as: + +```php +$queryHash = sha1(json_encode($query->toArray())); +$cacheKey = sprintf('agg:%s:%s:adhoc:%s:%s:%s', + $registerSlug, $schemaSlug, $queryHash, $filterHash, $rbacHash); +``` + +The literal `adhoc:` prefix in the name slot keeps ad-hoc entries visually distinct from named-aggregation entries in cache dumps. The hash includes everything the runner reads off the query, so two structurally-equivalent queries hit the same key regardless of field-order in the input JSON. + +**Why `toArray()` instead of `json_encode($query)`**. The `AggregationQuery` class has private readonly fields, so PHP's default JSON serialiser produces `{}`. An explicit `toArray()` method: +1. Documents the shape that's part of the cache contract. +2. Sorts the `filter` keys before hashing so `{a: 1, b: 2}` and `{b: 2, a: 1}` hash identically. +3. Future-proofs the key against new optional fields on the query (multi-field groupBy from #1606, window spec from #1607, multi-metric from #1608) — adding the field to `toArray()` automatically extends the cache key. + +**Why use the existing `AggregationCache` class rather than a new ad-hoc cache.** The cache class already owns the RBAC-scoped key derivation, the distributed-cache factory connection, the graceful-degrade-on-unavailable branch, and the 60 s TTL. Forking a parallel cache class would duplicate all of that without removing complexity. The `getAdhoc()`/`setAdhoc()` wrappers cost ~30 lines. + +### D4 — Invalidation reuses the existing listener verbatim + +**Decision.** No changes to `AggregationCacheInvalidationListener`. The listener already calls `AggregationCache::evictForSchema()` on every `ObjectCreatedEvent`, `ObjectUpdatedEvent`, `ObjectDeletedEvent`, and `ObjectTransitionedEvent`, and `evictForSchema()` runs `ICache::clear()` on the `openregister_aggregations` cache — which covers both named-aggregation entries and ad-hoc entries because they share the same cache instance. + +**Coarse vs precise eviction.** `ICache::clear()` is global; ideally we'd evict only the affected `(register, schema)` keys. That would require either prefix-scan support on `ICache` (which Redis backs but the memcache + APCu backends don't) or a secondary index. Both add complexity without removing the 60 s TTL safety net. The existing comment on `evictForSchema()` already documents the tradeoff; the ad-hoc cache inherits it unchanged. + +## Deferred — file separate opsx cycles + +### #1606 — Multi-field groupBy + +**Why deferred.** API decision (`groupBy.field` vs `groupBy.fields` vs `groupBy: [...]`) is unresolved. Three viable shapes: + +1. **Backward-compatible extension**: `GroupByInput.fields: [String!]` alongside the existing `field: String` — clients pick one. Cost: branch in every translator, two shapes to maintain forever. +2. **Replace field with fields**: `GroupByInput.fields: [String!]` (deprecate `field` as a single-element list). Cost: breaking GraphQL change, harder rollout. +3. **Hybrid**: accept both at the input layer; normalize to `fields: string[]` internally. Cost: one normalization function, one shape downstream. + +Postgres `GROUP BY a, b` is straightforward. Solr `pivot.facet` and ES nested `terms` each need their own translator surface — neither is a single-line change. + +**Recommended pickup ordering.** Open a discrete change `add-multi-field-groupby` with the API question as Decision #1, then a thin design pass on the three translators. Solr's `pivot.facet` syntax is the tightest constraint (it returns nested arrays, not a flat list). + +**Response shape.** Add `groups[i].keys: {field_a: value, field_b: value}` alongside the existing `groups[i].key` for single-field requests (keep `key` working for backwards-compat). Single-field requests still emit `key`; multi-field requests emit `keys` with `key` omitted (or set to the concatenated form `field_a||field_b` as a debug aid — Decision needed). + +### #1607 — Cumulative / rolling windows + +**Why deferred.** Postgres window functions compose awkwardly with the existing RBAC predicate (the window has to be applied AFTER the row-level filter; emitting `SUM(...) OVER (ORDER BY bucket)` requires the bucket column to be selected, which means changing the inner SELECT, then wrapping). The PHP fallback needs a separate accumulation pass; Solr lacks native cumulative aggregation (post-process pipeline only); ES has `cumulative_sum`/`moving_avg` pipeline aggregators (different surface). + +**Recommended pickup ordering.** Open `add-aggregation-windows` with three Decisions: +1. **API shape**: `AggregationQuery.window: {type: 'cumulative'|'rolling', size?: int}` — cumulative is rolling-over-all-prior; rolling is rolling-over-N. Need to pin `size` semantics (rows-vs-time-window). +2. **Postgres SQL**: nested-SELECT with `SUM(agg) OVER (ORDER BY bucket ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)` for cumulative; `ROWS BETWEEN N PRECEDING AND CURRENT ROW` for rolling. The Postgres native path becomes a 2-level query. +3. **Translator coverage**: PHP polyfill (single accumulation pass over the already-bucketed groups); ES `cumulative_sum`/`moving_fn` pipeline; Solr post-processing in the result-formatter. + +**Response shape.** Per-group field stays `value` but its semantics change with `window`. Document at the API layer; no extra response field needed. + +### #1608 — Multi-metric in one request + +**Why deferred.** Largest of the three deferred items — `AggregationQuery.metric: string` becomes `AggregationQuery.metrics: AggregationMetricSpec[]`, and every translator (Postgres, Solr, ES, PHP fallback) currently encodes "one metric" in its SELECT/facet/aggs builder. Decision points: + +1. **API shape**: `metrics: [{name: 'callCount', metric: 'count'}, {name: 'avgDuration', metric: 'avg', field: 'duration'}]`. `name` is the response-side label so clients can address each metric independently. +2. **Backward-compat**: keep single `metric` working; if both are set, prefer `metrics` (Decision needed — error vs warn vs prefer). +3. **Response shape**: `groups[i].metrics: {callCount: 30, avgDuration: 12.4}` alongside the existing `groups[i].value` (kept and populated from `metrics[0]` for single-metric back-compat). +4. **Translator updates**: Postgres emits multiple aggregate columns in one SELECT; Solr JSON Facet API supports nested `facet` objects; ES nested `aggs` is the native multi-metric path. + +**Recommended pickup ordering.** Open `add-multi-metric-aggregation`. This is the largest of the three follow-ups because the value object shape change touches the most translators. + +## Interaction between the deferred items + +`metrics: [{name, metric, field}]` (#1608) composes with `groupBy.fields` (#1606) trivially — `groups[i].metrics` becomes per-group regardless of how the groups are formed. + +`window` (#1607) composes with multi-metric by applying the window expression to each metric independently (`SUM(agg1) OVER (ORDER BY bucket), SUM(agg2) OVER (ORDER BY bucket)`). + +All three can be picked up in any order; none blocks the others. diff --git a/openspec/changes/add-aggregation-enhancements/proposal.md b/openspec/changes/add-aggregation-enhancements/proposal.md new file mode 100644 index 0000000000..baa2eb784e --- /dev/null +++ b/openspec/changes/add-aggregation-enhancements/proposal.md @@ -0,0 +1,68 @@ +--- +kind: code +depends_on: [add-time-bucket-aggregation] +--- + +# Time-bucket aggregation enhancements (MySQL/SQLite native + ad-hoc cache) + +## Why + +`add-time-bucket-aggregation` (PR #1611) shipped the ad-hoc aggregation primitive end-to-end (REST + GraphQL + Postgres native + PHP fallback), and filed five follow-ups under issues #1606-#1610. Two of those five are "ready" — the surface area is already designed, the dependencies already exist in code, and the work is bounded: + +- **#1609 — MySQL/SQLite native time-bucket**. Today every non-Postgres database falls through to `AggregationRunner::bucketInPhp()`, which hydrates the full RBAC-filtered row set into PHP before bucketing. That's correct but slow above ~50k rows. Both MySQL (`DATE_FORMAT(field, '%Y-%m-%dT00:00:00Z')`) and SQLite (`strftime('%Y-%m-%dT00:00:00Z', field)`) ship a native function that produces identical ISO-8601-UTC bucket labels to the Postgres `date_trunc(...)::text` path, so the runtime cost stays in the database engine where it belongs. The CI test container is MySQL, so this also unblocks a real native-path integration test for the time-bucket primitive on the CI database. +- **#1610 — Cache ad-hoc aggregations**. `AggregationCache` is already wired into the named-annotation path of `AggregationRunner::run()`, with a 60 s TTL, content-addressed RBAC-scoped key, and an `AggregationCacheInvalidationListener` that evicts on every `ObjectCreatedEvent`/`ObjectUpdatedEvent`/`ObjectDeletedEvent`/`ObjectTransitionedEvent`. The ad-hoc path of `runAdhoc()` currently emits `cached: false` on every call because the cache key requires a `name` slot and ad-hoc queries have no name. Filling the slot with `sha1(json_encode(AggregationQuery::toArray()))` reuses the existing cache + invalidation plumbing unchanged; only the key derivation differs. + +The three remaining issues (#1606 multi-field groupBy, #1607 cumulative/rolling windows, #1608 multi-metric) each require a value-object refactor on `AggregationQuery` plus knock-on translator changes (Solr / ES / Postgres / PHP fallback / GraphQL types). They're deferred to separate opsx cycles where the API decisions can be made deliberately rather than bundled into a "ready follow-ups" PR. Design notes are recorded in [`design.md`](./design.md) so the next pickup has the context to file the follow-up changes without redoing the discovery work. + +## What Changes + +### Shipped in this change + +- **`AggregationRunner::tryNativeAggregation()`** — extend the Postgres-specific branch to detect MySQL and SQLite platforms and emit the matching native bucketing expression for each. Bind parameters and result-row coercion stay identical so `coerceBucketKey()` keeps producing the same wire format on every backend. +- **`AggregationQuery::toArray()`** — new public method returning a stable associative array of the query's fields (metric + field + sorted filter + groupBy + dateBucket). Used as the input to the ad-hoc cache key hash. +- **`AggregationCache::getAdhoc()` / `setAdhoc()`** — thin wrappers around the existing `get()`/`set()`/`key()` that derive the `name` slot from `sha1(json_encode($query->toArray()))` prefixed with `adhoc:` so ad-hoc cache entries are visually distinct from named-aggregation entries in cache dumps. TTL stays at the existing 60 s class constant. +- **`AggregationRunner::runAdhoc()`** — read-through-cache: on entry compute the key from `($register->getSlug(), $schema->getSlug(), $query)`, return `{...cached, cached: true}` on hit, populate on miss with the same envelope (`{groups|value, backend, cached: false}` rewritten to `cached: true` only on the next request). +- **Invalidation** — no listener changes needed. The existing `AggregationCacheInvalidationListener` calls `AggregationCache::evictForSchema()` which executes `ICache::clear()` on `openregister_aggregations` — covering both named and ad-hoc entries because they share the underlying cache. +- **Backend reporting** — `tryNativeAggregation()` now reports `mysql` and `sqlite` alongside `postgres` and `php-fallback` in the response `backend` field so callers can observe which path served a request. +- **Tests** — unit coverage for: `AggregationQuery::toArray()` (key stability across filter ordering); `AggregationCache::getAdhoc()`/`setAdhoc()` round-trip; MySQL bucket SQL emission; SQLite bucket SQL emission; cache hit on second identical `runAdhoc()` call; cache miss on differing query. +- **Documentation** — `docs/technical/aggregation-api.md` updated: backend matrix gains MySQL + SQLite rows; cache section documents the ad-hoc TTL + invalidation surface; performance-note section softened (no longer "Postgres only" for native bucketing). + +### Deferred (file separate opsx cycles) + +- **#1606 Multi-field groupBy** — `AggregationQuery.groupBy` becomes `{fields: string[]}`; result shape grows `groups[i].keys: {a, b}`; Postgres `GROUP BY a, b`; Solr `pivot.facet`; ES nested `terms`. See `design.md` for the decision points. +- **#1607 Cumulative / rolling windows** — new `AggregationQuery.window: {type: 'cumulative'|'rolling', size?: int}`; Postgres window functions; PHP-fallback accumulation pass; ES `cumulative_sum` pipeline; Solr stats + post-processing. See `design.md`. +- **#1608 Multi-metric** — `AggregationQuery.metric` (scalar) becomes `AggregationQuery.metrics: AggregationMetricSpec[]`; result shape grows `groups[i].metrics: {count, sum, ...}`. Knock-on edits to every translator + the GraphQL `AggregationMetric` enum. See `design.md`. + +**Non-breaking.** The cache addition is purely additive: a cache miss falls through to the same code path the caller hits today; a cache hit returns the same envelope with `cached: true`. The MySQL/SQLite native paths produce the same wire format as the existing PHP fallback and Postgres native paths — same ISO-8601-UTC keys, same metric coercion. No spec deletes, no breaking API changes. + +## Capabilities + +### MODIFIED Capabilities + +- `aggregation-api` — extend the existing `aggregation-api` capability with: + - Backend matrix: MySQL + SQLite emit native time-bucket SQL; the response `backend` field reports the actual engine that served the query. + - Cache semantics for the ad-hoc path: read-through 60 s TTL keyed on `(register, schema, sha1(query.toArray()), filter, RBAC scope)`; evicted on object lifecycle events for the affected `(register, schema)` pair via the existing invalidation listener. + +## Risks + +- **MySQL `DATE_FORMAT` vs Postgres `date_trunc` semantics drift**. Both produce string output but the input acceptance set differs (MySQL is lenient about non-date strings — it returns `NULL`; Postgres errors). Mitigated by emitting `WHERE field >= ? AND field < ?` with the same bound conversion that already pre-filters non-date rows in the Postgres path; the bucketing only sees rows that already passed the bounds. +- **SQLite `strftime` only accepts `YYYY-MM-DD HH:MM:SS` or Julian-day input**. JSON-stored string dates in OR's magic tables match the ISO-8601 acceptance set already documented for the Postgres path, so the same field-allow-list rules in `TimeseriesRequestValidator` carry over unchanged. +- **Cache stampede on dashboards with many concurrent tiles**. Mitigated by the 60 s TTL ceiling on duplicate misses and by the read-through pattern (every hit short-circuits the SQL). Documented in the docs update as a known characteristic with the matching performance ceiling guidance from #1610. +- **Cache key explosion from variable filters**. The cache key includes `sha1(query.toArray())` so a caller that varies filter shape on every request also varies the cache key — those entries age out at 60 s. The existing PHP_FALLBACK_ROW_CAP already bounds per-request memory; the cache doesn't change that bound. + +## Migration / rollout + +- No data migration. No schema changes (`kind: code` per ADR-032). +- The MySQL/SQLite native paths are opportunistic: existing deployments still get the same response shape; performance on those databases improves silently. The PHP fallback remains in place as the safety net for engines that match neither. +- Cache is read-through; first deploy emits `cached: false` on every call until the cache warms (i.e. always emits `false` on the first call per cache key). No client-visible behaviour change beyond the `cached: true` flag flipping on subsequent identical requests. + +## Verification + +- **#1609** — switch dev container to MySQL (`docker-compose.yml` `db` service profile), re-run the time-bucket integration request (`curl ... /timeseries?field=_created&interval=DAY&from=...&to=...`), confirm response body `backend` is `mysql` (not `php-fallback`). Repeat on SQLite by switching the database URL. +- **#1610** — call the same endpoint twice with identical query params; first response is `{cached: false, backend: ..., groups: [...]}`, second response is `{cached: true, backend: ..., groups: [...]}` with the same `groups` array. Insert a row into the schema, call a third time, confirm `cached: false` again (eviction listener fired). + +## Out of scope + +- Issues #1606, #1607, #1608 — see `design.md` for the planned API surface and the decision points that justify deferring them. +- Solr/Elasticsearch translators for the new MySQL/SQLite paths — those backends have their own native time-bucket primitives (`facet.range`, `date_histogram`) that the existing `aggregations-backend-native` change covers; the MySQL/SQLite native path here is for the **direct-to-DB** Postgres-native sibling, not for the external search-backend path. +- Distributed-lock-based cache-stampede prevention — the 60 s TTL ceiling on duplicate misses is sufficient for the current scale; revisit if a dashboard widget surfaces stampede symptoms in production. diff --git a/openspec/changes/add-aggregation-enhancements/specs/aggregation-api/spec.md b/openspec/changes/add-aggregation-enhancements/specs/aggregation-api/spec.md new file mode 100644 index 0000000000..f7f57a55b5 --- /dev/null +++ b/openspec/changes/add-aggregation-enhancements/specs/aggregation-api/spec.md @@ -0,0 +1,88 @@ +## MODIFIED Requirements + +### Requirement: The system SHALL use a native time-bucket expression on every supported database and fall back to PHP only on unrecognised engines + +When the underlying database is PostgreSQL, the aggregator SHALL execute a single `SELECT date_trunc($gap, "$field")::text AS bucket, AS agg FROM
WHERE AND "$field" >= ? AND "$field" < ? GROUP BY bucket ORDER BY bucket` query and SHALL annotate the response with `backend: "postgres"`. + +When the underlying database is MySQL, the aggregator SHALL execute a single `SELECT DATE_FORMAT(\`$field\`, '') AS bucket, AS agg FROM
WHERE AND \`$field\` >= ? AND \`$field\` < ? GROUP BY bucket ORDER BY bucket` query, where `` is the MySQL `DATE_FORMAT` string corresponding to the `gap` vocabulary (`'%Y-%m-%dT%H:%i:00Z'` for minute, `'%Y-%m-%dT%H:00:00Z'` for hour, `'%Y-%m-%dT00:00:00Z'` for day, `'%Y-%m-01T00:00:00Z'` for month, `'%Y-01-01T00:00:00Z'` for year). For `week` the query SHALL emit `DATE_FORMAT(field - INTERVAL ((DAYOFWEEK(field) + 5) %% 7) DAY, '%Y-%m-%dT00:00:00Z')` to align with ISO-Monday week-start semantics. For `quarter` the query SHALL emit `CONCAT(YEAR(field), '-', LPAD(((QUARTER(field) - 1) * 3 + 1), 2, '0'), '-01T00:00:00Z')`. The response SHALL be annotated with `backend: "mysql"`. + +When the underlying database is SQLite, the aggregator SHALL execute the equivalent query using `strftime('', "$field")`, where `` is the SQLite strftime string for the `gap` (`'%Y-%m-%dT%H:%M:00Z'` for minute, `'%Y-%m-%dT%H:00:00Z'` for hour, `'%Y-%m-%dT00:00:00Z'` for day, `'%Y-%m-01T00:00:00Z'` for month, `'%Y-01-01T00:00:00Z'` for year). For `week` the query SHALL use the strftime `weekday 0` modifier to back-shift to the previous Monday. For `quarter` the query SHALL emit a `CASE WHEN strftime('%m', field) IN ('01','02','03') THEN ... END` expression. The response SHALL be annotated with `backend: "sqlite"`. + +When the database is none of the above (an engine OpenRegister does not natively target), the aggregator SHALL pull the RBAC-filtered row set, bucket in PHP using a `date_trunc` polyfill keyed on the `gap` vocabulary, and SHALL annotate the response with `backend: "php-fallback"`. The PHP path SHALL produce the same response shape and the same bucket-key format as the native paths. + +#### Scenario: Postgres path annotates `backend: "postgres"` +- **GIVEN** the database is PostgreSQL +- **WHEN** the client requests a DAY-bucketed series +- **THEN** the response SHALL include `backend: "postgres"` + +#### Scenario: MySQL path annotates `backend: "mysql"` and emits DATE_FORMAT +- **GIVEN** the database is MySQL +- **WHEN** the client requests a DAY-bucketed series +- **THEN** the response SHALL include `backend: "mysql"` +- **AND** the emitted SQL SHALL contain `DATE_FORMAT` against the bucketed field with the matching format string +- **AND** the bucket keys SHALL be ISO-8601-UTC strings matching what Postgres would have returned + +#### Scenario: SQLite path annotates `backend: "sqlite"` and emits strftime +- **GIVEN** the database is SQLite +- **WHEN** the client requests a DAY-bucketed series +- **THEN** the response SHALL include `backend: "sqlite"` +- **AND** the emitted SQL SHALL contain `strftime` against the bucketed field with the matching format string +- **AND** the bucket keys SHALL be ISO-8601-UTC strings matching what Postgres would have returned + +#### Scenario: Unrecognised engine falls back to PHP +- **GIVEN** the database is neither PostgreSQL nor MySQL nor SQLite (i.e. a hypothetical custom backend) +- **WHEN** the client requests a DAY-bucketed series +- **THEN** the response SHALL include `backend: "php-fallback"` +- **AND** the bucket keys SHALL be ISO-8601-UTC strings matching what the native paths would have returned + +## ADDED Requirements + +### Requirement: The system SHALL cache ad-hoc aggregation results for up to 60 seconds with read-through semantics + +`AggregationRunner::runAdhoc()` SHALL check `AggregationCache` for a cached result envelope before executing any native or PHP-fallback computation. On hit, the runner SHALL return the cached envelope with `cached: true` and SHALL NOT execute the underlying aggregation. On miss, the runner SHALL execute the aggregation, store the resulting envelope in the cache with `cached: false`, and return that envelope to the caller. + +The cache key SHALL be derived from `(registerSlug, schemaSlug, sha1(json_encode(AggregationQuery::toArray())), resolvedFilter, rbacScopeHash)` where: + +- `AggregationQuery::toArray()` returns a stable associative array of the query's `metric`, `field`, `filter`, `groupBy`, and `dateBucket` slots, with filter sub-arrays sorted by key so that two structurally-equivalent filters hash identically. +- `rbacScopeHash` is the SHA-1 of the active user's UID (or `'anonymous'` when no user is logged in), inherited from the existing named-aggregation cache key derivation. + +The cache entry SHALL share the storage backend with the named-aggregation cache (the `openregister_aggregations` distributed cache). The name slot SHALL be prefixed with the literal `adhoc:` so ad-hoc entries are visually distinct from named entries when the cache is dumped for debugging. + +The TTL SHALL be the existing 60-second class constant `AggregationCache::TTL`. The cache SHALL gracefully degrade to no-cache when the distributed cache backend is unavailable (cache writes silently no-op; cache reads return `null` for miss). + +#### Scenario: Cache miss populates, second call hits +- **GIVEN** the cache is empty for the (register, schema, query) triple +- **WHEN** the client issues an ad-hoc aggregation request +- **THEN** the response SHALL include `cached: false` and the underlying SQL SHALL be executed +- **AND** the result envelope SHALL be stored in the cache +- **WHEN** the client issues an identical request within 60 seconds +- **THEN** the response SHALL include `cached: true` +- **AND** the underlying SQL SHALL NOT be executed +- **AND** the `groups` (or `value`) field SHALL equal the first response's groups (or value) + +#### Scenario: Differing query is a different cache entry +- **GIVEN** the cache contains a result for `{field: 'created', interval: 'DAY', from: '2026-05-01', to: '2026-05-22'}` +- **WHEN** the client issues a different request `{field: 'created', interval: 'HOUR', from: '2026-05-21', to: '2026-05-22'}` +- **THEN** the response SHALL include `cached: false` and the underlying SQL SHALL be executed + +#### Scenario: Filter-key reordering hits the same cache entry +- **GIVEN** the cache contains a result for filter `{status: 'open', priority: 'high'}` +- **WHEN** the client issues an otherwise-identical request with filter `{priority: 'high', status: 'open'}` +- **THEN** the response SHALL include `cached: true` + +### Requirement: The ad-hoc aggregation cache SHALL be invalidated on every object lifecycle event for the affected schema + +The existing `AggregationCacheInvalidationListener` SHALL evict ad-hoc cache entries alongside named-aggregation entries on every `ObjectCreatedEvent`, `ObjectUpdatedEvent`, `ObjectDeletedEvent`, and `ObjectTransitionedEvent` for the affected `(register, schema)` pair. The listener SHALL achieve this by reusing the existing `AggregationCache::evictForSchema()` method, which evicts every entry in the `openregister_aggregations` distributed cache regardless of whether the entry was written by the named-aggregation path or the ad-hoc path. + +The eviction granularity SHALL match the existing coarse-but-bounded approach: a full cache flush on the `openregister_aggregations` namespace, with the 60-second TTL ceiling bounding staleness in the event of a missed evict. + +#### Scenario: Inserting a row evicts the ad-hoc cache for the schema +- **GIVEN** the cache contains an ad-hoc result for `(softwarecatalogus, applications, query=X)` +- **WHEN** a new `applications` row is inserted (firing `ObjectCreatedEvent`) +- **THEN** the next ad-hoc request for `(softwarecatalogus, applications, query=X)` SHALL include `cached: false` +- **AND** the underlying SQL SHALL be re-executed against the now-updated row set + +#### Scenario: Transitioning an object evicts the ad-hoc cache +- **GIVEN** the cache contains an ad-hoc result for `(softwarecatalogus, applications, query=Y)` +- **WHEN** an `applications` row transitions state (firing `ObjectTransitionedEvent`) +- **THEN** the next ad-hoc request for `(softwarecatalogus, applications, query=Y)` SHALL include `cached: false` diff --git a/openspec/changes/add-aggregation-enhancements/tasks.md b/openspec/changes/add-aggregation-enhancements/tasks.md new file mode 100644 index 0000000000..a15433f50b --- /dev/null +++ b/openspec/changes/add-aggregation-enhancements/tasks.md @@ -0,0 +1,39 @@ +# Tasks — Time-bucket aggregation enhancements + +## 1. Backend — MySQL / SQLite native bucketing (#1609) + +- [x] 1.1 Detect the active database platform inside `AggregationRunner::tryNativeAggregation()`. Existing code branches on `PostgreSQL`; extend the branch to also match `MySQL` and `SQLite` platform class names from `OCP\IDBConnection::getDatabasePlatform()`. Non-matching platforms still fall through to the PHP path. +- [x] 1.2 Extract the per-gap format-string lookup tables for MySQL (`DATE_FORMAT`) and SQLite (`strftime`) into private static helper methods (`mysqlBucketExpression()`, `sqliteBucketExpression()`). Each returns the full SQL expression `(, '') AS bucket` or the `CASE` form for `quarter` / week-Monday. +- [x] 1.3 Wire the helpers into the `if ($dateBucket !== null)` branch of `tryNativeAggregation()`. The `whereSql` / bindings construction stays unchanged; only the bucket expression and the identifier-quoting (backticks on MySQL vs double-quotes on Postgres/SQLite) differ. +- [x] 1.4 Surface the platform name in the response `backend` field. The dispatch site already wraps `tryNativeAggregation()` return with `'backend' => 'postgres'`; extend it to return `'mysql'` or `'sqlite'` when the matching platform served the query. Use a small `detectDatabasePlatform()` private helper that returns the lowercased platform short name. +- [x] 1.5 The `coerceBucketKey()` helper continues to normalize keys defensively. MySQL/SQLite emit the canonical `Y-m-d\TH:i:s\Z` directly, so `coerceBucketKey()` is a no-op on those keys — verified by passing the emitted format strings through `strtotime` round-trip in the unit test. + +## 2. Backend — Ad-hoc cache (#1610) + +- [x] 2.1 Add `AggregationQuery::toArray(): array` returning `['metric' => $metric, 'field' => $field, 'filter' => , 'groupBy' => $groupBy, 'dateBucket' => $dateBucket]`. Filter sub-arrays are ksort-sorted recursively so two structurally-equivalent filters hash to the same value. +- [x] 2.2 Add `AggregationCache::getAdhoc(string $registerSlug, string $schemaSlug, AggregationQuery $query): ?array` and the matching `setAdhoc(...)` writer. Both wrap the existing `get()`/`set()` with the name slot computed as `'adhoc:'.sha1(json_encode($query->toArray()))`. Filter content goes through the existing `filter` parameter for the inner key derivation. +- [x] 2.3 Wire `AggregationRunner::runAdhoc()` to call `cache->getAdhoc()` on entry: cache hit returns the stored envelope with `cached: true`. Cache miss falls through to the existing native-or-fallback dispatch, then writes the envelope back via `setAdhoc()` (with `cached: false`, which is rewritten to `cached: true` on the next hit). +- [x] 2.4 No changes to `AggregationCacheInvalidationListener` — the existing global `ICache::clear()` on object lifecycle events covers ad-hoc entries because they share the cache instance. Document this in the design doc and the cache class doc-block. + +## 3. Tests + +- [x] 3.1 `tests/Unit/Service/Aggregation/AggregationQueryTest.php` — add cases: `toArray()` round-trips through every field; `toArray()` is stable under filter-key reordering (`{a, b}` and `{b, a}` produce identical SHA-1 hashes); `toArray()` includes `dateBucket` when set; `toArray()` returns null for missing optional fields. +- [x] 3.2 `tests/Unit/Service/Aggregation/AggregationCacheTest.php` — add cases: `getAdhoc()` returns null on miss; `setAdhoc()` then `getAdhoc()` round-trips a result envelope; ad-hoc and named cache entries don't collide (a `set()` with `name='foo'` and a `setAdhoc()` with a query whose hash equals `'foo'` are independent because of the `'adhoc:'` prefix); `evictForSchema()` removes both kinds of entries (via the underlying `ICache::clear()`). +- [x] 3.3 `tests/Unit/Service/Aggregation/AggregationRunnerNativeBucketTest.php` — new file. Use `MySQLPlatform` / `SqlitePlatform` / `PostgreSQLPlatform` mocks for the `IDBConnection::getDatabasePlatform()` return value; assert the emitted SQL contains `DATE_FORMAT(\`\`, '%Y-%m-%dT00:00:00Z')` for MySQL `gap=day`; verify backtick identifier quoting; verify backend annotation flips per platform; pin the `%i` vs `%M` minute placeholder divergence between MySQL and SQLite. +- [x] 3.4 (merged into 3.3 — single file covers all three native platforms + unknown-platform fallthrough) +- [x] 3.5 `tests/Unit/Service/Aggregation/AggregationRunnerAdhocCacheTest.php` — new file. Test: cache hit flips `cached` flag and skips the underlying dispatch; cache miss populates the cache with the same envelope; the resolved query reaches both `getAdhoc()` and `setAdhoc()` callbacks unchanged. +- [x] 3.6 Run the full `Unit Tests` suite — stays green (113 aggregation tests pass). + +## 4. Quality gates + +- [x] 4.1 PHPCS strict — clean on all touched files (`./vendor/bin/phpcs --standard=phpcs.xml lib/Service/Aggregation/`). +- [x] 4.2 PHPMD strict — clean on all touched files (targeted `@SuppressWarnings` on `TooManyMethods` at class level, `ExcessiveMethodLength`/`StaticAccess` on `runAdhoc()`, `ElseExpression` on `tryNativeAggregation()` to cover platform branches). +- [x] 4.3 Psalm strict — `No errors found!` on touched files. +- [x] 4.4 PHPStan level 8 — clean on touched files. +- [x] 4.5 PHPUnit — full `Unit Tests` suite green inside the dev container. + +## 5. Documentation + +- [x] 5.1 `docs/technical/aggregation-api.md` — backend matrix rewritten as a per-engine table (`postgres` / `mysql` / `sqlite` / `php-fallback`). Added a "Cache" section documenting the 60 s TTL, the read-through pattern, the key-stability semantics, the RBAC scoping, the lifecycle-event invalidation, and the stampede-tolerance position. Performance-notes section updated to drop the "Postgres-only native" caveat. +- [x] 5.2 `openspec/platform-capabilities.md` — aggregations row updated: native fast path now covers Postgres/MySQL/SQLite; ad-hoc cache + invalidation listener documented. +- [x] 5.3 `design.md` cross-linked from the proposal so the next pickup of #1606/#1607/#1608 finds the deferred-design notes via the breadcrumb trail. diff --git a/openspec/platform-capabilities.md b/openspec/platform-capabilities.md index 3728223299..d786761468 100644 --- a/openspec/platform-capabilities.md +++ b/openspec/platform-capabilities.md @@ -36,7 +36,7 @@ Each annotation lives at the schema's top level. Validation runs at schema-save | `x-openregister-seeds` | implemented | (seed data spec) | Sample objects on install for demos / testing | | `x-openregister-archival` | implemented | `archival-destruction-workflow` | Retention classification + archival metadata (Archiefwet) | | `x-openregister-lifecycle` | implemented (pilot, decidesk Meeting) | `lifecycle-annotation` change | Declarative state machines: `field`, `initial`, `transitions{action: {from, to, requires?}}`. Pre-save validation rejects invalid transitions; sugar `POST /api/objects/{id}/transition` endpoint; `ObjectTransitionedEvent` joins the existing event family. Stored under `configuration['x-openregister-lifecycle']`. | -| `x-openregister-aggregations` | implemented + Postgres-native + 60s cache (pilot, decidesk ActionItem) | `aggregations-annotation`, `aggregations-backend-native` (in progress), `add-time-bucket-aggregation` | Declarative `count`/`sum`/`avg`/`min`/`max`/`count_distinct` queries with optional groupBy. `GET /api/objects/aggregations/{register}/{schema}/{name}`. Postgres-native fast path covers equality + operator filters (`in`/`gt`/`gte`/`lt`/`lte`/`ne`); falls back to PHP runner over MagicMapper for unsupported shapes. Results 60s-cached per (register, schema, name, filter, RBAC scope) and evicted on object-write events; response carries `backend: "postgres" \| "php-fallback"` for attribution. Solr facets / ES aggs paths are deferred. Stored under `configuration['x-openregister-aggregations']`. **Companion ad-hoc primitive** (added by `add-time-bucket-aggregation`): `GET /api/objects/aggregations/{register}/{schema}/timeseries?field&interval&from&to` + GraphQL `groupBy: GroupByInput` arg on every auto-generated list query. Client-controlled bucketing via Postgres `date_trunc()`; same row-level RBAC + multi-tenant predicate as the named path. See `docs/technical/aggregation-api.md`. | +| `x-openregister-aggregations` | implemented + Postgres/MySQL/SQLite native + 60s cache (pilot, decidesk ActionItem) | `aggregations-annotation`, `aggregations-backend-native` (in progress), `add-time-bucket-aggregation`, `add-aggregation-enhancements` | Declarative `count`/`sum`/`avg`/`min`/`max`/`count_distinct` queries with optional groupBy. `GET /api/objects/aggregations/{register}/{schema}/{name}`. Native fast path covers equality + operator filters (`in`/`gt`/`gte`/`lt`/`lte`/`ne`) on Postgres (full vocab), MySQL (time-bucket only via `DATE_FORMAT`), and SQLite (time-bucket only via `strftime`); falls back to PHP runner over MagicMapper for unsupported shapes. Results 60s-cached per (register, schema, name, filter, RBAC scope) and evicted on object-write events; response carries `backend: "postgres" \| "mysql" \| "sqlite" \| "php-fallback"` for attribution. Solr facets / ES aggs paths are deferred. Stored under `configuration['x-openregister-aggregations']`. **Companion ad-hoc primitive** (added by `add-time-bucket-aggregation`, enhanced by `add-aggregation-enhancements`): `GET /api/objects/aggregations/{register}/{schema}/timeseries?field&interval&from&to` + GraphQL `groupBy: GroupByInput` arg on every auto-generated list query. Client-controlled bucketing on every supported engine; same row-level RBAC + multi-tenant predicate as the named path. Ad-hoc results are cached for up to 60 s with read-through semantics; cache is invalidated on every object lifecycle event on the affected `(register, schema)` pair via the shared `AggregationCacheInvalidationListener`. See `docs/technical/aggregation-api.md`. | | `x-openregister-calculations` | implemented (pilot, decidesk ActionItem) | `calculations-annotation` change | Computed fields. JSON-shaped expression AST (concat, if, arithmetic, comparison, date diff, placeholders, dotted prop paths incl. `@self.created` / `@self.updated`). `materialise: true` evaluates at save and persists. `materialise: false` evaluates at render when `_extend=calculations`. Both shapes available; `occ openregister:rematerialise-calculations [--dry-run]` reprocesses materialised calcs. | | `x-openregister-notifications` | implemented v2 (pilot, decidesk Meeting) | `notifications-annotation`, `notifications-v2` (in progress) | Declarative notifications via `INotificationManager`. v2 trigger types: `created`/`updated`/`transition` (with optional action filter), `scheduled` (`intervalSec >= 60`, fires through 60s `ScheduledNotificationJob`), `threshold` (referenced aggregation + `op` + `value`, fires once per below→above transition via `AggregationThresholdListener`). Recipient kinds: `users`, `field`, `groups`, `relation`, `object-acl` (resolves ACL holders for `read`/`manage` permission), `expression` (DI-tagged `RecipientResolverInterface`). Channels: `nc-notification`, `email`, `activity`, `webhook` (with `webhook.persistent: true` auto-creating a `Webhook` entity via `NotificationsAnnotationInstaller` so delivery flows through the standard retry/HMAC/dead-letter pipeline), `talk` (chat post via Spreed REST). Subject template supports `{{prop}}` interpolation. Stored under `configuration['x-openregister-notifications']`. | diff --git a/tests/Unit/Service/Aggregation/AggregationCacheTest.php b/tests/Unit/Service/Aggregation/AggregationCacheTest.php index 8e5108687f..9af2944689 100644 --- a/tests/Unit/Service/Aggregation/AggregationCacheTest.php +++ b/tests/Unit/Service/Aggregation/AggregationCacheTest.php @@ -5,6 +5,7 @@ namespace Unit\Service\Aggregation; use OCA\OpenRegister\Service\Aggregation\AggregationCache; +use OCA\OpenRegister\Service\Aggregation\AggregationQuery; use OCP\ICache; use OCP\ICacheFactory; use OCP\IUser; @@ -215,6 +216,133 @@ public function testCacheBackendUnavailableFailsClosed(): void $cache->evictForSchema('reg', 'sch'); } + public function testGetAdhocReturnsNullOnMiss(): void + { + $this->cache->method('get')->willReturn(null); + $cache = $this->makeCache(); + $query = AggregationQuery::create(metric: 'count'); + $this->assertNull($cache->getAdhoc('reg', 'sch', $query)); + } + + public function testSetAdhocRoundTripsThroughGetAdhoc(): void + { + $stored = null; + $key = null; + $this->cache->method('set')->willReturnCallback(function (string $k, string $v) use (&$stored, &$key) { + $key = $k; + $stored = $v; + return true; + }); + $this->cache->method('get')->willReturnCallback(function (string $k) use (&$stored, &$key) { + return $k === $key ? $stored : null; + }); + + $cache = $this->makeCache(); + $query = AggregationQuery::create( + metric: 'count', + dateBucket: [ + 'field' => 'created', + 'start' => '2026-05-01T00:00:00Z', + 'end' => '2026-05-22T00:00:00Z', + 'gap' => 'day', + ] + ); + + $envelope = ['groups' => [['key' => '2026-05-01T00:00:00Z', 'value' => 7]], 'backend' => 'postgres', 'cached' => false]; + $cache->setAdhoc('reg', 'sch', $query, $envelope); + $result = $cache->getAdhoc('reg', 'sch', $query); + $this->assertSame($envelope, $result); + } + + public function testAdhocAndNamedCacheKeysDoNotCollide(): void + { + $captured = []; + $this->cache->method('set')->willReturnCallback(function (string $key) use (&$captured) { + $captured[] = $key; + return true; + }); + + $cache = $this->makeCache(); + $query = AggregationQuery::create(metric: 'count', filter: ['status' => 'open']); + + $cache->set('reg', 'sch', 'totalOpen', [], ['value' => 1]); + $cache->setAdhoc('reg', 'sch', $query, ['value' => 1, 'backend' => 'postgres', 'cached' => false]); + + $this->assertCount(2, $captured); + $this->assertNotSame( + $captured[0], + $captured[1], + 'ad-hoc cache entries MUST NOT collide with named-aggregation entries even when slugs match' + ); + + // The ad-hoc key must contain the literal "adhoc:" marker so it's + // visually distinct in cache dumps. + $this->assertStringContainsString('adhoc:', $captured[1]); + $this->assertStringNotContainsString('adhoc:', $captured[0]); + } + + public function testAdhocKeyIsStableUnderQueryFilterReordering(): void + { + $captured = []; + $this->cache->method('set')->willReturnCallback(function (string $key) use (&$captured) { + $captured[] = $key; + return true; + }); + + $cache = $this->makeCache(); + $first = AggregationQuery::create( + metric: 'count', + filter: ['status' => 'open', 'priority' => 'high'] + ); + $second = AggregationQuery::create( + metric: 'count', + filter: ['priority' => 'high', 'status' => 'open'] + ); + + $cache->setAdhoc('reg', 'sch', $first, ['value' => 1, 'backend' => 'postgres', 'cached' => false]); + $cache->setAdhoc('reg', 'sch', $second, ['value' => 1, 'backend' => 'postgres', 'cached' => false]); + + $this->assertSame($captured[0], $captured[1], 'ad-hoc keys MUST be stable under filter key reordering'); + } + + public function testDifferentAdhocQueriesProduceDifferentKeys(): void + { + $captured = []; + $this->cache->method('set')->willReturnCallback(function (string $key) use (&$captured) { + $captured[] = $key; + return true; + }); + + $cache = $this->makeCache(); + $dayQ = AggregationQuery::create( + metric: 'count', + dateBucket: ['field' => 'created', 'start' => '2026-05-01T00:00:00Z', 'end' => '2026-05-22T00:00:00Z', 'gap' => 'day'] + ); + $hourQ = AggregationQuery::create( + metric: 'count', + dateBucket: ['field' => 'created', 'start' => '2026-05-01T00:00:00Z', 'end' => '2026-05-22T00:00:00Z', 'gap' => 'hour'] + ); + + $cache->setAdhoc('reg', 'sch', $dayQ, ['value' => 1, 'backend' => 'postgres', 'cached' => false]); + $cache->setAdhoc('reg', 'sch', $hourQ, ['value' => 1, 'backend' => 'postgres', 'cached' => false]); + + $this->assertNotSame( + $captured[0], + $captured[1], + 'different ad-hoc queries (gap=day vs gap=hour) MUST produce different cache keys' + ); + } + + public function testEvictForSchemaCoversAdhocEntries(): void + { + // evictForSchema delegates to ICache::clear() which wipes the + // entire `openregister_aggregations` namespace — both named and + // ad-hoc entries live there, so a single clear() covers both. + $this->cache->expects($this->once())->method('clear'); + $cache = $this->makeCache(); + $cache->evictForSchema('reg', 'sch'); + } + private function makeCache(): AggregationCache { return new AggregationCache($this->cacheFactory, $this->userSession, $this->logger); diff --git a/tests/Unit/Service/Aggregation/AggregationQueryTest.php b/tests/Unit/Service/Aggregation/AggregationQueryTest.php index 426681f739..3dcd67a134 100644 --- a/tests/Unit/Service/Aggregation/AggregationQueryTest.php +++ b/tests/Unit/Service/Aggregation/AggregationQueryTest.php @@ -158,4 +158,78 @@ public function testGroupByAndDateBucketAreMutuallyExclusive(): void }//end testGroupByAndDateBucketAreMutuallyExclusive() + public function testToArrayIncludesAllFields(): void + { + $q = AggregationQuery::create( + metric: 'sum', + field: 'amount', + filter: ['status' => 'open'], + dateBucket: [ + 'field' => 'created', + 'start' => '2026-01-01T00:00:00Z', + 'end' => '2026-02-01T00:00:00Z', + 'gap' => 'day', + ] + ); + $arr = $q->toArray(); + $this->assertSame('sum', $arr['metric']); + $this->assertSame('amount', $arr['field']); + $this->assertSame(['status' => 'open'], $arr['filter']); + $this->assertNull($arr['groupBy']); + $this->assertSame('created', $arr['dateBucket']['field']); + $this->assertSame('day', $arr['dateBucket']['gap']); + + }//end testToArrayIncludesAllFields() + + + public function testToArrayIsStableUnderFilterKeyReordering(): void + { + $first = AggregationQuery::create( + metric: 'count', + filter: ['status' => 'open', 'priority' => 'high'] + ); + $second = AggregationQuery::create( + metric: 'count', + filter: ['priority' => 'high', 'status' => 'open'] + ); + $this->assertSame( + sha1((string) json_encode($first->toArray())), + sha1((string) json_encode($second->toArray())), + 'toArray() output MUST be stable under filter-key reordering' + ); + + }//end testToArrayIsStableUnderFilterKeyReordering() + + + public function testToArrayCanonicalisesOperatorSubArrays(): void + { + $first = AggregationQuery::create( + metric: 'count', + filter: ['amount' => ['gt' => 0, 'lte' => 100]] + ); + $second = AggregationQuery::create( + metric: 'count', + filter: ['amount' => ['lte' => 100, 'gt' => 0]] + ); + $this->assertSame( + sha1((string) json_encode($first->toArray())), + sha1((string) json_encode($second->toArray())), + 'toArray() output MUST be stable under operator-key reordering inside filter sub-arrays' + ); + + }//end testToArrayCanonicalisesOperatorSubArrays() + + + public function testToArrayReturnsNullForMissingOptionalFields(): void + { + $q = AggregationQuery::create(metric: 'count'); + $arr = $q->toArray(); + $this->assertNull($arr['field']); + $this->assertNull($arr['groupBy']); + $this->assertNull($arr['dateBucket']); + $this->assertSame([], $arr['filter']); + + }//end testToArrayReturnsNullForMissingOptionalFields() + + }//end class diff --git a/tests/Unit/Service/Aggregation/AggregationRunnerAdhocCacheTest.php b/tests/Unit/Service/Aggregation/AggregationRunnerAdhocCacheTest.php new file mode 100644 index 0000000000..30417029fd --- /dev/null +++ b/tests/Unit/Service/Aggregation/AggregationRunnerAdhocCacheTest.php @@ -0,0 +1,228 @@ + + * @copyright 2026 Conduction B.V. + * @license EUPL-1.2 https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 + * + * @link https://www.OpenRegister.app + */ + +declare(strict_types=1); + +namespace Unit\Service\Aggregation; + +use OCA\OpenRegister\Db\MagicMapper; +use OCA\OpenRegister\Db\Register; +use OCA\OpenRegister\Db\RegisterMapper; +use OCA\OpenRegister\Db\Schema; +use OCA\OpenRegister\Db\SchemaMapper; +use OCA\OpenRegister\Service\Aggregation\AggregationCache; +use OCA\OpenRegister\Service\Aggregation\AggregationQuery; +use OCA\OpenRegister\Service\Aggregation\AggregationRunner; +use OCA\OpenRegister\Service\Object\PermissionHandler; +use OCA\OpenRegister\Service\OrganisationService; +use OCA\OpenRegister\Service\Search\PlaceholderResolver; +use Doctrine\DBAL\Platforms\PostgreSQLPlatform; +use OCP\IDBConnection; +use OCP\IUserSession; +use PHPUnit\Framework\MockObject\MockObject; +use PHPUnit\Framework\TestCase; + +/** + * @covers \OCA\OpenRegister\Service\Aggregation\AggregationRunner + */ +class AggregationRunnerAdhocCacheTest extends TestCase +{ + + private MagicMapper&MockObject $magicMapper; + private RegisterMapper&MockObject $registerMapper; + private SchemaMapper&MockObject $schemaMapper; + private PlaceholderResolver $placeholderResolver; + private IDBConnection&MockObject $db; + private AggregationCache&MockObject $cache; + private PermissionHandler&MockObject $permissionHandler; + private IUserSession&MockObject $userSession; + private OrganisationService&MockObject $organisationService; + private AggregationRunner $runner; + + + protected function setUp(): void + { + parent::setUp(); + + $this->magicMapper = $this->createMock(MagicMapper::class); + $this->registerMapper = $this->createMock(RegisterMapper::class); + $this->schemaMapper = $this->createMock(SchemaMapper::class); + $this->db = $this->createMock(IDBConnection::class); + $this->cache = $this->createMock(AggregationCache::class); + $this->permissionHandler = $this->createMock(PermissionHandler::class); + $this->userSession = $this->createMock(IUserSession::class); + $this->organisationService = $this->createMock(OrganisationService::class); + $this->placeholderResolver = new PlaceholderResolver($this->userSession); + + $this->userSession->method('getUser')->willReturn(null); + $this->organisationService->method('getActiveOrganisation')->willReturn(null); + $this->permissionHandler->method('hasPermission')->willReturn(true); + + // Wire a Postgres platform mock so detectDatabasePlatform() can + // resolve cleanly when the runner annotates the cache-write + // envelope's `backend` field. The PHP-fallback path is exercised + // because tryNativeAggregation() returns null on the empty table + // name (next setUp lines), but detectDatabasePlatform is still + // called outside that branch. + $this->db->method('getDatabasePlatform')->willReturn( + $this->createMock(PostgreSQLPlatform::class) + ); + + // Force the PHP-fallback path by returning an empty table name — + // tryNativeAggregation returns null on empty table name, then the + // runner calls bucketInPhp() which hydrates magicMapper's empty + // findAllInRegisterSchemaTable() result. + $this->magicMapper->method('getTableNameForRegisterSchema')->willReturn(''); + $this->magicMapper->method('findAllInRegisterSchemaTable')->willReturn([]); + + $this->runner = new AggregationRunner( + magicMapper: $this->magicMapper, + registerMapper: $this->registerMapper, + schemaMapper: $this->schemaMapper, + placeholders: $this->placeholderResolver, + db: $this->db, + cache: $this->cache, + permissionHandler: $this->permissionHandler, + userSession: $this->userSession, + organisationService: $this->organisationService, + searchBackend: null + ); + + }//end setUp() + + + public function testCacheHitFlipsCachedFlagAndSkipsDispatch(): void + { + $stored = ['groups' => [['key' => '2026-05-01T00:00:00Z', 'value' => 7]], 'backend' => 'postgres', 'cached' => false]; + + // Cache hit: getAdhoc returns the stored envelope; the runner MUST + // NOT execute the underlying dispatch (magicMapper::findAll would + // be called by bucketInPhp on a miss; assert it's not called). + $this->cache->method('getAdhoc')->willReturn($stored); + $this->magicMapper->expects($this->never())->method('findAllInRegisterSchemaTable'); + $this->cache->expects($this->never())->method('setAdhoc'); + + $query = AggregationQuery::create( + metric: 'count', + dateBucket: ['field' => 'created', 'start' => '2026-05-01T00:00:00Z', 'end' => '2026-05-22T00:00:00Z', 'gap' => 'day'] + ); + + $result = $this->runner->runAdhoc( + register: $this->makeRegister(), + schema: $this->makeSchema(), + query: $query + ); + + $this->assertSame([['key' => '2026-05-01T00:00:00Z', 'value' => 7]], $result['groups']); + $this->assertTrue($result['cached'], 'cache hit MUST flip cached flag to true'); + $this->assertSame('postgres', $result['backend']); + + }//end testCacheHitFlipsCachedFlagAndSkipsDispatch() + + + public function testCacheMissPopulatesCacheWithEnvelope(): void + { + // Cache miss: dispatch runs, envelope is written back via setAdhoc. + $this->cache->method('getAdhoc')->willReturn(null); + + $stored = null; + $this->cache->expects($this->once()) + ->method('setAdhoc') + ->willReturnCallback(function ($_reg, $_sch, $_query, array $result) use (&$stored) { + $stored = $result; + }); + + $query = AggregationQuery::create( + metric: 'count', + dateBucket: ['field' => 'created', 'start' => '2026-05-01T00:00:00Z', 'end' => '2026-05-22T00:00:00Z', 'gap' => 'day'] + ); + + $result = $this->runner->runAdhoc( + register: $this->makeRegister(), + schema: $this->makeSchema(), + query: $query + ); + + $this->assertFalse($result['cached'], 'cache miss MUST emit cached=false'); + $this->assertSame('php-fallback', $result['backend'], 'empty table forces PHP fallback'); + $this->assertIsArray($stored); + $this->assertSame($result, $stored, 'envelope written to cache MUST equal envelope returned to caller'); + + }//end testCacheMissPopulatesCacheWithEnvelope() + + + public function testCachePassesResolvedQueryToBothGetAndSet(): void + { + $this->cache->method('getAdhoc')->willReturn(null); + + $capturedGet = null; + $capturedSet = null; + $this->cache->method('getAdhoc')->willReturnCallback(function ($_reg, $_sch, AggregationQuery $q) use (&$capturedGet) { + $capturedGet = $q; + return null; + }); + $this->cache->method('setAdhoc')->willReturnCallback(function ($_reg, $_sch, AggregationQuery $q) use (&$capturedSet) { + $capturedSet = $q; + }); + + $query = AggregationQuery::create( + metric: 'count', + filter: ['status' => 'open'] + ); + + $this->runner->runAdhoc( + register: $this->makeRegister(), + schema: $this->makeSchema(), + query: $query + ); + + $this->assertNotNull($capturedGet); + $this->assertNotNull($capturedSet); + $this->assertSame( + $capturedGet->toArray(), + $capturedSet->toArray(), + 'cache read and cache write MUST use the same resolved query value' + ); + + }//end testCachePassesResolvedQueryToBothGetAndSet() + + + private function makeSchema(): Schema + { + $schema = new Schema(); + $schema->setSlug('calllogs'); + $schema->setId(1); + return $schema; + + }//end makeSchema() + + + private function makeRegister(): Register + { + $register = new Register(); + $register->setSlug('openconnector'); + $register->setSchemas([1]); + return $register; + + }//end makeRegister() + + +}//end class diff --git a/tests/Unit/Service/Aggregation/AggregationRunnerNativeBucketTest.php b/tests/Unit/Service/Aggregation/AggregationRunnerNativeBucketTest.php new file mode 100644 index 0000000000..768f642449 --- /dev/null +++ b/tests/Unit/Service/Aggregation/AggregationRunnerNativeBucketTest.php @@ -0,0 +1,376 @@ + + * @copyright 2026 Conduction B.V. + * @license EUPL-1.2 https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 + * + * @link https://www.OpenRegister.app + */ + +declare(strict_types=1); + +namespace Unit\Service\Aggregation; + +use Doctrine\DBAL\Platforms\AbstractPlatform; +use Doctrine\DBAL\Platforms\MySQLPlatform; +use Doctrine\DBAL\Platforms\PostgreSQLPlatform; +use Doctrine\DBAL\Platforms\SqlitePlatform; +use OCA\OpenRegister\Db\MagicMapper; +use OCA\OpenRegister\Db\Register; +use OCA\OpenRegister\Db\RegisterMapper; +use OCA\OpenRegister\Db\Schema; +use OCA\OpenRegister\Db\SchemaMapper; +use OCA\OpenRegister\Service\Aggregation\AggregationCache; +use OCA\OpenRegister\Service\Aggregation\AggregationQuery; +use OCA\OpenRegister\Service\Aggregation\AggregationRunner; +use OCA\OpenRegister\Service\Object\PermissionHandler; +use OCA\OpenRegister\Service\OrganisationService; +use OCA\OpenRegister\Service\Search\PlaceholderResolver; +use OCP\DB\IPreparedStatement; +use OCP\DB\IResult; +use OCP\IDBConnection; +use OCP\IUserSession; +use PHPUnit\Framework\MockObject\MockObject; +use PHPUnit\Framework\TestCase; + +/** + * @covers \OCA\OpenRegister\Service\Aggregation\AggregationRunner + */ +class AggregationRunnerNativeBucketTest extends TestCase +{ + + private MagicMapper&MockObject $magicMapper; + private RegisterMapper&MockObject $registerMapper; + private SchemaMapper&MockObject $schemaMapper; + private PlaceholderResolver $placeholderResolver; + private IDBConnection&MockObject $db; + private AggregationCache&MockObject $cache; + private PermissionHandler&MockObject $permissionHandler; + private IUserSession&MockObject $userSession; + private OrganisationService&MockObject $organisationService; + + + protected function setUp(): void + { + parent::setUp(); + + $this->magicMapper = $this->createMock(MagicMapper::class); + $this->registerMapper = $this->createMock(RegisterMapper::class); + $this->schemaMapper = $this->createMock(SchemaMapper::class); + $this->db = $this->createMock(IDBConnection::class); + $this->cache = $this->createMock(AggregationCache::class); + $this->permissionHandler = $this->createMock(PermissionHandler::class); + $this->userSession = $this->createMock(IUserSession::class); + $this->organisationService = $this->createMock(OrganisationService::class); + $this->placeholderResolver = new PlaceholderResolver($this->userSession); + + $this->userSession->method('getUser')->willReturn(null); + $this->organisationService->method('getActiveOrganisation')->willReturn(null); + $this->permissionHandler->method('hasPermission')->willReturn(true); + $this->cache->method('getAdhoc')->willReturn(null); + $this->cache->method('setAdhoc'); + $this->magicMapper->method('getTableNameForRegisterSchema')->willReturn('register_1_schema_calllogs'); + + }//end setUp() + + + public function testMysqlPlatformEmitsDateFormatWithBackticks(): void + { + $this->wirePlatform(platform: $this->createMock(MySQLPlatform::class)); + $captured = $this->captureSql(); + + $runner = $this->makeRunner(); + $result = $runner->runAdhoc( + register: $this->makeRegister(), + schema: $this->makeSchema(), + query: $this->dayBucketQuery() + ); + + $this->assertNotNull($captured['sql'], 'runAdhoc MUST prepare a SQL statement on MySQL'); + $this->assertStringContainsString( + "DATE_FORMAT(`created`, '%Y-%m-%dT00:00:00Z')", + $captured['sql'], + 'MySQL path MUST emit DATE_FORMAT with the day-gap format string' + ); + $this->assertStringContainsString('`oc_register_1_schema_calllogs`', $captured['sql'], 'MySQL path MUST use backticks for the table'); + $this->assertStringContainsString('`_organisation`', $captured['sql'], 'MySQL path MUST use backticks for the org column'); + $this->assertSame('mysql', $result['backend'], 'response backend MUST be "mysql"'); + + }//end testMysqlPlatformEmitsDateFormatWithBackticks() + + + public function testMysqlHourBucketEmitsHourFormat(): void + { + $this->wirePlatform(platform: $this->createMock(MySQLPlatform::class)); + $captured = $this->captureSql(); + + $runner = $this->makeRunner(); + $runner->runAdhoc( + register: $this->makeRegister(), + schema: $this->makeSchema(), + query: AggregationQuery::create( + metric: 'count', + dateBucket: ['field' => 'created', 'start' => '2026-05-21T00:00:00Z', 'end' => '2026-05-22T00:00:00Z', 'gap' => 'hour'] + ) + ); + + $this->assertStringContainsString( + "DATE_FORMAT(`created`, '%Y-%m-%dT%H:00:00Z')", + $captured['sql'], + 'MySQL HOUR gap MUST use %Y-%m-%dT%H:00:00Z format' + ); + + }//end testMysqlHourBucketEmitsHourFormat() + + + public function testMysqlMinuteBucketUsesPercentI(): void + { + // MySQL minute placeholder is %i, NOT %M (which is full month name). + // This is the most common transcription error porting strftime to + // DATE_FORMAT — pin it with a test. + $this->wirePlatform(platform: $this->createMock(MySQLPlatform::class)); + $captured = $this->captureSql(); + + $runner = $this->makeRunner(); + $runner->runAdhoc( + register: $this->makeRegister(), + schema: $this->makeSchema(), + query: AggregationQuery::create( + metric: 'count', + dateBucket: ['field' => 'created', 'start' => '2026-05-21T00:00:00Z', 'end' => '2026-05-21T01:00:00Z', 'gap' => 'minute'] + ) + ); + + $this->assertStringContainsString("'%Y-%m-%dT%H:%i:00Z'", $captured['sql']); + $this->assertStringNotContainsString("'%Y-%m-%dT%H:%M:00Z'", $captured['sql'], 'MySQL must NOT emit %M for minutes — that is full month name'); + + }//end testMysqlMinuteBucketUsesPercentI() + + + public function testSqlitePlatformEmitsStrftimeWithDoubleQuotes(): void + { + $this->wirePlatform(platform: $this->createMock(SqlitePlatform::class)); + $captured = $this->captureSql(); + + $runner = $this->makeRunner(); + $result = $runner->runAdhoc( + register: $this->makeRegister(), + schema: $this->makeSchema(), + query: $this->dayBucketQuery() + ); + + $this->assertStringContainsString( + "strftime('%Y-%m-%dT00:00:00Z', \"created\")", + $captured['sql'], + 'SQLite path MUST emit strftime with the day-gap format string' + ); + $this->assertStringContainsString('"oc_register_1_schema_calllogs"', $captured['sql'], 'SQLite path MUST use double-quotes for the table'); + $this->assertSame('sqlite', $result['backend'], 'response backend MUST be "sqlite"'); + + }//end testSqlitePlatformEmitsStrftimeWithDoubleQuotes() + + + public function testSqliteMinuteBucketUsesPercentM(): void + { + // Counter-test to testMysqlMinuteBucketUsesPercentI() — SQLite + // strftime minute IS %M (unlike MySQL DATE_FORMAT). + $this->wirePlatform(platform: $this->createMock(SqlitePlatform::class)); + $captured = $this->captureSql(); + + $runner = $this->makeRunner(); + $runner->runAdhoc( + register: $this->makeRegister(), + schema: $this->makeSchema(), + query: AggregationQuery::create( + metric: 'count', + dateBucket: ['field' => 'created', 'start' => '2026-05-21T00:00:00Z', 'end' => '2026-05-21T01:00:00Z', 'gap' => 'minute'] + ) + ); + + $this->assertStringContainsString("strftime('%Y-%m-%dT%H:%M:00Z'", $captured['sql'], 'SQLite MUST use %M for minutes'); + + }//end testSqliteMinuteBucketUsesPercentM() + + + public function testPostgresPlatformStillEmitsDateTrunc(): void + { + // Regression: the Postgres path emitted by #1611 stays unchanged. + $this->wirePlatform(platform: $this->createMock(PostgreSQLPlatform::class)); + $captured = $this->captureSql(); + + $runner = $this->makeRunner(); + $result = $runner->runAdhoc( + register: $this->makeRegister(), + schema: $this->makeSchema(), + query: $this->dayBucketQuery() + ); + + $this->assertStringContainsString('date_trunc(?, "created")::text', $captured['sql'], 'Postgres path stays on date_trunc'); + $this->assertSame('postgres', $result['backend']); + + }//end testPostgresPlatformStillEmitsDateTrunc() + + + public function testUnknownPlatformFallsBackToPhp(): void + { + $this->wirePlatform(platform: $this->createMock(AbstractPlatform::class)); + // Db prepare MUST NOT be called — the runner detects the unknown + // platform and skips straight to bucketInPhp(). + $this->db->expects($this->never())->method('prepare'); + + $this->magicMapper->method('findAllInRegisterSchemaTable')->willReturn([]); + + $runner = $this->makeRunner(); + $result = $runner->runAdhoc( + register: $this->makeRegister(), + schema: $this->makeSchema(), + query: $this->dayBucketQuery() + ); + + $this->assertSame('php-fallback', $result['backend'], 'unknown platform MUST fall through to PHP fallback'); + + }//end testUnknownPlatformFallsBackToPhp() + + + public function testMysqlBucketBindsStartAndEndBounds(): void + { + $this->wirePlatform(platform: $this->createMock(MySQLPlatform::class)); + $captured = $this->captureSql(); + + $runner = $this->makeRunner(); + $runner->runAdhoc( + register: $this->makeRegister(), + schema: $this->makeSchema(), + query: AggregationQuery::create( + metric: 'count', + dateBucket: ['field' => 'created', 'start' => '2026-05-01T00:00:00Z', 'end' => '2026-05-22T00:00:00Z', 'gap' => 'day'] + ) + ); + + // MySQL/SQLite do NOT prepend the gap to the binding list (the + // format string is literal SQL on those engines), so the bound + // values are just `__no_active_org__`, start, end. + $this->assertContains('2026-05-01T00:00:00Z', $captured['bindings'] ?? [], 'start bound MUST be bound'); + $this->assertContains('2026-05-22T00:00:00Z', $captured['bindings'] ?? [], 'end bound MUST be bound'); + $this->assertNotContains('day', $captured['bindings'] ?? [], 'MySQL gap MUST be literal SQL, not a bound parameter'); + + }//end testMysqlBucketBindsStartAndEndBounds() + + + // ----------------------------------------------------------------------- + // Helpers. + // ----------------------------------------------------------------------- + + + /** + * Capture the SQL passed to `db->prepare()` and the bindings passed + * to `stmt->execute()`. Statement returns no rows so the runner emits + * empty groups. + * + * Uses an ArrayObject so the closures' writes are visible to the + * calling test (a plain PHP array returned from this helper would be + * copied on assignment, hiding the closure mutations). + * + * @return \ArrayObject Mutable container with `sql` and `bindings` keys. + */ + private function captureSql(): \ArrayObject + { + $captured = new \ArrayObject(['sql' => null, 'bindings' => null]); + $stmt = $this->createMock(IPreparedStatement::class); + $result = $this->createMock(IResult::class); + $stmt->method('execute')->willReturnCallback(function (array $bindings=[]) use ($captured, $result) { + $captured['bindings'] = $bindings; + return $result; + }); + $stmt->method('fetch')->willReturn(false); + + $this->db->method('prepare')->willReturnCallback(function (string $sql) use ($stmt, $captured) { + $captured['sql'] = $sql; + return $stmt; + }); + + return $captured; + + }//end captureSql() + + + /** + * Wire the db connection mock to return the given platform instance. + * + * @param AbstractPlatform $platform Platform double. + * + * @return void + */ + private function wirePlatform(AbstractPlatform $platform): void + { + $this->db->method('getDatabasePlatform')->willReturn($platform); + + }//end wirePlatform() + + + private function dayBucketQuery(): AggregationQuery + { + return AggregationQuery::create( + metric: 'count', + dateBucket: [ + 'field' => 'created', + 'start' => '2026-05-01T00:00:00Z', + 'end' => '2026-05-22T00:00:00Z', + 'gap' => 'day', + ] + ); + + }//end dayBucketQuery() + + + private function makeSchema(): Schema + { + $schema = new Schema(); + $schema->setSlug('calllogs'); + $schema->setId(1); + return $schema; + + }//end makeSchema() + + + private function makeRegister(): Register + { + $register = new Register(); + $register->setSlug('openconnector'); + $register->setSchemas([1]); + return $register; + + }//end makeRegister() + + + private function makeRunner(): AggregationRunner + { + return new AggregationRunner( + magicMapper: $this->magicMapper, + registerMapper: $this->registerMapper, + schemaMapper: $this->schemaMapper, + placeholders: $this->placeholderResolver, + db: $this->db, + cache: $this->cache, + permissionHandler: $this->permissionHandler, + userSession: $this->userSession, + organisationService: $this->organisationService, + searchBackend: null + ); + + }//end makeRunner() + + +}//end class