Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/core/runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -368,10 +368,12 @@ uint8_t ray_obj_attrs(ray_t* v) {

int64_t ray_vec_get_i64(ray_t* vec, int64_t idx) {
if (!vec || idx < 0 || idx >= vec->len) return 0;
if (vec->type == RAY_I64 || vec->type == RAY_DATE || vec->type == RAY_TIME || vec->type == RAY_TIMESTAMP) {
if (vec->type == RAY_I64 || vec->type == RAY_TIMESTAMP) {
return ((const int64_t*)ray_data(vec))[idx];
}
if (vec->type == RAY_I32) return ((const int32_t*)ray_data(vec))[idx];
if (vec->type == RAY_I32 || vec->type == RAY_DATE || vec->type == RAY_TIME) {
return ((const int32_t*)ray_data(vec))[idx];
}
if (vec->type == RAY_I16) return ((const int16_t*)ray_data(vec))[idx];
if (vec->type == RAY_U8 || vec->type == RAY_BOOL) return ((const uint8_t*)ray_data(vec))[idx];
return 0;
Expand Down
103 changes: 103 additions & 0 deletions test/rfl/agg/rowform_maxmin.rfl
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
;; ════════════════════════════════════════════════════════════════════
;; ROWFORM per-group max(x) + min(y) (src/ops/group.c: exec_group_maxmin_rowform)
;;
;; Planner gate (src/ops/query.c:5985) routes
;; (select {a: (max x) b: (min y) by: <single key col> from: T})
;; to OP_GROUP_MAXMIN_ROWFORM when:
;; - exactly 2 aggs, the first OP_MAX and second OP_MIN
;; - 1 key, no where, no non-agg
;; - key, x, y all simple OP_SCAN
;; - key type in {I64,I32,I16,U8,BOOL,DATE,TIME,TIMESTAMP,SYM}
;; - x, y types in {I64,I32,I16,U8,BOOL} (integer only — F64 falls back)
;;
;; ROWFORM emits one row per group with columns [key, x, y] where x
;; holds per-group max and y per-group min. Group order is partition-
;; induced, so tests use sum / membership rather than positional checks.
;;
;; Parallel threshold: nrows >= 16384 (src/ops/group.c:10482).
;; ════════════════════════════════════════════════════════════════════

;; ─── basic shape: I64 key, I64 x, I64 y ─────────────────────────────
(set T (table [k x y] (list (as 'I64 [0 0 0 1 1 1]) (as 'I64 [3 1 5 2 7 4]) (as 'I64 [50 30 70 20 60 10]))))
;; g=0: x={3,1,5} max=5; y={50,30,70} min=30
;; g=1: x={2,7,4} max=7; y={20,60,10} min=10
(count (select {mx: (max x) mn: (min y) by: k from: T})) -- 2
(sum (at (select {mx: (max x) mn: (min y) by: k from: T}) 'mx)) -- 12
(sum (at (select {mx: (max x) mn: (min y) by: k from: T}) 'mn)) -- 40
;; Output column types match source: I64 throughout.
(type (at (select {mx: (max x) mn: (min y) by: k from: T}) 'mx)) -- 'I64
(type (at (select {mx: (max x) mn: (min y) by: k from: T}) 'mn)) -- 'I64
(type (at (select {mx: (max x) mn: (min y) by: k from: T}) 'k)) -- 'I64

;; ─── SYM key (gate allows it) ───────────────────────────────────────
(set Ts (table [k x y] (list [A A A B B] (as 'I64 [1 5 3 2 4]) (as 'I64 [9 2 8 7 6]))))
;; g=A: max x=5, min y=2; g=B: max x=4, min y=6
(count (select {mx: (max x) mn: (min y) by: k from: Ts})) -- 2
(sum (at (select {mx: (max x) mn: (min y) by: k from: Ts}) 'mx)) -- 9
(sum (at (select {mx: (max x) mn: (min y) by: k from: Ts}) 'mn)) -- 8
(type (at (select {mx: (max x) mn: (min y) by: k from: Ts}) 'k)) -- 'SYM

;; ─── narrow integer key + narrow integer values ─────────────────────
(set Ti32 (table [k x y] (list (as 'I32 [0 0 1 1]) (as 'I32 [5 7 11 13]) (as 'I32 [20 10 50 40]))))
;; g=0: max=7, min=10; g=1: max=13, min=40
(sum (at (select {mx: (max x) mn: (min y) by: k from: Ti32}) 'mx)) -- 20
(sum (at (select {mx: (max x) mn: (min y) by: k from: Ti32}) 'mn)) -- 50
(type (at (select {mx: (max x) mn: (min y) by: k from: Ti32}) 'mx)) -- 'I32
(type (at (select {mx: (max x) mn: (min y) by: k from: Ti32}) 'mn)) -- 'I32

(set Ti16 (table [k x y] (list (as 'I16 [0 0 1 1]) (as 'I16 [10 20 30 40]) (as 'I16 [-1 -2 3 -4]))))
;; g=0: max=20, min=-2; g=1: max=40, min=-4. Sums: 60, -6.
(sum (at (select {mx: (max x) mn: (min y) by: k from: Ti16}) 'mx)) -- 60
(sum (at (select {mx: (max x) mn: (min y) by: k from: Ti16}) 'mn)) -- -6

(set Tu8 (table [k x y] (list (as 'U8 [0 0 1 1]) (as 'U8 [10 30 5 7]) (as 'U8 [40 60 20 80]))))
;; g=0: max=30, min=40; g=1: max=7, min=20. Sums: 37, 60.
(sum (at (select {mx: (max x) mn: (min y) by: k from: Tu8}) 'mx)) -- 37
(sum (at (select {mx: (max x) mn: (min y) by: k from: Tu8}) 'mn)) -- 60

;; ─── BOOL x, BOOL y (degenerate but supported) ──────────────────────
(set Tb (table [k x y] (list [A A B B] [false true true false] [true false true true])))
;; g=A: max x = true; min y = false. g=B: max x = true; min y = true.
(count (select {mx: (max x) mn: (min y) by: k from: Tb})) -- 2
;; sum of BOOL coerces to I64: true=1, false=0. mx: 1+1=2, mn: 0+1=1.
(sum (as 'I64 (at (select {mx: (max x) mn: (min y) by: k from: Tb}) 'mx))) -- 2
(sum (as 'I64 (at (select {mx: (max x) mn: (min y) by: k from: Tb}) 'mn))) -- 1

;; ─── single group ───────────────────────────────────────────────────
(set T1 (table [k x y] (list (as 'I64 [0 0 0 0 0]) (as 'I64 [3 1 5 2 7]) (as 'I64 [50 30 70 20 60]))))
(count (select {mx: (max x) mn: (min y) by: k from: T1})) -- 1
;; max x = 7, min y = 20
(at (at (select {mx: (max x) mn: (min y) by: k from: T1}) 'mx) 0) -- 7
(at (at (select {mx: (max x) mn: (min y) by: k from: T1}) 'mn) 0) -- 20

;; ─── many small groups ──────────────────────────────────────────────
(set Tm (table [k x y] (list (as 'I64 [0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]) (as 'I64 [10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29]) (as 'I64 [50 51 52 53 54 55 56 57 58 59 40 41 42 43 44 45 46 47 48 49]))))
;; group j has x={j+10, j+20} -> max = j+20; y={j+50, j+40} -> min = j+40.
;; sum of maxes: (0+20)+(1+20)+...+(9+20) = 45+200 = 245.
;; sum of mins: (0+40)+(1+40)+...+(9+40) = 45+400 = 445.
(count (select {mx: (max x) mn: (min y) by: k from: Tm})) -- 10
(sum (at (select {mx: (max x) mn: (min y) by: k from: Tm}) 'mx)) -- 245
(sum (at (select {mx: (max x) mn: (min y) by: k from: Tm}) 'mn)) -- 445

;; ─── parallel path: nrows >= 16384 ──────────────────────────────────
;; 20000 rows. x = i, y = 2N-i. 10 groups (mod 10).
(set N 20000)
(set Tbig (table [k x y] (list (% (til N) 10) (til N) (- (* 2 N) (til N)))))
;; Group j: x = {j, j+10, ..., j+19990}; max x = j+19990.
;; y = {2N-j, 2N-j-10, ..., 2N-j-19990}; min y = 2N-j-19990 = 20010-j.
;; sum of max x: 10*19990 + 45 = 199945
;; sum of min y: 10*20010 - 45 = 200055
(count (select {mx: (max x) mn: (min y) by: k from: Tbig})) -- 10
(sum (at (select {mx: (max x) mn: (min y) by: k from: Tbig}) 'mx)) -- 199945
(sum (at (select {mx: (max x) mn: (min y) by: k from: Tbig}) 'mn)) -- 200055

;; ─── parallel with SYM key (high-cardinality H2O id3 shape) ─────────
(set Tsbig (table [k x y] (list (as 'SYMBOL (% (til N) 100)) (til N) (- (* 2 N) (til N)))))
;; 100 groups. Group of key idj contains rows i where i % 100 = j.
;; x values: {j, j+100, ..., j+19900}; max x = j+19900.
;; y = 2N - i where N=20000: y values: {40000-j, ..., 40000-j-19900}; min y = 20100-j.
;; sum of max x: 100*19900 + (0+..+99) = 1990000 + 4950 = 1994950.
;; sum of min y: 100*20100 - (0+..+99) = 2010000 - 4950 = 2005050.
(count (select {mx: (max x) mn: (min y) by: k from: Tsbig})) -- 100
(sum (at (select {mx: (max x) mn: (min y) by: k from: Tsbig}) 'mx)) -- 1994950
(sum (at (select {mx: (max x) mn: (min y) by: k from: Tsbig}) 'mn)) -- 2005050
132 changes: 132 additions & 0 deletions test/rfl/agg/rowform_sum_count.rfl
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
;; ════════════════════════════════════════════════════════════════════
;; ROWFORM multi-key per-group sum(v) + count(v)
;; (src/ops/group.c: exec_group_sum_count_rowform)
;;
;; Planner gate (src/ops/query.c:6082) routes
;; (select {tot: (sum v) cnt: (count v) by: [k1 k2 ... kN] from: T})
;; to OP_GROUP_SUM_COUNT_ROWFORM when:
;; - N keys with 3 <= N <= 8 (all simple OP_SCAN)
;; - exactly 2 aggs: (sum v) then (count v), same value column
;; - no where, no non-agg expressions
;; - all keys non-nullable, types in
;; {I64,I32,I16,U8,BOOL,DATE,TIME,TIMESTAMP,SYM}
;; - v non-nullable, type in {I64,I32,I16,U8,BOOL,F64}
;;
;; ROWFORM emits one row per distinct key tuple with columns
;; [k1..kN, sum, count]. Sum is always F64 (executor casts integer
;; v -> double); count is I64. Group order is partition-induced; tests
;; verify via aggregate sums / counts, not positional checks.
;;
;; Aliases avoid colliding with key names (the result schema is
;; [keys..., tot, cnt] and a name collision lets `at` pick the wrong
;; column). We use `tot` and `cnt` throughout.
;;
;; Parallel threshold: nrows >= 16384 (src/ops/group.c:11656).
;; Closes canonical H2O q10.
;; ════════════════════════════════════════════════════════════════════

;; ─── basic 3-key shape: I64 keys, I64 v ─────────────────────────────
(set T (table [k1 k2 k3 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I64 [10 20 30 40]))))
;; All 4 rows have unique (k1,k2,k3) tuples, so 4 groups each of size 1.
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: T})) -- 4
;; Sum across all groups = sum of v = 100; count total = 4.
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: T}) 'tot)) -- 100.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: T}) 'cnt)) -- 4
;; Sum column is F64 (executor always emits F64 sum); count is I64.
(type (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: T}) 'tot)) -- 'F64
(type (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: T}) 'cnt)) -- 'I64

;; ─── 3 keys with collapses: distinct group count < n_rows ───────────
(set Tg (table [k1 k2 k3 v] (list (as 'I64 [0 0 0 1 1]) (as 'I64 [0 0 1 0 0]) (as 'I64 [0 0 0 0 0]) (as 'I64 [10 20 30 40 50]))))
;; Distinct (k1,k2,k3): (0,0,0)->v={10,20}=30, (0,1,0)->v=30, (1,0,0)->v={40,50}=90.
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg})) -- 3
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'tot)) -- 150.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'cnt)) -- 5
;; Verify max per-group sum is 90 (from (1,0,0)) and min 30 (from one of the singletons).
(max (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'tot)) -- 90.0
(min (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'tot)) -- 30.0
(max (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'cnt)) -- 2
(min (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'cnt)) -- 1

;; ─── 4 keys ─────────────────────────────────────────────────────────
(set T4 (table [k1 k2 k3 k4 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [1 2 3 4]))))
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4] from: T4})) -- 4
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4] from: T4}) 'tot)) -- 10.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4] from: T4}) 'cnt)) -- 4

;; ─── 5 keys ─────────────────────────────────────────────────────────
(set T5 (table [k1 k2 k3 k4 k5 v] (list (as 'I64 [0 0 1 1 0]) (as 'I64 [0 1 0 1 0]) (as 'I64 [0 0 0 0 0]) (as 'I64 [0 0 0 0 0]) (as 'I64 [0 0 0 0 0]) (as 'I64 [10 20 30 40 100]))))
;; (0,0,0,0,0) seen twice (rows 0 and 4) -> sum 110, count 2. Three other singletons.
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5] from: T5})) -- 4
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5] from: T5}) 'tot)) -- 200.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5] from: T5}) 'cnt)) -- 5
(max (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5] from: T5}) 'cnt)) -- 2

;; ─── 6 keys ─────────────────────────────────────────────────────────
(set T6 (table [k1 k2 k3 k4 k5 k6 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [11 22 33 44]))))
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6] from: T6})) -- 4
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6] from: T6}) 'tot)) -- 110.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6] from: T6}) 'cnt)) -- 4

;; ─── 7 keys ─────────────────────────────────────────────────────────
(set T7 (table [k1 k2 k3 k4 k5 k6 k7 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [5 6 7 8]))))
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7] from: T7})) -- 4
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7] from: T7}) 'tot)) -- 26.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7] from: T7}) 'cnt)) -- 4

;; ─── 8 keys (gate upper bound) ──────────────────────────────────────
(set T8 (table [k1 k2 k3 k4 k5 k6 k7 k8 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [10 20 30 40]))))
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7 k8] from: T8})) -- 4
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7 k8] from: T8}) 'tot)) -- 100.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7 k8] from: T8}) 'cnt)) -- 4

;; ─── F64 v column ───────────────────────────────────────────────────
(set Tf (table [k1 k2 k3 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'F64 [1.5 2.5 3.5 4.5]))))
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tf})) -- 4
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tf}) 'tot)) -- 12.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tf}) 'cnt)) -- 4

;; ─── narrow integer v (I32 / I16 / U8) ──────────────────────────────
(set Ti32 (table [k1 k2 k3 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I32 [10 20 30 40]))))
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Ti32}) 'tot)) -- 100.0
(set Tu8 (table [k1 k2 k3 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'U8 [1 2 3 4]))))
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tu8}) 'tot)) -- 10.0

;; ─── SYM keys (canonical H2O q10 shape) ─────────────────────────────
(set Ts (table [k1 k2 k3 v] (list [A A B B] [X Y X Y] [P P P P] (as 'I64 [10 20 30 40]))))
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Ts})) -- 4
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Ts}) 'tot)) -- 100.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Ts}) 'cnt)) -- 4

;; SYM keys with collapses
(set Tsc (table [k1 k2 k3 v] (list [A A A B] [X X Y Y] [P P P P] (as 'I64 [10 20 30 40]))))
;; Distinct: (A,X,P) -> {10,20}=30, (A,Y,P) -> 30, (B,Y,P) -> 40.
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsc})) -- 3
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsc}) 'tot)) -- 100.0
(max (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsc}) 'cnt)) -- 2

;; ─── mixed key types: I64 + SYM + I32 ───────────────────────────────
(set Tmix (table [k1 k2 k3 v] (list (as 'I64 [0 0 1 1]) [A B A B] (as 'I32 [0 0 0 0]) (as 'I64 [10 20 30 40]))))
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tmix})) -- 4
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tmix}) 'tot)) -- 100.0

;; ─── parallel path: nrows >= 16384 ──────────────────────────────────
;; 20000 rows; 3 keys whose product cardinalities are 5 * 4 * 3 = 60 groups.
(set N 20000)
(set Tbig (table [k1 k2 k3 v] (list (% (til N) 5) (% (til N) 4) (% (til N) 3) (til N))))
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tbig})) -- 60
;; Total sum of v across all groups = N*(N-1)/2 = 199990000.
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tbig}) 'tot)) -- 199990000.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tbig}) 'cnt)) -- 20000

;; ─── parallel + SYM key ─────────────────────────────────────────────
(set Tsbig (table [k1 k2 k3 v] (list (as 'SYMBOL (% (til N) 5)) (as 'SYMBOL (% (til N) 4)) (% (til N) 3) (til N))))
(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsbig})) -- 60
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsbig}) 'tot)) -- 199990000.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsbig}) 'cnt)) -- 20000

;; ─── parallel + 6 keys + F64 v ──────────────────────────────────────
(set Tf6 (table [k1 k2 k3 k4 k5 k6 v] (list (% (til N) 5) (% (til N) 4) (% (til N) 3) (% (til N) 2) (% (til N) 2) (% (til N) 2) (as 'F64 (til N)))))
;; Group count depends on coprime products, but total sum/count are stable.
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6] from: Tf6}) 'tot)) -- 199990000.0
(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6] from: Tf6}) 'cnt)) -- 20000
Loading
Loading