diff --git a/docs/memory.svg b/docs/memory.svg index d3d08588..eaf70b36 100644 --- a/docs/memory.svg +++ b/docs/memory.svg @@ -104,7 +104,7 @@ - nullmap / slice_parent / ext_nullmap + nullmap / slice_parent / index / link 16 B diff --git a/docs/superpowers/plans/2026-05-04-universal-dag-vm.md b/docs/superpowers/plans/2026-05-04-universal-dag-vm.md index 774e8199..1439c2df 100644 --- a/docs/superpowers/plans/2026-05-04-universal-dag-vm.md +++ b/docs/superpowers/plans/2026-05-04-universal-dag-vm.md @@ -35,9 +35,9 @@ --- -## Phase 1 — Boundary materialisation (Layer B) +## Pass 1 — Boundary materialisation (Layer B) -These tasks make it safe for producers to return lazy. After Phase 1 the codebase still produces no lazy values, so behaviour is unchanged — but the safety net is in place. +These tasks make it safe for producers to return lazy. After Pass 1 the codebase still produces no lazy values, so behaviour is unchanged — but the safety net is in place. ### Task 1: `ray_lazy_materialize` runs `ray_optimize` @@ -180,11 +180,11 @@ computation." - [ ] **Step 2: If any gaps found, add materialise prelude per the same pattern as Tasks 2–3** - Otherwise, no commit — Phase 1 is complete. + Otherwise, no commit — Pass 1 is complete. --- -## Phase 2 — Flip producers to return lazy (Layer A, partial) +## Pass 2 — Flip producers to return lazy (Layer A, partial) Only the `AGG_VEC_VIA_DAG` macro flip in this phase. The single-op leaf cases in `ray_min_fn` / `ray_max_fn` (`agg.c:225, 254`) keep their `wrap+materialize` because they need `recast_i64_to_orig` post-processing that depends on a concrete result. That recast is a separate executor cleanup, deferred. @@ -255,7 +255,7 @@ agg.c that were dormant code until now." --- -## Phase 3 — Lift four ops into the DAG (Layer C) +## Pass 3 — Lift four ops into the DAG (Layer C) Each task is one op and is fully self-contained: opcode + builder + executor + dump entry + lazy-append type rule + `*_fn` refactor. Land in any order. @@ -488,7 +488,7 @@ Same shape. `OP_REVERSE = 107`. Refactors `ray_reverse_fn` (`collection.c:1710`) --- -## Phase 4 — Idiom rewrite pass (Layer D) +## Pass 4 — Idiom rewrite pass (Layer D) ### Task 10: Skeleton — `idiom.h` + `idiom.c` with empty table, wired into `ray_optimize` diff --git a/docs/superpowers/specs/2026-05-04-dag-idiom-rewrite-design.md b/docs/superpowers/specs/2026-05-04-dag-idiom-rewrite-design.md index 7f70936b..55961a31 100644 --- a/docs/superpowers/specs/2026-05-04-dag-idiom-rewrite-design.md +++ b/docs/superpowers/specs/2026-05-04-dag-idiom-rewrite-design.md @@ -45,7 +45,7 @@ showed: calls**. They work today only because no producer ever returns lazy. -So the original "Phase 1 = lift four ops; Phase 2 = idiom rewriter" +So the original "Pass 1 = lift four ops; Pass 2 = idiom rewriter" framing was incomplete. The honest framing is one principle with three mechanical consequences. This revision restructures around that. diff --git a/include/rayforce.h b/include/rayforce.h index d87bfdfd..63263331 100644 --- a/include/rayforce.h +++ b/include/rayforce.h @@ -113,22 +113,27 @@ typedef enum { typedef union ray_t { /* Allocated: object header */ struct { - /* Bytes 0-15: nullable bitmask / slice / ext nullmap / index */ + /* Bytes 0-15: slice / sym_dict / str_pool / index / link arm. + * Null state is sentinel-encoded in the payload (see + * src/vec/vec.c); this 16-byte slot carries no bitmap bits. + * The `nullmap` name is retained as the raw-byte view used by + * atoms (nullmap[0]&1), envs (builtin name @ nullmap[2..15]), + * tables / dicts / lists / str-pools (zero-init), and the col + * on-disk header. */ union { uint8_t nullmap[16]; - struct { union ray_t* slice_parent; int64_t slice_offset; }; - struct { union ray_t* ext_nullmap; union ray_t* sym_dict; }; - struct { union ray_t* str_ext_null; union ray_t* str_pool; }; + struct { union ray_t* slice_parent; int64_t slice_offset; }; + struct { uint8_t _aux_sym_lo[8]; union ray_t* sym_dict; }; + struct { uint8_t _aux_str_lo[8]; union ray_t* str_pool; }; /* RAY_ATTR_HAS_INDEX (vectors): ray_t* of type RAY_INDEX - * carrying both the accelerator payload and the saved nullmap - * bytes. _idx_pad is reserved (must be NULL). See ops/idxop.h. */ - struct { union ray_t* index; union ray_t* _idx_pad; }; - /* RAY_ATTR_HAS_LINK (vectors, RAY_I32/RAY_I64 only): bytes 8-15 - * hold an int64 sym ID naming the target table. link_lo[8] - * aliases bytes 0-7 (inline nullmap bits OR ext_nullmap pointer - * OR HAS_INDEX index pointer, depending on the other arm in use). - * See ops/linkop.h. */ - struct { uint8_t link_lo[8]; int64_t link_target; }; + * carrying the accelerator payload and the saved nullmap + * bytes. _idx_pad is reserved (must be NULL). See + * ops/idxop.h. */ + struct { union ray_t* index; union ray_t* _idx_pad; }; + /* RAY_ATTR_HAS_LINK (vectors, RAY_I32/RAY_I64 only): bytes + * 8-15 hold an int64 sym ID naming the target table. + * link_lo[8] aliases bytes 0-7. See ops/linkop.h. */ + struct { uint8_t link_lo[8]; int64_t link_target; }; }; /* Bytes 16-31: metadata + value */ uint8_t mmod; /* 0=heap, 1=file-mmap */ @@ -310,48 +315,56 @@ ray_t* ray_typed_null(int8_t type); * directly (e.g. `x == NULL_I64`, `x != x` for NaN); there are no predicate * macros or aliases. Temporal types (DATE/TIME/TIMESTAMP) reuse NULL_I32 or * NULL_I64 based on their storage width. SYM null = sym ID 0; STR null = - * empty string (length 0); BOOL and U8 are non-nullable. - * - * Phase 1 added the constants and locked BOOL/U8 down as non-nullable. - * Phase 2 wired NULL_F64 into the CSV parser, ray_typed_null, and the - * I64→F64 UPDATE cast — null F64 slots now hold NaN alongside the - * nullmap bit. - * Phase 3a generalized this to integer / temporal types (I16, I32, I64, - * DATE, TIME, TIMESTAMP). Producer surface mirrors Phase 2 — CSV - * parser, ray_typed_null, cast_vec_copy_nulls, set_all_null, - * store_typed_elem (lang/internal.h), UPDATE atom broadcast (3 sites), - * UPDATE WHERE numeric-promo cast, group-by key scatter (serial + - * parallel + grpt TOP_N), pivot key scatter, linkop deref. The - * grouped-aggregation consumer (da_accum_row + scalar_accum_row) gained - * per-agg integer-null guards in the SUM/AVG/STDDEV/VAR/PROD/MIN/MAX/ - * FIRST/LAST arms — sentinel-compare (`v != precomputed_sentinel`) - * rather than nullmap consultation for cache-line efficiency; the - * tradeoff (a user-stored INT_MIN in a HAS_NULLS column is dropped) - * is bounded by dual encoding keeping the bitmap as source of truth. - * Phase 3b closed the documented finalization gaps in the - * scalar and direct-array (DA) grouped accumulators: per-(group, agg) - * non-null counts (`nn_count[gid * n_aggs + a]`) drive AVG / VAR / - * STDDEV divisors and gate MIN / MAX / PROD / FIRST / LAST result - * emission — all-null groups now produce a typed null (NULL_F64 / - * NULL_I64 plus the nullmap bit) instead of leaking the accumulator - * seed (DBL_MAX / -DBL_MAX / 0 / product identity). FIRST/LAST also - * gained "skip null rows" semantics: a null prefix no longer advances - * acc->first_row[gid]. The multi-key radix HT (accum_from_entry, - * ~line 2155) still inherits the pre-existing nullable-agg gap noted - * at the sparse-path fallback (~line 5728). - * Through Phase 7 (full cutover) the bitmap bit `nullmap[0] & 1` is - * kept in sync with the sentinel value for atoms ("dual encoding"), so - * legacy bitmap-aware readers and new sentinel-aware readers agree. - * After Phase 7 the bitmap arm is reclaimed for inline stats and the - * bit becomes a pure optimization hint. */ + * empty string (length 0); BOOL and U8 are non-nullable. */ #define NULL_I16 ((int16_t)INT16_MIN) #define NULL_I32 ((int32_t)INT32_MIN) #define NULL_I64 ((int64_t)INT64_MIN) +#define NULL_F32 ((float)__builtin_nanf("")) #define NULL_F64 (__builtin_nan("")) -/* Null bitmap check for atoms — bit 0 of nullmap[0] marks typed nulls. - * Also matches RAY_NULL_OBJ (the untyped null singleton). */ -#define RAY_ATOM_IS_NULL(x) (RAY_IS_NULL(x) || ((x)->type < 0 && ((x)->nullmap[0] & 1))) +/* Atom null check. RAY_NULL_OBJ is the untyped null singleton. + * Typed atoms with a defined NULL_* sentinel use payload-compare; + * types without a sentinel (BOOL/U8/F32) fall back to the + * nullmap[0]&1 bit written by ray_typed_null. */ +static inline bool ray_atom_is_null_fn(const union ray_t* x) { + if (RAY_IS_NULL(x)) return true; + if (x->type >= 0) return false; + switch (-x->type) { + case RAY_F64: return x->f64 != x->f64; + case RAY_F32: { + /* F32 atoms reuse the f64 union slot — see ray_f32 / atom.c. */ + float f = (float)x->f64; + return f != f; + } + case RAY_I64: + case RAY_TIMESTAMP: return x->i64 == NULL_I64; + case RAY_I32: + case RAY_DATE: + case RAY_TIME: return x->i32 == NULL_I32; + case RAY_I16: return x->i16 == NULL_I16; + case RAY_SYM: return x->i64 == 0; + case RAY_STR: + /* STR atom null = empty string. Atoms use SSO (slen + sdata) + * for len<=7 and a pool pointer (obj) for longer strings; the + * union overlap means a non-zero obj pointer has a low byte + * that ALSO reads as slen via the SSO arm. Only when slen==0 + * AND obj==NULL is the atom genuinely the empty string (see + * is_sso in src/vec/str.c). */ + return x->slen == 0 && x->obj == NULL; + case RAY_GUID: { + /* GUID null = 16 all-zero bytes in obj's U8 buffer. + * obj is always populated by ray_guid / ray_typed_null — + * a NULL obj indicates corruption; treat as null + * defensively. */ + if (!x->obj) return true; + const uint8_t* b = (const uint8_t*)((char*)x->obj + sizeof(union ray_t)); + for (int i = 0; i < 16; i++) if (b[i]) return false; + return true; + } + default: return (x->nullmap[0] & 1) != 0; + } +} +#define RAY_ATOM_IS_NULL(x) ray_atom_is_null_fn(x) /* ===== Vector API ===== */ diff --git a/src/core/morsel.c b/src/core/morsel.c index 3184cc3a..ccc5d9c6 100644 --- a/src/core/morsel.c +++ b/src/core/morsel.c @@ -68,37 +68,41 @@ bool ray_morsel_next(ray_morsel_t* m) { m->morsel_len = remaining < RAY_MORSEL_ELEMS ? remaining : RAY_MORSEL_ELEMS; m->morsel_ptr = (uint8_t*)ray_data(m->vec) + (size_t)m->offset * m->elem_size; - /* Null bitmap: only if HAS_NULLS. - * M5: null_bits points to the byte containing bit (m->offset). - * Callers must account for (m->offset % 8) bit offset within the - * first byte of null_bits when testing individual null bits. + /* Null bitmap: synthesized per-morsel from sentinel reads. + * null_bits points to a buffer offset (0,1,...) — caller indexes + * starting at bit (m->offset & 7) just like the previous + * source-bitmap layout did. We mirror the (m->offset / 8) byte + * offset by computing into &null_bits_buf[m->offset / 8]. * - * HAS_INDEX path: when an accelerator index is attached, the parent's - * 16-byte nullmap union holds the index pointer instead of bitmap data - * (or ext_nullmap pointer). The original bytes are preserved inside - * ix->saved_nullmap. Route through that snapshot here so null-aware - * loops still see the correct bits. */ + * Synthesizing on demand sidesteps the source bitmap entirely: + * sentinel-supporting types (F64 / F32 / integer & temporal / + * STR / GUID) have the source bitmap stripped, so reading it + * directly would give stale zeros. Cost is one O(morsel_len) + * sentinel scan per chunk; cheap given morsel_len <= 1024. */ m->null_bits = NULL; if (m->vec->attrs & RAY_ATTR_HAS_NULLS) { - if (m->vec->attrs & RAY_ATTR_HAS_INDEX) { - ray_index_t* ix = ray_index_payload(m->vec->index); - if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { - ray_t* ext; - memcpy(&ext, &ix->saved_nullmap[0], sizeof(ext)); - m->null_bits = (uint8_t*)ray_data(ext) + (m->offset / 8); - } else if (m->offset < 128) { - m->null_bits = ix->saved_nullmap + (m->offset / 8); + int64_t bit0 = m->offset & 7; + int64_t base_byte = m->offset / 8; + int64_t total_bits = bit0 + m->morsel_len; + int64_t nbytes = (total_bits + 7) / 8; + if ((size_t)nbytes > sizeof(m->null_bits_buf)) { + /* Defensive — RAY_MORSEL_ELEMS bounds morsel_len to 1024 + * (=128 bytes), well within the 128-byte buffer. Bail to + * a NULL null_bits if a future MORSEL grows beyond. */ + return true; + } + memset(m->null_bits_buf, 0, (size_t)nbytes); + for (int64_t k = 0; k < m->morsel_len; k++) { + if (ray_vec_is_null(m->vec, m->offset + k)) { + int64_t b = bit0 + k; + m->null_bits_buf[b >> 3] |= (uint8_t)(1u << (b & 7)); } - } else if (m->vec->attrs & RAY_ATTR_NULLMAP_EXT) { - /* External bitmap: point to correct byte offset */ - ray_t* ext = m->vec->ext_nullmap; - m->null_bits = (uint8_t*)ray_data(ext) + (m->offset / 8); - } else if (m->offset < 128) { - /* Inline bitmap is 16 bytes = 128 bits; vectors with HAS_NULLS - * and >128 elements must use external nullmap (RAY_ATTR_NULLMAP_EXT). - * Returns null_bits=NULL for offset>=128 when using inline bitmap. */ - m->null_bits = m->vec->nullmap + (m->offset / 8); } + /* Mimic the prior contract: pointer addresses the byte that + * holds bit (m->offset). Callers index into it starting at + * bit (m->offset & 7). */ + m->null_bits = m->null_bits_buf; + (void)base_byte; } return true; diff --git a/src/io/csv.c b/src/io/csv.c index 212b4fba..f8189ecb 100644 --- a/src/io/csv.c +++ b/src/io/csv.c @@ -725,8 +725,7 @@ static bool csv_intern_strings(csv_strref_t** str_refs, int n_cols, const csv_type_t* col_types, const int8_t* resolved_types, void** col_data, int64_t n_rows, - int64_t* col_max_ids, - uint8_t** col_nullmaps) { + int64_t* col_max_ids) { bool ok = true; /* CSV/TSV import policy for SYM columns: empty fields write the @@ -751,7 +750,6 @@ static bool csv_intern_strings(csv_strref_t** str_refs, int n_cols, csv_strref_t* refs = str_refs[c]; if (!refs) continue; uint32_t* ids = (uint32_t*)col_data[c]; - uint8_t* nm = col_nullmaps ? col_nullmaps[c] : NULL; int64_t max_id = empty_sym_id; /* Pre-grow: upper bound is n_rows unique strings */ @@ -760,14 +758,10 @@ static bool csv_intern_strings(csv_strref_t** str_refs, int n_cols, return false; /* OOM: cannot grow sym table */ for (int64_t r = 0; r < n_rows; r++) { - if (nm && (nm[r >> 3] & (1u << (r & 7)))) { + if (refs[r].ptr == NULL) { /* Empty/missing field → sym 0 (the canonical empty - * symbol). Clear the parse-time null bit so the - * post-pass attr-strip step doesn't leave HAS_NULLS - * set on a SYM column — SYM columns are no-null by - * design and ray_vec_set_null rejects them. */ + * symbol). SYM columns are no-null by design. */ ids[r] = (uint32_t)empty_sym_id; - nm[r >> 3] &= (uint8_t)~(1u << (r & 7)); continue; } int64_t id = ray_sym_intern_no_split_unlocked(refs[r].ptr, refs[r].len); @@ -803,12 +797,10 @@ static void csv_free_escaped_strrefs(csv_strref_t** str_refs, int n_cols, * that ray_str_vec_set would take for a freshly-owned vector. */ static bool csv_fill_str_cols(csv_strref_t** str_refs, int n_cols, const int8_t* resolved_types, - ray_t** col_vecs, int64_t n_rows, - uint8_t** col_nullmaps) { + ray_t** col_vecs, int64_t n_rows) { for (int c = 0; c < n_cols; c++) { if (resolved_types[c] != RAY_STR) continue; csv_strref_t* refs = str_refs[c]; - uint8_t* nm = col_nullmaps ? col_nullmaps[c] : NULL; ray_t* vec = col_vecs[c]; ray_str_t* dst = (ray_str_t*)ray_data(vec); @@ -817,7 +809,7 @@ static bool csv_fill_str_cols(csv_strref_t** str_refs, int n_cols, * wouldn't fit in the u32 offset field. */ uint64_t pool_bytes = 0; for (int64_t r = 0; r < n_rows; r++) { - if (nm && (nm[r >> 3] & (1u << (r & 7)))) continue; + if (refs[r].ptr == NULL) continue; uint32_t l = refs[r].len; if (l > RAY_STR_INLINE_MAX) pool_bytes += l; } @@ -836,7 +828,7 @@ static bool csv_fill_str_cols(csv_strref_t** str_refs, int n_cols, for (int64_t r = 0; r < n_rows; r++) { memset(&dst[r], 0, sizeof(ray_str_t)); - if (nm && (nm[r >> 3] & (1u << (r & 7)))) continue; + if (refs[r].ptr == NULL) continue; const char* p = refs[r].ptr; uint32_t l = refs[r].len; dst[r].len = l; @@ -871,7 +863,6 @@ typedef struct { ray_t** col_vecs; int64_t n_rows; int64_t* sym_max_ids; - uint8_t** col_nullmaps; bool fill_ok; bool intern_ok; } csv_finalize_ctx_t; @@ -882,11 +873,11 @@ static void csv_finalize_task(void* arg, uint32_t worker_id, csv_finalize_ctx_t* ctx = (csv_finalize_ctx_t*)arg; if (start == 0) { ctx->fill_ok = csv_fill_str_cols(ctx->str_refs, ctx->n_cols, - ctx->resolved_types, ctx->col_vecs, ctx->n_rows, ctx->col_nullmaps); + ctx->resolved_types, ctx->col_vecs, ctx->n_rows); } else { ctx->intern_ok = csv_intern_strings(ctx->str_refs, ctx->n_cols, ctx->parse_types, ctx->resolved_types, ctx->col_data, - ctx->n_rows, ctx->sym_max_ids, ctx->col_nullmaps); + ctx->n_rows, ctx->sym_max_ids); } } @@ -905,7 +896,6 @@ typedef struct { const int8_t* resolved_types; void** col_data; /* non-const: workers write parsed values into columns */ csv_strref_t** str_refs; /* [n_cols] — strref arrays for string columns, NULL for others */ - uint8_t** col_nullmaps; bool* worker_had_null; /* [n_workers * n_cols] */ } csv_par_ctx_t; @@ -929,9 +919,6 @@ static void csv_parse_fn(void* arg, uint32_t worker_id, switch (ctx->col_types[c]) { case CSV_TYPE_BOOL: ((uint8_t*)ctx->col_data[c])[row] = 0; break; case CSV_TYPE_U8: ((uint8_t*)ctx->col_data[c])[row] = 0; break; - /* Phase 3a dual encoding: integer/temporal nulls - * carry the width-correct INT_MIN sentinel in the - * payload alongside the nullmap bit. */ case CSV_TYPE_I16: ((int16_t*)ctx->col_data[c])[row] = NULL_I16; break; case CSV_TYPE_I32: ((int32_t*)ctx->col_data[c])[row] = NULL_I32; break; case CSV_TYPE_I64: ((int64_t*)ctx->col_data[c])[row] = NULL_I64; break; @@ -949,12 +936,9 @@ static void csv_parse_fn(void* arg, uint32_t worker_id, break; default: break; } - /* BOOL/U8 are non-nullable (Phase 1 lockdown). Empty - * cells store the default 0/false above and skip the - * nullmap mark. */ + /* BOOL/U8 are non-nullable; empty cells store 0/false. */ if (ctx->col_types[c] != CSV_TYPE_BOOL && ctx->col_types[c] != CSV_TYPE_U8) { - ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); my_had_null[c] = true; } } @@ -972,9 +956,8 @@ static void csv_parse_fn(void* arg, uint32_t worker_id, switch (ctx->col_types[c]) { case CSV_TYPE_BOOL: { - /* BOOL is non-nullable (Phase 1). fast_bool returns 0 - * for empty / unparseable input; we store it as-is and - * never mark a nullmap bit. */ + /* BOOL is non-nullable; fast_bool returns 0 for + * empty / unparseable input and we store it as-is. */ bool is_null; uint8_t v = fast_bool(fld, flen, &is_null); ((uint8_t*)ctx->col_data[c])[row] = v; @@ -983,18 +966,13 @@ static void csv_parse_fn(void* arg, uint32_t worker_id, case CSV_TYPE_I64: { bool is_null; int64_t v = fast_i64(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I64 whenever nullmap bit is set. */ ((int64_t*)ctx->col_data[c])[row] = is_null ? NULL_I64 : v; - if (is_null) { - ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - my_had_null[c] = true; - } + if (is_null) my_had_null[c] = true; break; } case CSV_TYPE_U8: { - /* U8 is non-nullable (Phase 1). fast_i64 returns 0 for - * empty / unparseable input; we store it as-is and - * never mark a nullmap bit. */ + /* U8 is non-nullable; fast_i64 returns 0 for + * empty / unparseable input and we store it as-is. */ bool is_null; int64_t v = fast_i64(fld, flen, &is_null); ((uint8_t*)ctx->col_data[c])[row] = (uint8_t)v; @@ -1003,67 +981,43 @@ static void csv_parse_fn(void* arg, uint32_t worker_id, case CSV_TYPE_I16: { bool is_null; int64_t v = fast_i64(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I16 whenever nullmap bit is set. */ ((int16_t*)ctx->col_data[c])[row] = is_null ? NULL_I16 : (int16_t)v; - if (is_null) { - ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - my_had_null[c] = true; - } + if (is_null) my_had_null[c] = true; break; } case CSV_TYPE_I32: { bool is_null; int64_t v = fast_i64(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I32 whenever nullmap bit is set. */ ((int32_t*)ctx->col_data[c])[row] = is_null ? NULL_I32 : (int32_t)v; - if (is_null) { - ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - my_had_null[c] = true; - } + if (is_null) my_had_null[c] = true; break; } case CSV_TYPE_F64: { bool is_null; double v = fast_f64(fld, flen, &is_null); - /* Phase 2 dual encoding: payload is NaN whenever nullmap bit is set. */ ((double*)ctx->col_data[c])[row] = is_null ? NULL_F64 : v; - if (is_null) { - ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - my_had_null[c] = true; - } + if (is_null) my_had_null[c] = true; break; } case CSV_TYPE_DATE: { bool is_null; int32_t v = fast_date(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I32 whenever nullmap bit is set. */ ((int32_t*)ctx->col_data[c])[row] = is_null ? NULL_I32 : v; - if (is_null) { - ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - my_had_null[c] = true; - } + if (is_null) my_had_null[c] = true; break; } case CSV_TYPE_TIME: { bool is_null; int32_t v = fast_time(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I32 whenever nullmap bit is set. */ ((int32_t*)ctx->col_data[c])[row] = is_null ? NULL_I32 : v; - if (is_null) { - ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - my_had_null[c] = true; - } + if (is_null) my_had_null[c] = true; break; } case CSV_TYPE_TIMESTAMP: { bool is_null; int64_t v = fast_timestamp(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I64 whenever nullmap bit is set. */ ((int64_t*)ctx->col_data[c])[row] = is_null ? NULL_I64 : v; - if (is_null) { - ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - my_had_null[c] = true; - } + if (is_null) my_had_null[c] = true; break; } case CSV_TYPE_GUID: { @@ -1072,7 +1026,6 @@ static void csv_parse_fn(void* arg, uint32_t worker_id, fast_guid(fld, flen, slot, &is_null); if (is_null) { memset(slot, 0, 16); - ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); my_had_null[c] = true; } break; @@ -1081,7 +1034,6 @@ static void csv_parse_fn(void* arg, uint32_t worker_id, if (flen == 0) { ctx->str_refs[c][row].ptr = NULL; ctx->str_refs[c][row].len = 0; - ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); my_had_null[c] = true; } else { /* fld may point into esc_buf (stack) or dyn_esc @@ -1119,7 +1071,7 @@ static void csv_parse_serial(const char* buf, size_t buf_size, const int8_t* resolved_types, void** col_data, csv_strref_t** str_refs, - uint8_t** col_nullmaps, bool* col_had_null) { + bool* col_had_null) { char esc_buf[8192]; const char* buf_end = buf + buf_size; @@ -1136,9 +1088,6 @@ static void csv_parse_serial(const char* buf, size_t buf_size, switch (col_types[c]) { case CSV_TYPE_BOOL: ((uint8_t*)col_data[c])[row] = 0; break; case CSV_TYPE_U8: ((uint8_t*)col_data[c])[row] = 0; break; - /* Phase 3a dual encoding: integer/temporal nulls - * carry the width-correct INT_MIN sentinel in the - * payload alongside the nullmap bit. */ case CSV_TYPE_I16: ((int16_t*)col_data[c])[row] = NULL_I16; break; case CSV_TYPE_I32: ((int32_t*)col_data[c])[row] = NULL_I32; break; case CSV_TYPE_I64: ((int64_t*)col_data[c])[row] = NULL_I64; break; @@ -1156,10 +1105,9 @@ static void csv_parse_serial(const char* buf, size_t buf_size, break; default: break; } - /* BOOL/U8 are non-nullable (Phase 1 lockdown). */ + /* BOOL/U8 are non-nullable; empty cells store 0/false. */ if (col_types[c] != CSV_TYPE_BOOL && col_types[c] != CSV_TYPE_U8) { - col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); col_had_null[c] = true; } } @@ -1177,7 +1125,7 @@ static void csv_parse_serial(const char* buf, size_t buf_size, switch (col_types[c]) { case CSV_TYPE_BOOL: { - /* BOOL is non-nullable (Phase 1 lockdown). */ + /* BOOL is non-nullable. */ bool is_null; uint8_t v = fast_bool(fld, flen, &is_null); ((uint8_t*)col_data[c])[row] = v; @@ -1186,16 +1134,12 @@ static void csv_parse_serial(const char* buf, size_t buf_size, case CSV_TYPE_I64: { bool is_null; int64_t v = fast_i64(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I64 whenever nullmap bit is set. */ ((int64_t*)col_data[c])[row] = is_null ? NULL_I64 : v; - if (is_null) { - col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - col_had_null[c] = true; - } + if (is_null) col_had_null[c] = true; break; } case CSV_TYPE_U8: { - /* U8 is non-nullable (Phase 1 lockdown). */ + /* U8 is non-nullable. */ bool is_null; int64_t v = fast_i64(fld, flen, &is_null); ((uint8_t*)col_data[c])[row] = (uint8_t)v; @@ -1204,67 +1148,43 @@ static void csv_parse_serial(const char* buf, size_t buf_size, case CSV_TYPE_I16: { bool is_null; int64_t v = fast_i64(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I16 whenever nullmap bit is set. */ ((int16_t*)col_data[c])[row] = is_null ? NULL_I16 : (int16_t)v; - if (is_null) { - col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - col_had_null[c] = true; - } + if (is_null) col_had_null[c] = true; break; } case CSV_TYPE_I32: { bool is_null; int64_t v = fast_i64(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I32 whenever nullmap bit is set. */ ((int32_t*)col_data[c])[row] = is_null ? NULL_I32 : (int32_t)v; - if (is_null) { - col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - col_had_null[c] = true; - } + if (is_null) col_had_null[c] = true; break; } case CSV_TYPE_F64: { bool is_null; double v = fast_f64(fld, flen, &is_null); - /* Phase 2 dual encoding: payload is NaN whenever nullmap bit is set. */ ((double*)col_data[c])[row] = is_null ? NULL_F64 : v; - if (is_null) { - col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - col_had_null[c] = true; - } + if (is_null) col_had_null[c] = true; break; } case CSV_TYPE_DATE: { bool is_null; int32_t v = fast_date(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I32 whenever nullmap bit is set. */ ((int32_t*)col_data[c])[row] = is_null ? NULL_I32 : v; - if (is_null) { - col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - col_had_null[c] = true; - } + if (is_null) col_had_null[c] = true; break; } case CSV_TYPE_TIME: { bool is_null; int32_t v = fast_time(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I32 whenever nullmap bit is set. */ ((int32_t*)col_data[c])[row] = is_null ? NULL_I32 : v; - if (is_null) { - col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - col_had_null[c] = true; - } + if (is_null) col_had_null[c] = true; break; } case CSV_TYPE_TIMESTAMP: { bool is_null; int64_t v = fast_timestamp(fld, flen, &is_null); - /* Phase 3a dual encoding: payload is NULL_I64 whenever nullmap bit is set. */ ((int64_t*)col_data[c])[row] = is_null ? NULL_I64 : v; - if (is_null) { - col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); - col_had_null[c] = true; - } + if (is_null) col_had_null[c] = true; break; } case CSV_TYPE_GUID: { @@ -1273,7 +1193,6 @@ static void csv_parse_serial(const char* buf, size_t buf_size, fast_guid(fld, flen, slot, &is_null); if (is_null) { memset(slot, 0, 16); - col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); col_had_null[c] = true; } break; @@ -1282,7 +1201,6 @@ static void csv_parse_serial(const char* buf, size_t buf_size, if (flen == 0) { str_refs[c][row].ptr = NULL; str_refs[c][row].len = 0; - col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); col_had_null[c] = true; } else { /* fld may point into esc_buf (stack) or dyn_esc @@ -1329,32 +1247,9 @@ static ray_t* csv_materialize_rows(const char* buf, size_t file_size, col_data[c] = ray_data(col_vecs[c]); } - uint8_t* col_nullmaps[CSV_MAX_COLS]; bool col_had_null[CSV_MAX_COLS]; if (ncols > 0) memset(col_had_null, 0, (size_t)ncols * sizeof(bool)); - for (int c = 0; c < ncols; c++) { - ray_t* vec = col_vecs[c]; - bool force_ext = (resolved_types[c] == RAY_STR); - if (n_rows <= 128 && !force_ext) { - vec->attrs |= RAY_ATTR_HAS_NULLS; - memset(vec->nullmap, 0, 16); - col_nullmaps[c] = vec->nullmap; - } else { - size_t bmp_bytes = ((size_t)n_rows + 7) / 8; - ray_t* ext = ray_vec_new(RAY_U8, (int64_t)bmp_bytes); - if (!ext || RAY_IS_ERR(ext)) { - for (int j = 0; j < ncols; j++) ray_release(col_vecs[j]); - return NULL; - } - ext->len = (int64_t)bmp_bytes; - memset(ray_data(ext), 0, bmp_bytes); - vec->ext_nullmap = ext; - vec->attrs |= RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT; - col_nullmaps[c] = (uint8_t*)ray_data(ext); - } - } - csv_type_t parse_types[CSV_MAX_COLS]; for (int c = 0; c < ncols; c++) { switch (resolved_types[c]) { @@ -1423,7 +1318,6 @@ static ray_t* csv_materialize_rows(const char* buf, size_t file_size, .resolved_types = resolved_types, .col_data = col_data, .str_refs = str_ref_bufs, - .col_nullmaps = col_nullmaps, .worker_had_null = worker_had_null_buf, }; @@ -1442,7 +1336,7 @@ static ray_t* csv_materialize_rows(const char* buf, size_t file_size, if (!use_parallel) { csv_parse_serial(buf, file_size, row_offsets, n_rows, ncols, delimiter, parse_types, resolved_types, col_data, - str_ref_bufs, col_nullmaps, col_had_null); + str_ref_bufs, col_had_null); } } @@ -1456,7 +1350,6 @@ static ray_t* csv_materialize_rows(const char* buf, size_t file_size, .col_vecs = col_vecs, .n_rows = n_rows, .sym_max_ids = sym_max_ids, - .col_nullmaps = col_nullmaps, .fill_ok = true, .intern_ok = true, }; @@ -1489,14 +1382,10 @@ static ray_t* csv_materialize_rows(const char* buf, size_t file_size, for (int c = 0; c < ncols; c++) { ray_t* vec = col_vecs[c]; - int strip = !col_had_null[c] || vec->type == RAY_SYM; - if (!strip) continue; - if (vec->attrs & RAY_ATTR_NULLMAP_EXT) { - ray_release(vec->ext_nullmap); - vec->ext_nullmap = NULL; - } - vec->attrs &= (uint8_t)~(RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT); - if (vec->type != RAY_STR) memset(vec->nullmap, 0, 16); + if (col_had_null[c] && vec->type != RAY_SYM) + vec->attrs |= RAY_ATTR_HAS_NULLS; + else + vec->attrs &= (uint8_t)~RAY_ATTR_HAS_NULLS; } for (int c = 0; c < ncols; c++) { @@ -1515,15 +1404,7 @@ static ray_t* csv_materialize_rows(const char* buf, size_t file_size, uint16_t* d = (uint16_t*)dst; for (int64_t r = 0; r < n_rows; r++) d[r] = (uint16_t)src[r]; } - if (col_vecs[c]->attrs & RAY_ATTR_HAS_NULLS) { - narrow->attrs |= (col_vecs[c]->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT)); - if (col_vecs[c]->attrs & RAY_ATTR_NULLMAP_EXT) { - narrow->ext_nullmap = col_vecs[c]->ext_nullmap; - ray_retain(narrow->ext_nullmap); - } else { - memcpy(narrow->nullmap, col_vecs[c]->nullmap, 16); - } - } + narrow->attrs |= (col_vecs[c]->attrs & RAY_ATTR_HAS_NULLS); ray_release(col_vecs[c]); col_vecs[c] = narrow; col_data[c] = dst; @@ -1726,35 +1607,9 @@ ray_t* ray_read_csv_named_opts(const char* path, char delimiter, bool header, col_data[c] = ray_data(col_vecs[c]); } - /* ---- 8b. Pre-allocate nullmaps for all columns ---- */ - uint8_t* col_nullmaps[CSV_MAX_COLS]; bool col_had_null[CSV_MAX_COLS]; if (ncols > 0) memset(col_had_null, 0, (size_t)ncols * sizeof(bool)); - for (int c = 0; c < ncols; c++) { - ray_t* vec = col_vecs[c]; - /* RAY_STR aliases bytes 8-15 of the header with str_pool — inline - * nullmap would corrupt the pool pointer, so force external. */ - bool force_ext = (resolved_types[c] == RAY_STR); - if (n_rows <= 128 && !force_ext) { - vec->attrs |= RAY_ATTR_HAS_NULLS; - memset(vec->nullmap, 0, 16); - col_nullmaps[c] = vec->nullmap; - } else { - size_t bmp_bytes = ((size_t)n_rows + 7) / 8; - ray_t* ext = ray_vec_new(RAY_U8, (int64_t)bmp_bytes); - if (!ext || RAY_IS_ERR(ext)) { - for (int j = 0; j <= c; j++) ray_release(col_vecs[j]); - goto fail_offsets; - } - ext->len = (int64_t)bmp_bytes; - memset(ray_data(ext), 0, bmp_bytes); - vec->ext_nullmap = ext; - vec->attrs |= RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT; - col_nullmaps[c] = (uint8_t*)ray_data(ext); - } - } - /* Build csv_type_t array for parse functions (maps td types → csv types) */ csv_type_t parse_types[CSV_MAX_COLS]; for (int c = 0; c < ncols; c++) { @@ -1826,7 +1681,6 @@ ray_t* ray_read_csv_named_opts(const char* path, char delimiter, bool header, .resolved_types = resolved_types, .col_data = col_data, .str_refs = str_ref_bufs, - .col_nullmaps = col_nullmaps, .worker_had_null = worker_had_null_buf, }; @@ -1846,7 +1700,7 @@ ray_t* ray_read_csv_named_opts(const char* path, char delimiter, bool header, if (!use_parallel) { csv_parse_serial(buf, file_size, row_offsets, n_rows, ncols, delimiter, parse_types, resolved_types, col_data, - str_ref_bufs, col_nullmaps, col_had_null); + str_ref_bufs, col_had_null); } } @@ -1865,7 +1719,6 @@ ray_t* ray_read_csv_named_opts(const char* path, char delimiter, bool header, .col_vecs = col_vecs, .n_rows = n_rows, .sym_max_ids = sym_max_ids, - .col_nullmaps = col_nullmaps, .fill_ok = true, .intern_ok = true, }; @@ -1897,28 +1750,17 @@ ray_t* ray_read_csv_named_opts(const char* path, char delimiter, bool header, csv_free_escaped_strrefs(str_ref_bufs, ncols, parse_types, n_rows, buf, file_size); for (int c = 0; c < ncols; c++) scratch_free(str_ref_hdrs[c]); - /* ---- 9c. Strip nullmaps from all-valid columns ---- + /* ---- 9c. Set HAS_NULLS for columns that saw a null ---- * - * A column qualifies as "no nulls" if either: - * - the parser never saw a null (col_had_null[c] == false), or - * - it's a SYM column. SYM is no-null by design — empty fields - * were already remapped to sym 0 in step 9b, and SYM columns - * never carry HAS_NULLS regardless of what the parse-time - * nullmap looked like. - * - * For non-SYM columns where col_had_null is true, the nullmap - * stays. */ + * Sentinels in the payload carry the null state; HAS_NULLS is the + * vec-level fast-path bit. SYM columns are no-null by design — + * empty fields were already remapped to sym 0 in step 9b. */ for (int c = 0; c < ncols; c++) { ray_t* vec = col_vecs[c]; - int strip = !col_had_null[c] || vec->type == RAY_SYM; - if (!strip) continue; - if (vec->attrs & RAY_ATTR_NULLMAP_EXT) { - ray_release(vec->ext_nullmap); - vec->ext_nullmap = NULL; - } - vec->attrs &= (uint8_t)~(RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT); - /* RAY_STR stores str_pool in bytes 8-15 of the header — don't wipe. */ - if (vec->type != RAY_STR) memset(vec->nullmap, 0, 16); + if (col_had_null[c] && vec->type != RAY_SYM) + vec->attrs |= RAY_ATTR_HAS_NULLS; + else + vec->attrs &= (uint8_t)~RAY_ATTR_HAS_NULLS; } /* ---- 10. Narrow sym columns to optimal width ---- */ @@ -1938,16 +1780,7 @@ ray_t* ray_read_csv_named_opts(const char* path, char delimiter, bool header, uint16_t* d = (uint16_t*)dst; for (int64_t r = 0; r < n_rows; r++) d[r] = (uint16_t)src[r]; } - /* Transfer nullmap to narrowed vector */ - if (col_vecs[c]->attrs & RAY_ATTR_HAS_NULLS) { - narrow->attrs |= (col_vecs[c]->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT)); - if (col_vecs[c]->attrs & RAY_ATTR_NULLMAP_EXT) { - narrow->ext_nullmap = col_vecs[c]->ext_nullmap; - ray_retain(narrow->ext_nullmap); - } else { - memcpy(narrow->nullmap, col_vecs[c]->nullmap, 16); - } - } + narrow->attrs |= (col_vecs[c]->attrs & RAY_ATTR_HAS_NULLS); ray_release(col_vecs[c]); col_vecs[c] = narrow; col_data[c] = dst; @@ -1990,16 +1823,12 @@ ray_t* ray_read_csv_opts(const char* path, char delimiter, bool header, typedef struct { FILE* fp; - FILE* null_fp; char path[1024]; char tmp_path[1024]; - char null_tmp_path[1024]; int8_t type; uint8_t attrs; int64_t rows; bool had_nulls; - uint8_t null_acc; - uint8_t null_bits; } csv_splayed_col_writer_t; static ray_err_t csv_splayed_writer_open(csv_splayed_col_writer_t* w, @@ -2023,8 +1852,6 @@ static ray_err_t csv_splayed_writer_open(csv_splayed_col_writer_t* w, if (n < 0 || (size_t)n >= sizeof(w->path)) return RAY_ERR_RANGE; n = snprintf(w->tmp_path, sizeof(w->tmp_path), "%s.tmp", w->path); if (n < 0 || (size_t)n >= sizeof(w->tmp_path)) return RAY_ERR_RANGE; - n = snprintf(w->null_tmp_path, sizeof(w->null_tmp_path), "%s.nulltmp", w->path); - if (n < 0 || (size_t)n >= sizeof(w->null_tmp_path)) return RAY_ERR_RANGE; w->fp = fopen(w->tmp_path, "wb+"); if (!w->fp) return RAY_ERR_IO; @@ -2033,53 +1860,6 @@ static ray_err_t csv_splayed_writer_open(csv_splayed_col_writer_t* w, return RAY_OK; } -static ray_err_t csv_splayed_writer_null_bit(csv_splayed_col_writer_t* w, - bool is_null) { - if (!w->null_fp) { - w->null_fp = fopen(w->null_tmp_path, "wb"); - if (!w->null_fp) return RAY_ERR_IO; - } - if (is_null) w->null_acc |= (uint8_t)(1u << w->null_bits); - w->null_bits++; - if (w->null_bits == 8) { - if (fwrite(&w->null_acc, 1, 1, w->null_fp) != 1) return RAY_ERR_IO; - w->null_acc = 0; - w->null_bits = 0; - } - return RAY_OK; -} - -static ray_err_t csv_splayed_writer_zero_nulls(csv_splayed_col_writer_t* w, - int64_t count) { - if (count <= 0) return RAY_OK; - if (!w->null_fp) { - w->null_fp = fopen(w->null_tmp_path, "wb"); - if (!w->null_fp) return RAY_ERR_IO; - } - - while (count > 0 && w->null_bits != 0) { - w->null_bits++; - if (w->null_bits == 8) { - if (fwrite(&w->null_acc, 1, 1, w->null_fp) != 1) return RAY_ERR_IO; - w->null_acc = 0; - w->null_bits = 0; - } - count--; - } - - uint8_t zeros[8192] = {0}; - int64_t bytes = count / 8; - while (bytes > 0) { - size_t chunk = (bytes > (int64_t)sizeof(zeros)) ? sizeof(zeros) : (size_t)bytes; - if (fwrite(zeros, 1, chunk, w->null_fp) != chunk) return RAY_ERR_IO; - bytes -= (int64_t)chunk; - } - - w->null_bits = (uint8_t)(count & 7); - w->null_acc = 0; - return RAY_OK; -} - static ray_err_t csv_splayed_writer_append(csv_splayed_col_writer_t* w, ray_t* col) { if (!w->fp || !col || RAY_IS_ERR(col)) return RAY_ERR_TYPE; @@ -2104,20 +1884,7 @@ static ray_err_t csv_splayed_writer_append(csv_splayed_col_writer_t* w, size_t bytes = (size_t)n * (size_t)esz; if (bytes && fwrite(ray_data(col), 1, bytes, w->fp) != bytes) return RAY_ERR_IO; - if (col->attrs & RAY_ATTR_HAS_NULLS) { - if (!w->had_nulls) { - ray_err_t err = csv_splayed_writer_zero_nulls(w, w->rows); - if (err != RAY_OK) return err; - } - w->had_nulls = true; - for (int64_t i = 0; i < n; i++) { - ray_err_t err = csv_splayed_writer_null_bit(w, ray_vec_is_null(col, i)); - if (err != RAY_OK) return err; - } - } else if (w->had_nulls) { - ray_err_t err = csv_splayed_writer_zero_nulls(w, n); - if (err != RAY_OK) return err; - } + if (col->attrs & RAY_ATTR_HAS_NULLS) w->had_nulls = true; } w->rows += n; return RAY_OK; @@ -2126,27 +1893,6 @@ static ray_err_t csv_splayed_writer_append(csv_splayed_col_writer_t* w, static ray_err_t csv_splayed_writer_close(csv_splayed_col_writer_t* w) { if (!w->fp) return RAY_OK; ray_err_t err = RAY_OK; - if (w->null_fp && w->null_bits) { - if (fwrite(&w->null_acc, 1, 1, w->null_fp) != 1) err = RAY_ERR_IO; - w->null_acc = 0; - w->null_bits = 0; - } - if (w->null_fp && fclose(w->null_fp) != 0 && err == RAY_OK) err = RAY_ERR_IO; - w->null_fp = NULL; - - if (err == RAY_OK && w->had_nulls) { - FILE* nf = fopen(w->null_tmp_path, "rb"); - if (!nf) err = RAY_ERR_IO; - else { - char buf[65536]; - size_t nr; - while ((nr = fread(buf, 1, sizeof(buf), nf)) > 0) { - if (fwrite(buf, 1, nr, w->fp) != nr) { err = RAY_ERR_IO; break; } - } - if (ferror(nf) && err == RAY_OK) err = RAY_ERR_IO; - fclose(nf); - } - } if (err == RAY_OK) { ray_t hdr = {0}; @@ -2154,8 +1900,7 @@ static ray_err_t csv_splayed_writer_close(csv_splayed_col_writer_t* w) { hdr.attrs = w->attrs; hdr.len = w->rows; hdr.rc = (w->type == RAY_SYM) ? ray_sym_count() : 0; - if (w->had_nulls) - hdr.attrs |= RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT; + if (w->had_nulls) hdr.attrs |= RAY_ATTR_HAS_NULLS; if (fseek(w->fp, 0, SEEK_SET) != 0 || fwrite(&hdr, 1, 32, w->fp) != 32) err = RAY_ERR_IO; @@ -2163,7 +1908,6 @@ static ray_err_t csv_splayed_writer_close(csv_splayed_col_writer_t* w) { if (fclose(w->fp) != 0 && err == RAY_OK) err = RAY_ERR_IO; w->fp = NULL; - remove(w->null_tmp_path); if (err == RAY_OK) err = ray_file_rename(w->tmp_path, w->path); if (err != RAY_OK) remove(w->tmp_path); return err; @@ -2171,11 +1915,8 @@ static ray_err_t csv_splayed_writer_close(csv_splayed_col_writer_t* w) { static void csv_splayed_writer_abort(csv_splayed_col_writer_t* w) { if (w->fp) fclose(w->fp); - if (w->null_fp) fclose(w->null_fp); w->fp = NULL; - w->null_fp = NULL; remove(w->tmp_path); - remove(w->null_tmp_path); } ray_err_t ray_csv_save_splayed_named_opts(const char* path, char delimiter, bool header, diff --git a/src/lang/eval.c b/src/lang/eval.c index a076d56f..2c6af584 100644 --- a/src/lang/eval.c +++ b/src/lang/eval.c @@ -1154,8 +1154,6 @@ ray_t* gather_by_idx(ray_t* vec, int64_t* idx, int64_t n) { const ray_t* dict_owner = (vec->attrs & RAY_ATTR_SLICE) ? vec->slice_parent : vec; if (dict_owner && !(dict_owner->attrs & RAY_ATTR_SLICE) && - (!(dict_owner->attrs & RAY_ATTR_HAS_NULLS) || - (dict_owner->attrs & RAY_ATTR_NULLMAP_EXT)) && dict_owner->sym_dict) { ray_retain(dict_owner->sym_dict); result->sym_dict = dict_owner->sym_dict; diff --git a/src/lang/internal.h b/src/lang/internal.h index 5fdcbf41..13fffe64 100644 --- a/src/lang/internal.h +++ b/src/lang/internal.h @@ -277,8 +277,7 @@ static inline int64_t elem_as_i64(ray_t* elem) { * Returns 0 on success, -1 if the element type doesn't match. */ static inline int store_typed_elem(ray_t* vec, int64_t i, ray_t* elem) { if (RAY_ATOM_IS_NULL(elem)) { - /* Phase 2/3a dual-encoding: payload must carry the width-correct - * sentinel alongside the nullmap bit. */ + /* Payload carries the width-correct sentinel. */ switch (vec->type) { case RAY_F64: ((double*)ray_data(vec))[i] = NULL_F64; break; diff --git a/src/lang/parse.c b/src/lang/parse.c index d95becb9..dae09d97 100644 --- a/src/lang/parse.c +++ b/src/lang/parse.c @@ -636,10 +636,10 @@ static ray_t* parse_vector(ray_parser_t *p) { for (int32_t i = 0; i < count; i++) { if (RAY_ATOM_IS_NULL(elems[i])) { ray_vec_set_null(vec, i, true); - /* Phase 2 dual-encoding: a non-F64 typed null (0Nl/0Ni/0Nh) - * carries i64 = 0, so the cast above wrote 0.0 to the slot. - * Overwrite with NULL_F64 so raw-payload consumers see NaN. - * Null F64 atoms already carry NULL_F64 from ray_typed_null. */ + /* A non-F64 typed null (0Nl/0Ni/0Nh) carries i64 = 0, so + * the cast above wrote 0.0 to the slot. Overwrite with + * NULL_F64 so raw-payload consumers see NaN. Null F64 + * atoms already carry NULL_F64 from ray_typed_null. */ d[i] = NULL_F64; } ray_release(elems[i]); diff --git a/src/mem/heap.c b/src/mem/heap.c index 8af6d506..4896788f 100644 --- a/src/mem/heap.c +++ b/src/mem/heap.c @@ -559,19 +559,15 @@ static void ray_release_owned_refs(ray_t* v) { } /* Vector with attached index: nullmap[0..7] holds an owning ref to - * the index ray_t. The index owns the displaced ext_nullmap/str_pool/ - * sym_dict, so we must NOT also try to release those off the parent — - * they aren't there anymore. Skip the NULLMAP_EXT and STR_pool branches. */ + * the index ray_t. The index owns the displaced str_pool / sym_dict, + * so we must NOT also try to release those off the parent — they + * aren't there anymore. Skip the STR_pool branch. */ if (v->attrs & RAY_ATTR_HAS_INDEX) { if (v->index && !RAY_IS_ERR(v->index)) ray_release(v->index); return; } - if ((v->attrs & RAY_ATTR_NULLMAP_EXT) && - v->ext_nullmap && !RAY_IS_ERR(v->ext_nullmap)) - ray_release(v->ext_nullmap); - if (v->type == RAY_STR && v->str_pool && !RAY_IS_ERR(v->str_pool)) ray_release(v->str_pool); @@ -677,10 +673,6 @@ bool ray_retain_owned_refs(ray_t* v) { return true; } - if ((v->attrs & RAY_ATTR_NULLMAP_EXT) && - v->ext_nullmap && !RAY_IS_ERR(v->ext_nullmap)) - ray_retain(v->ext_nullmap); - if (v->type == RAY_STR && v->str_pool && !RAY_IS_ERR(v->str_pool)) ray_retain(v->str_pool); @@ -779,11 +771,6 @@ static void ray_detach_owned_refs(ray_t* v) { return; } - if (v->attrs & RAY_ATTR_NULLMAP_EXT) { - v->ext_nullmap = NULL; - v->attrs &= (uint8_t)~RAY_ATTR_NULLMAP_EXT; - } - if (v->type == RAY_STR) { v->str_pool = NULL; } @@ -942,8 +929,6 @@ void ray_free(ray_t* v) { pool_len = (size_t)v->str_pool->len; data_size += 32 + pool_len; } - if (v->attrs & RAY_ATTR_NULLMAP_EXT) - data_size += ((size_t)v->len + 7) / 8; size_t mapped_size = (data_size + 4095) & ~(size_t)4095; ray_vm_unmap_file(v, mapped_size); } else { @@ -1289,18 +1274,18 @@ void ray_heap_gc(void) { bool safe = (atomic_load_explicit(&ray_parallel_flag, memory_order_relaxed) == 0); - /* Phase 1: Flush main heap's foreign blocks and slab caches. + /* Pass 1: Flush main heap's foreign blocks and slab caches. * When safe (workers idle), return foreign blocks to their owners * so worker pools become reusable. */ heap_flush_foreign(h, safe); heap_flush_slabs(h); if (safe) { - /* Phase 2: Return foreign blocks absorbed onto our freelists + /* Pass 2: Return foreign blocks absorbed onto our freelists * back to their owning worker heaps. */ heap_return_foreign_freelist(h); - /* Phase 3: Skip worker heaps — we cannot safely touch their + /* Pass 3: Skip worker heaps — we cannot safely touch their * foreign lists or slab caches because workers may still be * between pending-- and sem_wait, calling ray_free which * modifies wh->foreign and wh->slabs. Workers flush their @@ -1308,7 +1293,7 @@ void ray_heap_gc(void) { * TODO: full cross-heap reclamation requires a worker * quiescence barrier. */ - /* Phase 4: Reclaim OVERSIZED empty pools. + /* Pass 4: Reclaim OVERSIZED empty pools. * Standard pools (pool_order == RAY_HEAP_POOL_ORDER) are never * munmapped — physical pages released via madvise (phase 5) * re-fault cheaply on next query. @@ -1318,7 +1303,7 @@ void ray_heap_gc(void) { * Emptiness is computed by walking all heaps' freelists and slab * caches to sum free capacity within the pool. This avoids atomic * live_count operations on the alloc/free hot path. */ - /* Phase 4: Reclaim oversized empty pools. + /* Pass 4: Reclaim oversized empty pools. * * For each candidate pool (owned by heap gh), count free bytes from: * (a) gh's own freelist + slab cache — safe, only gh modifies these @@ -1434,8 +1419,8 @@ void ray_heap_gc(void) { } } - /* Phase 5: Release physical pages from free blocks in every - * idle heap. Phase 2 may have returned blocks to worker-owned + /* Pass 5: Release physical pages from free blocks in every + * idle heap. Pass 2 may have returned blocks to worker-owned * freelists; releasing only the caller heap leaves those worker * pages resident across large query repetitions. */ for (int hid = 0; hid < RAY_HEAP_REGISTRY_SIZE; hid++) { diff --git a/src/mem/heap.h b/src/mem/heap.h index 19c273ee..2f0017a5 100644 --- a/src/mem/heap.h +++ b/src/mem/heap.h @@ -56,19 +56,19 @@ * Bit 0x04 -RAY_I64 atoms: RAY_ATTR_HNSW (HNSW handle in .i64) * Bit 0x08 vectors: RAY_ATTR_HAS_INDEX (index ray_t* in nullmap[0..7]) * Bit 0x10 vectors: RAY_ATTR_SLICE - * Bit 0x20 vectors: RAY_ATTR_NULLMAP_EXT * Bit 0x20 -RAY_SYM: RAY_ATTR_NAME (variable reference) - * Bit 0x40 vectors: RAY_ATTR_HAS_NULLS + * Bit 0x40 vectors: RAY_ATTR_HAS_NULLS (sentinel-encoded; payload is truth) * Bit 0x80 all types: RAY_ATTR_ARENA (arena-allocated, no refcount) * * Overlapping bit values are safe because consumers always check the type tag * before interpreting attrs. + * + * Bit 0x20 on vectors is reserved for future use. */ #ifndef RAY_ATTR_SLICE #define RAY_ATTR_SLICE 0x10 #endif -#define RAY_ATTR_NULLMAP_EXT 0x20 #define RAY_ATTR_HAS_NULLS 0x40 #define RAY_ATTR_ARENA 0x80 @@ -93,32 +93,21 @@ * * Coexists with HAS_INDEX: bytes 0-7 carry the index pointer (or saved * nullmap), bytes 8-15 carry the link sym; both bits can be set on the - * same column. A linked vec with nulls is forced to RAY_ATTR_NULLMAP_EXT - * because the inline 128-bit bitmap would alias the link-target slot. + * same column. * * Same numeric value as RAY_ATTR_HNSW (HNSW handles are -RAY_I64 atoms, * the type tag disambiguates). */ #define RAY_ATTR_HAS_LINK 0x04 /* Vector carries an attached accelerator index in nullmap[0..7] (a ray_t* - * of type RAY_INDEX). The original 16-byte nullmap union content (inline - * bitmap, ext_nullmap, str_ext_null/str_pool, sym_dict) is preserved inside - * the index ray_t and restored on detach. - * - * Attribute-bit invariant when HAS_INDEX is set: - * - HAS_NULLS is *preserved* (not cleared). Many call sites use it as a - * cheap "do I need null-aware logic?" gate; clearing it would silently - * break correctness for nullable columns. The bit is authoritative. - * - NULLMAP_EXT is *cleared*. The parent's ext_nullmap field is now the - * index pointer, not a U8 bitmap vec; readers that gate on NULLMAP_EXT - * and dereference ext_nullmap directly would otherwise read garbage. - * The displaced ext-nullmap pointer (if any) lives in - * ix->saved_nullmap[0..7]; ix->saved_attrs records the original - * NULLMAP_EXT bit for restoration on detach. + * of type RAY_INDEX). The original 16-byte nullmap union content + * (slice_offset, str_pool, sym_dict, link_target) is preserved inside the + * index ray_t and restored on detach. * - * Direct nullmap-byte readers (morsel iteration, ray_vec_is_null) MUST - * check HAS_INDEX first and route through ix->saved_nullmap / saved_attrs. - * See src/ops/idxop.h. */ + * HAS_NULLS is preserved on the parent across attach/detach; many call + * sites use it as a cheap "do I need null-aware logic?" gate. Null state + * itself is sentinel-encoded in the payload (see src/vec/vec.c) so the + * index pointer overlay at bytes 0-7 does not affect ray_vec_is_null. */ #define RAY_ATTR_HAS_INDEX 0x08 /* ===== Internal Allocator Variants ===== */ diff --git a/src/ops/agg.c b/src/ops/agg.c index 4b747447..fee02d2e 100644 --- a/src/ops/agg.c +++ b/src/ops/agg.c @@ -481,9 +481,8 @@ static ray_t* vec_to_f64_scratch(ray_t* x, double** out_vals) { } ray_t* ray_med_fn(ray_t* x) { - /* Note: after Phase 1.5 the dispatcher always materialises non-LAZY_AWARE - * fn args, so x is already concrete here. The inline materialise guard - * that was here was unreachable and has been removed. */ + /* The dispatcher always materialises non-LAZY_AWARE fn args, so x + * is already concrete here. */ if (RAY_IS_ERR(x)) return x; /* Scalar: median of single value → f64 */ if (ray_is_atom(x)) { @@ -572,9 +571,8 @@ ray_t* ray_dev_fn(ray_t* x) { return var_stddev_core(x, 0, 1); } * sample=1 -> divide sum-of-squares by (n-1); sample=0 -> divide by n. * take_sqrt=1 -> stddev; take_sqrt=0 -> variance. */ static ray_t* var_stddev_core(ray_t* x, int sample, int take_sqrt) { - /* Note: after Phase 1.5 the dispatcher always materialises non-LAZY_AWARE - * fn args, so x is already concrete here. The inline materialise guard - * that was here was unreachable and has been removed. */ + /* The dispatcher always materialises non-LAZY_AWARE fn args, so x + * is already concrete here. */ if (RAY_IS_ERR(x)) return x; if (ray_is_atom(x)) { if (RAY_ATOM_IS_NULL(x)) return ray_typed_null(-RAY_F64); diff --git a/src/ops/builtins.c b/src/ops/builtins.c index 130000d9..9eff45d0 100644 --- a/src/ops/builtins.c +++ b/src/ops/builtins.c @@ -744,8 +744,14 @@ static int cast_match(const char* tname, size_t tlen, const char* target) { return 1; } -/* Helper: copy null bitmap from source vec/list to destination vec. */ +/* Helper: copy null state from source vec/list to destination vec. */ static ray_t* cast_vec_copy_nulls(ray_t* vec, ray_t* val) { + /* BOOL / U8 destinations are non-nullable — there is no slot for a + * null marker. Casting a nullable source to one of these types + * silently collapses the null to the type's zero value (already + * written by the cast loop). */ + if (vec->type == RAY_BOOL || vec->type == RAY_U8) return vec; + if (ray_is_vec(val)) { if (ray_vec_copy_nulls(vec, val) != RAY_OK) { ray_release(vec); return ray_error("oom", NULL); } @@ -754,37 +760,42 @@ static ray_t* cast_vec_copy_nulls(ray_t* vec, ray_t* val) { for (int64_t j = 0; j < vec->len; j++) if (le[j] && RAY_ATOM_IS_NULL(le[j])) ray_vec_set_null(vec, j, true); + /* ray_vec_set_null writes the sentinel into the payload, so the + * LIST branch needs no post-fill. */ + return vec; } - /* Phase 2/3a dual encoding: when the destination has nulls, fill each - * null payload slot with the correct-width sentinel so consumers that - * read the raw payload (without consulting the bitmap) honor the null - * contract. Narrowing casts (Hazard 3) require writing the dest-width + /* VEC source: ray_vec_copy_nulls bulk-copies the source's HAS_NULLS + * state into the destination, but never touches the payload — the + * cast loop already filled the dest payload with raw cast results. + * For each null source slot, overwrite the dest payload with the + * dest-width sentinel so consumers reading the raw payload honor the + * null contract. Narrowing casts require writing the dest-width * sentinel directly — propagating through the cast macro produces * (int16_t)NULL_I32 = 0 etc., which collides with a legitimate value. */ - if (vec->attrs & RAY_ATTR_HAS_NULLS) { + if (val->attrs & RAY_ATTR_HAS_NULLS) { switch (vec->type) { case RAY_F64: { double* d = (double*)ray_data(vec); for (int64_t j = 0; j < vec->len; j++) - if (ray_vec_is_null(vec, j)) d[j] = NULL_F64; + if (ray_vec_is_null(val, j)) d[j] = NULL_F64; break; } case RAY_I64: case RAY_TIMESTAMP: { int64_t* d = (int64_t*)ray_data(vec); for (int64_t j = 0; j < vec->len; j++) - if (ray_vec_is_null(vec, j)) d[j] = NULL_I64; + if (ray_vec_is_null(val, j)) d[j] = NULL_I64; break; } case RAY_I32: case RAY_DATE: case RAY_TIME: { int32_t* d = (int32_t*)ray_data(vec); for (int64_t j = 0; j < vec->len; j++) - if (ray_vec_is_null(vec, j)) d[j] = NULL_I32; + if (ray_vec_is_null(val, j)) d[j] = NULL_I32; break; } case RAY_I16: { int16_t* d = (int16_t*)ray_data(vec); for (int64_t j = 0; j < vec->len; j++) - if (ray_vec_is_null(vec, j)) d[j] = NULL_I16; + if (ray_vec_is_null(val, j)) d[j] = NULL_I16; break; } default: break; diff --git a/src/ops/collection.c b/src/ops/collection.c index 8696e6db..a473ce2e 100644 --- a/src/ops/collection.c +++ b/src/ops/collection.c @@ -718,8 +718,6 @@ static void propagate_sym_dict(ray_t* dst, const ray_t* src) { const ray_t* owner = (src->attrs & RAY_ATTR_SLICE) ? src->slice_parent : src; if (owner && !(owner->attrs & RAY_ATTR_SLICE) && - (!(owner->attrs & RAY_ATTR_HAS_NULLS) || - (owner->attrs & RAY_ATTR_NULLMAP_EXT)) && owner->sym_dict) { ray_retain(owner->sym_dict); dst->sym_dict = owner->sym_dict; diff --git a/src/ops/exec.c b/src/ops/exec.c index a3ec646d..e30ebf97 100644 --- a/src/ops/exec.c +++ b/src/ops/exec.c @@ -262,7 +262,7 @@ void gather_fn(void* raw, uint32_t wid, int64_t start, int64_t end) { #define PG_BSIZE (1 << PG_BSHIFT) /* 16384 */ #define PG_MIN (PG_BSIZE * 8) /* 131072 — below this, routing overhead > benefit */ -/* Phase 1+2 use dispatch_n with explicit task-to-range mapping so that +/* Pass 1+2 use dispatch_n with explicit task-to-range mapping so that * histogram and scatter have consistent per-task assignments regardless * of which worker picks up each task (work-stealing is non-deterministic). */ @@ -326,7 +326,7 @@ static void pg_route_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { } } -/* Phase 3: per-block gather — one task per source block */ +/* Pass 3: per-block gather — one task per source block */ typedef struct { const int32_t* rdest; const int32_t* rsrc; @@ -434,14 +434,14 @@ void partitioned_gather(ray_pool_t* pool, const int64_t* idx, int64_t n, return; } - /* Phase 1: parallel histogram (dispatch_n for deterministic task→range) */ + /* Pass 1: parallel histogram (dispatch_n for deterministic task→range) */ pg_hist_ctx_t hctx = { .idx = idx, .hist = hist, .n_parts = n_parts, .n = n, .n_tasks = nw, }; ray_pool_dispatch_n(pool, pg_hist_fn, &hctx, nw); - /* Phase 2: prefix sum → per-task scatter offsets + partition boundaries */ + /* Pass 2: prefix sum → per-task scatter offsets + partition boundaries */ int64_t running = 0; for (int64_t p = 0; p < n_parts; p++) { part_off[p] = running; @@ -452,7 +452,7 @@ void partitioned_gather(ray_pool_t* pool, const int64_t* idx, int64_t n, } part_off[n_parts] = running; - /* Phase 3: parallel route (same task→range mapping as histogram) */ + /* Pass 3: parallel route (same task→range mapping as histogram) */ pg_route_ctx_t rctx = { .idx = idx, .rdest = rdest, .rsrc = rsrc, .offsets = offsets, .n_parts = n_parts, @@ -460,7 +460,7 @@ void partitioned_gather(ray_pool_t* pool, const int64_t* idx, int64_t n, }; ray_pool_dispatch_n(pool, pg_route_fn, &rctx, nw); - /* Phase 4: parallel per-block gather */ + /* Pass 4: parallel per-block gather */ pg_block_ctx_t bctx = { .rdest = rdest, .rsrc = rsrc, .part_off = part_off, .srcs = srcs, .dsts = dsts, .esz = esz, .ncols = ncols, diff --git a/src/ops/expr.c b/src/ops/expr.c index bb913a64..30b65302 100644 --- a/src/ops/expr.c +++ b/src/ops/expr.c @@ -295,10 +295,10 @@ bool try_linear_sumavg_input_i64(ray_graph_t* g, ray_t* tbl, ray_op_t* input_op, for (uint8_t i = 0; i < lin.n_terms; i++) { ray_t* col = ray_table_get_col(tbl, lin.syms[i]); if (!col || !type_is_linear_i64_col(col->type)) return false; - /* Phase 3a: scalar_sum_linear_i64_fn reads slots raw via - * scalar_i64_at; any nullable term would poison the sum with - * NULL_I{16,32,64} sentinels. Refuse the fast plan and let - * the caller fall back to the generic masked path. */ + /* scalar_sum_linear_i64_fn reads slots raw via scalar_i64_at; + * any nullable term would poison the sum with NULL_I{16,32,64} + * sentinels. Refuse the fast plan and let the caller fall back + * to the generic masked path. */ if (col->attrs & RAY_ATTR_HAS_NULLS) return false; out_plan->term_ptrs[i] = ray_data(col); out_plan->term_types[i] = col->type; @@ -467,7 +467,7 @@ bool expr_compile(ray_graph_t* g, ray_t* tbl, ray_op_t* root, ray_expr_t* out) { if (!col) return false; if (col->type == RAY_MAPCOMMON) return false; if (col->type == RAY_STR) return false; /* RAY_STR needs string comparison path */ - if (col->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_SLICE)) return false; /* nullable cols need bitmap-aware path */ + if (col->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_SLICE)) return false; /* nullable cols need the null-aware path */ out->regs[r].kind = REG_SCAN; if (RAY_IS_PARTED(col->type)) { int8_t base = (int8_t)RAY_PARTED_BASETYPE(col->type); @@ -488,7 +488,7 @@ bool expr_compile(ray_graph_t* g, ray_t* tbl, ray_op_t* root, ray_expr_t* out) { } else if (node->opcode == OP_CONST) { ray_op_ext_t* ext = find_ext(g, node->id); if (!ext || !ext->literal) return false; - if (RAY_ATOM_IS_NULL(ext->literal)) return false; /* null constants need bitmap-aware path */ + if (RAY_ATOM_IS_NULL(ext->literal)) return false; /* null constants need the null-aware path */ double cf; int64_t ci; bool is_f64; if (!atom_to_numeric(ext->literal, &cf, &ci, &is_f64)) { /* Try resolving string constant to symbol intern ID — @@ -692,6 +692,8 @@ static void expr_exec_binary(uint8_t opcode, int8_t dt, void* dp, } break; case OP_MIN2: for (int64_t j = 0; j < n; j++) d[j] = a[j] < b[j] ? a[j] : b[j]; break; case OP_MAX2: for (int64_t j = 0; j < n; j++) d[j] = a[j] > b[j] ? a[j] : b[j]; break; + case OP_AND: for (int64_t j = 0; j < n; j++) d[j] = (a[j] && b[j]) ? 1 : 0; break; + case OP_OR: for (int64_t j = 0; j < n; j++) d[j] = (a[j] || b[j]) ? 1 : 0; break; default: break; } } else if (dt == RAY_I32 || dt == RAY_DATE || dt == RAY_TIME) { @@ -778,6 +780,10 @@ static void expr_exec_binary(uint8_t opcode, int8_t dt, void* dp, case OP_LE: for (int64_t j = 0; j < n; j++) d[j] = a[j]<=b[j]; break; case OP_GT: for (int64_t j = 0; j < n; j++) d[j] = a[j]>b[j]; break; case OP_GE: for (int64_t j = 0; j < n; j++) d[j] = a[j]>=b[j]; break; + /* BOOL cols are loaded as I64 abstract via expr_load_i64; + * AND/OR on such inputs lands here with dt=BOOL t1=t2=I64. */ + case OP_AND: for (int64_t j = 0; j < n; j++) d[j] = (a[j] && b[j]) ? 1 : 0; break; + case OP_OR: for (int64_t j = 0; j < n; j++) d[j] = (a[j] || b[j]) ? 1 : 0; break; default: break; } } else { /* both bool */ @@ -808,6 +814,8 @@ static void expr_exec_unary(uint8_t opcode, int8_t dt, void* dp, case OP_CEIL: for (int64_t j = 0; j < n; j++) d[j] = ceil(a[j]); break; case OP_FLOOR: for (int64_t j = 0; j < n; j++) d[j] = floor(a[j]); break; case OP_ROUND: for (int64_t j = 0; j < n; j++) d[j] = round(a[j]); break; + /* OP_CAST F64→F64: same-buffer-issue as I64→I64 (see below). */ + case OP_CAST: memcpy(d, a, (size_t)n * sizeof(double)); break; default: break; } } else { /* CAST i64→f64 */ @@ -822,8 +830,19 @@ static void expr_exec_unary(uint8_t opcode, int8_t dt, void* dp, /* Unsigned negation avoids UB on INT64_MIN */ case OP_NEG: for (int64_t j = 0; j < n; j++) d[j] = (int64_t)(-(uint64_t)a[j]); break; case OP_ABS: for (int64_t j = 0; j < n; j++) d[j] = a[j] < 0 ? (int64_t)(-(uint64_t)a[j]) : a[j]; break; + /* OP_CAST I64→I64 is logically a no-op, but src and dst are + * separate scratch buffers: the dst slot must still receive + * the data. SCAN U8/BOOL/I16/I32 columns get loaded into + * the I64 abstract via expr_load_i64; any subsequent + * `(as 'I64 col)` lands in this branch and would otherwise + * leave dst un-initialised. */ + case OP_CAST: memcpy(d, a, (size_t)n * sizeof(int64_t)); break; default: break; } + } else if (t1 == RAY_BOOL) { + /* CAST bool→i64 — BOOL scratch is 1 byte per elem (0/1). */ + const uint8_t* a = (const uint8_t*)ap; + for (int64_t j = 0; j < n; j++) d[j] = a[j]; } else { /* CAST f64→i64 — clamp to avoid out-of-range UB */ const double* a = (const double*)ap; for (int64_t j = 0; j < n; j++) @@ -941,11 +960,10 @@ static void expr_full_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t e /* Post-pass for the fused unary path: |INT64_MIN| and -INT64_MIN don't fit in * i64 (signed-overflow; k/q convention surfaces this as typed null). The * element-wise loop uses unsigned wrap, so any overflow position lands as - * INT64_MIN in data. Post Phase 3a-1, INT64_MIN IS the canonical NULL_I64 - * sentinel — the dual-encoding contract requires the payload to *remain* - * INT64_MIN while the null bit is set. So we only need to flip the bitmap - * bit; the payload is already correct. Caller must invoke single-threaded - * — after pool dispatch joins. */ + * INT64_MIN in data. Since INT64_MIN IS the canonical NULL_I64 sentinel, + * the payload is already correct — we just flip HAS_NULLS via + * ray_vec_set_null. Caller must invoke single-threaded (after pool + * dispatch joins). */ static void mark_i64_overflow_as_null(ray_t* result, int64_t off, int64_t len) { int64_t* d = (int64_t*)ray_data(result) + off; for (int64_t i = 0; i < len; i++) { @@ -1067,61 +1085,12 @@ ray_t* expr_eval_full(const ray_expr_t* expr, int64_t nrows) { * Null bitmap propagation for element-wise ops * ============================================================================ */ -/* Resolve the raw null bitmap pointer and bit offset for a vector. - * Returns NULL if the vector has no null bits, or if the inline nullmap - * cannot cover the requested range (prevents overread). */ -static const uint8_t* nullmap_bits(ray_t* v, int64_t* bit_offset, int64_t len) { - ray_t* target = v; - int64_t off = 0; - if (v->attrs & RAY_ATTR_SLICE) { - target = v->slice_parent; - off = v->slice_offset; - } - if (!(target->attrs & RAY_ATTR_HAS_NULLS)) return NULL; - int64_t resolved_off = 0, len_bits = 0; - const uint8_t* bits = ray_vec_nullmap_bytes(target, &resolved_off, &len_bits); - if (!bits) return NULL; - *bit_offset = off + resolved_off; - /* Caller assumes inline buffer means 128-bit coverage; reject ranges - * that would overrun it just like the original guard. */ - if (len_bits == 128 && off + len > 128) return NULL; - return bits; -} - -/* Writable null bitmap pointer for freshly allocated (non-slice) dst vector. - * Returns NULL if inline nullmap cannot cover dst->len (prevents overflow). */ -static uint8_t* nullmap_bits_mut(ray_t* dst) { - if (dst->attrs & RAY_ATTR_NULLMAP_EXT) - return (uint8_t*)ray_data(dst->ext_nullmap); - if (dst->type == RAY_STR) return NULL; - if (dst->len > 128) return NULL; /* inline can only cover 128 bits */ - return dst->nullmap; -} - -/* OR-merge null bitmap from src into dst. Fast byte-level path when possible, - * element-level fallback for misaligned slices or RAY_STR without ext nullmap. */ +/* Propagate nulls from src into dst element-wise. ray_vec_set_null + * writes the type-correct sentinel, and ray_vec_is_null reads it back — + * the per-element walk is required since there is no per-row bitmap to + * bulk-OR. */ static void propagate_nulls(ray_t* src, ray_t* dst, int64_t len) { - int64_t src_off = 0; - const uint8_t* sbits = nullmap_bits(src, &src_off, len); - if (!sbits) goto slow; /* no accessible bitmap — use element path */ - - /* Ensure dst has ext nullmap for large vectors */ - if (len > 128 && !(dst->attrs & RAY_ATTR_NULLMAP_EXT)) - ray_vec_set_null(dst, len - 1, false); /* force ext alloc */ - uint8_t* dbits = nullmap_bits_mut(dst); - if (!dbits) goto slow; /* ext alloc failed or RAY_STR */ - - /* Bulk OR — both bitmaps are byte-accessible and src is byte-aligned */ - if ((src_off % 8) == 0) { - int64_t byte_start = src_off / 8; - int64_t nbytes = (len + 7) / 8; - for (int64_t b = 0; b < nbytes; b++) - dbits[b] |= sbits[byte_start + b]; - dst->attrs |= RAY_ATTR_HAS_NULLS; - return; - } - -slow: + if (!(src->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_SLICE))) return; for (int64_t i = 0; i < len; i++) { if (ray_vec_is_null(src, i)) ray_vec_set_null(dst, i, true); @@ -1170,38 +1139,20 @@ static void fix_null_comparisons(ray_t* lhs, ray_t* rhs, ray_t* result, bool r_has = !r_scalar && vec_may_have_nulls(rhs); if (!ln_s && !rn_s && !l_has && !r_has) return; - /* Fast path: only one side has nulls (the common shape — vec col vs - * non-null scalar) and no scalar is null. Walk the nullmap byte-by- - * byte; skip any 8-row chunk where the byte is 0. Drops Q11's - * `(!= MobilePhoneModel "")` from ~14 ms to <1 ms when the column - * has HAS_NULLS set but few actual nulls. */ + /* One-sided null fast path: only one side has nulls (the common + * shape — vec col vs non-null scalar) and no scalar is null. Scan + * src elements via ray_vec_is_null (sentinel-based), set the + * comparison's fill value per null cell. */ if (!ln_s && !rn_s && (l_has ^ r_has)) { ray_t* src = l_has ? lhs : rhs; bool src_left = l_has; - int64_t src_off = 0; - const uint8_t* nbits = nullmap_bits(src, &src_off, len); - if (nbits && (src_off % 8) == 0) { - int64_t byte0 = src_off / 8; - int64_t i = 0; - uint8_t left_bits = (opcode == OP_LT || opcode == OP_LE || opcode == OP_NE); - uint8_t right_bits = (opcode == OP_GT || opcode == OP_GE || opcode == OP_NE); - uint8_t fill = src_left ? left_bits : right_bits; - while (i + 8 <= len) { - uint8_t b = nbits[byte0 + (i >> 3)]; - if (b) { - /* Only set the bits where src is null. */ - for (int64_t k = 0; k < 8; k++) - if ((b >> k) & 1) dst[i + k] = fill; - } - i += 8; - } - for (; i < len; i++) { - if ((nbits[byte0 + (i >> 3)] >> (i & 7)) & 1) - dst[i] = fill; - } - return; + uint8_t left_bits = (opcode == OP_LT || opcode == OP_LE || opcode == OP_NE); + uint8_t right_bits = (opcode == OP_GT || opcode == OP_GE || opcode == OP_NE); + uint8_t fill = src_left ? left_bits : right_bits; + for (int64_t i = 0; i < len; i++) { + if (ray_vec_is_null(src, i)) dst[i] = fill; } - /* Fall through to slow path on misaligned slice / no bitmap. */ + return; } for (int64_t i = 0; i < len; i++) { @@ -1223,20 +1174,12 @@ static void fix_null_comparisons(ray_t* lhs, ray_t* rhs, ray_t* result, } } -/* Set all elements in result as null (scalar null broadcast). */ +/* Set all elements in result as null (scalar null broadcast). + * Writes the type-correct sentinel into every payload slot and sets + * HAS_NULLS. */ static void set_all_null(ray_t* result, int64_t len) { - if (len > 128 && !(result->attrs & RAY_ATTR_NULLMAP_EXT)) - ray_vec_set_null(result, len - 1, false); /* force ext alloc */ - uint8_t* dbits = nullmap_bits_mut(result); - if (dbits) { - memset(dbits, 0xFF, (size_t)((len + 7) / 8)); - result->attrs |= RAY_ATTR_HAS_NULLS; - } else { - for (int64_t i = 0; i < len; i++) ray_vec_set_null(result, i, true); - } - /* Phase 2/3a dual-encoding: results must also carry the matching - * width sentinel in every payload slot so raw-payload consumers see - * the null marker without consulting the bitmap. */ + result->attrs |= RAY_ATTR_HAS_NULLS; + /* Sentinel payload fill — the sole source of truth. */ switch (result->type) { case RAY_F64: { double* d = (double*)ray_data(result); @@ -1248,6 +1191,11 @@ static void set_all_null(ray_t* result, int64_t len) { for (int64_t i = 0; i < len; i++) d[i] = NULL_I64; break; } + case RAY_F32: { + float* d = (float*)ray_data(result); + for (int64_t i = 0; i < len; i++) d[i] = NULL_F32; + break; + } case RAY_I32: case RAY_DATE: case RAY_TIME: { int32_t* d = (int32_t*)ray_data(result); for (int64_t i = 0; i < len; i++) d[i] = NULL_I32; @@ -1258,6 +1206,14 @@ static void set_all_null(ray_t* result, int64_t len) { for (int64_t i = 0; i < len; i++) d[i] = NULL_I16; break; } + case RAY_STR: { + ray_str_t* s = (ray_str_t*)ray_data(result); + memset(s, 0, (size_t)len * sizeof(ray_str_t)); + break; + } + case RAY_GUID: + memset(ray_data(result), 0, (size_t)len * 16); + break; default: break; } } diff --git a/src/ops/fused_group.c b/src/ops/fused_group.c index c8fc9100..f7e2a5af 100644 --- a/src/ops/fused_group.c +++ b/src/ops/fused_group.c @@ -308,7 +308,7 @@ int ray_fused_group_supported(ray_t* expr, ray_t* tbl) { /* ───────────────────────────────────────────────────────────────────────── * Per-morsel predicate evaluator * - * Phase 1 only handles a single comparison `(== col const)` / `(!= col const)` + * Pass 1 only handles a single comparison `(== col const)` / `(!= col const)` * against an SYM or numeric column. The compiled state is built once at * exec entry (column resolution + constant decode) and reused for every * morsel. fp_eval_cmp writes 0/1 into bits[0..n) for the corresponding @@ -707,7 +707,7 @@ static int fp_compile_cmp(ray_graph_t* g, ray_op_t* pred_op, ray_t* tbl, } /* Walk the predicate DAG (an OP_AND tree of leaf comparisons) and collect - * leaves into `out->children`. Phase 3: balanced binary OP_AND emitted + * leaves into `out->children`. Pass 3: balanced binary OP_AND emitted * by compile_expr_dag means we recurse on both inputs whenever we see an * OP_AND node. Returns 0 on success, -1 if a leaf can't be compiled or * the fan-in exceeds FP_PRED_MAX_CHILDREN. */ @@ -3130,7 +3130,7 @@ ray_t* exec_filtered_group(ray_graph_t* g, ray_op_t* op) { if (!ext) return ray_error("nyi", NULL); /* count1 fast path: single key, single OP_COUNT. Unchanged from - * Phase 3 — guarantees zero regression on Q8/Q37/Q38/Q43. + * Pass 3 — guarantees zero regression on Q8/Q37/Q38/Q43. * If the fused exec rejects the shape (planner / executor gate * divergence), fall back to the unfused FILTER + GROUP subgraph. */ ray_t* res; diff --git a/src/ops/fused_group.h b/src/ops/fused_group.h index dfb4735b..a3955162 100644 --- a/src/ops/fused_group.h +++ b/src/ops/fused_group.h @@ -46,8 +46,8 @@ ray_op_t* ray_filtered_group(ray_graph_t* g, * fused op. Returns 1 if `expr` (a Rayfall expression, not a DAG node) * can be evaluated by the per-morsel predicate evaluator against `tbl`. * - * Phase 1 accepted single (== col const) / (!= col const) on flat - * SYM/integer columns. Phase 3 adds (and pred1 pred2 …) of those, plus + * Pass 1 accepted single (== col const) / (!= col const) on flat + * SYM/integer columns. Pass 3 adds (and pred1 pred2 …) of those, plus * ordering comparisons (<, <=, >, >=) on numeric (non-SYM) columns. */ int ray_fused_group_supported(ray_t* expr, ray_t* tbl); diff --git a/src/ops/group.c b/src/ops/group.c index e19c410d..aa7c1cf2 100644 --- a/src/ops/group.c +++ b/src/ops/group.c @@ -48,16 +48,22 @@ static void reduce_acc_init(reduce_acc_t* acc) { /* Integer reduction loop — reads native type T, accumulates as i64. * HAS_NULLS and HAS_IDX must be integer literal constants (0 or 1) so the * compiler dead-code-eliminates the corresponding branches in every - * specialisation. reduce_range dispatches to the right combination before - * calling this macro so the hot path (no nulls, no idx) contains zero - * per-element runtime branches. */ -#define REDUCE_LOOP_I(T, base, start, end, acc, HAS_NULLS, null_bm, HAS_IDX, idx) \ + * specialisation. reduce_range dispatches to the right combination + * before calling this macro so the hot path (no nulls, no idx) contains + * zero per-element runtime branches. + * + * NULL_SENT is the type-correct NULL_* sentinel value for T (NULL_I16, + * NULL_I32, NULL_I64). For BOOL/U8 the sentinel slot is unused + * (those types are non-nullable; dispatcher pins HAS_NULLS=0) so any + * value works; we pass 0 for compileability. */ +#define REDUCE_LOOP_I(T, NULL_SENT, base, start, end, acc, HAS_NULLS, HAS_IDX, idx) \ do { \ const T* d = (const T*)(base); \ for (int64_t i = start; i < end; i++) { \ int64_t row = (HAS_IDX) ? (idx)[i] : i; \ - if ((HAS_NULLS) && (null_bm[row/8] >> (row%8)) & 1) { (acc)->null_count++; continue; } \ - int64_t v = (int64_t)d[row]; \ + T raw = d[row]; \ + if ((HAS_NULLS) && raw == (T)(NULL_SENT)) { (acc)->null_count++; continue; } \ + int64_t v = (int64_t)raw; \ /* sum/sum_sq may overflow on signed arithmetic — use defined \ * unsigned wrap (same semantic, no UBSan whine). */ \ (acc)->sum_i = (int64_t)((uint64_t)(acc)->sum_i + (uint64_t)v); \ @@ -70,14 +76,15 @@ static void reduce_acc_init(reduce_acc_t* acc) { } \ } while (0) -/* Float reduction loop — see REDUCE_LOOP_I for HAS_NULLS/HAS_IDX semantics. */ -#define REDUCE_LOOP_F(base, start, end, acc, HAS_NULLS, null_bm, HAS_IDX, idx) \ +/* Float reduction loop — see REDUCE_LOOP_I for HAS_NULLS/HAS_IDX semantics. + * F64 null = NaN (NULL_F64); detect via v != v (only NaN fails self-equality). */ +#define REDUCE_LOOP_F(base, start, end, acc, HAS_NULLS, HAS_IDX, idx) \ do { \ const double* d = (const double*)(base); \ for (int64_t i = start; i < end; i++) { \ int64_t row = (HAS_IDX) ? (idx)[i] : i; \ - if ((HAS_NULLS) && (null_bm[row/8] >> (row%8)) & 1) { (acc)->null_count++; continue; } \ double v = d[row]; \ + if ((HAS_NULLS) && v != v) { (acc)->null_count++; continue; } \ (acc)->sum_f += v; (acc)->sum_sq_f += v * v; (acc)->prod_f *= v; \ if (v < (acc)->min_f) (acc)->min_f = v; \ if (v > (acc)->max_f) (acc)->max_f = v; \ @@ -89,48 +96,66 @@ static void reduce_acc_init(reduce_acc_t* acc) { /* Dispatch helper: expand REDUCE_LOOP_I/F with compile-time 0/1 constants for * HAS_NULLS and HAS_IDX based on the runtime pointers so the compiler can * dead-code-eliminate the branches inside each specialisation. */ -#define DISPATCH_I(T, base, start, end, acc, has_nulls, null_bm, idx) \ +#define DISPATCH_I(T, NULL_SENT, base, start, end, acc, has_nulls, idx) \ do { \ if (!(has_nulls) && !(idx)) \ - REDUCE_LOOP_I(T, base, start, end, acc, 0, null_bm, 0, idx); \ + REDUCE_LOOP_I(T, NULL_SENT, base, start, end, acc, 0, 0, idx); \ else if (!(has_nulls)) \ - REDUCE_LOOP_I(T, base, start, end, acc, 0, null_bm, 1, idx); \ + REDUCE_LOOP_I(T, NULL_SENT, base, start, end, acc, 0, 1, idx); \ else if (!(idx)) \ - REDUCE_LOOP_I(T, base, start, end, acc, 1, null_bm, 0, idx); \ + REDUCE_LOOP_I(T, NULL_SENT, base, start, end, acc, 1, 0, idx); \ else \ - REDUCE_LOOP_I(T, base, start, end, acc, 1, null_bm, 1, idx); \ + REDUCE_LOOP_I(T, NULL_SENT, base, start, end, acc, 1, 1, idx); \ } while (0) -#define DISPATCH_F(base, start, end, acc, has_nulls, null_bm, idx) \ +#define DISPATCH_F(base, start, end, acc, has_nulls, idx) \ do { \ if (!(has_nulls) && !(idx)) \ - REDUCE_LOOP_F(base, start, end, acc, 0, null_bm, 0, idx); \ + REDUCE_LOOP_F(base, start, end, acc, 0, 0, idx); \ else if (!(has_nulls)) \ - REDUCE_LOOP_F(base, start, end, acc, 0, null_bm, 1, idx); \ + REDUCE_LOOP_F(base, start, end, acc, 0, 1, idx); \ else if (!(idx)) \ - REDUCE_LOOP_F(base, start, end, acc, 1, null_bm, 0, idx); \ + REDUCE_LOOP_F(base, start, end, acc, 1, 0, idx); \ else \ - REDUCE_LOOP_F(base, start, end, acc, 1, null_bm, 1, idx); \ + REDUCE_LOOP_F(base, start, end, acc, 1, 1, idx); \ } while (0) static void reduce_range(ray_t* input, int64_t start, int64_t end, reduce_acc_t* acc, bool has_nulls, - const uint8_t* null_bm, const int64_t* idx) { + const int64_t* idx) { void* base = ray_data(input); switch (input->type) { - case RAY_BOOL: case RAY_U8: - DISPATCH_I(uint8_t, base, start, end, acc, has_nulls, null_bm, idx); break; + case RAY_BOOL: case RAY_U8: { + /* BOOL/U8 are non-nullable; has_nulls is always false here, + * so the per-element null check is dead code in practice. */ + const uint8_t* d = (const uint8_t*)base; + for (int64_t i = start; i < end; i++) { + int64_t row = idx ? idx[i] : i; + if (has_nulls && ray_vec_is_null(input, row)) { acc->null_count++; continue; } + int64_t v = (int64_t)d[row]; + acc->sum_i = (int64_t)((uint64_t)acc->sum_i + (uint64_t)v); + acc->sum_sq_i = (int64_t)((uint64_t)acc->sum_sq_i + (uint64_t)v * (uint64_t)v); + acc->prod_i = (int64_t)((uint64_t)acc->prod_i * (uint64_t)v); + if (v < acc->min_i) acc->min_i = v; + if (v > acc->max_i) acc->max_i = v; + if (!acc->has_first) { acc->first_i = v; acc->has_first = true; } + acc->last_i = v; acc->cnt++; + } + break; + } case RAY_I16: - DISPATCH_I(int16_t, base, start, end, acc, has_nulls, null_bm, idx); break; + DISPATCH_I(int16_t, NULL_I16, base, start, end, acc, has_nulls, idx); break; case RAY_I32: case RAY_DATE: case RAY_TIME: - DISPATCH_I(int32_t, base, start, end, acc, has_nulls, null_bm, idx); break; + DISPATCH_I(int32_t, NULL_I32, base, start, end, acc, has_nulls, idx); break; case RAY_I64: case RAY_TIMESTAMP: - DISPATCH_I(int64_t, base, start, end, acc, has_nulls, null_bm, idx); break; + DISPATCH_I(int64_t, NULL_I64, base, start, end, acc, has_nulls, idx); break; case RAY_F64: - DISPATCH_F(base, start, end, acc, has_nulls, null_bm, idx); break; + DISPATCH_F(base, start, end, acc, has_nulls, idx); break; case RAY_SYM: { - /* Adaptive-width SYM columns — use read_col_i64. Same 4-way dispatch - * to eliminate the per-element null/idx branches. */ + /* Adaptive-width SYM columns — read_col_i64 produces the i64 + * sym id; id 0 is the canonical null sym (interned empty string + * reserved at ray_sym_init). Same 4-way dispatch to eliminate + * the per-element null/idx branches. */ if (!has_nulls && !idx) { for (int64_t i = start; i < end; i++) { int64_t v = read_col_i64(base, i, input->type, input->attrs); @@ -154,8 +179,8 @@ static void reduce_range(ray_t* input, int64_t start, int64_t end, } } else if (!idx) { for (int64_t i = start; i < end; i++) { - if ((null_bm[i/8] >> (i%8)) & 1) { acc->null_count++; continue; } int64_t v = read_col_i64(base, i, input->type, input->attrs); + if (v == 0) { acc->null_count++; continue; } acc->sum_i += v; acc->sum_sq_i += v * v; acc->prod_i = (int64_t)((uint64_t)acc->prod_i * (uint64_t)v); if (v < acc->min_i) acc->min_i = v; @@ -166,8 +191,8 @@ static void reduce_range(ray_t* input, int64_t start, int64_t end, } else { for (int64_t i = start; i < end; i++) { int64_t row = idx[i]; - if ((null_bm[row/8] >> (row%8)) & 1) { acc->null_count++; continue; } int64_t v = read_col_i64(base, row, input->type, input->attrs); + if (v == 0) { acc->null_count++; continue; } acc->sum_i += v; acc->sum_sq_i += v * v; acc->prod_i = (int64_t)((uint64_t)acc->prod_i * (uint64_t)v); if (v < acc->min_i) acc->min_i = v; @@ -187,14 +212,13 @@ typedef struct { ray_t* input; reduce_acc_t* accs; /* one per worker */ bool has_nulls; - const uint8_t* null_bm; const int64_t* idx; /* NULL = no selection; else int64[total_pass] */ } par_reduce_ctx_t; static void par_reduce_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { par_reduce_ctx_t* c = (par_reduce_ctx_t*)ctx; reduce_range(c->input, start, end, &c->accs[worker_id], - c->has_nulls, c->null_bm, c->idx); + c->has_nulls, c->idx); } static void reduce_merge(reduce_acc_t* dst, const reduce_acc_t* src, int8_t in_type) { @@ -743,7 +767,6 @@ typedef struct { int64_t n_rows; int64_t n_groups; bool has_nulls; - const uint8_t* null_bm; uint64_t p_mask; /* P - 1, P = number of partitions */ /* Pass 1 outputs / pass 2 inputs. Per-task counters: each worker * writes to its own slice of hist[task_id * P] / cursor[task_id * P] @@ -766,6 +789,32 @@ typedef struct { int64_t* odata; /* n_groups, atomic per-group distinct count */ } cdpg_ctx_t; +/* Type-correct null check for the column row r. Mirrors sentinel_is_null + * but specialised for cdpg's pre-resolved (base, in_type, esz) ctx so the + * hot loop avoids the ray_t pointer indirection. */ +static inline bool cdpg_is_null(const void* base, int64_t r, + int8_t in_type, uint8_t esz) { + switch (in_type) { + case RAY_F64: { double f = ((const double*)base)[r]; return f != f; } + case RAY_F32: { float f = ((const float*) base)[r]; return f != f; } + case RAY_I64: case RAY_TIMESTAMP: + return ((const int64_t*)base)[r] == NULL_I64; + case RAY_I32: case RAY_DATE: case RAY_TIME: + return ((const int32_t*)base)[r] == NULL_I32; + case RAY_I16: + return ((const int16_t*)base)[r] == NULL_I16; + case RAY_SYM: + switch (esz) { + case 1: return ((const uint8_t*) base)[r] == 0; + case 2: return ((const uint16_t*)base)[r] == 0; + case 4: return ((const uint32_t*)base)[r] == 0; + default: return ((const int64_t*) base)[r] == 0; + } + default: /* BOOL / U8 — non-nullable */ + return false; + } +} + /* Read column row r as int64. Width-typed fast path; F64 bitcasts. */ static inline int64_t cdpg_read(const void* base, int64_t r, int8_t in_type, uint8_t esz) { @@ -799,8 +848,7 @@ static void cdpg_hist_fn(void* ctx_, uint32_t worker_id, for (int64_t r = start; r < end; r++) { int64_t gid = x->row_gid[r]; if (gid < 0 || gid >= x->n_groups) continue; - if (x->has_nulls && x->null_bm && - ((x->null_bm[r/8] >> (r%8)) & 1)) continue; + if (x->has_nulls && cdpg_is_null(x->base, r, x->in_type, esz)) continue; /* Partition by gid (not gid×val) so the dedup pass can write to * odata[gid] without atomics. */ uint64_t h = CDPG_PART_HASH(gid + 1); @@ -823,8 +871,7 @@ static void cdpg_scat_fn(void* ctx_, uint32_t worker_id, for (int64_t r = start; r < end; r++) { int64_t gid = x->row_gid[r]; if (gid < 0 || gid >= x->n_groups) continue; - if (x->has_nulls && x->null_bm && - ((x->null_bm[r/8] >> (r%8)) & 1)) continue; + if (x->has_nulls && cdpg_is_null(x->base, r, x->in_type, esz)) continue; int64_t val = cdpg_read(x->base, r, x->in_type, esz); int64_t gid_p1 = gid + 1; uint64_t h = CDPG_PART_HASH(gid_p1); @@ -919,12 +966,9 @@ static ray_t* count_distinct_per_group_parallel( .n_rows = n_rows, .n_groups = n_groups, .has_nulls = (src->attrs & RAY_ATTR_HAS_NULLS) != 0, - .null_bm = NULL, .p_mask = p_mask, .odata = (int64_t*)ray_data(out), }; - if (ctx.has_nulls) - ctx.null_bm = ray_vec_nullmap_bytes(src, NULL, NULL); if (P > 256) return NULL; @@ -1082,8 +1126,6 @@ ray_t* ray_count_distinct_per_group(ray_t* src, const int64_t* row_gid, void* base = ray_data(src); bool has_nulls = (src->attrs & RAY_ATTR_HAS_NULLS) != 0; - const uint8_t* null_bm = has_nulls ? ray_vec_nullmap_bytes(src, NULL, NULL) - : NULL; /* Per-type read width — hoist the type dispatch out of the hot loop. * read_col_i64 was branching on `in_type` every iteration plus paying @@ -1167,7 +1209,7 @@ ray_t* ray_count_distinct_per_group(ray_t* src, const int64_t* row_gid, for (int64_t r = 0; r < n_rows; r++) { int64_t gid = row_gid[r]; if (gid < 0 || gid >= n_groups) continue; - if (null_bm && ((null_bm[r/8] >> (r%8)) & 1)) continue; + if (cdpg_is_null(base, r, in_type, esz)) continue; /* Use a different name from the macro's inner `val` so * clang doesn't see an `int64_t val = (val);` self-init * after macro expansion. */ @@ -1231,7 +1273,6 @@ typedef struct { const void* base; /* ray_data(src) */ int8_t src_type; bool has_nulls; - const uint8_t* null_bm; const int64_t* idx_buf; const int64_t* offsets; const int64_t* grp_cnt; @@ -1251,6 +1292,20 @@ static inline double med_read_as_f64(const void* base, int8_t t, int64_t row) { } } +/* Type-correct sentinel null check for the med_par paths. U8 is + * non-nullable; med only accepts the listed types so SYM/STR/GUID/F32 + * never reach here. */ +static inline bool med_is_null(const void* base, int8_t t, int64_t row) { + switch (t) { + case RAY_F64: { double v; memcpy(&v, (const char*)base + (size_t)row * 8, 8); return v != v; } + case RAY_I64: return ((const int64_t*)base)[row] == NULL_I64; + case RAY_I32: return ((const int32_t*)base)[row] == NULL_I32; + case RAY_I16: return ((const int16_t*)base)[row] == NULL_I16; + case RAY_U8: return false; /* non-nullable */ + default: return false; + } +} + static void med_per_group_fn(void* ctx_v, uint32_t worker_id, int64_t start, int64_t end) { (void)worker_id; @@ -1260,10 +1315,10 @@ static void med_per_group_fn(void* ctx_v, uint32_t worker_id, int64_t off = c->offsets[g]; double* slice = c->scratch_pool + off; int64_t actual = 0; - if (c->has_nulls && c->null_bm) { + if (c->has_nulls) { for (int64_t i = 0; i < cnt; i++) { int64_t row = c->idx_buf[off + i]; - if ((c->null_bm[row >> 3] >> (row & 7)) & 1) continue; + if (med_is_null(c->base, c->src_type, row)) continue; slice[actual++] = med_read_as_f64(c->base, c->src_type, row); } } else { @@ -1310,8 +1365,6 @@ ray_t* ray_median_per_group_buf(ray_t* src, .base = ray_data(src), .src_type = t, .has_nulls = (src->attrs & RAY_ATTR_HAS_NULLS) != 0, - .null_bm = (src->attrs & RAY_ATTR_HAS_NULLS) - ? ray_vec_nullmap_bytes(src, NULL, NULL) : NULL, .idx_buf = idx_buf, .offsets = offsets, .grp_cnt = grp_cnt, @@ -1365,7 +1418,6 @@ typedef struct { const void* base; int8_t src_type; bool has_nulls; - const uint8_t* null_bm; int64_t k; uint8_t desc; const int64_t* idx_buf; @@ -1473,8 +1525,7 @@ static void topk_per_group_fn(void* ctx_v, uint32_t worker_id, for (int64_t i = 0; i < cnt && kept < K; i++) { int64_t row = idxs[i]; init_end = i + 1; - if (c->has_nulls && c->null_bm && - ((c->null_bm[row >> 3] >> (row & 7)) & 1)) continue; + if (c->has_nulls && med_is_null(c->base, c->src_type, row)) continue; dst[kept++] = topk_read_f64(c->base, row); } if (kept == K) { @@ -1482,8 +1533,7 @@ static void topk_per_group_fn(void* ctx_v, uint32_t worker_id, topk_sift_down_dbl(dst, K, j, max_heap); for (int64_t i = init_end; i < cnt; i++) { int64_t row = idxs[i]; - if (c->has_nulls && c->null_bm && - ((c->null_bm[row >> 3] >> (row & 7)) & 1)) continue; + if (c->has_nulls && med_is_null(c->base, c->src_type, row)) continue; double v = topk_read_f64(c->base, row); if (desc ? (v > dst[0]) : (v < dst[0])) { dst[0] = v; @@ -1518,8 +1568,7 @@ static void topk_per_group_fn(void* ctx_v, uint32_t worker_id, for (int64_t i = 0; i < cnt && kept < K; i++) { int64_t row = idxs[i]; init_end = i + 1; - if (c->has_nulls && c->null_bm && - ((c->null_bm[row >> 3] >> (row & 7)) & 1)) continue; + if (c->has_nulls && med_is_null(c->base, c->src_type, row)) continue; heap[kept++] = topk_read_i64(c->base, t, row); } if (kept == K) { @@ -1527,8 +1576,7 @@ static void topk_per_group_fn(void* ctx_v, uint32_t worker_id, topk_sift_down_i64(heap, K, j, max_heap); for (int64_t i = init_end; i < cnt; i++) { int64_t row = idxs[i]; - if (c->has_nulls && c->null_bm && - ((c->null_bm[row >> 3] >> (row & 7)) & 1)) continue; + if (c->has_nulls && med_is_null(c->base, c->src_type, row)) continue; int64_t v = topk_read_i64(c->base, t, row); if (desc ? (v > heap[0]) : (v < heap[0])) { heap[0] = v; @@ -1595,8 +1643,6 @@ ray_t* ray_topk_per_group_buf(ray_t* src, .base = ray_data(src), .src_type = t, .has_nulls = (src->attrs & RAY_ATTR_HAS_NULLS) != 0, - .null_bm = (src->attrs & RAY_ATTR_HAS_NULLS) - ? ray_vec_nullmap_bytes(src, NULL, NULL) : NULL, .k = k, .desc = desc, .idx_buf = idx_buf, @@ -1666,11 +1712,10 @@ ray_t* exec_reduction(ray_graph_t* g, ray_op_t* op, ray_t* input) { int8_t in_type = input->type; int64_t len = input->len; - /* Resolve null bitmap once before dispatching. ray_vec_nullmap_bytes - * handles slice / ext / inline / HAS_INDEX uniformly so this works on - * vectors that carry an attached accelerator index. */ + /* Sentinel-based per-element null detection happens inside + * REDUCE_LOOP_I/F via the type-correct NULL_* constant; the + * has_nulls attribute below is the vec-level fast-path gate. */ bool has_nulls = (input->attrs & RAY_ATTR_HAS_NULLS) != 0; - const uint8_t* null_bm = ray_vec_nullmap_bytes(input, NULL, NULL); /* Selection-aware reduction: when a lazy WHERE filter has installed * g->selection on the graph and the column we're reducing matches @@ -1710,12 +1755,12 @@ ray_t* exec_reduction(ray_graph_t* g, ray_op_t* op, ray_t* input) { if (op->opcode == OP_FIRST) { for (int64_t i = 0; i < scan_n; i++) { int64_t r = sel_idx ? sel_idx[i] : i; - if (!has_nulls || !((null_bm[r/8] >> (r%8)) & 1)) { row = r; break; } + if (!has_nulls || !ray_vec_is_null(input, r)) { row = r; break; } } } else { for (int64_t i = scan_n - 1; i >= 0; i--) { int64_t r = sel_idx ? sel_idx[i] : i; - if (!has_nulls || !((null_bm[r/8] >> (r%8)) & 1)) { row = r; break; } + if (!has_nulls || !ray_vec_is_null(input, r)) { row = r; break; } } } if (sel_idx_block) ray_release(sel_idx_block); @@ -1735,8 +1780,7 @@ ray_t* exec_reduction(ray_graph_t* g, ray_op_t* op, ray_t* input) { for (uint32_t i = 0; i < nw; i++) reduce_acc_init(&accs[i]); par_reduce_ctx_t ctx = { .input = input, .accs = accs, - .has_nulls = has_nulls, .null_bm = null_bm, - .idx = sel_idx }; + .has_nulls = has_nulls, .idx = sel_idx }; ray_pool_dispatch(pool, par_reduce_fn, &ctx, scan_n); /* Merge: worker 0 is the base, merge the rest in order */ @@ -1800,7 +1844,7 @@ ray_t* exec_reduction(ray_graph_t* g, ray_op_t* op, ray_t* input) { reduce_acc_t acc; reduce_acc_init(&acc); - reduce_range(input, 0, scan_n, &acc, has_nulls, null_bm, sel_idx); + reduce_range(input, 0, scan_n, &acc, has_nulls, sel_idx); if (sel_idx_block) ray_release(sel_idx_block); switch (op->opcode) { @@ -2566,12 +2610,12 @@ static void group_rows_range_existing(group_ht_t* ht, void** key_data, /* ============================================================================ * Radix-partitioned parallel group-by * - * Phase 1 (parallel): Each worker reads keys+agg values from original columns, + * Pass 1 (parallel): Each worker reads keys+agg values from original columns, * packs into fat entries (hash, keys, agg_vals), scatters into * thread-local per-partition buffers. - * Phase 2 (parallel): Each partition is aggregated independently using + * Pass 2 (parallel): Each partition is aggregated independently using * inline data — no original column access needed. - * Phase 3: Build result columns from inline group rows. + * Pass 3: Build result columns from inline group rows. * ============================================================================ */ #define RADIX_BITS 8 @@ -2624,7 +2668,7 @@ typedef struct { uint8_t nullable_mask; /* bit k = key k column may contain nulls */ ray_t** agg_vecs; /* Second input column per agg; NULL when no binary aggs in this - * OP_GROUP. Phase 1 reads agg_vecs2[a] alongside agg_vecs[a] and + * OP_GROUP. Pass 1 reads agg_vecs2[a] alongside agg_vecs[a] and * packs (x, y) consecutively into the entry agg_vals area for any * agg whose layout bit agg_is_binary is set. */ ray_t** agg_vecs2; @@ -2754,7 +2798,7 @@ static void group_rows_indirect(group_ht_t* ht, const int8_t* key_types, } } -/* Phase 3: build result columns from inline group rows */ +/* Pass 3: build result columns from inline group rows */ typedef struct { int8_t out_type; bool src_f64; @@ -2816,7 +2860,7 @@ static void radix_phase3_fn(void* ctx, uint32_t worker_id, int64_t start, int64_ if (null_mask & (int64_t)(1u << k)) { if (c->key_cols && c->key_cols[k]) grp_set_null(c->key_cols[k], di); - /* Phase 2/3a dual encoding: fill correct-width sentinel. */ + /* Fill the correct-width sentinel. */ char* dst = c->key_dsts[k]; uint8_t esz = c->key_esizes[k]; size_t off = (size_t)di * esz; @@ -2955,7 +2999,7 @@ static void radix_phase3_fn(void* ctx, uint32_t worker_id, int64_t start, int64_ } } -/* Phase 2: aggregate each partition independently using inline data */ +/* Pass 2: aggregate each partition independently using inline data */ typedef struct { int8_t* key_types; uint8_t n_keys; @@ -3719,7 +3763,7 @@ typedef struct { /* per-worker accumulators (1 slot each) */ da_accum_t* accums; uint32_t n_accums; - /* Phase 3a: per-agg integer-null sentinel + mask (mirrors da_ctx_t). */ + /* Per-agg integer-null sentinel + mask (mirrors da_ctx_t). */ uint32_t agg_int_null_mask; int64_t* agg_int_null_sentinel; } scalar_ctx_t; @@ -3807,13 +3851,13 @@ static inline void scalar_accum_row(scalar_ctx_t* c, da_accum_t* acc, int64_t r) } uint16_t op = c->agg_ops[a]; bool is_f = (c->agg_types[a] == RAY_F64); - /* Phase 3a dual encoding: NULL_I* sentinel = null. */ + /* NULL_I* sentinel = null. */ bool int_null = !is_f && (c->agg_int_null_mask & (1u << a)) && iv == c->agg_int_null_sentinel[a]; bool is_null = is_f ? !(fv == fv) : int_null; if (op == OP_SUM || op == OP_AVG || op == OP_STDDEV || op == OP_STDDEV_POP || op == OP_VAR || op == OP_VAR_POP) { if (is_f) { - /* Phase 2 dual encoding: NaN payload = null, skip from sum/sumsq. */ + /* NaN payload = null, skip from sum/sumsq. */ if (RAY_LIKELY(fv == fv)) { acc->sum[a].f += fv; if (acc->sumsq_f64) acc->sumsq_f64[a] += fv * fv; @@ -3904,19 +3948,15 @@ static inline void da_accum_row(da_ctx_t* c, da_accum_t* acc, int32_t gid, int64 acc->sum[idx].i += group_strlen_at(c->agg_cols[a], r); if (nn) nn[idx]++; } else if (f64m & (1u << a)) { - /* Phase 2 dual encoding: NaN payload = null, skip from sum. */ + /* NaN payload = null, skip from sum. */ double v = ((const double*)c->agg_ptrs[a])[r]; if (RAY_LIKELY(v == v)) { acc->sum[idx].f += v; if (nn) nn[idx]++; } } else { - /* Phase 3a dual encoding: NULL_I* sentinel = null, skip from sum. - * Only paid when the source column actually advertises nulls. - * - * Phase 3a hazard: this sentinel-compare drops user-stored INT_MIN - * values in HAS_NULLS columns. The plan accepted this tradeoff for - * the cache-line cost of nullmap consultation — dual encoding keeps - * the bitmap as source of truth, so the corruption is bounded to the - * narrow window where HAS_NULLS is set AND a non-null cell holds the - * sentinel value. */ + /* NULL_I* sentinel = null, skip from sum. Only paid when + * the source column actually advertises nulls. A user-stored + * INT_MIN value in a HAS_NULLS column is indistinguishable + * from a null and is dropped — this is the standard cost of + * sentinel-based null encoding for integers. */ int64_t v = read_col_i64(c->agg_ptrs[a], r, c->agg_types[a], 0); if (RAY_LIKELY(!((inm >> a) & 1) || v != c->agg_int_null_sentinel[a])) { acc->sum[idx].i += v; @@ -3938,10 +3978,9 @@ static inline void da_accum_row(da_ctx_t* c, da_accum_t* acc, int32_t gid, int64 * with disjoint null patterns can race — whichever non-null lands * first stakes first_row and the other agg never gets a chance. * The result for the "loser" agg is a typed null (nn[idx] stays 0), - * which is strictly safer than the previous behaviour (leaked the - * 0 calloc seed) but still not the true first-non-null value. Fix - * would require per-(group, agg) first_row arrays. Out of scope for - * this phase; documented for future work. */ + * which is strictly safer than leaking the 0 calloc seed but still + * not the true first-non-null value. Fix would require per-(group, + * agg) first_row arrays — documented for future work. */ bool fl_take_first = (acc->first_row && r < acc->first_row[gid]); bool fl_take_last = (acc->last_row && r > acc->last_row[gid]); bool first_advanced = false, last_advanced = false; @@ -3959,15 +3998,15 @@ static inline void da_accum_row(da_ctx_t* c, da_accum_t* acc, int32_t gid, int64 } uint16_t op = c->agg_ops[a]; bool is_f = (c->agg_types[a] == RAY_F64); - /* Phase 3a dual encoding: NULL_I* sentinel = null. Bit set in - * agg_int_null_mask AND value equal to per-agg sentinel means - * this row is null for an integer aggregation column. */ + /* NULL_I* sentinel = null. Bit set in agg_int_null_mask AND + * value equal to per-agg sentinel means this row is null for + * an integer aggregation column. */ bool int_null = (c->agg_int_null_mask & (1u << a)) && iv == c->agg_int_null_sentinel[a]; bool is_null = is_f ? !(fv == fv) : int_null; if (op == OP_SUM || op == OP_AVG || op == OP_STDDEV || op == OP_STDDEV_POP || op == OP_VAR || op == OP_VAR_POP) { if (is_f) { - /* Phase 2 dual encoding: NaN payload = null, skip from sum/sumsq. */ + /* NaN payload = null, skip from sum/sumsq. */ if (RAY_LIKELY(fv == fv)) { acc->sum[idx].f += fv; if (acc->sumsq_f64) acc->sumsq_f64[idx] += fv * fv; @@ -4012,8 +4051,8 @@ static inline void da_accum_row(da_ctx_t* c, da_accum_t* acc, int32_t gid, int64 } } else if (op == OP_MIN) { if (is_f) { - /* Phase 2 dual encoding: NaN comparisons are always false, but - * make the skip explicit. */ + /* NaN comparisons are always false, but make the skip + * explicit. */ if (fv == fv && fv < acc->min_val[idx].f) acc->min_val[idx].f = fv; } else if (!int_null) { if (iv < acc->min_val[idx].i) acc->min_val[idx].i = iv; @@ -5148,12 +5187,12 @@ ray_t* exec_group(ray_graph_t* g, ray_op_t* op, ray_t* tbl, * The specialized scalar_sum_*_fn variants don't honour * match_idx — they read data[r] directly — so they're only * safe when no selection is in flight. They also read the - * slot raw, so they require null-free input: Phase 3a stores - * NULL_I{16,32,64} sentinels in null slots which would poison - * the sum. Fall back to the generic masked path when the - * source vector advertises nulls. (try_linear_sumavg_input_i64 - * already refuses to build a linear plan when any term column - * has nulls, so agg_linear[0].enabled implies null-free.) */ + * slot raw, so they require null-free input: NULL_I{16,32,64} + * sentinels in null slots would poison the sum. Fall back to + * the generic masked path when the source vector advertises + * nulls. (try_linear_sumavg_input_i64 already refuses to build + * a linear plan when any term column has nulls, so + * agg_linear[0].enabled implies null-free.) */ typedef void (*scalar_fn_t)(void*, uint32_t, int64_t, int64_t); scalar_fn_t sc_fn = scalar_accum_fn; bool agg0_has_nulls = (sc_int_null_mask & 1u) != 0 || @@ -5428,10 +5467,10 @@ da_path:; int64_t da_int_null_sentinel[vla_aggs]; uint32_t agg_f64_mask = 0; uint32_t da_int_null_mask = 0; - /* Phase 3 follow-up: track whether any agg column can produce - * a null so we can allocate per-(group, agg) non-null counts - * only when required. F64 with HAS_NULLS uses NaN-skip; sentinel- - * typed integers with HAS_NULLS use sentinel-skip. */ + /* Track whether any agg column can produce a null so we can + * allocate per-(group, agg) non-null counts only when required. + * F64 with HAS_NULLS uses NaN-skip; sentinel-typed integers + * with HAS_NULLS use sentinel-skip. */ bool da_any_nullable = false; for (uint8_t a = 0; a < n_aggs; a++) { if (agg_vecs[a]) { @@ -5472,8 +5511,8 @@ da_path:; if (need_flags & DA_NEED_MIN) arrays_per_agg += 2; if (need_flags & DA_NEED_MAX) arrays_per_agg += 2; if (need_flags & DA_NEED_SUMSQ) arrays_per_agg += 1; - /* Phase 3 follow-up: nullable aggs add a per-(group, agg) - * non-null count array. ~8 bytes per (group, agg). */ + /* Nullable aggs add a per-(group, agg) non-null count array. + * ~8 bytes per (group, agg). */ if (da_any_nullable) arrays_per_agg += 1; uint64_t per_worker_bytes = (uint64_t)n_slots * (arrays_per_agg * n_aggs + 1u) * 8u; if ((uint64_t)da_n_workers * per_worker_bytes > DA_MEM_BUDGET) @@ -5883,15 +5922,12 @@ da_path:; if (op != OP_SUM && op != OP_AVG) sp_eligible = false; else { - /* Phase 3a: the single-key sparse aggregation path reads agg - * slots raw via read_col_i64 / direct double load; nullable - * input columns would poison the sum with NULL_I* or NULL_F64 - * sentinels. Fall back to slower paths that mask nulls - * properly. Scope note: this gate covers the scalar - * dispatcher and this single-key sparse path only; the - * multi-key radix HT (accum_from_entry, ~line 2155) inherits - * Phase 2's pre-existing nullable-agg gap and is out of scope - * for this commit. */ + /* The single-key sparse aggregation path reads agg slots + * raw via read_col_i64 / direct double load; nullable + * input columns would poison the sum with NULL_I* or + * NULL_F64 sentinels. Fall back to slower paths that + * mask nulls properly. (The multi-key radix HT at + * accum_from_entry inherits the same nullable-agg gap.) */ if (agg_vecs[a] && (agg_vecs[a]->attrs & RAY_ATTR_HAS_NULLS)) sp_eligible = false; else @@ -7187,7 +7223,7 @@ ht_path:; p1_nullable |= (uint8_t)(1u << k); } - /* Phase 1: parallel hash + copy keys/agg values into fat entries */ + /* Pass 1: parallel hash + copy keys/agg values into fat entries */ radix_phase1_ctx_t p1ctx = { .key_data = key_data, .key_types = key_types, @@ -7220,7 +7256,7 @@ ht_path:; } } - /* Phase 2: parallel per-partition aggregation (no column access) */ + /* Pass 2: parallel per-partition aggregation (no column access) */ part_hts = (group_ht_t*)scratch_calloc(&part_hts_hdr, RADIX_P * sizeof(group_ht_t)); if (!part_hts) { @@ -7339,7 +7375,7 @@ ht_path:; for (uint8_t k = 0; k < n_keys; k++) if (key_cols[k]) grp_prepare_nullmap(key_cols[k]); - /* Phase 3: parallel key gather + agg result building from inline rows */ + /* Pass 3: parallel key gather + agg result building from inline rows */ { radix_phase3_ctx_t p3ctx = { .part_hts = part_hts, @@ -7683,7 +7719,7 @@ sequential_fallback:; int64_t null_mask = rkeys[n_keys]; if (null_mask & (int64_t)(1u << k)) { ray_vec_set_null(new_col, (int64_t)gi, true); - /* Phase 2/3a dual encoding: fill correct-width sentinel. */ + /* Fill the correct-width sentinel. */ switch (kt) { case RAY_F64: ((double*)ray_data(new_col))[gi] = NULL_F64; break; @@ -8186,9 +8222,9 @@ exec_group_per_partition(ray_t* parted_tbl, ray_op_ext_t* ext, /* ---- Batched incremental merge ---- * Process partitions in batches of MERGE_BATCH. After each batch: - * Phase 1: exec_group each partition in batch → batch_partials[] - * Phase 2: concat (running + batch_partials + MAPCOMMON) → merge_tbl - * Phase 3: merge GROUP BY → new running + * Pass 1: exec_group each partition in batch → batch_partials[] + * Pass 2: concat (running + batch_partials + MAPCOMMON) → merge_tbl + * Pass 3: merge GROUP BY → new running * Bounds peak memory to O(MERGE_BATCH × groups_per_partition). */ #define MERGE_BATCH 8 @@ -8206,7 +8242,7 @@ exec_group_per_partition(ray_t* parted_tbl, ray_op_ext_t* ext, if (batch_end > n_parts) batch_end = n_parts; int32_t batch_n = batch_end - batch_start; - /* Phase 1: exec_group each partition in this batch */ + /* Pass 1: exec_group each partition in this batch */ ray_t* bp[MERGE_BATCH]; memset(bp, 0, sizeof(bp)); @@ -8290,7 +8326,7 @@ exec_group_per_partition(ray_t* parted_tbl, ray_op_ext_t* ext, } } - /* Phase 2: concat (running + batch_partials + MAPCOMMON) */ + /* Pass 2: concat (running + batch_partials + MAPCOMMON) */ int64_t mrows = running ? ray_table_nrows(running) : 0; for (int32_t i = 0; i < batch_n; i++) mrows += ray_table_nrows(bp[i]); @@ -8404,7 +8440,7 @@ exec_group_per_partition(ray_t* parted_tbl, ray_op_ext_t* ext, bp[i] = NULL; } - /* Phase 3: merge GROUP BY */ + /* Pass 3: merge GROUP BY */ ray_graph_t* mg = ray_graph_new(merge_tbl); if (!mg) goto batch_fail; @@ -8806,19 +8842,19 @@ void pivot_ingest_free(pivot_ingest_t* out) { * * Three-phase parallel design. * - * Phase 1 (parallel rows): each worker scatters fat entries + * Pass 1 (parallel rows): each worker scatters fat entries * (hash:8, key_bits:8, val_bits:8) into per-(worker, partition) buffers * using the same 8-bit radix the OP_GROUP path uses (RADIX_P=256). No * hashmap in this phase — pure streaming write. Per-partition data fits * in L2 by construction. * - * Phase 2 (parallel partitions): RADIX_P tasks. Each partition iterates + * Pass 2 (parallel partitions): RADIX_P tasks. Each partition iterates * all worker buffers for its partition slot, probing a partition-local * open-addressing hashmap. Entries hold a bounded K-slot heap (min-heap * for top, max-heap for bot — root = worst-of-kept). No cross-partition * contention. * - * Phase 3 (parallel partitions): each partition heapsort-drains its heap + * Pass 3 (parallel partitions): each partition heapsort-drains its heap * entries into the pre-allocated output columns at its row range. Row * ranges come from a prefix-sum over per-partition kept-counts. * @@ -8829,8 +8865,8 @@ void pivot_ingest_free(pivot_ingest_t* out) { * explode in user code. * ============================================================================ */ -/* Scatter entry: 3 × 8 bytes = 24 bytes per row. Phase 1 writes these - * sequentially into per-partition buffers; Phase 2 reads them linearly. +/* Scatter entry: 3 × 8 bytes = 24 bytes per row. Pass 1 writes these + * sequentially into per-partition buffers; Pass 2 reads them linearly. * word 0: hash (used for HT probe and salt extraction) * word 1: key bits (canonical int64 — reinterp to double for F64) * word 2: val bits (canonical int64 — reinterp to double for F64) */ @@ -9043,7 +9079,7 @@ static inline void grpt_heap_push_i64(int64_t* heap, uint8_t* kept_p, } } -/* ─── Phase 1 ────────────────────────────────────────────────────────── +/* ─── Pass 1 ────────────────────────────────────────────────────────── * Per-worker scan: read (key, val) per row, dispatch into per-worker * hashmap. Specialized inner loops for (key_type, val_type) so the * branch out of `topk_read_*` lifts out of the hot loop. The dominant @@ -9055,8 +9091,10 @@ typedef struct { const void* val_data; int8_t key_type; int8_t val_type; - const uint8_t* key_null_bm; - const uint8_t* val_null_bm; + uint8_t key_attrs; /* for SYM width via ray_sym_elem_size */ + uint8_t val_attrs; + bool key_has_nulls; + bool val_has_nulls; int val_is_f64; /* outputs: per-worker × per-partition scatter buffers */ grpt_scat_buf_t* bufs; /* [n_workers * RADIX_P] */ @@ -9098,8 +9136,30 @@ static inline uint64_t grpt_key_hash(int64_t bits, int8_t t) { return ray_hash_i64(bits); } -static inline bool grpt_is_null(const uint8_t* nbm, int64_t row) { - return (nbm[row >> 3] >> (row & 7)) & 1; +/* Type-correct sentinel null check for the grpt paths. Uses the same + * type dispatch as cdpg_is_null; duplicated locally to keep the helper + * inline at hot-loop scope. */ +static inline bool grpt_is_null(const void* base, int8_t t, uint8_t attrs, + int64_t row) { + switch (t) { + case RAY_F64: { double f; memcpy(&f, (const char*)base + (size_t)row*8, 8); return f != f; } + case RAY_F32: { float f; memcpy(&f, (const char*)base + (size_t)row*4, 4); return f != f; } + case RAY_I64: case RAY_TIMESTAMP: + return ((const int64_t*)base)[row] == NULL_I64; + case RAY_I32: case RAY_DATE: case RAY_TIME: + return ((const int32_t*)base)[row] == NULL_I32; + case RAY_I16: + return ((const int16_t*)base)[row] == NULL_I16; + case RAY_SYM: + switch (ray_sym_elem_size(t, attrs)) { + case 1: return ((const uint8_t*) base)[row] == 0; + case 2: return ((const uint16_t*)base)[row] == 0; + case 4: return ((const uint32_t*)base)[row] == 0; + default: return ((const int64_t*) base)[row] == 0; + } + default: /* BOOL/U8 non-nullable */ + return false; + } } static inline int64_t grpt_val_read(const void* base, int8_t t, int64_t row, @@ -9149,13 +9209,15 @@ static void grpt_phase1_fn(void* ctx_v, uint32_t worker_id, int val_is_f64 = c->val_is_f64; const void* kbase = c->key_data; const void* vbase = c->val_data; - const uint8_t* knbm = c->key_null_bm; - const uint8_t* vnbm = c->val_null_bm; + uint8_t kattrs = c->key_attrs; + uint8_t vattrs = c->val_attrs; + bool knulls = c->key_has_nulls; + bool vnulls = c->val_has_nulls; for (int64_t r = start; r < end; r++) { /* Skip null value rows (match standalone `top` and DuckDB WHERE * v IS NOT NULL). */ - if (vnbm && grpt_is_null(vnbm, r)) continue; + if (vnulls && grpt_is_null(vbase, vt, vattrs, r)) continue; /* Skip null keys too: matches the OP_TOP_N path's effective * behaviour and DuckDB's groupby semantics where NULL keys form * a discarded group (we mirror DuckDB which drops null-key rows @@ -9163,7 +9225,7 @@ static void grpt_phase1_fn(void* ctx_v, uint32_t worker_id, * correctness impact on the bench path; small-data fixtures with * null id6 are routed away by the type-restriction in the * planner (no SYM keys). */ - if (knbm && grpt_is_null(knbm, r)) continue; + if (knulls && grpt_is_null(kbase, kt, kattrs, r)) continue; int64_t key_bits = grpt_key_read(kbase, kt, r); uint64_t h = grpt_key_hash(key_bits, kt); int64_t val_bits = grpt_val_read(vbase, vt, r, val_is_f64); @@ -9172,7 +9234,7 @@ static void grpt_phase1_fn(void* ctx_v, uint32_t worker_id, } } -/* ─── Phase 2 ────────────────────────────────────────────────────────── +/* ─── Pass 2 ────────────────────────────────────────────────────────── * Per-partition aggregation. RADIX_P tasks. Each task iterates all * per-worker scatter buffers for its partition slot, probes a * partition-local hashmap, and applies bounded-heap insert. HT size @@ -9257,7 +9319,7 @@ static void grpt_phase2_fn(void* ctx_v, uint32_t worker_id, } } -/* ─── Phase 3 ────────────────────────────────────────────────────────── +/* ─── Pass 3 ────────────────────────────────────────────────────────── * Per-partition emit. Walk merged hashmap, sort each heap in-place * (heapsort: swap root with tail, sift, repeat), then write rows. */ @@ -9329,18 +9391,14 @@ static void grpt_phase3_fn(void* ctx_v, uint32_t worker_id, /* Key write — replicate same key across kept rows. */ if (e->has_null_key) { /* Write width-correct sentinel then mark null on the - * output column. Phase 2/3a dual encoding: payload - * must hold INT_MIN/NaN per type, not 0. - * ray_vec_set_null is not threadsafe across workers - * for the same word; but each partition writes a - * contiguous row range so two partitions never touch - * the same nullmap word — unless a row range - * straddles an 8-row boundary that another - * partition's range also touches. In practice the - * null-key case at most produces K rows and - * partitions are large; we serialise null-key - * writes by routing the null-key entry into the - * sequential final-pass below. */ + * output column. Payload must hold INT_MIN/NaN per + * type, not 0. ray_vec_set_null is not threadsafe + * across workers for the same HAS_NULLS write; each + * partition writes a contiguous row range so two + * partitions normally don't collide, but the null-key + * case (at most K rows, partitions large) is routed + * into the sequential final-pass below to serialise + * its null write. */ int64_t null_bits = 0; switch (c->key_type) { case RAY_F64: { @@ -9355,7 +9413,7 @@ static void grpt_phase3_fn(void* ctx_v, uint32_t worker_id, case RAY_I16: null_bits = (int64_t)NULL_I16; break; default: - /* BOOL/U8 — non-nullable per Phase 1, keep 0. */ + /* BOOL/U8 — non-nullable, keep 0. */ null_bits = 0; break; } grpt_write_key(c->key_out, row + j, null_bits, kesz); @@ -9467,10 +9525,10 @@ ray_t* exec_group_topk_rowform(ray_graph_t* g, ray_op_t* op) { .val_data = ray_data(val_vec), .key_type = kt, .val_type = vt, - .key_null_bm = (key_vec->attrs & RAY_ATTR_HAS_NULLS) - ? ray_vec_nullmap_bytes(key_vec, NULL, NULL) : NULL, - .val_null_bm = (val_vec->attrs & RAY_ATTR_HAS_NULLS) - ? ray_vec_nullmap_bytes(val_vec, NULL, NULL) : NULL, + .key_attrs = key_vec->attrs, + .val_attrs = val_vec->attrs, + .key_has_nulls = (key_vec->attrs & RAY_ATTR_HAS_NULLS) != 0, + .val_has_nulls = (val_vec->attrs & RAY_ATTR_HAS_NULLS) != 0, .val_is_f64 = (vt == RAY_F64) ? 1 : 0, .bufs = bufs, .n_workers = n_workers, @@ -9492,7 +9550,7 @@ ray_t* exec_group_topk_rowform(ray_graph_t* g, ray_op_t* op) { } } - /* Phase 2: per-partition HT build. */ + /* Pass 2: per-partition HT build. */ ray_t* phts_hdr = NULL; grpt_ht_t* part_hts = (grpt_ht_t*)scratch_calloc(&phts_hdr, (size_t)RADIX_P * sizeof(grpt_ht_t)); @@ -9614,14 +9672,14 @@ ray_t* exec_group_topk_rowform(ray_graph_t* g, ray_op_t* op) { * exec_group_topk_rowform. * * Algorithm: - * Phase 1: morsel-parallel scan reads (k0[,k1], x, y) per row, + * Pass 1: morsel-parallel scan reads (k0[,k1], x, y) per row, * composes hash from key(s), scatters fat entries into * per-(worker, partition) buffers — no contention. - * Phase 2: RADIX_P parallel tasks build a per-partition HT. Each + * Pass 2: RADIX_P parallel tasks build a per-partition HT. Each * entry holds the fixed Pearson state (Σx, Σy, Σx², Σy², * Σxy, cnt). Each scatter entry probes/inserts and * accumulates in-place. - * Phase 3: walk all partition HTs, compute r² from state, emit + * Pass 3: walk all partition HTs, compute r² from state, emit * (key0[, key1], r²) row form. * * Per-row scatter stride: 40 B (hash + 2×key + 2×val). 1-key shape @@ -9768,7 +9826,7 @@ grpc_ht_get(grpc_ht_t* ht, uint64_t hash, int64_t k0, int64_t k1) { } } -/* ─── Phase 1 ────────────────────────────────────────────────────────── +/* ─── Pass 1 ────────────────────────────────────────────────────────── * Per-worker scan: read (k0[, k1], x, y) per row, hash, scatter into * partition buckets. Skips rows with null x, y, or any key. */ @@ -9783,10 +9841,10 @@ typedef struct { int8_t y_type; uint8_t k0_attrs; uint8_t k1_attrs; - const uint8_t* k0_null_bm; - const uint8_t* k1_null_bm; - const uint8_t* x_null_bm; - const uint8_t* y_null_bm; + bool k0_has_nulls; + bool k1_has_nulls; + bool x_has_nulls; + bool y_has_nulls; uint8_t n_keys; uint8_t x_is_f64; uint8_t y_is_f64; @@ -9794,8 +9852,28 @@ typedef struct { uint32_t n_workers; } grpc_phase1_ctx_t; -static inline bool grpc_is_null(const uint8_t* nbm, int64_t row) { - return (nbm[row >> 3] >> (row & 7)) & 1; +/* Type-correct sentinel null check for grpc paths. Identical shape to + * grpt_is_null; duplicated here to keep the hot loop inline-local. */ +static inline bool grpc_is_null(const void* base, int8_t t, uint8_t attrs, + int64_t row) { + switch (t) { + case RAY_F64: { double f; memcpy(&f, (const char*)base + (size_t)row*8, 8); return f != f; } + case RAY_F32: { float f; memcpy(&f, (const char*)base + (size_t)row*4, 4); return f != f; } + case RAY_I64: case RAY_TIMESTAMP: + return ((const int64_t*)base)[row] == NULL_I64; + case RAY_I32: case RAY_DATE: case RAY_TIME: + return ((const int32_t*)base)[row] == NULL_I32; + case RAY_I16: + return ((const int16_t*)base)[row] == NULL_I16; + case RAY_SYM: + switch (ray_sym_elem_size(t, attrs)) { + case 1: return ((const uint8_t*) base)[row] == 0; + case 2: return ((const uint16_t*)base)[row] == 0; + case 4: return ((const uint32_t*)base)[row] == 0; + default: return ((const int64_t*) base)[row] == 0; + } + default: return false; + } } static inline double grpc_val_read_dbl(const void* base, int8_t t, int64_t row, @@ -9835,11 +9913,11 @@ static void grpc_phase1_fn(void* ctx_v, uint32_t worker_id, grpc_scat_buf_t* my_bufs = &c->bufs[(size_t)worker_id * RADIX_P]; for (int64_t r = start; r < end; r++) { - if (c->x_null_bm && grpc_is_null(c->x_null_bm, r)) continue; - if (c->y_null_bm && grpc_is_null(c->y_null_bm, r)) continue; - if (c->k0_null_bm && grpc_is_null(c->k0_null_bm, r)) continue; - if (c->n_keys == 2 && c->k1_null_bm && grpc_is_null(c->k1_null_bm, r)) - continue; + if (c->x_has_nulls && grpc_is_null(c->x_data, c->x_type, 0, r)) continue; + if (c->y_has_nulls && grpc_is_null(c->y_data, c->y_type, 0, r)) continue; + if (c->k0_has_nulls && grpc_is_null(c->k0_data, c->k0_type, c->k0_attrs, r)) continue; + if (c->n_keys == 2 && c->k1_has_nulls && + grpc_is_null(c->k1_data, c->k1_type, c->k1_attrs, r)) continue; int64_t k0 = read_col_i64(c->k0_data, r, c->k0_type, c->k0_attrs); int64_t k1 = 0; uint64_t h = ray_hash_i64(k0); @@ -9856,7 +9934,7 @@ static void grpc_phase1_fn(void* ctx_v, uint32_t worker_id, } } -/* ─── Phase 2 ────────────────────────────────────────────────────────── +/* ─── Pass 2 ────────────────────────────────────────────────────────── * RADIX_P tasks. Each builds a partition HT and accumulates Pearson * state from the scatter entries in its partition. */ @@ -10029,14 +10107,10 @@ ray_t* exec_group_pearson_rowform(ray_graph_t* g, ray_op_t* op) { .y_type = yt, .k0_attrs = k_attrs[0], .k1_attrs = k_attrs[1], - .k0_null_bm = (k_vecs[0]->attrs & RAY_ATTR_HAS_NULLS) - ? ray_vec_nullmap_bytes(k_vecs[0], NULL, NULL) : NULL, - .k1_null_bm = (ext->n_keys == 2 && (k_vecs[1]->attrs & RAY_ATTR_HAS_NULLS)) - ? ray_vec_nullmap_bytes(k_vecs[1], NULL, NULL) : NULL, - .x_null_bm = (x_vec->attrs & RAY_ATTR_HAS_NULLS) - ? ray_vec_nullmap_bytes(x_vec, NULL, NULL) : NULL, - .y_null_bm = (y_vec->attrs & RAY_ATTR_HAS_NULLS) - ? ray_vec_nullmap_bytes(y_vec, NULL, NULL) : NULL, + .k0_has_nulls = (k_vecs[0]->attrs & RAY_ATTR_HAS_NULLS) != 0, + .k1_has_nulls = (ext->n_keys == 2 && (k_vecs[1]->attrs & RAY_ATTR_HAS_NULLS)) != 0, + .x_has_nulls = (x_vec->attrs & RAY_ATTR_HAS_NULLS) != 0, + .y_has_nulls = (y_vec->attrs & RAY_ATTR_HAS_NULLS) != 0, .n_keys = ext->n_keys, .x_is_f64 = (xt == RAY_F64) ? 1 : 0, .y_is_f64 = (yt == RAY_F64) ? 1 : 0, @@ -10059,7 +10133,7 @@ ray_t* exec_group_pearson_rowform(ray_graph_t* g, ray_op_t* op) { } } - /* Phase 2. */ + /* Pass 2. */ ray_t* phts_hdr = NULL; grpc_ht_t* part_hts = (grpc_ht_t*)scratch_calloc(&phts_hdr, (size_t)RADIX_P * sizeof(grpc_ht_t)); @@ -10097,7 +10171,7 @@ ray_t* exec_group_pearson_rowform(ray_graph_t* g, ray_op_t* op) { } } - /* Phase 3 — emit row form. Allocate output columns sized to total + /* Pass 3 — emit row form. Allocate output columns sized to total * entries, fill sequentially by walking partitions in order. */ int64_t total_rows = 0; for (uint32_t p = 0; p < RADIX_P; p++) total_rows += part_emit_rows[p]; @@ -10328,9 +10402,9 @@ typedef struct { int8_t x_type; int8_t y_type; uint8_t k_attrs; - const uint8_t* k_null_bm; - const uint8_t* x_null_bm; - const uint8_t* y_null_bm; + bool k_has_nulls; + bool x_has_nulls; + bool y_has_nulls; grpmm_scat_buf_t* bufs; uint32_t n_workers; } grpmm_phase1_ctx_t; @@ -10360,9 +10434,9 @@ static void grpmm_phase1_fn(void* ctx_v, uint32_t worker_id, grpmm_scat_buf_t* my_bufs = &c->bufs[(size_t)worker_id * RADIX_P]; for (int64_t r = start; r < end; r++) { - if (c->x_null_bm && (c->x_null_bm[r >> 3] >> (r & 7)) & 1) continue; - if (c->y_null_bm && (c->y_null_bm[r >> 3] >> (r & 7)) & 1) continue; - if (c->k_null_bm && (c->k_null_bm[r >> 3] >> (r & 7)) & 1) continue; + if (c->x_has_nulls && grpc_is_null(c->x_data, c->x_type, 0, r)) continue; + if (c->y_has_nulls && grpc_is_null(c->y_data, c->y_type, 0, r)) continue; + if (c->k_has_nulls && grpc_is_null(c->k_data, c->k_type, c->k_attrs, r)) continue; int64_t k = read_col_i64(c->k_data, r, c->k_type, c->k_attrs); int64_t x = read_col_i64(c->x_data, r, c->x_type, 0); int64_t y = read_col_i64(c->y_data, r, c->y_type, 0); @@ -10506,12 +10580,9 @@ ray_t* exec_group_maxmin_rowform(ray_graph_t* g, ray_op_t* op) { .y_data = ray_data(y_vec), .k_type = kt, .x_type = xt, .y_type = yt, .k_attrs = k_vec->attrs, - .k_null_bm = (k_vec->attrs & RAY_ATTR_HAS_NULLS) - ? ray_vec_nullmap_bytes(k_vec, NULL, NULL) : NULL, - .x_null_bm = (x_vec->attrs & RAY_ATTR_HAS_NULLS) - ? ray_vec_nullmap_bytes(x_vec, NULL, NULL) : NULL, - .y_null_bm = (y_vec->attrs & RAY_ATTR_HAS_NULLS) - ? ray_vec_nullmap_bytes(y_vec, NULL, NULL) : NULL, + .k_has_nulls = (k_vec->attrs & RAY_ATTR_HAS_NULLS) != 0, + .x_has_nulls = (x_vec->attrs & RAY_ATTR_HAS_NULLS) != 0, + .y_has_nulls = (y_vec->attrs & RAY_ATTR_HAS_NULLS) != 0, .bufs = bufs, .n_workers = n_workers, }; @@ -10627,16 +10698,16 @@ ray_t* exec_group_maxmin_rowform(ray_graph_t* g, ray_op_t* op) { * Bypasses the shared OP_GROUP path's two-stage holistic fill (reprobe + * histogram + scatter) by computing both aggregates from a single radix * pipeline: - * Phase 1 (parallel): scatter rows into per-(worker,partition) bufs + * Pass 1 (parallel): scatter rows into per-(worker,partition) bufs * as (hash, key0, key1, v3) fat entries. - * Phase 2 (parallel per partition): + * Pass 2 (parallel per partition): * Pass 1 — probe HT, accumulate {cnt, sum, sumsq} per group. * Cumsum cnt → per-group offsets into the partition's v_buf. * Pass 2 — re-walk entries, scatter v3 into v_buf at the * bucketed position for each group. * Result: per-partition v_buf is group-contiguous, ready for * a per-group quickselect (no cross-partition scatter). - * Phase 3 (parallel per partition): + * Pass 3 (parallel per partition): * For each group, run ray_median_dbl_inplace on its slice and * emit median + std(sample) into the output columns. * ════════════════════════════════════════════════════════════════════════ */ @@ -10658,7 +10729,7 @@ typedef struct { double sum; double sumsq; uint32_t val_off; /* offset into ph->v_buf for this group's slice */ - uint32_t val_pos; /* cursor during Phase 2 Pass 2 (scatter v3) */ + uint32_t val_pos; /* cursor during Pass 2 Pass 2 (scatter v3) */ } grpms_entry_t; typedef struct { @@ -11126,7 +11197,7 @@ ray_t* exec_group_median_stddev_rowform(ray_graph_t* g, ray_op_t* op) { } } - /* Phase 2. */ + /* Pass 2. */ ray_t* phts_hdr = NULL; grpms_ht_t* part_hts = (grpms_ht_t*)scratch_calloc(&phts_hdr, (size_t)RADIX_P * sizeof(grpms_ht_t)); @@ -11164,7 +11235,7 @@ ray_t* exec_group_median_stddev_rowform(ray_graph_t* g, ray_op_t* op) { } } - /* Scatter bufs no longer needed — release before Phase 3 to lower peak RSS. */ + /* Scatter bufs no longer needed — release before Pass 3 to lower peak RSS. */ for (size_t j = 0; j < n_bufs; j++) if (bufs[j]._hdr) { scratch_free(bufs[j]._hdr); bufs[j]._hdr = NULL; } scratch_free(bufs_hdr); bufs_hdr = NULL; bufs = NULL; @@ -11216,7 +11287,7 @@ ray_t* exec_group_median_stddev_rowform(ray_graph_t* g, ray_op_t* op) { std_out->len = total_rows; if (cnt_out) cnt_out->len = total_rows; - /* Phase 3: per partition, emit keys + median + stddev. */ + /* Pass 3: per partition, emit keys + median + stddev. */ grpms_phase3_ctx_t p3 = { .part_hts = part_hts, .part_offsets = part_offsets, diff --git a/src/ops/idxop.c b/src/ops/idxop.c index b3817a60..3f74476b 100644 --- a/src/ops/idxop.c +++ b/src/ops/idxop.c @@ -111,70 +111,24 @@ static ray_t* ray_index_alloc(ray_idx_kind_t kind, int8_t parent_type, int64_t p return idx; } -/* Reading saved-nullmap pointers: typed views into the 16-byte snapshot. */ -static inline ray_t* saved_lo_ptr(ray_index_t* ix) { - ray_t* p; memcpy(&p, &ix->saved_nullmap[0], sizeof(p)); return p; -} -static inline ray_t* saved_hi_ptr(ray_index_t* ix) { - ray_t* p; memcpy(&p, &ix->saved_nullmap[8], sizeof(p)); return p; -} -static inline void saved_lo_clear(ray_index_t* ix) { - memset(&ix->saved_nullmap[0], 0, 8); -} -static inline void saved_hi_clear(ray_index_t* ix) { - memset(&ix->saved_nullmap[8], 0, 8); -} - /* -------------------------------------------------------------------------- * Saved-nullmap retain / release * - * The saved 16 bytes hold pointers iff (parent_type, saved_attrs) say so: - * - saved_attrs & NULLMAP_EXT => low 8 bytes are an owning ray_t* (ext nullmap) - * *except* RAY_STR uses the same slot for - * str_ext_null (also an owning ref) — same - * semantics, same ownership. - * - parent_type == RAY_STR => high 8 bytes are str_pool (owning ref) - * - parent_type == RAY_SYM and saved_attrs & NULLMAP_EXT - * => high 8 bytes are sym_dict (owning ref) - * - * For all other type/attr combos the bytes are inline bitmap data, not - * pointers, and we leave them alone. - * -------------------------------------------------------------------------- */ + * The 16 byte snapshot preserves the parent's original nullmap-union bytes + * across attach/detach. Since index attach is restricted to numeric + * types (see prepare_attach), the snapshot contains either: + * - all-zero bytes (no link, no nulls), or + * - bytes 8-15 hold an int64 link_target (HAS_LINK on I32/I64 cols). + * Neither case carries an owning ray_t* reference, so retain/release + * are no-ops. The functions remain to preserve the heap.c / vec.c + * call sites symmetric with the pre-migration layout. */ void ray_index_release_saved(ray_index_t* ix) { - if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { - ray_t* lo = saved_lo_ptr(ix); - if (lo && !RAY_IS_ERR(lo)) ray_release(lo); - saved_lo_clear(ix); - } - if (ix->parent_type == RAY_STR) { - ray_t* hi = saved_hi_ptr(ix); - if (hi && !RAY_IS_ERR(hi)) ray_release(hi); - saved_hi_clear(ix); - } else if (ix->parent_type == RAY_SYM && - (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT)) { - /* RAY_SYM stores sym_dict at high 8 bytes only when an ext nullmap - * is present (otherwise the inline bitmap occupies both halves and - * sym_dict isn't materialized in the union slot). */ - ray_t* hi = saved_hi_ptr(ix); - if (hi && !RAY_IS_ERR(hi)) ray_release(hi); - saved_hi_clear(ix); - } + (void)ix; } void ray_index_retain_saved(ray_index_t* ix) { - if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { - ray_t* lo = saved_lo_ptr(ix); - if (lo && !RAY_IS_ERR(lo)) ray_retain(lo); - } - if (ix->parent_type == RAY_STR) { - ray_t* hi = saved_hi_ptr(ix); - if (hi && !RAY_IS_ERR(hi)) ray_retain(hi); - } else if (ix->parent_type == RAY_SYM && - (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT)) { - ray_t* hi = saved_hi_ptr(ix); - if (hi && !RAY_IS_ERR(hi)) ray_retain(hi); - } + (void)ix; } /* -------------------------------------------------------------------------- @@ -311,39 +265,32 @@ static ray_err_t zone_scan(ray_t* v, ray_index_t* ix) { /* -------------------------------------------------------------------------- * Attach * - * The 16-byte snapshot must be taken AFTER the scan (so the scan reads the - * parent's normal nullmap) but BEFORE we overwrite parent->nullmap with the - * index pointer. Ownership transfer: pointers in the snapshot (ext_nullmap, - * str_pool, sym_dict) move from parent to ix. We do NOT retain them here — - * the existing refs simply move. Symmetrically, when we install the index - * pointer in parent->nullmap, we transfer that single ref to the parent - * (no extra retain). + * The 16-byte snapshot preserves the parent's nullmap-union bytes across + * the attachment so detach can restore them byte-for-byte. For numeric + * vectors (the only types that may attach) bytes 0-7 are unused and + * bytes 8-15 carry link_target when HAS_LINK is set — no owned pointers + * either way. We do NOT retain anything here; the index pointer install + * at bytes 0-7 transfers a single ref to the parent (no extra retain). * -------------------------------------------------------------------------- */ static ray_t* attach_finalize(ray_t* parent, ray_t* idx) { ray_index_t* ix = ray_index_payload(idx); /* Snapshot the parent's 16 raw bytes verbatim. */ memcpy(ix->saved_nullmap, parent->nullmap, 16); - ix->saved_attrs = parent->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT); + ix->saved_attrs = parent->attrs & RAY_ATTR_HAS_NULLS; /* Install the index pointer — overwrites bytes 0-7 with the index ptr. * Bytes 8-15 carry link_target when HAS_LINK is set; preserve them. - * Otherwise zero _idx_pad as a tidy default. */ - parent->index = idx; - if (!(parent->attrs & RAY_ATTR_HAS_LINK)) parent->_idx_pad = NULL; - parent->attrs |= RAY_ATTR_HAS_INDEX; - /* Clear NULLMAP_EXT on the parent: vec->ext_nullmap is now the index - * pointer, not a U8 nullmap vec, so naive readers that gate on - * NULLMAP_EXT and dereference ext_nullmap would read garbage. The - * displaced ext-nullmap pointer is preserved inside ix->saved_nullmap[0..7] - * and accessed via the HAS_INDEX-aware helpers in vec.c / morsel.c. + * Otherwise zero _idx_pad as a tidy default. * * IMPORTANT: HAS_NULLS is *preserved* on the parent so the many call * sites that use it as a cheap "do I need null logic at all?" gate - * continue to give correct answers. The actual null bits are read - * via ray_vec_is_null / ray_morsel_next, both of which check - * HAS_INDEX first and route through the saved snapshot. */ - parent->attrs &= (uint8_t)~RAY_ATTR_NULLMAP_EXT; + * continue to give correct answers. The actual null state is read + * via ray_vec_is_null (sentinel-based), which is unaffected by the + * index pointer overlay at bytes 0-7. */ + parent->index = idx; + if (!(parent->attrs & RAY_ATTR_HAS_LINK)) parent->_idx_pad = NULL; + parent->attrs |= RAY_ATTR_HAS_INDEX; return parent; } @@ -565,10 +512,8 @@ ray_t* ray_index_drop(ray_t** vp) { /* Shared-index case: another vec may share this RAY_INDEX block via * ray_alloc_copy (rc>1). Don't clobber the snapshot in that case — - * the other holder still reads it. Copy our own retained refs to - * the saved-pointer slots so the bytes we move into v->nullmap are - * owned by v. See vec_drop_index_inplace for the same pattern. */ - uint8_t saved = ix->saved_attrs; + * the other holder still reads it. See vec_drop_index_inplace for + * the same pattern. */ bool shared = ray_atomic_load(&idx->rc) > 1; if (shared) { ray_index_retain_saved(ix); @@ -579,11 +524,9 @@ ray_t* ray_index_drop(ray_t** vp) { ix->saved_attrs = 0; } - /* Restore parent attrs. HAS_NULLS was preserved through the attachment - * so we don't need to OR it back in; only NULLMAP_EXT (which we cleared - * at attach time) needs to be reinstated from saved_attrs. */ + /* Restore parent attrs. HAS_NULLS was preserved through the + * attachment so it needs no restoration. */ v->attrs &= (uint8_t)~RAY_ATTR_HAS_INDEX; - if (saved & RAY_ATTR_NULLMAP_EXT) v->attrs |= RAY_ATTR_NULLMAP_EXT; /* Release the index. Per-kind children are released by the RAY_INDEX * branch of ray_release_owned_refs (added in heap.c). */ diff --git a/src/ops/idxop.h b/src/ops/idxop.h index 5dcc4c34..46d294bc 100644 --- a/src/ops/idxop.h +++ b/src/ops/idxop.h @@ -43,7 +43,7 @@ */ #include -#include "mem/heap.h" /* RAY_ATTR_HAS_INDEX, RAY_ATTR_NULLMAP_EXT */ +#include "mem/heap.h" /* RAY_ATTR_HAS_INDEX */ /* Index kinds. Stored in ray_index_t.kind. */ typedef enum { @@ -57,16 +57,16 @@ typedef enum { /* The payload stored inside data[] of a RAY_INDEX ray_t. */ typedef struct { uint8_t kind; /* ray_idx_kind_t */ - uint8_t saved_attrs; /* parent attrs & (HAS_NULLS|NULLMAP_EXT) at attach */ - int8_t parent_type; /* parent->type (for restore-time pointer interp) */ + uint8_t saved_attrs; /* parent attrs & HAS_NULLS at attach */ + int8_t parent_type; /* parent->type (recorded for diagnostics) */ uint8_t reserved; int64_t built_for_len; /* parent->len at attach (mismatch -> stale) */ - /* Raw 16-byte snapshot of parent->nullmap union at attach time. - * Restored verbatim on detach. When this contains pointers - * (ext_nullmap, str_pool, sym_dict, str_ext_null) they are owned - * by THIS ray_t for the duration of the attachment; release-side - * of RAY_INDEX walks these based on (parent_type, saved_attrs). */ + /* Raw 16-byte snapshot of parent->nullmap union at attach time, + * restored verbatim on detach. For the numeric vector types that + * may attach an index (see prepare_attach) this snapshot holds no + * owned ray_t* refs: bytes 0-7 are unused and bytes 8-15 carry the + * link_target int64 when HAS_LINK is set. */ uint8_t saved_nullmap[16]; /* Kind-specific payload. All ray_t* fields are owning refs. */ diff --git a/src/ops/internal.h b/src/ops/internal.h index 6badf146..318ab119 100644 --- a/src/ops/internal.h +++ b/src/ops/internal.h @@ -964,7 +964,7 @@ void ray_group_emit_filter_set(ray_group_emit_filter_t filter); * When match_idx is NULL, `row = i` — iterating directly over source * column rows (no selection). */ /* agg_vecs2 is the optional y-side input column per agg (NULL when no - * binary aggs). Phase 1 packs (x, y) consecutively for binary aggs. */ + * binary aggs). Pass 1 packs (x, y) consecutively for binary aggs. */ void group_rows_range(group_ht_t* ht, void** key_data, int8_t* key_types, uint8_t* key_attrs, ray_t** key_vecs, ray_t** agg_vecs, ray_t** agg_vecs2, @@ -1070,60 +1070,76 @@ ray_t* exec_node(ray_graph_t* g, ray_op_t* op); * Thread-safe null bitmap helpers (parallel group/window) * ══════════════════════════════════════════ */ -/* Atomically set a null bit. For idx >= 128 without ext nullmap, falls back - * to ray_vec_set_null (lazy alloc). Safe because OOM forces sequential path. */ +/* Parallel-safe null marker. Writes the type-correct NULL_* sentinel + * into payload[idx] and atomically ORs HAS_NULLS into vec->attrs. + * Payload write needs no synchronisation — different threads call this + * with different idx, so each per-slot store is uncontended. attrs OR + * is atomic so the read-modify-write on the shared attrs byte is safe. + * + * BOOL/U8/SYM are non-nullable (rejected at the producer surface) and + * are no-ops here. STR/GUID don't appear in parallel aggregation/window + * output columns and likewise no-op. */ static inline void par_set_null(ray_t* vec, int64_t idx) { - if (!(vec->attrs & RAY_ATTR_NULLMAP_EXT)) { - if (idx >= 128) { - ray_vec_set_null(vec, idx, true); - return; - } - int byte_idx = (int)(idx / 8); - int bit_idx = (int)(idx % 8); - __atomic_fetch_or(&vec->nullmap[byte_idx], - (uint8_t)(1u << bit_idx), __ATOMIC_RELAXED); - return; + void* p = ray_data(vec); + switch (vec->type) { + case RAY_F64: ((double*)p)[idx] = NULL_F64; break; + case RAY_F32: ((float*)p)[idx] = NULL_F32; break; + case RAY_I64: case RAY_TIMESTAMP: ((int64_t*)p)[idx] = NULL_I64; break; + case RAY_I32: case RAY_DATE: case RAY_TIME: ((int32_t*)p)[idx] = NULL_I32; break; + case RAY_I16: ((int16_t*)p)[idx] = NULL_I16; break; + default: return; } - ray_t* ext = vec->ext_nullmap; - uint8_t* bits = (uint8_t*)ray_data(ext); - int byte_idx = (int)(idx / 8); - int bit_idx = (int)(idx % 8); - __atomic_fetch_or(&bits[byte_idx], - (uint8_t)(1u << bit_idx), __ATOMIC_RELAXED); + __atomic_fetch_or(&vec->attrs, (uint8_t)RAY_ATTR_HAS_NULLS, + __ATOMIC_RELAXED); } -/* Pre-allocate external nullmap so parallel threads can set bits safely. - * - * Probe at idx>=128 (not idx=0): ray_vec_set_null_checked(vec, 0, true) - * stays in the inline-nullmap path because the inline 16-byte bitmap - * fits idx<128 — so it never promotes to ext_nullmap. par_set_null - * for idx>=128 would then race-crash on lazy ext alloc. Probing at - * len-1 forces the promotion path. */ +/* No-op kept for symmetry with the historical bitmap-promotion helper. + * Sentinel writes are unconditional and need no pre-allocation. */ static inline ray_err_t par_prepare_nullmap(ray_t* vec) { - if (vec->len <= 128) return RAY_OK; - int64_t probe = vec->len - 1; /* >= 128, forces ext promotion */ - ray_err_t err = ray_vec_set_null_checked(vec, probe, true); - if (err != RAY_OK) return err; - ray_vec_set_null_checked(vec, probe, false); - vec->attrs &= (uint8_t)~RAY_ATTR_HAS_NULLS; + (void)vec; return RAY_OK; } -/* Scan nullmap after parallel execution; set RAY_ATTR_HAS_NULLS if any bit set. */ +/* Scan payload after parallel execution and set RAY_ATTR_HAS_NULLS if + * any element carries the type-correct NULL_* sentinel. This catches + * the case where par_set_null's atomic OR raced with another thread's + * load before it took effect — the scan is the post-hoc authoritative + * check. No-op for non-sentinel types. */ static inline void par_finalize_nulls(ray_t* vec) { - if (vec->attrs & RAY_ATTR_NULLMAP_EXT) { - ray_t* ext = vec->ext_nullmap; - uint8_t* bits = (uint8_t*)ray_data(ext); - int64_t nbytes = (vec->len + 7) / 8; - for (int64_t i = 0; i < nbytes; i++) { - if (bits[i]) { vec->attrs |= RAY_ATTR_HAS_NULLS; return; } + int64_t n = vec->len; + const void* p = ray_data(vec); + switch (vec->type) { + case RAY_F64: { + const double* d = (const double*)p; + for (int64_t i = 0; i < n; i++) + if (d[i] != d[i]) { vec->attrs |= RAY_ATTR_HAS_NULLS; return; } + return; } - } else { - int64_t nbytes = (vec->len + 7) / 8; - if (nbytes > 16) nbytes = 16; - for (int64_t i = 0; i < nbytes; i++) { - if (vec->nullmap[i]) { vec->attrs |= RAY_ATTR_HAS_NULLS; return; } + case RAY_F32: { + const float* d = (const float*)p; + for (int64_t i = 0; i < n; i++) + if (d[i] != d[i]) { vec->attrs |= RAY_ATTR_HAS_NULLS; return; } + return; + } + case RAY_I64: case RAY_TIMESTAMP: { + const int64_t* d = (const int64_t*)p; + for (int64_t i = 0; i < n; i++) + if (d[i] == NULL_I64) { vec->attrs |= RAY_ATTR_HAS_NULLS; return; } + return; + } + case RAY_I32: case RAY_DATE: case RAY_TIME: { + const int32_t* d = (const int32_t*)p; + for (int64_t i = 0; i < n; i++) + if (d[i] == NULL_I32) { vec->attrs |= RAY_ATTR_HAS_NULLS; return; } + return; + } + case RAY_I16: { + const int16_t* d = (const int16_t*)p; + for (int64_t i = 0; i < n; i++) + if (d[i] == NULL_I16) { vec->attrs |= RAY_ATTR_HAS_NULLS; return; } + return; } + default: return; } } diff --git a/src/ops/join.c b/src/ops/join.c index 21baa4a8..7dccd525 100644 --- a/src/ops/join.c +++ b/src/ops/join.c @@ -47,10 +47,10 @@ static uint64_t hash_row_keys(ray_t** key_vecs, uint8_t n_keys, int64_t row) { * Radix-partitioned hash join * * Four-phase pipeline: - * Phase 1: Partition both sides by radix bits of hash (parallel) - * Phase 2: Per-partition build + probe with open-addressing HT (parallel) - * Phase 3: Gather output columns from matched pairs (parallel) - * Phase 4: Fallback to chained HT for small joins (< RAY_PARALLEL_THRESHOLD) + * Pass 1: Partition both sides by radix bits of hash (parallel) + * Pass 2: Per-partition build + probe with open-addressing HT (parallel) + * Pass 3: Gather output columns from matched pairs (parallel) + * Pass 4: Fallback to chained HT for small joins (< RAY_PARALLEL_THRESHOLD) * ============================================================================ */ /* Partition entry: row index + cached hash */ @@ -360,9 +360,9 @@ static join_radix_part_t* join_radix_partition(ray_pool_t* pool, int64_t nrows, * Join execution (parallel hash join) * * Three-phase pipeline: - * Phase 1 (sequential): Build chained hash table on right side - * Phase 2 (parallel): Two-pass probe — count matches, prefix-sum, fill - * Phase 3 (parallel): Column gather — assemble result columns + * Pass 1 (sequential): Build chained hash table on right side + * Pass 2 (parallel): Two-pass probe — count matches, prefix-sum, fill + * Pass 3 (parallel): Column gather — assemble result columns * ============================================================================ */ /* Key equality helper — shared by count + fill phases */ @@ -646,7 +646,7 @@ typedef struct { int64_t sjoin_key_max; } join_probe_ctx_t; -/* Phase 2a: count matches per morsel */ +/* Pass 2a: count matches per morsel */ static void join_count_fn(void* raw, uint32_t wid, int64_t task_start, int64_t task_end) { (void)wid; (void)task_end; join_probe_ctx_t* c = (join_probe_ctx_t*)raw; @@ -688,7 +688,7 @@ static void join_count_fn(void* raw, uint32_t wid, int64_t task_start, int64_t t c->morsel_counts[tid] = count; } -/* Phase 2b: fill match pairs using pre-computed offsets */ +/* Pass 2b: fill match pairs using pre-computed offsets */ static void join_fill_fn(void* raw, uint32_t wid, int64_t task_start, int64_t task_end) { (void)wid; (void)task_end; join_probe_ctx_t* c = (join_probe_ctx_t*)raw; @@ -1087,7 +1087,7 @@ chained_ht_fallback:; } CHECK_CANCEL_GOTO(pool, join_cleanup); - /* Phase 1.5: S-Join semijoin filter extraction. + /* Pass 1.5: S-Join semijoin filter extraction. * Build a RAY_SEL bitmap of all distinct right-side key values that * appear in the hash table. This can be used to skip left-side rows * whose key cannot match any right-side row. @@ -1116,7 +1116,7 @@ chained_ht_fallback:; } } - /* Phase 2: Parallel probe (two-pass: count → prefix-sum → fill) */ + /* Pass 2: Parallel probe (two-pass: count → prefix-sum → fill) */ uint32_t n_tasks = (uint32_t)((left_rows + JOIN_MORSEL - 1) / JOIN_MORSEL); if (n_tasks == 0) n_tasks = 1; @@ -1233,7 +1233,7 @@ chained_ht_fallback:; } join_gather:; - /* Phase 3: Build result table with parallel column gather. + /* Pass 3: Build result table with parallel column gather. * Use multi_gather for batched column access when possible (non-nullable * indices), falling back to per-column gather for nullable RIGHT columns. */ int64_t left_ncols = ray_table_ncols(left_table); diff --git a/src/ops/linkop.c b/src/ops/linkop.c index 895e8853..d920399a 100644 --- a/src/ops/linkop.c +++ b/src/ops/linkop.c @@ -33,36 +33,6 @@ #include "lang/env.h" #include -/* -------------------------------------------------------------------------- - * Promote inline nullmap to ext-nullmap before attaching a link. - * - * A linked column places its int64 target sym at nullmap-union bytes 8-15. - * If the column has inline nulls and >64 elements, those bytes hold real - * bitmap bits that would be clobbered. Promote up front to keep nulls - * intact. Mirrors the promotion logic in ray_vec_set_null_checked. */ -static ray_err_t promote_inline_to_ext(ray_t* vec) { - if (!(vec->attrs & RAY_ATTR_HAS_NULLS)) return RAY_OK; - if (vec->attrs & RAY_ATTR_NULLMAP_EXT) return RAY_OK; - - int64_t bitmap_len = (vec->len + 7) / 8; - if (bitmap_len < 1) bitmap_len = 1; - ray_t* ext = ray_vec_new(RAY_U8, bitmap_len); - if (!ext || RAY_IS_ERR(ext)) return RAY_ERR_OOM; - ext->len = bitmap_len; - - /* Copy existing inline bits (16 bytes max) into ext. */ - int64_t copy = bitmap_len < 16 ? bitmap_len : 16; - memcpy(ray_data(ext), vec->nullmap, (size_t)copy); - if (bitmap_len > 16) { - memset((char*)ray_data(ext) + 16, 0, (size_t)(bitmap_len - 16)); - } - /* Now overwrite bytes 0-7 with the ext_nullmap pointer. Bytes 8-15 - * become don't-care — caller is about to write link_target there. */ - vec->ext_nullmap = ext; - vec->attrs |= RAY_ATTR_NULLMAP_EXT; - return RAY_OK; -} - /* -------------------------------------------------------------------------- * ray_link_attach * -------------------------------------------------------------------------- */ @@ -107,11 +77,9 @@ ray_t* ray_link_attach(ray_t** vp, int64_t target_sym_id) { if (!v || RAY_IS_ERR(v)) return v; *vp = v; - /* Promote nulls to ext if necessary so bytes 8-15 are free. */ - ray_err_t err = promote_inline_to_ext(v); - if (err != RAY_OK) return ray_error(ray_err_code_str(err), "link: oom"); - - /* Replace any existing link (idempotent re-attach with new target). */ + /* Nulls live as sentinels in the payload — bytes 0-15 of the union + * carry no per-element data, so we can write link_target into + * bytes 8-15 unconditionally. */ v->link_target = target_sym_id; v->attrs |= RAY_ATTR_HAS_LINK; @@ -266,8 +234,8 @@ ray_t* ray_link_deref(ray_t* v, int64_t sym_id) { } } - /* Phase 2/3a dual encoding: fill correct-width sentinel into null - * payload slots so consumers reading raw payload honor the contract. */ + /* Fill correct-width sentinel into null payload slots so consumers + * reading raw payload honor the contract. */ switch (out_type) { case RAY_F64: { double* d = (double*)ray_data(result); @@ -299,17 +267,13 @@ ray_t* ray_link_deref(ray_t* v, int64_t sym_id) { /* Type-specific metadata propagation. * RAY_STR: share the source pool so ray_str_t pool_offs are valid. * RAY_SYM: if the source column carries a local sym_dict, share it. - * - * sym_dict aliases bytes 8-15 of the nullmap union. It is only a - * real pointer when the column doesn't have inline nulls clobbering - * those bytes, i.e. either no nulls or NULLMAP_EXT. Mirrors the - * guard pattern in src/ops/sort.c:3307 and src/ops/rerank.c:182. */ + * sym_dict aliases bytes 8-15 of the nullmap union and is safe + * to read on any non-slice SYM vec — sentinel-encoded nulls + * don't consume those bytes. */ if (out_type == RAY_STR) { col_propagate_str_pool(result, target_col); } else if (out_type == RAY_SYM) { if (col_owner && !(col_owner->attrs & RAY_ATTR_SLICE) && - (!(col_owner->attrs & RAY_ATTR_HAS_NULLS) || - (col_owner->attrs & RAY_ATTR_NULLMAP_EXT)) && col_owner->sym_dict) { ray_retain(col_owner->sym_dict); result->sym_dict = col_owner->sym_dict; diff --git a/src/ops/ops.h b/src/ops/ops.h index 86a4aba9..b2178c33 100644 --- a/src/ops/ops.h +++ b/src/ops/ops.h @@ -223,8 +223,8 @@ void ray_cancel(void); #define OP_GROUP_MAXMIN_ROWFORM 112 /* Dedicated single-pass per-group MEDIAN(v)+STDDEV(v) with row-form * emission for canonical shape `(select (median v) (std v) from t by - * k0 k1)`. Phase 2 builds per-partition HT + group-contiguous F64 - * v_buf in two passes; Phase 3 runs ray_median_dbl_inplace per group. + * k0 k1)`. Pass 2 builds per-partition HT + group-contiguous F64 + * v_buf in two passes; Pass 3 runs ray_median_dbl_inplace per group. * Bypasses the shared OP_GROUP path's reprobe-and-histogram holistic * fill. Closes H2O canonical q6. 2 keys, both aggs on the same * column, non-nullable inputs. */ @@ -452,7 +452,12 @@ typedef struct { uint32_t elem_size; /* bytes per element */ int64_t morsel_len; /* elements in current morsel (<=RAY_MORSEL_ELEMS) */ void* morsel_ptr; /* pointer to current morsel data */ - uint8_t* null_bits; /* current morsel null bitmap (or NULL) */ + uint8_t* null_bits; /* current morsel null bitmap (or NULL). + * Points into null_bits_buf below when the + * source uses sentinels (synthesized per + * morsel) or into the source's bitmap for + * BOOL/U8 legacy path. */ + uint8_t null_bits_buf[RAY_MORSEL_ELEMS / 8]; /* synthesis scratch */ } ray_morsel_t; /* ===== Selection Bitmap (RAY_SEL) ===== */ diff --git a/src/ops/pivot.c b/src/ops/pivot.c index ac5745a9..2d5a5596 100644 --- a/src/ops/pivot.c +++ b/src/ops/pivot.c @@ -313,7 +313,7 @@ ray_t* exec_pivot(ray_graph_t* g, ray_op_t* op, ray_t* tbl) { uint32_t grp_count = pg.total_grps; if (grp_count == 0) { pivot_ingest_free(&pg); return ray_table_new(0); } - /* Phase 2: Collect distinct pivot values and distinct index keys. + /* Pass 2: Collect distinct pivot values and distinct index keys. * Each group row layout: [hash:8][key0:8]...[keyN-1:8][null_mask:8][accum...] * where the keys region holds n_idx index keys + 1 pivot key, * followed by the key-null bitmap written by group_rows_range. */ @@ -492,7 +492,7 @@ ray_t* exec_pivot(ray_graph_t* g, ray_op_t* op, ray_t* tbl) { } } - /* Phase 3: Build output table */ + /* Pass 3: Build output table */ ray_progress_update("pivot", "scatter", 0, (uint64_t)pv_count); bool val_is_f64 = vcol->type == RAY_F64; int8_t out_agg_type; @@ -522,7 +522,7 @@ ray_t* exec_pivot(ray_graph_t* g, ray_op_t* op, ray_t* tbl) { memcpy(&ent_nmask, ix_entry_p + 8 + (size_t)n_idx * 8, 8); if (ent_nmask & (int64_t)(1u << k)) { ray_vec_set_null(new_col, (int64_t)r, true); - /* Phase 2/3a dual encoding: fill correct-width sentinel. */ + /* Fill the correct-width sentinel. */ switch (kt) { case RAY_F64: ((double*)ray_data(new_col))[r] = NULL_F64; break; diff --git a/src/ops/query.c b/src/ops/query.c index deb347ea..0c899d7a 100644 --- a/src/ops/query.c +++ b/src/ops/query.c @@ -2311,7 +2311,6 @@ typedef struct { uint8_t in_attrs; const void* base; bool has_nulls; - const uint8_t* null_bm; uint8_t esz; /* 1/2/4/8 */ bool is_f64; const int64_t* idx_buf; @@ -2331,23 +2330,23 @@ typedef struct { * int64 read on the hot path. Hits high-cardinality count_distinct * grouped queries where the per-group HT churn was thrashing L2. */ #define CDPG_BUF_INSERT(VAL_EXPR) do { \ - int64_t v = (int64_t)(VAL_EXPR); \ - if (RAY_UNLIKELY(v == 0)) { \ + int64_t _ins_v = (int64_t)(VAL_EXPR); \ + if (RAY_UNLIKELY(_ins_v == 0)) { \ if (!saw_zero) { saw_zero = 1; distinct++; } \ break; \ } \ - uint64_t h = (uint64_t)v * CDPG_BUF_HASH_K1; \ - h ^= h >> 33; \ - uint64_t slot = h & mask; \ + uint64_t _ins_h = (uint64_t)_ins_v * CDPG_BUF_HASH_K1; \ + _ins_h ^= _ins_h >> 33; \ + uint64_t _ins_slot = _ins_h & mask; \ for (;;) { \ - int64_t cur = set[slot]; \ - if (cur == 0) { \ - set[slot] = v; \ + int64_t _ins_cur = set[_ins_slot]; \ + if (_ins_cur == 0) { \ + set[_ins_slot] = _ins_v; \ distinct++; \ break; \ } \ - if (cur == v) break; \ - slot = (slot + 1) & mask; \ + if (_ins_cur == _ins_v) break; \ + _ins_slot = (_ins_slot + 1) & mask; \ } \ } while (0) @@ -2383,28 +2382,26 @@ static void cdpg_buf_par_fn(void* vctx, uint32_t worker_id, int64_t distinct = 0; int saw_zero = 0; - const uint8_t* null_bm = ctx->null_bm; bool has_nulls = ctx->has_nulls; if (ctx->is_f64) { const double* d = (const double*)ctx->base; for (int64_t i = 0; i < cnt; i++) { int64_t r = idxs[i]; - if (has_nulls && null_bm && ((null_bm[r/8] >> (r%8)) & 1)) continue; double fv = d[r]; - if (fv != fv) fv = (double)NAN; - else if (fv == 0.0) fv = 0.0; + if (has_nulls && fv != fv) continue; + if (fv == 0.0) fv = 0.0; int64_t vbits = 0; memcpy(&vbits, &fv, sizeof(int64_t)); CDPG_BUF_INSERT(vbits); } } else if (ctx->esz == 8) { const int64_t* d = (const int64_t*)ctx->base; - if (has_nulls && null_bm) { + if (has_nulls) { for (int64_t i = 0; i < cnt; i++) { - int64_t r = idxs[i]; - if ((null_bm[r/8] >> (r%8)) & 1) continue; - CDPG_BUF_INSERT(d[r]); + int64_t v = d[idxs[i]]; + if (v == NULL_I64) continue; + CDPG_BUF_INSERT(v); } } else { for (int64_t i = 0; i < cnt; i++) { @@ -2413,11 +2410,11 @@ static void cdpg_buf_par_fn(void* vctx, uint32_t worker_id, } } else if (ctx->esz == 4) { const int32_t* d = (const int32_t*)ctx->base; - if (has_nulls && null_bm) { + if (has_nulls) { for (int64_t i = 0; i < cnt; i++) { - int64_t r = idxs[i]; - if ((null_bm[r/8] >> (r%8)) & 1) continue; - CDPG_BUF_INSERT((int64_t)d[r]); + int32_t v = d[idxs[i]]; + if (v == NULL_I32) continue; + CDPG_BUF_INSERT((int64_t)v); } } else { for (int64_t i = 0; i < cnt; i++) { @@ -2427,16 +2424,14 @@ static void cdpg_buf_par_fn(void* vctx, uint32_t worker_id, } else if (ctx->esz == 2) { const int16_t* d = (const int16_t*)ctx->base; for (int64_t i = 0; i < cnt; i++) { - int64_t r = idxs[i]; - if (has_nulls && null_bm && ((null_bm[r/8] >> (r%8)) & 1)) continue; - CDPG_BUF_INSERT((int64_t)d[r]); + int16_t v = d[idxs[i]]; + if (has_nulls && v == NULL_I16) continue; + CDPG_BUF_INSERT((int64_t)v); } - } else { /* esz == 1 */ + } else { /* esz == 1 — BOOL/U8 are non-nullable */ const uint8_t* d = (const uint8_t*)ctx->base; for (int64_t i = 0; i < cnt; i++) { - int64_t r = idxs[i]; - if (has_nulls && null_bm && ((null_bm[r/8] >> (r%8)) & 1)) continue; - CDPG_BUF_INSERT((int64_t)d[r]); + CDPG_BUF_INSERT((int64_t)d[idxs[i]]); } } @@ -2597,7 +2592,6 @@ static ray_t* count_distinct_per_group_buf(ray_t* inner_expr, ray_t* tbl, .in_attrs = src->attrs, .base = ray_data(src), .has_nulls = (src->attrs & RAY_ATTR_HAS_NULLS) != 0, - .null_bm = NULL, .esz = ray_sym_elem_size(st, src->attrs), .is_f64 = (st == RAY_F64), .idx_buf = idx_buf, @@ -2606,8 +2600,6 @@ static ray_t* count_distinct_per_group_buf(ray_t* inner_expr, ray_t* tbl, .odata = odata, .oom = 0, }; - if (pctx.has_nulls) - pctx.null_bm = ray_vec_nullmap_bytes(src, NULL, NULL); ray_pool_dispatch_n(pool, cdpg_buf_par_fn, &pctx, (uint32_t)n_groups); if (!atomic_load_explicit(&pctx.oom, memory_order_relaxed)) { ray_release(src); @@ -5862,7 +5854,7 @@ ray_t* ray_select(ray_t** args, int64_t n) { && !has_binary_agg && !has_agg_k) { /* exec_filtered_group dispatches: count1 (single key, - * single COUNT) → Phase 3 fast path; everything else → + * single COUNT) → Pass 3 fast path; everything else → * multi path with packed composite key. Skipped when * any agg is binary (filtered-group fusion only knows * about unary aggs) or holistic with a K param. */ @@ -8024,32 +8016,12 @@ ray_t* ray_xbar_fn(ray_t* col, ray_t* bucket) { xbar_par_fn(&ctx, 0, 0, n); } - /* Propagate null bitmap if present. Walk the source nullmap - * byte-by-byte and only clobber positions where the source is - * null — same trick as fix_null_comparisons. Cheap for the - * common case of HAS_NULLS attr set with mostly-empty bitmap. */ + /* Propagate nulls if present. Walk per-element via + * ray_vec_is_null (sentinel-based). */ if (col->attrs & RAY_ATTR_HAS_NULLS) { - int64_t off_bits = 0, len_bits = 0; - const uint8_t* nbits = ray_vec_nullmap_bytes(col, &off_bits, &len_bits); - if (nbits && (off_bits % 8) == 0) { - int64_t byte0 = off_bits / 8; - for (int64_t i = 0; i + 8 <= n; i += 8) { - uint8_t bb = nbits[byte0 + (i >> 3)]; - if (bb) { - for (int64_t k = 0; k < 8; k++) - if ((bb >> k) & 1) - ray_vec_set_null(out, i + k, true); - } - } - for (int64_t i = (n & ~7); i < n; i++) { - if ((nbits[byte0 + (i >> 3)] >> (i & 7)) & 1) - ray_vec_set_null(out, i, true); - } - } else { - for (int64_t i = 0; i < n; i++) - if (ray_vec_is_null(col, i)) - ray_vec_set_null(out, i, true); - } + for (int64_t i = 0; i < n; i++) + if (ray_vec_is_null(col, i)) + ray_vec_set_null(out, i, true); } return out; } @@ -8455,13 +8427,13 @@ ray_t* ray_update(ray_t** args, int64_t n) { else if (ct == RAY_F64 && expr_type == RAY_I64) ((double*)ray_data(new_col))[r] = (double)((int64_t*)ray_data(expr_vec))[r]; } - /* Null-bit propagation: memcpy above only copies values, - * not the nullmap. Carry over orig_col's nulls for the - * untouched rows, and pull expr_vec's nulls in for the - * masked rows. Phase 3a dual encoding: also overwrite the - * destination payload with the dest-width sentinel — casting - * a NaN/INT_MIN sentinel produces implementation-defined - * garbage that wouldn't match the dual-encoding contract. */ + /* Null propagation: the memcpy above only copies values, + * so re-flag null rows here — orig_col's nulls for the + * untouched rows, expr_vec's nulls for the masked rows. + * Also overwrite the destination payload with the + * dest-width sentinel: casting a NaN/INT_MIN sentinel + * across widths produces implementation-defined garbage + * that wouldn't match the typed null encoding. */ for (int64_t r = 0; r < nrows; r++) { ray_t* src = mask[r] ? expr_vec : orig_col; if (ray_vec_is_null(src, r)) { @@ -8538,13 +8510,12 @@ ray_t* ray_update(ray_t** args, int64_t n) { /* Preserve typed-null markers across broadcast. Without * this, (update {a: 0N from: t}) silently writes plain * zeros into the I64 column — the value bits get copied - * but the null bitmap doesn't, so (nil? a) reports false + * but HAS_NULLS is not set, so (nil? a) reports false * on what should be null cells. */ if (RAY_ATOM_IS_NULL(expr_vec)) { for (int64_t r = 0; r < nrows; r++) ray_vec_set_null(bcast, r, true); - /* Phase 2/3a dual encoding: fill correct-width - * sentinel into payload. */ + /* Fill the correct-width sentinel into the payload. */ switch (ct) { case RAY_F64: { double* d = (double*)ray_data(bcast); @@ -8584,8 +8555,8 @@ ray_t* ray_update(ray_t** args, int64_t n) { promoted = ray_vec_append(promoted, &v); if (RAY_IS_ERR(promoted)) { ray_release(expr_vec); ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); return promoted; } } - /* Carry the nullmap across the I64→F64 promotion; - * Phase 2 dual encoding: also overwrite the slot with NaN. */ + /* Carry nulls across the I64→F64 promotion and overwrite + * the slot with NULL_F64 (NaN) so the payload encodes null. */ double* dst = (double*)ray_data(promoted); for (int64_t r = 0; r < nr; r++) { if (ray_vec_is_null(expr_vec, r)) { @@ -8771,8 +8742,7 @@ ray_t* ray_update(ray_t** args, int64_t n) { if (RAY_ATOM_IS_NULL(expr_vec)) { for (int64_t r = 0; r < nrows; r++) ray_vec_set_null(bcast, r, true); - /* Phase 2/3a dual encoding: fill correct-width - * sentinel into payload. */ + /* Fill the correct-width sentinel into the payload. */ switch (ct) { case RAY_F64: { double* d = (double*)ray_data(bcast); @@ -8812,8 +8782,8 @@ ray_t* ray_update(ray_t** args, int64_t n) { promoted = ray_vec_append(promoted, &v); if (RAY_IS_ERR(promoted)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return promoted; } } - /* Carry the nullmap across the I64→F64 promotion; - * Phase 2 dual encoding: also overwrite the slot with NaN. */ + /* Carry nulls across the I64→F64 promotion and overwrite + * the slot with NULL_F64 (NaN) so the payload encodes null. */ double* dst = (double*)ray_data(promoted); for (int64_t r = 0; r < nr; r++) { if (ray_vec_is_null(expr_vec, r)) { @@ -8893,8 +8863,7 @@ ray_t* ray_update(ray_t** args, int64_t n) { if (RAY_ATOM_IS_NULL(expr_vec)) { for (int64_t r = 0; r < nrows; r++) ray_vec_set_null(bcast, r, true); - /* Phase 2/3a dual encoding: fill correct-width - * sentinel into payload. */ + /* Fill the correct-width sentinel into the payload. */ switch (ct) { case RAY_F64: { double* d = (double*)ray_data(bcast); diff --git a/src/ops/rerank.c b/src/ops/rerank.c index a35b94ba..c08ea210 100644 --- a/src/ops/rerank.c +++ b/src/ops/rerank.c @@ -174,15 +174,11 @@ static ray_t* gather_rows_with_dist(ray_t* tbl, /* RAY_SYM: propagate the per-vector sym_dict so narrow-width * local indices resolve against the same dictionary. For * sliced SYM columns the sym_dict lives on the slice_parent - * (the slice's own union slot holds slice_parent/offset). - * Guards against the inline-nullmap aliasing mirror sort.c:3307. */ + * (the slice's own union slot holds slice_parent/offset). */ if (ct == RAY_SYM) { const ray_t* dict_owner = (src_col->attrs & RAY_ATTR_SLICE) ? src_col->slice_parent : src_col; - if (dict_owner && - (!(dict_owner->attrs & RAY_ATTR_HAS_NULLS) || - (dict_owner->attrs & RAY_ATTR_NULLMAP_EXT)) && - dict_owner->sym_dict) { + if (dict_owner && dict_owner->sym_dict) { ray_retain(dict_owner->sym_dict); new_col->sym_dict = dict_owner->sym_dict; } diff --git a/src/ops/rowsel.c b/src/ops/rowsel.c index aa83b2d0..88dc0fc3 100644 --- a/src/ops/rowsel.c +++ b/src/ops/rowsel.c @@ -332,7 +332,7 @@ ray_t* ray_rowsel_to_indices(ray_t* sel) { /* refine: walk `existing`'s surviving rows, test pred at each, emit a * new selection. Sequential — chained filters are typically applied * to already-shrunk row sets where parallelism doesn't pay back the - * dispatch overhead. Phase 2 will revisit if measurement says + * dispatch overhead. Pass 2 will revisit if measurement says * otherwise. */ ray_t* ray_rowsel_refine(ray_t* existing, ray_t* pred) { if (!existing) return ray_rowsel_from_pred(pred); diff --git a/src/ops/sort.c b/src/ops/sort.c index b05afc95..4fc8f144 100644 --- a/src/ops/sort.c +++ b/src/ops/sort.c @@ -336,7 +336,7 @@ uint8_t compute_key_nbytes(ray_pool_t* pool, const uint64_t* keys, /* radix_pass_ctx_t defined in exec_internal.h */ -/* Phase 1: histogram — each task counts byte values in its fixed range */ +/* Pass 1: histogram — each task counts byte values in its fixed range */ static void radix_hist_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { (void)wid; (void)end; radix_pass_ctx_t* c = (radix_pass_ctx_t*)arg; @@ -359,7 +359,7 @@ static void radix_hist_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { h[(keys[i] >> shift) & 0xFF]++; } -/* Phase 3: scatter with software write-combining (SWC). +/* Pass 3: scatter with software write-combining (SWC). * Buffers entries per bucket before flushing, converting random writes * into sequential bursts that are friendlier to the cache hierarchy. */ #define SWC_N 8 /* entries per bucket buffer; 8*8=64B per bucket = 32KB total */ @@ -453,7 +453,7 @@ int64_t* radix_sort_run(ray_pool_t* pool, .hist = hist, .offsets = offsets, }; - /* Phase 1: parallel histogram */ + /* Pass 1: parallel histogram */ if (pool && n_tasks > 1) ray_pool_dispatch_n(pool, radix_hist_fn, &ctx, n_tasks); else @@ -469,7 +469,7 @@ int64_t* radix_sort_run(ray_pool_t* pool, } if (uniform) continue; /* all same byte — skip this pass */ - /* Phase 2: prefix sum → per-task scatter offsets */ + /* Pass 2: prefix sum → per-task scatter offsets */ int64_t running = 0; for (int b = 0; b < 256; b++) { for (uint32_t t = 0; t < n_tasks; t++) { @@ -478,7 +478,7 @@ int64_t* radix_sort_run(ray_pool_t* pool, } } - /* Phase 3: parallel scatter */ + /* Pass 3: parallel scatter */ if (pool && n_tasks > 1) ray_pool_dispatch_n(pool, radix_scatter_fn, &ctx, n_tasks); else @@ -589,7 +589,7 @@ uint64_t* packed_radix_sort_run(ray_pool_t* pool, .hist = hist, .offsets = offsets, }; - /* Phase 1: parallel histogram (reuses existing radix_hist_fn) */ + /* Pass 1: parallel histogram (reuses existing radix_hist_fn) */ if (pool && n_tasks > 1) ray_pool_dispatch_n(pool, radix_hist_fn, &ctx, n_tasks); else @@ -605,7 +605,7 @@ uint64_t* packed_radix_sort_run(ray_pool_t* pool, } if (uniform) continue; - /* Phase 2: prefix sum */ + /* Pass 2: prefix sum */ int64_t running = 0; for (int b = 0; b < 256; b++) { for (uint32_t t = 0; t < n_tasks; t++) { @@ -614,7 +614,7 @@ uint64_t* packed_radix_sort_run(ray_pool_t* pool, } } - /* Phase 3: packed scatter (half the traffic of dual-array scatter) */ + /* Pass 3: packed scatter (half the traffic of dual-array scatter) */ if (pool && n_tasks > 1) ray_pool_dispatch_n(pool, packed_scatter_fn, &ctx, n_tasks); else @@ -838,7 +838,7 @@ int64_t* msd_radix_sort_run(ray_pool_t* pool, .hist = hist, .offsets = offsets, }; - /* Phase 1: parallel histogram */ + /* Pass 1: parallel histogram */ if (pool && n_tasks > 1) ray_pool_dispatch_n(pool, radix_hist_fn, &ctx, n_tasks); else @@ -860,7 +860,7 @@ int64_t* msd_radix_sort_run(ray_pool_t* pool, n, n_bytes - 1, sorted_keys_out); } - /* Phase 2: prefix sum → per-task scatter offsets + bucket boundaries */ + /* Pass 2: prefix sum → per-task scatter offsets + bucket boundaries */ int64_t bucket_offsets[257]; { int64_t running = 0; @@ -874,7 +874,7 @@ int64_t* msd_radix_sort_run(ray_pool_t* pool, bucket_offsets[256] = running; } - /* Phase 3: parallel scatter with SWC */ + /* Pass 3: parallel scatter with SWC */ if (pool && n_tasks > 1) ray_pool_dispatch_n(pool, radix_scatter_fn, &ctx, n_tasks); else @@ -1775,11 +1775,11 @@ static bool sort_str_msd_inplace(int64_t* sorted_idx, int64_t nrows, .n_tasks = n_tasks, .hist = hist, .offsets = off, }; - /* Phase 1: parallel histogram. */ + /* Pass 1: parallel histogram. */ ray_pool_dispatch_n(pool_p, strsort_top_hist_fn, &tctx, n_tasks); - /* Phase 2: sequential prefix-sum. For each bucket + /* Pass 2: sequential prefix-sum. For each bucket * b, the starting offset is the sum of all counts * in earlier buckets plus all counts in earlier * tasks for this bucket. */ @@ -1797,7 +1797,7 @@ static bool sort_str_msd_inplace(int64_t* sorted_idx, int64_t nrows, sum += bc; } - /* Phase 3: parallel scatter into tmp. */ + /* Pass 3: parallel scatter into tmp. */ ray_pool_dispatch_n(pool_p, strsort_top_scatter_fn, &tctx, n_tasks); @@ -1805,7 +1805,7 @@ static bool sort_str_msd_inplace(int64_t* sorted_idx, int64_t nrows, scratch_free(hist_hdr); scratch_free(off_hdr); - /* Phase 4: parallel per-bucket recursive sort. */ + /* Pass 4: parallel per-bucket recursive sort. */ strsort_bucket_ctx_t bctx = { .keys = tmp, .starts = bucket_starts, @@ -3761,14 +3761,10 @@ ray_t* exec_sort(ray_graph_t* g, ray_op_t* op, ray_t* tbl, int64_t limit) { if (!col) continue; col_propagate_str_pool(new_cols[c], col); /* sym_dict lives in bytes 8-15 of the header union, which also - * hold inline-nullmap bits and slice_offset. Only read it when - * the header layout actually exposes the sym_dict/ext_nullmap - * interpretation: no slice, and either no nulls or external - * nullmap. Otherwise those bytes are bitmap payload / slice - * metadata and dereferencing them hands ray_retain garbage. */ + * hold slice_offset for slices. Skip slices to avoid reading + * the offset as a pointer. */ if (col->type == RAY_SYM && !(col->attrs & RAY_ATTR_SLICE) && - (!(col->attrs & RAY_ATTR_HAS_NULLS) || (col->attrs & RAY_ATTR_NULLMAP_EXT)) && col->sym_dict) { ray_retain(col->sym_dict); new_cols[c]->sym_dict = col->sym_dict; @@ -4092,14 +4088,10 @@ ray_t* sort_table_by_keys(ray_t* tbl, ray_t* keys, uint8_t descending) { if (!col) continue; col_propagate_str_pool(new_cols[c], col); /* sym_dict lives in bytes 8-15 of the header union, which also - * hold inline-nullmap bits and slice_offset. Only read it when - * the header layout actually exposes the sym_dict/ext_nullmap - * interpretation: no slice, and either no nulls or external - * nullmap. Otherwise those bytes are bitmap payload / slice - * metadata and dereferencing them hands ray_retain garbage. */ + * hold slice_offset for slices. Skip slices to avoid reading + * the offset as a pointer. */ if (col->type == RAY_SYM && !(col->attrs & RAY_ATTR_SLICE) && - (!(col->attrs & RAY_ATTR_HAS_NULLS) || (col->attrs & RAY_ATTR_NULLMAP_EXT)) && col->sym_dict) { ray_retain(col->sym_dict); new_cols[c]->sym_dict = col->sym_dict; diff --git a/src/ops/string.c b/src/ops/string.c index 7c9512a4..4f0c4e23 100644 --- a/src/ops/string.c +++ b/src/ops/string.c @@ -619,7 +619,7 @@ ray_t* exec_like(ray_graph_t* g, ray_op_t* op) { int sym_w = (int)(input->attrs & RAY_SYM_W_MASK); ray_pool_t* pool = ray_pool_get(); - /* Phase 1: mark used sym_ids. Parallelised because for + /* Pass 1: mark used sym_ids. Parallelised because for * high-cardinality text columns the seen- * mark scan was a 5 ms-class serial pass. Multiple workers * may write 1 to the same byte concurrently — the value is @@ -648,7 +648,7 @@ ray_t* exec_like(ray_graph_t* g, ray_op_t* op) { like_seen_fn(&sctx, 0, 0, len); } - /* Phase 2: parallel pattern resolve over the dict range. */ + /* Pass 2: parallel pattern resolve over the dict range. */ like_resolve_ctx_t rctx = { .sym_strings = sym_strings, .seen = seen, .lut = lut, .pc = &pc, .use_simple = use_simple, @@ -660,7 +660,7 @@ ray_t* exec_like(ray_graph_t* g, ray_op_t* op) { like_resolve_fn(&rctx, 0, 0, (int64_t)dict_n); } - /* Phase 3: row projection — gather lut[sid] into the per-row + /* Pass 3: row projection — gather lut[sid] into the per-row * bool dst. Parallelised because it's a 5 M-row pass (~5 ms * serial on a W64 SYM column). Width-specialised in the * worker fn so the inner load is a typed pointer dereference. */ diff --git a/src/ops/traverse.c b/src/ops/traverse.c index c30acc37..c2015608 100644 --- a/src/ops/traverse.c +++ b/src/ops/traverse.c @@ -143,7 +143,7 @@ ray_t* exec_expand(ray_graph_t* g, ray_op_t* op, ray_t* src_vec) { /* Helper to expand one CSR direction */ #define EXPAND_DIR(csr_ptr) do { \ ray_csr_t* csr = (csr_ptr); \ - /* Phase 1: count total output pairs */ \ + /* Pass 1: count total output pairs */ \ int64_t total = 0; \ for (int64_t i = 0; i < n_src; i++) { \ int64_t node = src_data[i]; \ @@ -153,7 +153,7 @@ ray_t* exec_expand(ray_graph_t* g, ray_op_t* op, ray_t* src_vec) { if (node >= 0 && node < csr->n_nodes) \ total += ray_csr_degree(csr, node); \ } \ - /* Phase 2: fill */ \ + /* Pass 2: fill */ \ ray_t* d_src = ray_vec_new(RAY_I64, total > 0 ? total : 1); \ ray_t* d_dst = ray_vec_new(RAY_I64, total > 0 ? total : 1); \ if (!d_src || RAY_IS_ERR(d_src) || !d_dst || RAY_IS_ERR(d_dst)) { \ @@ -1163,7 +1163,7 @@ ray_t* exec_wco_join(ray_graph_t* g, ray_op_t* op) { /* -------------------------------------------------------------------------- * exec_louvain: community detection via Louvain modularity optimization. - * Phase 1 only (no graph contraction). + * Pass 1 only (no graph contraction). * Maximizes modularity Q = (1/2m) * SUM[(A_ij - k_i*k_j/2m) * delta(c_i, c_j)] * Treats graph as undirected. Uses forward+reverse CSR. * -------------------------------------------------------------------------- */ diff --git a/src/ops/window.c b/src/ops/window.c index d0619de8..a4019184 100644 --- a/src/ops/window.c +++ b/src/ops/window.c @@ -572,7 +572,7 @@ static void win_par_fn(void* arg, uint32_t worker_id, } /* Parallel gather of partition key values into contiguous array. - * Eliminates random-access reads during Phase 2 boundary detection. */ + * Eliminates random-access reads during Pass 2 boundary detection. */ typedef struct { const int64_t* sorted_idx; uint64_t* pkey_sorted; @@ -707,7 +707,7 @@ ray_t* exec_window(ray_graph_t* g, ray_op_t* op, ray_t* tbl) { } } - /* --- Phase 1: Sort by (partition_keys ++ order_keys) --- */ + /* --- Pass 1: Sort by (partition_keys ++ order_keys) --- */ ray_t* radix_itmp_hdr = NULL; ray_t* win_enum_rank_hdrs[n_sort > 0 ? n_sort : 1]; memset(win_enum_rank_hdrs, 0, sizeof(win_enum_rank_hdrs)); @@ -1015,7 +1015,7 @@ ray_t* exec_window(ray_graph_t* g, ray_op_t* op, ray_t* tbl) { } } - /* --- Phase 2: Find partition boundaries --- */ + /* --- Pass 2: Find partition boundaries --- */ /* Overallocate part_offsets to worst case (single-pass, no counting pass) */ ray_t* poff_hdr = NULL; int64_t* part_offsets = (int64_t*)scratch_alloc(&poff_hdr, @@ -1103,7 +1103,7 @@ ray_t* exec_window(ray_graph_t* g, ray_op_t* op, ray_t* tbl) { } } - /* --- Phase 3: Allocate result vectors and compute per-partition --- */ + /* --- Pass 3: Allocate result vectors and compute per-partition --- */ for (uint8_t f = 0; f < n_funcs; f++) { uint8_t kind = ext->window.func_kinds[f]; ray_t* fvec = func_vecs[f]; @@ -1130,9 +1130,8 @@ ray_t* exec_window(ray_graph_t* g, ray_op_t* op, ray_t* tbl) { /* Pre-stamp every slot with the width-correct null sentinel. The * per-partition compute loops below write valid values into * "active" slots and call win_set_null on null-producing slots - * without re-writing the payload — so the only way to honor the - * dual-encoding contract for those bitmap-only nulls is to make - * the payload already match the sentinel up front. */ + * without re-writing the payload — pre-stamping ensures every + * null slot already holds the correct sentinel. */ if (is_f64[f]) { double* d = (double*)ray_data(result_vecs[f]); for (int64_t i = 0; i < nrows; i++) d[i] = NULL_F64; @@ -1183,7 +1182,7 @@ ray_t* exec_window(ray_graph_t* g, ray_op_t* op, ray_t* tbl) { win_finalize_nulls(result_vecs[f]); } - /* --- Phase 4: Build result table --- */ + /* --- Pass 4: Build result table --- */ ray_t* result = ray_table_new(ncols + n_funcs); if (!result || RAY_IS_ERR(result)) { for (uint8_t f = 0; f < n_funcs; f++) ray_release(result_vecs[f]); diff --git a/src/store/col.c b/src/store/col.c index 3275c668..848762fa 100644 --- a/src/store/col.c +++ b/src/store/col.c @@ -91,12 +91,13 @@ static size_t col_str_pool_payload_len(const ray_t* vec); /* -------------------------------------------------------------------------- * Column file format: - * Bytes 0-15: nullmap (inline) or zeroed (ext_nullmap / no nulls) + * Bytes 0-15: nullmap union arm (atom flags / HAS_INDEX saved bytes) * Bytes 16-31: mmod=0, order=0, type, attrs, rc=0, len * Bytes 32+: raw element data - * (if RAY_ATTR_NULLMAP_EXT): appended (len+7)/8 bitmap bytes * * On-disk format IS the in-memory format (zero deserialization on load). + * Null state lives in the payload as a type-correct sentinel + * (NULL_F64/NULL_I64/...). There is no separate bitmap region. * -------------------------------------------------------------------------- */ /* Explicit allowlist of types that are safe to serialize as raw bytes. @@ -557,24 +558,12 @@ static ray_err_t col_save_impl(ray_t* vec, const char* path, bool durable) { /* HAS_INDEX rebase: an attached accelerator index displaces the * 16-byte nullmap union with an index pointer. Persist the - * pre-attach state instead — strip HAS_INDEX, restore the saved - * NULLMAP_EXT bit, and copy the saved bitmap bytes back into the - * on-disk header. ext_for_append captures the saved ext-nullmap - * pointer so the bitmap append at end-of-write reads from the - * right place. */ - ray_t* ext_for_append = (vec->attrs & RAY_ATTR_NULLMAP_EXT) - ? vec->ext_nullmap : NULL; + * pre-attach state — strip HAS_INDEX and copy the saved bytes + * back into the on-disk header. Sentinels in the payload + * carry the null state, so there is no bitmap to append. */ if (vec->attrs & RAY_ATTR_HAS_INDEX) { ray_index_t* ix = ray_index_payload(vec->index); header.attrs &= ~RAY_ATTR_HAS_INDEX; - if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { - header.attrs |= RAY_ATTR_NULLMAP_EXT; - memcpy(&ext_for_append, &ix->saved_nullmap[0], - sizeof(ext_for_append)); - } else { - header.attrs &= ~RAY_ATTR_NULLMAP_EXT; - ext_for_append = NULL; - } memcpy(header.nullmap, ix->saved_nullmap, 16); } @@ -588,15 +577,10 @@ static ray_err_t col_save_impl(ray_t* vec, const char* path, bool durable) { memset(header.nullmap + 8, 0, 8); } - /* Clear slice field; preserve ext_nullmap flag for bitmap append */ - header.attrs &= ~RAY_ATTR_SLICE; - if (!(header.attrs & RAY_ATTR_HAS_NULLS)) { + /* Clear slice flag — slices are materialized on save. */ + header.attrs &= (uint8_t)~RAY_ATTR_SLICE; + if (!(header.attrs & RAY_ATTR_HAS_NULLS)) memset(header.nullmap, 0, 16); - header.attrs &= ~RAY_ATTR_NULLMAP_EXT; - } else if (header.attrs & RAY_ATTR_NULLMAP_EXT) { - /* Ext bitmap appended after data; zero pointer bytes in header */ - memset(header.nullmap, 0, 16); - } size_t written = fwrite(&header, 1, 32, f); if (written != 32) { fclose(f); remove(tmp_path); return RAY_ERR_IO; } @@ -659,17 +643,6 @@ static ray_err_t col_save_impl(ray_t* vec, const char* path, bool durable) { } } - /* Append external nullmap bitmap after data. Use header.attrs - * (rebased above for HAS_INDEX) and ext_for_append (the - * effective ext_nullmap pointer, possibly extracted from the - * index's saved snapshot). */ - if ((vec->attrs & RAY_ATTR_HAS_NULLS) && - (header.attrs & RAY_ATTR_NULLMAP_EXT) && ext_for_append) { - size_t bitmap_len = ((size_t)vec->len + 7) / 8; - written = fwrite(ray_data(ext_for_append), 1, bitmap_len, f); - if (written != bitmap_len) { fclose(f); remove(tmp_path); return RAY_ERR_IO; } - } - fclose(f); } @@ -755,9 +728,7 @@ typedef struct { bool has_str_pool; size_t str_pool_offset; size_t str_pool_size; - size_t bitmap_offset; - bool has_ext_nullmap; - size_t bitmap_len; + size_t tail_offset; /* end of payload — file size must match */ uint32_t saved_sym_count; } col_mapped_t; @@ -817,7 +788,7 @@ static ray_err_t col_validate_str_region(ray_t* hdr, const void* ptr, out->has_str_pool = true; out->str_pool_offset = offset; out->str_pool_size = pool_size; - out->bitmap_offset = offset + 32 + pool_size; + out->tail_offset = offset + 32 + pool_size; return RAY_OK; } @@ -880,29 +851,17 @@ static ray_t* col_validate_mapped(const char* path, col_mapped_t* out) { } out->data_size = data_size; - size_t bitmap_offset = 32 + data_size; if (hdr->type == RAY_STR) { ray_err_t se = col_validate_str_region(hdr, ptr, mapped_size, out); if (se != RAY_OK) { ray_vm_unmap_file(ptr, mapped_size); return ray_error(ray_err_code_str(se), NULL); } - bitmap_offset = out->bitmap_offset; } else { out->has_str_pool = false; out->str_pool_offset = 0; out->str_pool_size = 0; - out->bitmap_offset = bitmap_offset; - } - - /* Check for appended ext_nullmap bitmap */ - bool has_ext_nullmap = (hdr->attrs & RAY_ATTR_HAS_NULLS) && - (hdr->attrs & RAY_ATTR_NULLMAP_EXT); - size_t bitmap_len = has_ext_nullmap ? ((size_t)hdr->len + 7) / 8 : 0; - if (has_ext_nullmap && (bitmap_offset > mapped_size || - bitmap_len > mapped_size - bitmap_offset)) { - ray_vm_unmap_file(ptr, mapped_size); - return ray_error("corrupt", NULL); + out->tail_offset = 32 + data_size; } /* RAY_SYM: fast-reject via sym count in header rc field. @@ -923,25 +882,6 @@ static ray_t* col_validate_mapped(const char* path, col_mapped_t* out) { out->header = hdr; out->esz = esz; out->data_size = data_size; - out->bitmap_offset = bitmap_offset; - out->has_ext_nullmap = has_ext_nullmap; - out->bitmap_len = bitmap_len; - return NULL; /* success */ -} - -/* -------------------------------------------------------------------------- - * col_restore_ext_nullmap -- allocate buddy-backed copy of ext nullmap - * - * Shared by ray_col_load and ray_col_mmap. On success, sets vec->ext_nullmap. - * Returns NULL on success, or an error string on failure. - * -------------------------------------------------------------------------- */ - -static ray_t* col_restore_ext_nullmap(ray_t* vec, const col_mapped_t* cm) { - ray_t* ext = ray_vec_new(RAY_U8, (int64_t)cm->bitmap_len); - if (!ext || RAY_IS_ERR(ext)) return ray_error("oom", NULL); - ext->len = (int64_t)cm->bitmap_len; - memcpy(ray_data(ext), (char*)cm->mapped + cm->bitmap_offset, cm->bitmap_len); - vec->ext_nullmap = ext; return NULL; /* success */ } @@ -1007,24 +947,12 @@ ray_t* ray_col_load(const char* path) { vec->str_pool = pool; } - /* Restore external nullmap if present */ - if (cm.has_ext_nullmap) { - ray_t* ext_err = col_restore_ext_nullmap(vec, &cm); - if (ext_err) { - ray_vm_unmap_file(cm.mapped, cm.mapped_size); - ray_free(vec); - return ext_err; - } - } - ray_vm_unmap_file(cm.mapped, cm.mapped_size); /* Fix up header for buddy-allocated block */ vec->mmod = 0; vec->order = saved_order; vec->attrs &= ~RAY_ATTR_SLICE; - if (!cm.has_ext_nullmap) - vec->attrs &= ~RAY_ATTR_NULLMAP_EXT; ray_atomic_store(&vec->rc, 1); /* RAY_SYM: validate sym count footer + bounds check */ @@ -1060,8 +988,7 @@ static ray_t* col_mmap_impl(const char* path, bool trust_splayed_sym_count) { /* Validate that file size matches expected layout exactly. * ray_free() reconstructs the munmap size using the same formula. */ - size_t expected = cm.bitmap_offset + cm.bitmap_len; - if (expected != cm.mapped_size) { + if (cm.tail_offset != cm.mapped_size) { ray_vm_unmap_file(cm.mapped, cm.mapped_size); return ray_error("io", NULL); } @@ -1092,22 +1019,10 @@ static ray_t* col_mmap_impl(const char* path, bool trust_splayed_sym_count) { } } - /* Restore external nullmap: allocate buddy-backed copy - * (ext_nullmap must be a proper ray_t for ref counting) */ - if (cm.has_ext_nullmap) { - ray_t* ext_err = col_restore_ext_nullmap(vec, &cm); - if (ext_err) { - ray_vm_unmap_file(cm.mapped, cm.mapped_size); - return ext_err; - } - } - /* Patch header -- MAP_PRIVATE COW: only the header page gets copied */ vec->mmod = 1; vec->order = 0; vec->attrs &= ~RAY_ATTR_SLICE; - if (!cm.has_ext_nullmap) - vec->attrs &= ~RAY_ATTR_NULLMAP_EXT; ray_atomic_store(&vec->rc, 1); if (vec->type == RAY_STR) { diff --git a/src/store/hnsw.c b/src/store/hnsw.c index dc939a4b..c348e8a1 100644 --- a/src/store/hnsw.c +++ b/src/store/hnsw.c @@ -519,13 +519,13 @@ ray_hnsw_t* ray_hnsw_build(const float* vectors, int64_t n_nodes, int32_t dim, const float* vec = vectors + i * dim; int32_t node_level = idx->node_level[i]; - /* Phase 1: Greedy descent from top layer to node_level+1 */ + /* Pass 1: Greedy descent from top layer to node_level+1 */ int64_t ep = idx->entry_point; for (int32_t l = idx->n_layers - 1; l > node_level; l--) { ep = hnsw_greedy_closest(idx, vec, ep, l); } - /* Phase 2: Insert into layers [node_level ... 0] */ + /* Pass 2: Insert into layers [node_level ... 0] */ for (int32_t l = node_level; l >= 0; l--) { ray_hnsw_layer_t* layer = &idx->layers[l]; int64_t M_max_l = layer->M_max; @@ -667,13 +667,13 @@ int64_t ray_hnsw_search(const ray_hnsw_t* idx, if (ef_search < k) ef_search = (int32_t)k; if (idx->n_nodes == 0) return 0; - /* Phase 1: Greedy descent from top layer to layer 1 */ + /* Pass 1: Greedy descent from top layer to layer 1 */ int64_t ep = idx->entry_point; for (int32_t l = idx->n_layers - 1; l >= 1; l--) { ep = hnsw_greedy_closest(idx, query, ep, l); } - /* Phase 2: Beam search on layer 0 with ef_search width */ + /* Pass 2: Beam search on layer 0 with ef_search width */ hnsw_cand_t* results = (hnsw_cand_t*)ray_sys_alloc( (size_t)ef_search * sizeof(hnsw_cand_t)); if (!results) return -1; /* OOM — caller must propagate error. */ diff --git a/src/store/serde.c b/src/store/serde.c index 336bb678..763ad52c 100644 --- a/src/store/serde.c +++ b/src/store/serde.c @@ -79,12 +79,6 @@ static size_t safe_strlen(const uint8_t* buf, int64_t max) { return (size_t)max; } -/* Null bitmap size for a vector (0 if no nulls) */ -static int64_t null_bitmap_size(ray_t* v) { - if (!(v->attrs & RAY_ATTR_HAS_NULLS)) return 0; - return (v->len + 7) / 8; -} - static int64_t schema_names_serde_size(ray_t* schema) { if (!schema || schema->type != RAY_I64) return 0; int64_t size = 1 + 1 + 8; @@ -115,51 +109,6 @@ static int64_t ser_schema_names(uint8_t* buf, ray_t* schema) { return c; } -/* Write null bitmap bytes into buf. Returns bytes written. - * Uses ray_vec_nullmap_bytes so HAS_INDEX, slice, ext, and inline storage - * forms all serialize the correct bits. bit_offset is non-zero only for - * slices, which (per pre-existing serde behaviour) are saved as if they - * had no nulls — null_bitmap_size returns 0 since the slice's own attrs - * lack HAS_NULLS — so we never reach this with off>0. */ -static int64_t ser_null_bitmap(uint8_t* buf, ray_t* v) { - int64_t bsz = null_bitmap_size(v); - if (bsz <= 0) return 0; - - int64_t bit_off = 0, len_bits = 0; - const uint8_t* bits = ray_vec_nullmap_bytes(v, &bit_off, &len_bits); - if (!bits || bit_off != 0) { - memset(buf, 0, (size_t)bsz); - return bsz; - } - int64_t avail_bytes = (len_bits + 7) / 8; - int64_t copy = bsz < avail_bytes ? bsz : avail_bytes; - memcpy(buf, bits, (size_t)copy); - if (copy < bsz) memset(buf + copy, 0, (size_t)(bsz - copy)); - return bsz; -} - -/* Restore null bitmap from buf into vector. Returns bytes consumed. */ -static int64_t de_null_bitmap(const uint8_t* buf, int64_t avail, ray_t* v) { - int64_t bsz = (v->len + 7) / 8; - if (avail < bsz) return -1; - - v->attrs |= RAY_ATTR_HAS_NULLS; - - if (v->type == RAY_STR || v->len > 128) { - /* Must use external nullmap (STR always, others when > 128 elements) */ - ray_t* ext = ray_vec_new(RAY_U8, bsz); - if (!ext || RAY_IS_ERR(ext)) return -1; - ext->len = bsz; - memcpy(ray_data(ext), buf, (size_t)bsz); - v->attrs |= RAY_ATTR_NULLMAP_EXT; - v->ext_nullmap = ext; - } else { - /* Inline nullmap */ - memcpy(v->nullmap, buf, (size_t)bsz); - } - return bsz; -} - /* -------------------------------------------------------------------------- * ray_serde_size — calculate serialized size (excluding IPC header) * -------------------------------------------------------------------------- */ @@ -202,24 +151,24 @@ int64_t ray_serde_size(ray_t* obj) { /* NULL object: type=LIST with len=0, but we check for actual NULL semantics */ - /* Vectors — format: type(1) + attrs(1) + len(8) + data + nullmap */ - int64_t nbm = null_bitmap_size(obj); + /* Vectors — format: type(1) + attrs(1) + len(8) + data. + * Null state is sentinel-encoded in the payload — no bitmap region. */ /* Overflow guard: worst case is GUID at 16 bytes/elem */ if (obj->len > (INT64_MAX - 32) / 16) return -1; switch (type) { case RAY_BOOL: - case RAY_U8: return 1 + 1 + 8 + obj->len + nbm; - case RAY_I16: return 1 + 1 + 8 + obj->len * 2 + nbm; + case RAY_U8: return 1 + 1 + 8 + obj->len; + case RAY_I16: return 1 + 1 + 8 + obj->len * 2; case RAY_I32: case RAY_DATE: case RAY_TIME: - case RAY_F32: return 1 + 1 + 8 + obj->len * 4 + nbm; + case RAY_F32: return 1 + 1 + 8 + obj->len * 4; case RAY_I64: case RAY_TIMESTAMP: - case RAY_F64: return 1 + 1 + 8 + obj->len * 8 + nbm; - case RAY_GUID: return 1 + 1 + 8 + obj->len * 16 + nbm; + case RAY_F64: return 1 + 1 + 8 + obj->len * 8; + case RAY_GUID: return 1 + 1 + 8 + obj->len * 16; case RAY_SYM: { int64_t size = 1 + 1 + 8; int64_t* ids = (int64_t*)ray_data(obj); @@ -227,14 +176,14 @@ int64_t ray_serde_size(ray_t* obj) { ray_t* s = ray_sym_str(ids[i]); size += (s ? (int64_t)ray_str_len(s) : 0) + 1; } - return size + nbm; + return size; } case RAY_STR: { int64_t size = 1 + 1 + 8; ray_str_t* elems = (ray_str_t*)ray_data(obj); for (int64_t i = 0; i < obj->len; i++) size += 8 + elems[i].len; /* i64 length + raw bytes */ - return size + nbm; + return size; } case RAY_LIST: { int64_t size = 1 + 1 + 8; @@ -376,7 +325,7 @@ int64_t ray_ser_raw(uint8_t* buf, ray_t* obj) { /* Vectors and compound types */ int64_t c; - /* Attrs byte: preserve HAS_NULLS, clear SLICE/NULLMAP_EXT/ARENA (internal flags) */ + /* Attrs byte: preserve HAS_NULLS; clear SLICE / ARENA (internal flags). */ uint8_t wire_attrs = obj->attrs & (RAY_ATTR_HAS_NULLS); switch (type) { @@ -386,7 +335,6 @@ int64_t ray_ser_raw(uint8_t* buf, ray_t* obj) { memcpy(buf, &obj->len, 8); buf += 8; memcpy(buf, ray_data(obj), obj->len); c = 1 + 1 + 8 + obj->len; - c += ser_null_bitmap(buf + obj->len, obj); return c; } case RAY_I16: { @@ -395,7 +343,6 @@ int64_t ray_ser_raw(uint8_t* buf, ray_t* obj) { int64_t dsz = obj->len * 2; memcpy(buf, ray_data(obj), dsz); c = 1 + 1 + 8 + dsz; - c += ser_null_bitmap(buf + dsz, obj); return c; } case RAY_I32: @@ -407,7 +354,6 @@ int64_t ray_ser_raw(uint8_t* buf, ray_t* obj) { int64_t dsz = obj->len * 4; memcpy(buf, ray_data(obj), dsz); c = 1 + 1 + 8 + dsz; - c += ser_null_bitmap(buf + dsz, obj); return c; } case RAY_I64: @@ -418,7 +364,6 @@ int64_t ray_ser_raw(uint8_t* buf, ray_t* obj) { int64_t dsz = obj->len * 8; memcpy(buf, ray_data(obj), dsz); c = 1 + 1 + 8 + dsz; - c += ser_null_bitmap(buf + dsz, obj); return c; } case RAY_GUID: { @@ -427,7 +372,6 @@ int64_t ray_ser_raw(uint8_t* buf, ray_t* obj) { int64_t dsz = obj->len * 16; memcpy(buf, ray_data(obj), dsz); c = 1 + 1 + 8 + dsz; - c += ser_null_bitmap(buf + dsz, obj); return c; } case RAY_SYM: { @@ -445,7 +389,6 @@ int64_t ray_ser_raw(uint8_t* buf, ray_t* obj) { buf[c] = '\0'; c++; } - c += ser_null_bitmap(buf + c, obj); return 1 + 1 + 8 + c; } @@ -463,7 +406,6 @@ int64_t ray_ser_raw(uint8_t* buf, ray_t* obj) { memcpy(buf + c, p, (size_t)slen); c += slen; } - c += ser_null_bitmap(buf + c, obj); return 1 + 1 + 8 + c; } @@ -656,13 +598,7 @@ ray_t* ray_de_raw(uint8_t* buf, int64_t* len) { buf += data_bytes; *len -= data_bytes; - /* Restore null bitmap if present */ - if (attrs & RAY_ATTR_HAS_NULLS) { - int64_t consumed = de_null_bitmap(buf, *len, vec); - if (consumed < 0) { ray_release(vec); return ray_error("domain", NULL); } - buf += consumed; - *len -= consumed; - } + if (attrs & RAY_ATTR_HAS_NULLS) vec->attrs |= RAY_ATTR_HAS_NULLS; return vec; } @@ -692,12 +628,7 @@ ray_t* ray_de_raw(uint8_t* buf, int64_t* len) { *len -= (int64_t)slen + 1; } - if (attrs & RAY_ATTR_HAS_NULLS) { - int64_t consumed = de_null_bitmap(buf, *len, vec); - if (consumed < 0) { ray_release(vec); return ray_error("domain", NULL); } - buf += consumed; - *len -= consumed; - } + if (attrs & RAY_ATTR_HAS_NULLS) vec->attrs |= RAY_ATTR_HAS_NULLS; return vec; } @@ -727,12 +658,7 @@ ray_t* ray_de_raw(uint8_t* buf, int64_t* len) { *len -= slen; } - if (attrs & RAY_ATTR_HAS_NULLS) { - int64_t consumed = de_null_bitmap(buf, *len, vec); - if (consumed < 0) { ray_release(vec); return ray_error("domain", NULL); } - buf += consumed; - *len -= consumed; - } + if (attrs & RAY_ATTR_HAS_NULLS) vec->attrs |= RAY_ATTR_HAS_NULLS; return vec; } diff --git a/src/vec/atom.c b/src/vec/atom.c index 20eaeaf1..fc046538 100644 --- a/src/vec/atom.c +++ b/src/vec/atom.c @@ -177,11 +177,25 @@ ray_t* ray_timestamp(int64_t val) { ray_t* ray_typed_null(int8_t type) { if (type >= 0) return ray_error("type", NULL); + /* GUID null is the canonical all-zero 16-byte value: allocate the + * U8 payload buffer up front (same shape as ray_guid) so consumers + * can deref obj without a NULL check. Other types use the payload + * union — the sentinel write below is the source of truth; the + * nullmap[0] bit is retained for atom types without a sentinel + * (BOOL/U8/F32). */ + if (type == -RAY_GUID) { + static const uint8_t NULL_GUID_BYTES[16] = {0}; + ray_t* v = ray_guid(NULL_GUID_BYTES); + if (RAY_IS_ERR(v)) return v; + v->nullmap[0] |= 1; + return v; + } ray_t* v = ray_alloc(0); if (RAY_IS_ERR(v)) return v; v->type = type; switch (type) { case -RAY_F64: v->f64 = NULL_F64; break; + case -RAY_F32: v->f64 = (double)NULL_F32; break; case -RAY_I64: case -RAY_TIMESTAMP: v->i64 = NULL_I64; break; case -RAY_I32: case -RAY_DATE: case -RAY_TIME: v->i32 = NULL_I32; break; case -RAY_I16: v->i16 = NULL_I16; break; diff --git a/src/vec/vec.c b/src/vec/vec.c index 16491f73..809c3c0c 100644 --- a/src/vec/vec.c +++ b/src/vec/vec.c @@ -41,71 +41,50 @@ static int pair_cmp_idx_then_k(const void* a, const void* b) { return (pa[1] > pb[1]) - (pa[1] < pb[1]); } -/* Public bitmap accessor — handles slice / ext / inline / HAS_INDEX - * uniformly. See vec.h for the contract. */ -const uint8_t* ray_vec_nullmap_bytes(const ray_t* v, - int64_t* bit_offset_out, - int64_t* len_bits_out) { - if (bit_offset_out) *bit_offset_out = 0; - if (len_bits_out) *len_bits_out = 0; - if (!v) return NULL; - - /* Slice: HAS_NULLS / HAS_INDEX live on the parent — redirect first, - * THEN test for nulls. Reading v->attrs & HAS_NULLS here would - * incorrectly drop a sliced view of a nullable column. */ - const ray_t* target = v; - int64_t off = 0; - if (v->attrs & RAY_ATTR_SLICE) { - target = v->slice_parent; - off = v->slice_offset; - if (!target) return NULL; - } - if (!(target->attrs & RAY_ATTR_HAS_NULLS)) return NULL; - - if (bit_offset_out) *bit_offset_out = off; - - if (target->attrs & RAY_ATTR_HAS_INDEX) { - const ray_index_t* ix = ray_index_payload(target->index); - if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { - ray_t* ext; - memcpy(&ext, &ix->saved_nullmap[0], sizeof(ext)); - if (len_bits_out) *len_bits_out = ext->len * 8; - return (const uint8_t*)ray_data(ext); +/* Sentinel-based per-element null test. Caller guarantees v is a + * non-slice vector (type > 0) and idx is in range. Returns true iff + * payload[idx] equals the type-correct NULL_* sentinel. F64/F32 use + * (x != x) to detect any NaN bit pattern. BOOL/U8 are non-nullable + * and return false. */ +static inline bool sentinel_is_null(const ray_t* v, int64_t idx) { + const void* p = ray_data((ray_t*)v); + switch (v->type) { + case RAY_F64: { + double x = ((const double*)p)[idx]; + return x != x; } - if (len_bits_out) *len_bits_out = 128; - return ix->saved_nullmap; - } - if (target->attrs & RAY_ATTR_NULLMAP_EXT) { - if (len_bits_out) *len_bits_out = target->ext_nullmap->len * 8; - return (const uint8_t*)ray_data(target->ext_nullmap); - } - /* Inline path: RAY_STR's bytes 0-15 hold str_pool/str_ext_null, not - * bits — so RAY_STR with HAS_NULLS must always have NULLMAP_EXT. */ - if (target->type == RAY_STR) return NULL; - if (len_bits_out) *len_bits_out = 128; - return target->nullmap; -} - -/* Internal compatibility wrapper for the older two-out-param form used - * inside vec.c. Returns the inline pointer (16-byte buffer) when nulls - * live inline, or NULL when they live in *ext_out. */ -static inline const uint8_t* vec_inline_nullmap(const ray_t* v, ray_t** ext_nullmap_ref) { - *ext_nullmap_ref = NULL; - if (v->attrs & RAY_ATTR_HAS_INDEX) { - const ray_index_t* ix = ray_index_payload(v->index); - if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { - ray_t* ext; - memcpy(&ext, &ix->saved_nullmap[0], sizeof(ext)); - *ext_nullmap_ref = ext; - return NULL; + case RAY_F32: { + float x = ((const float*)p)[idx]; + return x != x; } - return ix->saved_nullmap; - } - if (v->attrs & RAY_ATTR_NULLMAP_EXT) { - *ext_nullmap_ref = v->ext_nullmap; - return NULL; + case RAY_I64: + case RAY_TIMESTAMP: + return ((const int64_t*)p)[idx] == NULL_I64; + case RAY_I32: + case RAY_DATE: + case RAY_TIME: + return ((const int32_t*)p)[idx] == NULL_I32; + case RAY_I16: + return ((const int16_t*)p)[idx] == NULL_I16; + case RAY_SYM: + switch (v->attrs & 0x3) { + case RAY_SYM_W8: return ((const uint8_t*)p)[idx] == 0; + case RAY_SYM_W16: return ((const uint16_t*)p)[idx] == 0; + case RAY_SYM_W32: return ((const uint32_t*)p)[idx] == 0; + default: return ((const int64_t*)p)[idx] == 0; + } + case RAY_STR: + return ((const ray_str_t*)p)[idx].len == 0; + case RAY_GUID: { + /* GUID null = 16 all-zero bytes (canonical convention). */ + static const uint8_t Z[16] = {0}; + return memcmp((const uint8_t*)p + idx * 16, Z, 16) == 0; + } + case RAY_BOOL: + case RAY_U8: + default: + return false; } - return v->nullmap; } /* True if v has any nulls. HAS_NULLS is preserved on the parent across @@ -118,8 +97,7 @@ static inline bool vec_any_nulls(const ray_t* v) { /* In-place drop of attached index — caller must hold a unique ref (rc==1) * on `v` itself. Used by mutation paths to invalidate the (now stale) * index before writing. HAS_NULLS was preserved through the attachment - * so it needs no restoration; only NULLMAP_EXT (cleared at attach time) - * is reinstated from saved_attrs. + * so it needs no restoration. * * Shared-index case: `v` may share its index ray_t with another vec * (e.g. after ray_cow followed by ray_retain_owned_refs, both copies @@ -131,14 +109,13 @@ static inline void vec_drop_index_inplace(ray_t* v) { if (!(v->attrs & RAY_ATTR_HAS_INDEX)) return; ray_t* idx = v->index; ray_index_t* ix = ray_index_payload(idx); - uint8_t saved = ix->saved_attrs; bool shared = ray_atomic_load(&idx->rc) > 1; if (shared) { /* Take our own retained references to the saved-pointer slots - * (ext_nullmap / str_pool / sym_dict etc.) so the bytes we copy - * into v->nullmap are validly owned by v. Leave the index's - * snapshot intact for the other holder. */ + * (str_pool / sym_dict etc.) so the bytes we copy into v->nullmap + * are validly owned by v. Leave the index's snapshot intact for + * the other holder. */ ray_index_retain_saved(ix); } memcpy(v->nullmap, ix->saved_nullmap, 16); @@ -150,7 +127,6 @@ static inline void vec_drop_index_inplace(ray_t* v) { ix->saved_attrs = 0; } v->attrs &= (uint8_t)~RAY_ATTR_HAS_INDEX; - if (saved & RAY_ATTR_NULLMAP_EXT) v->attrs |= RAY_ATTR_NULLMAP_EXT; ray_release(idx); } @@ -848,10 +824,13 @@ ray_t* ray_vec_from_raw(int8_t type, const void* data, int64_t count) { } /* -------------------------------------------------------------------------- - * Null bitmap operations + * Null state operations * - * Inline: for vectors with <=128 elements, bits stored in nullmap[16] (128 bits). - * External: for >128 elements, allocate a U8 vector bitmap via ext_nullmap. + * Null state is encoded in-band via the type-correct NULL_* sentinel in + * the payload (F64/F32 NaN, NULL_I64 / NULL_I32 / NULL_I16, ray_str_t{0,0}, + * 16 zero bytes for GUID). A vec-level RAY_ATTR_HAS_NULLS flag is a + * cheap fast-path gate; ray_vec_is_null reads the payload as source of + * truth. BOOL/U8/SYM are non-nullable. * -------------------------------------------------------------------------- */ ray_err_t ray_vec_set_null_checked(ray_t* vec, int64_t idx, bool is_null) { @@ -859,87 +838,45 @@ ray_err_t ray_vec_set_null_checked(ray_t* vec, int64_t idx, bool is_null) { if (vec->attrs & RAY_ATTR_SLICE) return RAY_ERR_TYPE; /* cannot set null on slice — COW first */ if (idx < 0 || idx >= vec->len) return RAY_ERR_RANGE; - /* SYM columns are no-null by design — sym ID 0 (the interned - * empty string, reserved by ray_sym_init) is the canonical - * "missing" / "empty" / "absent" value, and every SYM cell - * holds some valid ID. Reject set-null on SYM so callers that - * mean "this row is missing" write 0 explicitly instead. */ - if (vec->type == RAY_SYM) return RAY_ERR_TYPE; + /* Types that don't accept set-null: + * - SYM: sym ID 0 (interned empty string, reserved by + * ray_sym_init) is the canonical "missing" value; callers + * write 0 directly. + * - BOOL / U8: non-nullable; they have nowhere to store a + * null, so reject to keep the producer surface clean. */ + if (vec->type == RAY_SYM || + vec->type == RAY_BOOL || + vec->type == RAY_U8) return RAY_ERR_TYPE; /* Mutation invalidates any attached accelerator index — drop it inline. * Caller must already hold a unique ref (set-null on a shared vec is a * bug regardless of indexing). */ vec_drop_index_inplace(vec); - /* Mark HAS_NULLS if setting a null (defer for RAY_STR until ext alloc succeeds) */ - if (is_null && vec->type != RAY_STR) vec->attrs |= RAY_ATTR_HAS_NULLS; - - if (!(vec->attrs & RAY_ATTR_NULLMAP_EXT)) { - /* RAY_STR uses bytes 8-15 for str_pool, HAS_LINK uses bytes 8-15 for - * link_target — both must skip the inline-128 path to avoid - * aliasing corruption. Otherwise <=128 elements go inline. */ - bool can_inline = (vec->type != RAY_STR) && idx < 128 && - !(vec->attrs & RAY_ATTR_HAS_LINK); - if (can_inline) { - /* Inline nullmap path (<=128 elements, non-STR, non-linked) */ - int byte_idx = (int)(idx / 8); - int bit_idx = (int)(idx % 8); - if (is_null) - vec->nullmap[byte_idx] |= (uint8_t)(1u << bit_idx); - else - vec->nullmap[byte_idx] &= (uint8_t)~(1u << bit_idx); - return RAY_OK; - } - /* Need to promote to external nullmap */ - int64_t bitmap_len = (vec->len + 7) / 8; - ray_t* ext = ray_vec_new(RAY_U8, bitmap_len); - if (!ext || RAY_IS_ERR(ext)) return RAY_ERR_OOM; - ext->len = bitmap_len; - if (vec->type == RAY_STR || (vec->attrs & RAY_ATTR_HAS_LINK)) { - /* Bytes 0-15 contain pointers/sym, not bits — start ext zeroed. - * (Linked vecs reach here only when adding their first null, - * since promote_inline_to_ext in linkop.c covers the - * pre-existing-nulls case at attach time.) */ - memset(ray_data(ext), 0, (size_t)bitmap_len); - } else { - /* Copy existing inline bits */ - memcpy(ray_data(ext), vec->nullmap, 16); - /* Zero remaining bytes */ - if (bitmap_len > 16) - memset((char*)ray_data(ext) + 16, 0, (size_t)(bitmap_len - 16)); + /* Every remaining vec type uses a sentinel: F64/F32/I64/TIMESTAMP/ + * I32/DATE/TIME/I16/STR/GUID. Write the type-correct NULL_* into + * the payload and set HAS_NULLS. ray_vec_is_null (the sole reader) + * recovers null state from the payload. Caller owns the payload on + * is_null=false (we have no way to know the prior real value); the + * clear path is a no-op. */ + if (is_null) { + void* p = ray_data(vec); + switch (vec->type) { + case RAY_F64: ((double*)p)[idx] = NULL_F64; break; + case RAY_F32: ((float*)p)[idx] = NULL_F32; break; + case RAY_I64: case RAY_TIMESTAMP: ((int64_t*)p)[idx] = NULL_I64; break; + case RAY_I32: case RAY_DATE: case RAY_TIME: ((int32_t*)p)[idx] = NULL_I32; break; + case RAY_I16: ((int16_t*)p)[idx] = NULL_I16; break; + case RAY_STR: + memset(&((ray_str_t*)p)[idx], 0, sizeof(ray_str_t)); + break; + case RAY_GUID: + memset((uint8_t*)p + idx * 16, 0, 16); + break; + default: return RAY_ERR_TYPE; } - vec->attrs |= RAY_ATTR_NULLMAP_EXT; - if (is_null) vec->attrs |= RAY_ATTR_HAS_NULLS; - vec->ext_nullmap = ext; + vec->attrs |= RAY_ATTR_HAS_NULLS; } - - /* External nullmap path */ - ray_t* ext = vec->ext_nullmap; - /* Grow external bitmap if needed */ - int64_t needed_bytes = (idx / 8) + 1; - if (needed_bytes > ext->len) { - int64_t new_len = (vec->len + 7) / 8; - if (new_len < needed_bytes) new_len = needed_bytes; - size_t new_data_size = (size_t)new_len; - int64_t old_len = ext->len; - ray_t* new_ext = ray_scratch_realloc(ext, new_data_size); - if (!new_ext || RAY_IS_ERR(new_ext)) return RAY_ERR_OOM; - /* Zero new bytes */ - if (new_len > old_len) - memset((char*)ray_data(new_ext) + old_len, 0, - (size_t)(new_len - old_len)); - new_ext->len = new_len; - vec->ext_nullmap = new_ext; - ext = new_ext; - } - - uint8_t* bits = (uint8_t*)ray_data(ext); - int byte_idx = (int)(idx / 8); - int bit_idx = (int)(idx % 8); - if (is_null) - bits[byte_idx] |= (uint8_t)(1u << bit_idx); - else - bits[byte_idx] &= (uint8_t)~(1u << bit_idx); return RAY_OK; } @@ -1310,9 +1247,9 @@ bool ray_vec_is_null(ray_t* vec, int64_t idx) { if (idx < 0 || idx >= vec->len) return false; /* SYM columns are no-null by design — see ray_vec_set_null_checked - * for the rationale. Short-circuit before slice/nullmap dispatch - * so any leftover HAS_NULLS attr from pre-policy code paths - * doesn't surface a phantom null. */ + * for the rationale. Sentinel check is bypassed here; consumers + * that need sym-null detection (e.g. dict.c key handling) test the + * sym id directly. */ if (vec->type == RAY_SYM) return false; /* Slice: delegate to parent with adjusted index */ @@ -1322,40 +1259,37 @@ bool ray_vec_is_null(ray_t* vec, int64_t idx) { return ray_vec_is_null(parent, pidx); } + /* Vec-level fast-path gate: HAS_NULLS clear means no null anywhere. */ if (!vec_any_nulls(vec)) return false; - ray_t* ext = NULL; - const uint8_t* inline_bits = vec_inline_nullmap(vec, &ext); - if (ext) { - int64_t byte_idx = idx / 8; - if (byte_idx >= ext->len) return false; - const uint8_t* bits = (const uint8_t*)ray_data(ext); - return (bits[byte_idx] >> (idx % 8)) & 1; + /* Sentinels are the sole source of truth. BOOL/U8 are non-nullable + * (rejected at the producer) so they can never reach here with + * HAS_NULLS set; the default arm is unreachable in practice. */ + switch (vec->type) { + case RAY_F64: + case RAY_F32: + case RAY_I64: case RAY_TIMESTAMP: + case RAY_I32: case RAY_DATE: case RAY_TIME: + case RAY_I16: + case RAY_STR: + case RAY_GUID: + return sentinel_is_null(vec, idx); + default: + return false; } - - /* Inline nullmap path. RAY_STR's inline 16 bytes hold str_pool/str_ext_null - * (or, when an index is attached, were the same and are now in the index - * snapshot). Either way, RAY_STR uses ext nullmap exclusively for its - * null bits, which is handled above; if the inline path is taken for - * RAY_STR, no nulls are present. */ - if (vec->type == RAY_STR) return false; - if (idx >= 128) return false; - int byte_idx = (int)(idx / 8); - int bit_idx = (int)(idx % 8); - return (inline_bits[byte_idx] >> bit_idx) & 1; } /* -------------------------------------------------------------------------- - * ray_vec_copy_nulls — bulk-copy null bitmap from src to dst + * ray_vec_copy_nulls — copy null state from src to dst * * dst must have the same len as src (or at least as many elements). - * Handles inline, external, and slice source bitmaps. + * Handles direct and slice sources. * -------------------------------------------------------------------------- */ ray_err_t ray_vec_copy_nulls(ray_t* dst, const ray_t* src) { if (!dst || !src) return RAY_ERR_TYPE; - /* Use ray_vec_is_null which handles slices, inline, and external bitmaps + /* Use ray_vec_is_null which handles slices and sentinel reads * transparently. For non-null sources this returns immediately. */ bool has_any = false; if (src->attrs & RAY_ATTR_SLICE) { diff --git a/src/vec/vec.h b/src/vec/vec.h index 15d670ea..97368d4c 100644 --- a/src/vec/vec.h +++ b/src/vec/vec.h @@ -28,31 +28,14 @@ * vec.h -- Vector operations. * * Vectors are ray_t blocks with positive type tags. Data follows the 32-byte - * header. Supports append, get, set, slice (zero-copy), concat, and nullable - * bitmap (inline for <=128 elements, external for >128). + * header. Supports append, get, set, slice (zero-copy), concat. Null state + * is encoded in-band via type-correct NULL_* sentinels (see vec.c). */ #include -/* Copy null bitmap from src to dst (handles slices, inline, external). - * dst and src must have the same length. Internal helper. */ +/* Copy null bits from src to dst (sentinel-based). dst and src must have + * the same length. Internal helper. */ ray_err_t ray_vec_copy_nulls(ray_t* dst, const ray_t* src); -/* Return a pointer to the effective null bitmap bytes for `v`, accounting - * for slice / external / inline / HAS_INDEX storage forms. Returns NULL - * when `v` has no nulls (caller should gate on `v->attrs & RAY_ATTR_HAS_NULLS` - * before calling for the cheap fast-path). - * - * On return: - * *bit_offset_out (if non-NULL): bit-offset within the returned buffer - * that corresponds to v's row 0. Non-zero only for slices. - * *len_bits_out (if non-NULL): total bits addressable in the buffer. - * For inline, this is 128. For external, it's the ext->len * 8. - * - * The returned pointer is valid as long as `v` (and its ext_nullmap / - * attached index ray_t, if any) are not released or mutated. */ -const uint8_t* ray_vec_nullmap_bytes(const ray_t* v, - int64_t* bit_offset_out, - int64_t* len_bits_out); - #endif /* RAY_VEC_H */ diff --git a/test/rfl/agg/pearson_corr.rfl b/test/rfl/agg/pearson_corr.rfl index f10b641f..0b504a4d 100644 --- a/test/rfl/agg/pearson_corr.rfl +++ b/test/rfl/agg/pearson_corr.rfl @@ -19,15 +19,17 @@ (pearson_corr (as 'I16 [1 2 3 4 5]) (as 'I16 [5 4 3 2 1])) -- -1.0 (pearson_corr (as 'U8 [1 2 3 4]) (as 'U8 [4 3 2 1])) -- -1.0 -;; ─── undefined cases → NaN ──────────────────────────────────────── -;; n < 2 → NaN (single-row variance undefined). -(!= (pearson_corr [1.0] [2.0]) (pearson_corr [1.0] [2.0])) -- true -;; Constant left column → variance 0 → NaN. +;; ─── undefined cases → NaN (= F64 null sentinel) ──────────────── +;; n < 2 → NaN (single-row variance undefined). NaN IS NULL_F64, so +;; detect via nil? rather than IEEE NaN != NaN (which collapses to +;; "both nulls are equal" in cmp.c null-handling). +(nil? (pearson_corr [1.0] [2.0])) -- true +;; Constant left column → variance 0 → NaN/null. (set Rc1 (pearson_corr [1.0 1.0 1.0] [2.0 4.0 6.0])) -(!= Rc1 Rc1) -- true -;; Constant right column → variance 0 → NaN. +(nil? Rc1) -- true +;; Constant right column → variance 0 → NaN/null. (set Rc2 (pearson_corr [1.0 2.0 3.0] [5.0 5.0 5.0])) -(!= Rc2 Rc2) -- true +(nil? Rc2) -- true ;; ─── algebraic invariants ───────────────────────────────────────── ;; Symmetry: r(x,y) == r(y,x). diff --git a/test/rfl/arith/sqrt.rfl b/test/rfl/arith/sqrt.rfl index 5b22013c..0b003a40 100644 --- a/test/rfl/arith/sqrt.rfl +++ b/test/rfl/arith/sqrt.rfl @@ -7,11 +7,14 @@ (sqrt 9.0) -- 3.0 (sqrt 25.0) -- 5.0 -;; sqrt of a negative produces IEEE NaN (still f64, not nil) — NaN is -;; the only float that is not equal to itself. +;; sqrt of a negative produces IEEE NaN. NaN IS the F64 null sentinel +;; (NULL_F64 = __builtin_nan("")), so the result is recognised as null. +;; NaN remains its own type — type stays 'f64. IEEE-NaN != NaN does +;; not leak through cmp.c because two null atoms compare as equal under +;; null-handling at cmp.c:188-189. (type (sqrt -1.0)) -- 'f64 -(nil? (sqrt -1.0)) -- false -(!= (sqrt -1.0) (sqrt -1.0)) -- true +(nil? (sqrt -1.0)) -- true +(!= (sqrt -1.0) (sqrt -1.0)) -- false ;; roundtrip: (sqrt x)^2 ≈ x for x >= 0 (set A (as 'F64 (rand 256 1000))) diff --git a/test/rfl/collection/distinct.rfl b/test/rfl/collection/distinct.rfl index e1764bf9..c2ae1f4c 100644 --- a/test/rfl/collection/distinct.rfl +++ b/test/rfl/collection/distinct.rfl @@ -38,14 +38,15 @@ (nil? (at (concat [1 0Nl 3] [0Nl 5 6]) 3)) -- true (nil? (at (concat [1 0Nl 3] [0Nl 5 6]) 0)) -- false (nil? (at (concat [1 0Nl 3] [0Nl 5 6]) 4)) -- false -;; cast preserves null bitmaps +;; cast preserves null state (nil? (at (as 'F64 [1 0Nl 3]) 1)) -- true (nil? (at (as 'I32 [1 0Nl 3]) 1)) -- true (at (as 'F64 [1 0Nl 3]) 0) -- 1.0 (at (as 'F64 [1 0Nl 3]) 2) -- 3.0 -;; cast to I16/U8/BOOL preserves nulls +;; cast to I16 preserves nulls; U8/BOOL are non-nullable so the null +;; collapses to u8-zero (no NULL_U8 sentinel). (nil? (at (as 'I16 [1 0Nl 3]) 1)) -- true -(nil? (at (as 'U8 [1 0Nl 3]) 1)) -- true +(nil? (at (as 'U8 [1 0Nl 3]) 1)) -- false ;; cast non-null values survive (at (as 'I32 [10 0Nl 30]) 0) -- 10i (at (as 'I32 [10 0Nl 30]) 2) -- 30i diff --git a/test/rfl/integration/fused_group_parity.rfl b/test/rfl/integration/fused_group_parity.rfl index 31aebe65..65ec2081 100644 --- a/test/rfl/integration/fused_group_parity.rfl +++ b/test/rfl/integration/fused_group_parity.rfl @@ -119,8 +119,9 @@ ;; I16 SUM with full range: -32768 + -1 + 0 + 1 + 32767 = -1 (sum (at (select {s: (sum v) from: Ti16 where: (>= g 0) by: g}) 's)) -- -1 -;; MIN, MAX preserve full range -(min (at (select {m: (min v) from: Ti16 where: (>= g 0) by: g}) 'm)) -- -32768 +;; MIN, MAX: INT16_MIN (-32768) IS NULL_I16, so a user-stored -32768 +;; round-trips as 0Nh (null). +(min (at (select {m: (min v) from: Ti16 where: (>= g 0) by: g}) 'm)) -- 0Nh (max (at (select {m: (max v) from: Ti16 where: (>= g 0) by: g}) 'm)) -- 32767 ;; I32 boundaries — same pattern. INT32_MAX = 2147483647. diff --git a/test/rfl/integration/null.rfl b/test/rfl/integration/null.rfl index ed918065..11dd3d75 100644 --- a/test/rfl/integration/null.rfl +++ b/test/rfl/integration/null.rfl @@ -5,7 +5,8 @@ (nil? 0Nl) -- true (nil? 0) -- false (nil? 1) -- false -(nil? "") -- false +;; STR null = empty string (len 0). +(nil? "") -- true ;; nil? distinguishes typed nulls from zero-valued atoms across types (nil? 0Ni) -- true (nil? 0Nf) -- true diff --git a/test/rfl/lazy/chains.rfl b/test/rfl/lazy/chains.rfl index 2f707a8a..1fcb3edb 100644 --- a/test/rfl/lazy/chains.rfl +++ b/test/rfl/lazy/chains.rfl @@ -12,7 +12,7 @@ (last V) -- 5 ;; Compose lazy producer with non-lazy-aware consumer — the dispatcher -;; (Phase 1.5) must materialise (sum U) and (sum V) before passing to +. +;; must materialise (sum U) and (sum V) before passing to +. ;; Regression test for the bug that originally blocked Plan Task 5. (set U [10 20 30]) (+ (sum U) (sum V)) -- 75 diff --git a/test/rfl/mem/heap_coverage.rfl b/test/rfl/mem/heap_coverage.rfl index 5bafbaea..c1ae7bd1 100644 --- a/test/rfl/mem/heap_coverage.rfl +++ b/test/rfl/mem/heap_coverage.rfl @@ -233,13 +233,12 @@ FV1 -- [1.5 2.5 3.5] (.sys.timeit 0) -- 0 ;; ════════════════════════════════════════════════════════════════════════════ -;; 7. NULLMAP_EXT release/retain (heap.c:560-562, 658-660, 753-755). -;; A vector with > 128 elements where any are null spills the inline -;; nullmap into an external bitmap vec. +;; 7. Large nullable vec release/retain. +;; A vector with > 128 elements where any are null exercises the +;; sentinel-encoded null path (no external bitmap child). ;; ════════════════════════════════════════════════════════════════════════════ -;; 200-element vec with nulls scattered — exceeds 128-bit inline cap -;; so ext_nullmap is allocated. +;; 203-element vec with sentinel-encoded nulls scattered. (set NV (concat (til 100) (concat [0Nl 0Nl 0Nl] (til 100)))) (count NV) -- 203 (set NV2 NV) diff --git a/test/rfl/null/bool_u8_lockdown.rfl b/test/rfl/null/bool_u8_lockdown.rfl deleted file mode 100644 index 01431e80..00000000 --- a/test/rfl/null/bool_u8_lockdown.rfl +++ /dev/null @@ -1,22 +0,0 @@ -;; Phase 1: BOOL and U8 are non-nullable. -;; -;; Empty cells in CSV ingest must materialize as false / 0 with no null mark. -;; All other nullable types still produce typed nulls as before. - -;; Sanity: typed nulls for the nullable types still parse and report null. -(nil? 0Nh) -- true -(nil? 0Ni) -- true -(nil? 0Nl) -- true -(nil? 0Nf) -- true - -;; CSV ingest: empty BOOL / U8 cells coerce to false / 0, not null. -(.sys.exec "rm -f /tmp/rfl_phase1_bool_u8_unique_path.csv") -(.sys.exec "printf 'b,u\\ntrue,1\\n,\\nfalse,3\\n' > /tmp/rfl_phase1_bool_u8_unique_path.csv") -(set P1Lockdown (.csv.read [B8 U8] "/tmp/rfl_phase1_bool_u8_unique_path.csv")) -(count P1Lockdown) -- 3 -(at P1Lockdown 'b) -- [true false false] -(at P1Lockdown 'u) -- [0x01 0x00 0x03] -(map nil? (at P1Lockdown 'b)) -- [false false false] -(map nil? (at P1Lockdown 'u)) -- [false false false] -(sum (map nil? (at P1Lockdown 'b))) -- 0 -(sum (map nil? (at P1Lockdown 'u))) -- 0 diff --git a/test/rfl/null/bool_u8_non_nullable.rfl b/test/rfl/null/bool_u8_non_nullable.rfl new file mode 100644 index 00000000..65be2bb9 --- /dev/null +++ b/test/rfl/null/bool_u8_non_nullable.rfl @@ -0,0 +1,22 @@ +;; BOOL and U8 are non-nullable. +;; +;; Empty cells in CSV ingest must materialize as false / 0 with no null mark. +;; All other nullable types still produce typed nulls. + +;; Sanity: typed nulls for the nullable types still parse and report null. +(nil? 0Nh) -- true +(nil? 0Ni) -- true +(nil? 0Nl) -- true +(nil? 0Nf) -- true + +;; CSV ingest: empty BOOL / U8 cells coerce to false / 0, not null. +(.sys.exec "rm -f /tmp/rfl_bool_u8_non_nullable.csv") +(.sys.exec "printf 'b,u\\ntrue,1\\n,\\nfalse,3\\n' > /tmp/rfl_bool_u8_non_nullable.csv") +(set BU (.csv.read [B8 U8] "/tmp/rfl_bool_u8_non_nullable.csv")) +(count BU) -- 3 +(at BU 'b) -- [true false false] +(at BU 'u) -- [0x01 0x00 0x03] +(map nil? (at BU 'b)) -- [false false false] +(map nil? (at BU 'u)) -- [false false false] +(sum (map nil? (at BU 'b))) -- 0 +(sum (map nil? (at BU 'u))) -- 0 diff --git a/test/rfl/null/cast.rfl b/test/rfl/null/cast.rfl index 4d9402e3..663c0f49 100644 --- a/test/rfl/null/cast.rfl +++ b/test/rfl/null/cast.rfl @@ -3,7 +3,10 @@ (sum (map nil? (as 'F64 [1 0N 3]))) -- 1 (sum (map nil? (as 'I32 [1 0N 3]))) -- 1 (sum (map nil? (as 'I16 [1 0N 3]))) -- 1 -(sum (map nil? (as 'B8 [1 0N 3]))) -- 1 +;; Post-Phase-1: B8 / U8 are non-nullable. Casting an integer null to +;; B8 collapses the null to b8-zero (no NULL_B8 sentinel exists), so +;; the post-cast nil? scan reports 0 nulls. +(sum (map nil? (as 'B8 [1 0N 3]))) -- 0 ;; Non-null values survive cast (at (as 'F64 [1 0N 3]) 0) -- 1.0 diff --git a/test/rfl/null/f64_dual_encoding.rfl b/test/rfl/null/f64_nan_encoding.rfl similarity index 67% rename from test/rfl/null/f64_dual_encoding.rfl rename to test/rfl/null/f64_nan_encoding.rfl index 1ccdd0da..184dfef7 100644 --- a/test/rfl/null/f64_dual_encoding.rfl +++ b/test/rfl/null/f64_nan_encoding.rfl @@ -1,5 +1,5 @@ -;; Phase 2 dual-encoding contract: F64 nulls are NaN in the payload AND -;; have the nullmap bit set. Every consumer must agree on null-ness. +;; F64 null encoding: nulls are NaN in the payload. Every consumer +;; must agree on null-ness via the NaN sentinel. ;; ----- 1. Atom construction ----- @@ -7,20 +7,20 @@ ;; ----- 2. CSV ingest ----- -(.sys.exec "rm -f /tmp/rfl_phase2_f64_dual.csv") -(.sys.exec "printf 'x\\n1.5\\n\\n3.5\\n' > /tmp/rfl_phase2_f64_dual.csv") -(set P2F (.csv.read [F64] "/tmp/rfl_phase2_f64_dual.csv")) -(count P2F) -- 3 -(nil? (at (at P2F 'x) 1)) -- true -(at (at P2F 'x) 0) -- 1.5 -(at (at P2F 'x) 2) -- 3.5 +(.sys.exec "rm -f /tmp/rfl_f64_nan.csv") +(.sys.exec "printf 'x\\n1.5\\n\\n3.5\\n' > /tmp/rfl_f64_nan.csv") +(set Pf (.csv.read [F64] "/tmp/rfl_f64_nan.csv")) +(count Pf) -- 3 +(nil? (at (at Pf 'x) 1)) -- true +(at (at Pf 'x) 0) -- 1.5 +(at (at Pf 'x) 2) -- 3.5 ;; ----- 3. Aggregations exclude nulls ----- -(sum (at P2F 'x)) -- 5.0 -(avg (at P2F 'x)) -- 2.5 -(min (at P2F 'x)) -- 1.5 -(max (at P2F 'x)) -- 3.5 +(sum (at Pf 'x)) -- 5.0 +(avg (at Pf 'x)) -- 2.5 +(min (at Pf 'x)) -- 1.5 +(max (at Pf 'x)) -- 3.5 ;; ----- 4. Sort places nulls per policy ----- diff --git a/test/rfl/null/grouped_agg_null_correctness.rfl b/test/rfl/null/grouped_agg_null_correctness.rfl index a35f839c..9c1388f8 100644 --- a/test/rfl/null/grouped_agg_null_correctness.rfl +++ b/test/rfl/null/grouped_agg_null_correctness.rfl @@ -1,8 +1,8 @@ -;; Phase 3 follow-up: per-(group, agg) non-null counts drive AVG/VAR/ -;; STDDEV divisors, and result-side null finalization replaces -;; accumulator seeds (DBL_MAX / -DBL_MAX / 0 / NaN product) for -;; MIN/MAX/PROD/FIRST/LAST on all-null groups. See -;; include/rayforce.h NULL_* paragraph. +;; Grouped aggregates exclude nulls correctly: per-(group, agg) non-null +;; counts drive AVG/VAR/STDDEV divisors, and result-side null finalization +;; produces a typed null (rather than leaking the accumulator seed — +;; DBL_MAX / -DBL_MAX / 0 / NaN product) for MIN/MAX/PROD/FIRST/LAST on +;; all-null groups. ;; ----- AVG divisor excludes nulls ----- ;; Group g=0 has v in [1, 2, 0N, 4] — non-null sum = 7, non-null count = 3. diff --git a/test/rfl/null/integer_dual_encoding.rfl b/test/rfl/null/integer_sentinel_encoding.rfl similarity index 67% rename from test/rfl/null/integer_dual_encoding.rfl rename to test/rfl/null/integer_sentinel_encoding.rfl index 7cc330f3..31cf6a01 100644 --- a/test/rfl/null/integer_dual_encoding.rfl +++ b/test/rfl/null/integer_sentinel_encoding.rfl @@ -1,5 +1,5 @@ -;; Phase 3a dual-encoding contract: integer/temporal nulls hold the -;; INT_MIN sentinel in the payload AND have the nullmap bit set. +;; Integer / temporal null encoding: nulls hold the type-correct INT_MIN +;; sentinel (NULL_I16 / NULL_I32 / NULL_I64) in the payload. ;; ----- 1. Atom construction ----- @@ -9,19 +9,19 @@ ;; ----- 2. CSV ingest (I64) ----- -(.sys.exec "rm -f /tmp/rfl_phase3a_int_dual.csv") -(.sys.exec "printf 'x\\n10\\n\\n30\\n' > /tmp/rfl_phase3a_int_dual.csv") -(set P3I (.csv.read [I64] "/tmp/rfl_phase3a_int_dual.csv")) -(count P3I) -- 3 -(nil? (at (at P3I 'x) 1)) -- true -(at (at P3I 'x) 0) -- 10 -(at (at P3I 'x) 2) -- 30 +(.sys.exec "rm -f /tmp/rfl_int_sentinel.csv") +(.sys.exec "printf 'x\\n10\\n\\n30\\n' > /tmp/rfl_int_sentinel.csv") +(set Pi (.csv.read [I64] "/tmp/rfl_int_sentinel.csv")) +(count Pi) -- 3 +(nil? (at (at Pi 'x) 1)) -- true +(at (at Pi 'x) 0) -- 10 +(at (at Pi 'x) 2) -- 30 ;; ----- 3. Aggregations exclude nulls ----- -(sum (at P3I 'x)) -- 40 -(min (at P3I 'x)) -- 10 -(max (at P3I 'x)) -- 30 +(sum (at Pi 'x)) -- 40 +(min (at Pi 'x)) -- 10 +(max (at Pi 'x)) -- 30 ;; ----- 4. Sort places nulls per policy ----- @@ -34,7 +34,7 @@ (nil? (at (distinct [0N 0N 0N]) 0)) -- true (count (distinct [0N 0N 0N])) -- 1 -;; ----- 6. Group-by SUM on nullable I64 (consumer NaN/sentinel-skip) ----- +;; ----- 6. Group-by SUM on nullable I64 (consumer sentinel-skip) ----- (set Tn (table [v g] (list [1 2 0N 4 5] [0 0 1 1 1]))) (sum (at (select {s: (sum v) from: Tn where: (>= g 0) by: g}) 's)) -- 12 diff --git a/test/rfl/null/sentinel_only_baseline.rfl b/test/rfl/null/sentinel_only_baseline.rfl new file mode 100644 index 00000000..9bd242cd --- /dev/null +++ b/test/rfl/null/sentinel_only_baseline.rfl @@ -0,0 +1,72 @@ +;; Sentinel-only baseline. +;; +;; Pins the null contract: for every nullable numeric/temporal type, the +;; NULL_* sentinel value in a vec payload is the SOLE truth that consumers +;; (count, sum/avg, format, sort, distinct) need. +;; +;; Every check builds a real vec containing a sentinel (via `as` cast or +;; CSV ingest) and exercises a consumer — never just `(nil? 0Nl)` on a +;; literal, which would only test the parser. + +;; ----- 1. F64 (NaN-encoded null) ----- +(set Vf (as 'F64 [1.0 0N 3.0 0N 5.0])) +(nil? (at Vf 1)) -- true +(- (count Vf) (sum (map nil? Vf))) -- 3 +(sum Vf) -- 9.0 +(avg Vf) -- 3.0 +(format "%" (at Vf 1)) -- "0Nf" +(at (asc Vf) 0) -- 0Nf +(at (desc Vf) 4) -- 0Nf +(count (distinct Vf)) -- 4 + +;; ----- 2. I16 (INT16_MIN sentinel) ----- +(set V16 (as 'I16 [1 0N 2 0N 3])) +(nil? (at V16 1)) -- true +(- (count V16) (sum (map nil? V16))) -- 3 +(sum V16) -- 6 +(format "%" (at V16 1)) -- "0Nh" +(at (asc V16) 0) -- 0Nh +(at (desc V16) 4) -- 0Nh +(count (distinct V16)) -- 4 + +;; ----- 3. I32 (INT32_MIN sentinel) ----- +(set V32 (as 'I32 [10 0N 20 0N 30])) +(nil? (at V32 1)) -- true +(- (count V32) (sum (map nil? V32))) -- 3 +(sum V32) -- 60 +(format "%" (at V32 1)) -- "0Ni" +(at (asc V32) 0) -- 0Ni +(count (distinct V32)) -- 4 + +;; ----- 4. I64 (INT64_MIN sentinel) via CSV ingest ----- +;; CSV reader writes the sentinel into the payload; consumers must read it. +(.sys.exec "rm -f /tmp/rfl_sentinel_baseline_i64.csv") +(.sys.exec "printf 'x\\n100\\n\\n300\\n\\n500\\n' > /tmp/rfl_sentinel_baseline_i64.csv") +(set Ti (.csv.read [I64] "/tmp/rfl_sentinel_baseline_i64.csv")) +(set Vi (at Ti 'x)) +(count Vi) -- 5 +(nil? (at Vi 1)) -- true +(nil? (at Vi 3)) -- true +(- (count Vi) (sum (map nil? Vi))) -- 3 +(sum Vi) -- 900 +(avg Vi) -- 300.0 +(format "%" (at Vi 1)) -- "0Nl" +(at (asc Vi) 0) -- 0Nl +(at (desc Vi) 4) -- 0Nl +(count (distinct Vi)) -- 4 + +;; ----- 5. DATE temporal (NULL_DATE sentinel) ----- +(set Vd (as 'DATE [7305 0N 7306 0N 7307])) +(nil? (at Vd 1)) -- true +(- (count Vd) (sum (map nil? Vd))) -- 3 +(format "%" (at Vd 1)) -- "0Nd" +(at (asc Vd) 0) -- 0Nd +(count (distinct Vd)) -- 4 + +;; ----- 6. TIMESTAMP temporal (NULL_TIMESTAMP sentinel) ----- +(set Vp (as 'TIMESTAMP [1000 0N 2000 0N 3000])) +(nil? (at Vp 1)) -- true +(- (count Vp) (sum (map nil? Vp))) -- 3 +(format "%" (at Vp 1)) -- "0Np" +(at (asc Vp) 0) -- 0Np +(count (distinct Vp)) -- 4 diff --git a/test/rfl/ops/exec_advanced.rfl b/test/rfl/ops/exec_advanced.rfl index c2a27a09..d52a2f60 100644 --- a/test/rfl/ops/exec_advanced.rfl +++ b/test/rfl/ops/exec_advanced.rfl @@ -8,8 +8,8 @@ ;; Hit (with rationale): ;; - partitioned_gather phases (exec.c:275-473): single-key sort over ;; >= PG_MIN (=131072) rows. No existing test crosses this size for -;; the OP_SORT path. Phase 1 (pg_hist_fn), phase 2 (pg_route_fn), -;; phase 3 (pg_block_fn) all run; pg_block_fn covers e==4 and e==8 +;; the OP_SORT path. Pass 1 (pg_hist_fn), pass 2 (pg_route_fn), +;; pass 3 (pg_block_fn) all run; pg_block_fn covers e==4 and e==8 ;; element-size arms when the table mixes I32 and I64 columns. ;; - OP_TRIM in select projection (exec.c:1548): no rfl fixture ;; currently invokes (trim col) — string.c's exec_string_unary @@ -69,7 +69,7 @@ ;; ==================================================================== ;; OP_SORT — partitioned_gather (exec.c:275-473). Single-key sort over ;; >= PG_MIN (=131072) rows triggers the partitioned routing path. -;; Phase 1 (pg_hist_fn @275), phase 2 (pg_route_fn @304), phase 3 +;; Pass 1 (pg_hist_fn @275), pass 2 (pg_route_fn @304), pass 3 ;; (pg_block_fn @338) all run. Mixing I64 and I32 columns drives ;; pg_block_fn's e==8 (line 358) and e==4 (line 363) element-size arms. ;; ==================================================================== diff --git a/test/rfl/ops/internal_coverage.rfl b/test/rfl/ops/internal_coverage.rfl index b242a645..cf02f6fe 100644 --- a/test/rfl/ops/internal_coverage.rfl +++ b/test/rfl/ops/internal_coverage.rfl @@ -334,15 +334,12 @@ (at (at (select {s: (sum v) from: BN by: k asc: k}) 's) 199) -- 198 ;; ── 9. Large parallel GROUP BY with STDDEV + singleton groups ────────────── -;; Covers par_set_null (lines 954-956): parallel radix GROUP BY (nrows >= 65536), -;; > 128 output groups (200 groups), singleton groups at indices >= 128 (keys -;; 128.0..199.0 have 1 row each). STDDEV of 1 row → cnt=1 → insuf=true → null. -;; F64 keys are NOT eligible for the DA path → radix HT path is used. -;; 1. par_prepare_nullmap: vec->len=200>128 → inline bit-0 set+clear (no EXT yet) -;; 2. radix_phase3: singleton group at di>=128 → par_set_null(di>=128) -;; → !(NULLMAP_EXT) && idx>=128 → ray_vec_set_null promotes inline→EXT -;; → lines 954-956 covered -;; 3. par_finalize_nulls: vec now has EXT → lines 983-989 (EXT scan) covered +;; Covers par_set_null on a parallel radix GROUP BY (nrows >= 65536) with +;; > 128 output groups (200 groups) and singleton groups at indices >= 128 +;; (keys 128.0..199.0 have 1 row each). STDDEV of 1 row → cnt=1 → insuf=true +;; → null, which goes through par_set_null on output rows past the legacy +;; 128-inline boundary. F64 keys are NOT eligible for the DA path → radix +;; HT path is used. ;; Keys 0.0..127.0 each have 512 rows (65536 total), 128.0..199.0 have 1 row each. ;; Total = 65608 rows ≥ RAY_PARALLEL_THRESHOLD (65536) → parallel radix path. (set PN_keys (concat (as 'F64 (% (til 65536) 128)) (as 'F64 (+ 128 (til 72))))) diff --git a/test/rfl/strop/split.rfl b/test/rfl/strop/split.rfl index 64a9eb66..0b0addb7 100644 --- a/test/rfl/strop/split.rfl +++ b/test/rfl/strop/split.rfl @@ -1,7 +1,11 @@ ;; Invariants for `split`. (split "a,b,c" ",") -- ["a" "b" "c"] -(split "" ",") -- [""] +;; Empty string IS the STR null, so split-of-"" yields a one-element +;; vector whose only element is null. Assert via nil? rather than a +;; [0Nc] literal (parser doesn't accept that form). +(count (split "" ",")) -- 1 +(nil? (at (split "" ",") 0)) -- true (split "abc" ",") -- ["abc"] ;; joining splits back equals input (when separator present) diff --git a/test/rfl/system/read_csv.rfl b/test/rfl/system/read_csv.rfl index 9e1c8e52..77955e95 100644 --- a/test/rfl/system/read_csv.rfl +++ b/test/rfl/system/read_csv.rfl @@ -67,7 +67,11 @@ (.sys.exec "printf 'name\\nalice\\n\\nbob\\n\\ncarol\\n' > rf_test_empty.csv") -- 0 (set _t (.csv.read [SYMBOL] "rf_test_empty.csv")) (count _t) -- 5 -;; Three rows have a value, two are empty — neither side counts as null. -(count (select {x: name from: _t where: (!= name "")})) -- 3 -(count (select {x: name from: _t where: (== name "")})) -- 2 +;; Empty string IS a null STR atom and empty SYM cell IS null (sym +;; id 0). The SYM vec vs null STR atom comparison short-circuits null: +;; every cell passes `!= ""` and none passes `== ""`. Documented +;; tension; revisit if SQL-style null-aware filtering on SYM columns +;; becomes a requirement. +(count (select {x: name from: _t where: (!= name "")})) -- 5 +(count (select {x: name from: _t where: (== name "")})) -- 0 (.sys.exec "rm -f rf_test_empty.csv") -- 0 diff --git a/test/rfl/type/as.rfl b/test/rfl/type/as.rfl index 4e8f7b81..be007f49 100644 --- a/test/rfl/type/as.rfl +++ b/test/rfl/type/as.rfl @@ -374,9 +374,12 @@ ;; INT16/INT32 boundary parses — negative-extreme literals can't be written ;; (parser tokenises positive then negates), so verify via i64 round-trip. -(as 'i64 (as 'i16 "-32768")) -- -32768 +;; INT16_MIN / INT32_MIN / INT64_MIN are the respective NULL_* +;; sentinels. Casting these boundary literals round-trips as the +;; typed null of the wider type. +(as 'i64 (as 'i16 "-32768")) -- 0Nl (as 'i64 (as 'i16 "32767")) -- 32767 -(as 'i64 (as 'i32 "-2147483648")) -- -2147483648 +(as 'i64 (as 'i32 "-2147483648")) -- 0Nl (as 'i64 (as 'i32 "2147483647")) -- 2147483647 ;; ========== NULL PRESERVATION ACROSS CASTS ========== diff --git a/test/test_atom.c b/test/test_atom.c index 34b382c7..0af64acd 100644 --- a/test/test_atom.c +++ b/test/test_atom.c @@ -457,9 +457,8 @@ static test_result_t test_atom_eq_list_sym_atoms(void) { } static test_result_t test_atom_typed_null_f64(void) { - /* Phase 2 dual-encoding: ray_typed_null(-RAY_F64) must store NaN in - * the f64 payload AND set nullmap[0]&1. Downstream kernels that - * read the slot raw (without consulting the bitmap) then see NaN. */ + /* ray_typed_null(-RAY_F64) stores NaN in the f64 payload AND sets + * nullmap[0]&1. Downstream kernels reading the slot raw see NaN. */ ray_t* v = ray_typed_null(-RAY_F64); TEST_ASSERT_NOT_NULL(v); TEST_ASSERT_FALSE(RAY_IS_ERR(v)); @@ -472,7 +471,7 @@ static test_result_t test_atom_typed_null_f64(void) { } static test_result_t test_atom_typed_null_i64(void) { - /* Phase 3a: integer typed nulls now use INT_MIN sentinel + bitmap bit. */ + /* Integer typed nulls use the INT_MIN sentinel and set nullmap[0]&1. */ ray_t* v = ray_typed_null(-RAY_I64); TEST_ASSERT_NOT_NULL(v); TEST_ASSERT_FALSE(RAY_IS_ERR(v)); diff --git a/test/test_compile.c b/test/test_compile.c index fc61f051..ac0298ae 100644 --- a/test/test_compile.c +++ b/test/test_compile.c @@ -258,19 +258,18 @@ static test_result_t test_compile_vector_literal(void) { } /* ════════════════════════════════════════════════════════════════════ - * Phase 2e: F64 dual-encoding regression tests. + * F64 / integer null-slot regression tests. * - * Each consumer of an F64 vector with a null bit MUST see NULL_F64 - * (= NaN) in the raw `double` payload as well — kernels are allowed to - * read the slot without consulting the bitmap. These tests assert the - * payload, not the bitmap, by reading `((double*)ray_data(v))[idx]` and - * checking `x != x` (NaN's defining property). + * Each consumer of a HAS_NULLS vector MUST see the width-correct + * sentinel (NULL_F64 = NaN, NULL_I{16,32,64} = INT_MIN) in the raw + * payload — kernels read the slot directly. These tests assert the + * payload value at the null index, not just HAS_NULLS. * ════════════════════════════════════════════════════════════════════ */ static test_result_t test_compile_f64_mixed_literal_null_slot_is_nan(void) { /* Mixed numeric literal [1.0 0N 3.0] promotes to F64 in parse.c. - * The integer null 0N (typed I64 null with i64=0) used to write 0.0 - * into the f64 slot, breaking the dual-encoding contract. */ + * The integer null 0N (typed I64 null with i64=0) must not write + * 0.0 into the f64 slot — it must land as NULL_F64 (NaN). */ ray_t* r = ray_eval_str("[1.0 0N 3.0]"); TEST_ASSERT_NOT_NULL(r); if (RAY_IS_ERR(r)) { ray_error_free(r); FAIL("eval error on mixed F64 literal"); } @@ -287,9 +286,8 @@ static test_result_t test_compile_f64_mixed_literal_null_slot_is_nan(void) { static test_result_t test_compile_f64_cast_i64_null_slot_is_nan(void) { /* (as 'F64 [1 0N 3]) — cast an I64 vector with a null slot to F64. - * The cast loop writes (double)src[i] regardless of null status, - * which used to leave 0.0 in the null F64 slot. Phase 2e routes - * the post-cast nullmap copy through a per-slot NULL_F64 fill. */ + * The cast loop writes (double)src[i] regardless of null status, so + * the post-cast pass must overwrite the null slot with NULL_F64. */ ray_t* r = ray_eval_str("(as 'F64 [1 0N 3])"); TEST_ASSERT_NOT_NULL(r); if (RAY_IS_ERR(r)) { ray_error_free(r); FAIL("eval error on cast"); } @@ -305,10 +303,9 @@ static test_result_t test_compile_f64_cast_i64_null_slot_is_nan(void) { } static test_result_t test_compile_i32_cast_i64_null_slot_is_sentinel(void) { - /* Phase 3a: (as 'I32 [1 0N 3]) — narrowing I64→I32 cast over a vector - * with a null slot must leave NULL_I32 (INT32_MIN) in the payload, not - * the cast result (int32_t)NULL_I64 = 0. Mirror of the Phase 2e F64 - * post-cast NaN fill for integer destinations. */ + /* (as 'I32 [1 0N 3]) — narrowing I64→I32 cast over a vector with a + * null slot must leave NULL_I32 (INT32_MIN) in the payload, not the + * cast result (int32_t)NULL_I64 = 0. */ ray_t* r = ray_eval_str("(as 'I32 [1 0N 3])"); TEST_ASSERT_NOT_NULL(r); if (RAY_IS_ERR(r)) { ray_error_free(r); FAIL("eval error on cast"); } @@ -325,9 +322,9 @@ static test_result_t test_compile_i32_cast_i64_null_slot_is_sentinel(void) { } static test_result_t test_compile_i16_cast_i32_null_slot_is_sentinel(void) { - /* Phase 3a Hazard 3: chained narrowing I64→I32→I16 cast over a vector - * with a null slot must leave NULL_I16 (INT16_MIN) in the I16 payload, - * NOT (int16_t)NULL_I32 = 0. The destination-width sentinel must be + /* Chained narrowing I64→I32→I16 cast over a vector with a null slot + * must leave NULL_I16 (INT16_MIN) in the I16 payload, NOT + * (int16_t)NULL_I32 = 0. The destination-width sentinel must be * written post-cast directly — propagating through the cast macro * truncates the sentinel. */ ray_t* r = ray_eval_str("(as 'I16 (as 'I32 [1 0N 3]))"); @@ -346,9 +343,9 @@ static test_result_t test_compile_i16_cast_i32_null_slot_is_sentinel(void) { } static test_result_t test_compile_i64_cast_i32_null_slot_is_sentinel(void) { - /* Phase 3a: widening I32→I64 cast must still fill NULL_I64 in the - * null payload slot — the cast macro would write (int64_t)NULL_I32 - * = -2147483648, which collides with a legitimate I64 value. */ + /* Widening I32→I64 cast must still fill NULL_I64 in the null payload + * slot — the cast macro would write (int64_t)NULL_I32 = -2147483648, + * which collides with a legitimate I64 value. */ ray_t* r = ray_eval_str("(as 'I64 (as 'I32 [1 0N 3]))"); TEST_ASSERT_NOT_NULL(r); if (RAY_IS_ERR(r)) { ray_error_free(r); FAIL("eval error on cast"); } @@ -365,10 +362,10 @@ static test_result_t test_compile_i64_cast_i32_null_slot_is_sentinel(void) { } static test_result_t test_compile_i64_scalar_null_propagation_slot_is_sentinel(void) { - /* Phase 3a-4: a binary op with a scalar-null I64 operand should fill the - * I64 result payload with NULL_I64, not leave it as the kernel's output. + /* A binary op with a scalar-null I64 operand should fill the I64 + * result payload with NULL_I64, not leave it as the kernel's output. * `(+ 0Nl [1 2 3])` — scalar-null left operand triggers set_all_null - * with an I64 result vector. Mirror of the Phase 2e F64 NaN-fill. */ + * with an I64 result vector. */ ray_t* r = ray_eval_str("(+ 0Nl [1 2 3])"); TEST_ASSERT_NOT_NULL(r); if (RAY_IS_ERR(r)) { ray_error_free(r); FAIL("eval error on scalar-null add"); } @@ -387,8 +384,8 @@ static test_result_t test_compile_i64_scalar_null_propagation_slot_is_sentinel(v } static test_result_t test_compile_update_promo_f64_to_i64_null_slot_is_sentinel(void) { - /* Phase 3a-5: UPDATE-WHERE that promotes an F64 expression with nulls into - * an I64 column must fill NULL_I64 in the destination payload, not the + /* UPDATE-WHERE that promotes an F64 expression with nulls into an + * I64 column must fill NULL_I64 in the destination payload, not the * implementation-defined garbage from (int64_t)NaN. */ ray_t* r = ray_eval_str( "(do " @@ -408,8 +405,8 @@ static test_result_t test_compile_update_promo_f64_to_i64_null_slot_is_sentinel( } static test_result_t test_compile_update_promo_i64_to_f64_null_slot_is_sentinel(void) { - /* Phase 3a-5: UPDATE-WHERE that promotes an I64 expression with nulls into - * an F64 column must fill NULL_F64 in the destination payload, not + /* UPDATE-WHERE that promotes an I64 expression with nulls into an + * F64 column must fill NULL_F64 in the destination payload, not * (double)NULL_I64 (a large finite value). */ ray_t* r = ray_eval_str( "(do " @@ -429,8 +426,8 @@ static test_result_t test_compile_update_promo_i64_to_f64_null_slot_is_sentinel( } static test_result_t test_compile_update_atom_broadcast_i64_null_slot_is_sentinel(void) { - /* Phase 3a-6: UPDATE that broadcasts an I64 typed-null atom into an - * I64 column should fill NULL_I64 into the destination payload, not 0. */ + /* UPDATE that broadcasts an I64 typed-null atom into an I64 column + * should fill NULL_I64 into the destination payload, not 0. */ ray_t* r = ray_eval_str( "(do (set t (table [a] (list [10 20 30])))" " (set u (update {a: 0Nl from: t}))" @@ -450,8 +447,8 @@ static test_result_t test_compile_update_atom_broadcast_i64_null_slot_is_sentine } static test_result_t test_compile_update_atom_broadcast_where_i64_null_slot_is_sentinel(void) { - /* Phase 3a-6: UPDATE-WHERE that broadcasts an I64 typed-null atom into - * an I64 column should fill NULL_I64 into masked slots only. */ + /* UPDATE-WHERE that broadcasts an I64 typed-null atom into an I64 + * column should fill NULL_I64 into masked slots only. */ ray_t* r = ray_eval_str( "(do (set t (table [a b] (list [10 20 30] [1 2 3])))" " (set u (update {a: 0Nl where: (> b 1) from: t}))" @@ -472,8 +469,8 @@ static test_result_t test_compile_update_atom_broadcast_where_i64_null_slot_is_s } static test_result_t test_compile_group_by_i64_null_key_slot_is_sentinel(void) { - /* Phase 3a-7: group-by on a nullable I64 column with a null row must - * write NULL_I64 into the result column's null slot, not 0. */ + /* Group-by on a nullable I64 column with a null row must write + * NULL_I64 into the result column's null slot, not 0. */ ray_t* r = ray_eval_str( "(do (set t (table [k v] (list [1 0Nl 2 0Nl 3] [10 20 30 40 50])))" " (set r (select {c: (count v) from: t by: k}))" @@ -497,8 +494,8 @@ static test_result_t test_compile_group_by_i64_null_key_slot_is_sentinel(void) { } static test_result_t test_compile_pivot_i64_null_key_slot_is_sentinel(void) { - /* Phase 3a-8: pivot on a nullable I64 key column with null rows must - * fill NULL_I64 into the result index-column's null slot, not 0. */ + /* Pivot on a nullable I64 key column with null rows must fill + * NULL_I64 into the result index-column's null slot, not 0. */ ray_t* r = ray_eval_str( "(do (set t (table [k v c] (list [1 0Nl 2 0Nl 3] [10 20 30 40 50] ['a 'b 'a 'b 'c])))" " (set p (pivot t 'k 'c 'v sum))" @@ -522,19 +519,15 @@ static test_result_t test_compile_pivot_i64_null_key_slot_is_sentinel(void) { } /* ════════════════════════════════════════════════════════════════════ - * Phase 3a-13 regressions — producer-side dual-encoding gaps that - * surfaced from the cross-cut integration review (temporal extract, - * strlen, mark_i64_overflow_as_null, median_per_group). - * Each previously wrote 0 / 0.0 to the payload while flipping the null - * bitmap bit — bitmap-only nulls that violate the dual-encoding - * contract. After the fix the slot must carry the width-correct - * sentinel (NULL_I64 / NULL_F64) in addition to the bitmap bit. + * Producer-side null-slot regressions: temporal extract, strlen, + * mark_i64_overflow_as_null, and median_per_group must each write the + * width-correct sentinel (NULL_I64 / NULL_F64) into the payload at null + * positions — leaving 0 / 0.0 there would let sentinel-aware readers + * mistake the null for a legitimate value. * ════════════════════════════════════════════════════════════════════ */ static test_result_t test_compile_temporal_extract_null_slot_is_sentinel(void) { - /* Phase 3a-13 (C1): extract over a nullable TIMESTAMP column must - * fill NULL_I64 in the result I64 payload — the kernel previously - * wrote 0 with a bitmap bit, which sentinel-aware readers see as a - * legitimate zero. */ + /* Extract (yyyy ...) over a nullable TIMESTAMP column must fill + * NULL_I64 in the result I64 payload, not 0. */ ray_t* r = ray_eval_str( "(do (set __t13 (as 'TIMESTAMP (list 1000000000 0Np 2000000000)))" " (yyyy __t13))"); @@ -553,10 +546,10 @@ static test_result_t test_compile_temporal_extract_null_slot_is_sentinel(void) { } static test_result_t test_compile_strlen_null_slot_is_sentinel(void) { - /* Phase 3a-13 (C2): strlen over a nullable STR vector must fill - * NULL_I64 in the I64 payload, not 0. Mixed string-vec literal - * `[\"hello\" 0N \"x\"]` parses as a LIST; cast to STR to get a - * proper typed nullable STR vector. */ + /* strlen over a nullable STR vector must fill NULL_I64 in the I64 + * payload, not 0. Mixed string-vec literal `[\"hello\" 0N \"x\"]` + * parses as a LIST; cast to STR to get a proper typed nullable STR + * vector. */ ray_t* r = ray_eval_str("(strlen (as 'STR (concat \"hello\" (concat 0N \"x\"))))"); TEST_ASSERT_NOT_NULL(r); if (RAY_IS_ERR(r)) { ray_error_free(r); FAIL("eval error on strlen null"); } @@ -573,10 +566,10 @@ static test_result_t test_compile_strlen_null_slot_is_sentinel(void) { } static test_result_t test_compile_overflow_neg_int64_min_slot_is_null_i64(void) { - /* Phase 3a-13 (C3): negating INT64_MIN over an i64 column produces - * INT64_MIN (k/q convention surfaces this as typed null). After - * Phase 3a-1 INT64_MIN IS NULL_I64 — mark_i64_overflow_as_null must - * leave the sentinel in place, not overwrite with 0. */ + /* Negating INT64_MIN over an i64 column produces INT64_MIN (k/q + * convention surfaces this as typed null). Since INT64_MIN IS + * NULL_I64, mark_i64_overflow_as_null must leave the sentinel in + * place, not overwrite with 0. */ ray_t* r = ray_eval_str( "(do (set Vneg (concat -9223372036854775808 (concat -5 (concat 5 0))))" " (set Tneg (table [v] (list Vneg)))" @@ -594,9 +587,8 @@ static test_result_t test_compile_overflow_neg_int64_min_slot_is_null_i64(void) } static test_result_t test_compile_median_per_group_all_null_slot_is_nan(void) { - /* Phase 3a-13 (C4 — closes Phase 2 gap): median over a per-group - * all-null F64 input must fill NULL_F64 in the result slot, not - * leave it as 0.0. */ + /* Median over a per-group all-null F64 input must fill NULL_F64 in + * the result slot, not leave it as 0.0. */ ray_t* r = ray_eval_str( "(do (set __tm13 (table [k v] (list [1 1 2 2] [0Nf 0Nf 1.0 2.0])))" " (set __rm13 (select {m: (med v) by: k from: __tm13}))" diff --git a/test/test_csv.c b/test/test_csv.c index 511b4f2f..5ec1f708 100644 --- a/test/test_csv.c +++ b/test/test_csv.c @@ -191,7 +191,7 @@ static test_result_t test_csv_null_i64(void) { TEST_ASSERT_FALSE(ray_vec_is_null(col, 0)); TEST_ASSERT_EQ_I(((int64_t*)ray_data(col))[0], 10); - /* Phase 3a: empty I64 cell must be both bitmap-null AND NULL_I64-in-slot. */ + /* Empty I64 cell must report null and carry NULL_I64 in the slot. */ TEST_ASSERT_TRUE(ray_vec_is_null(col, 1)); TEST_ASSERT_EQ_I(((int64_t*)ray_data(col))[1], NULL_I64); @@ -220,7 +220,7 @@ static test_result_t test_csv_null_i64_unparseable(void) { TEST_ASSERT_FALSE(ray_vec_is_null(col, 0)); TEST_ASSERT_EQ_I(((int64_t*)ray_data(col))[0], 10); - /* Phase 3a: unparseable I64 cell must be both bitmap-null AND NULL_I64-in-slot. */ + /* Unparseable I64 cell must report null and carry NULL_I64 in the slot. */ TEST_ASSERT_TRUE(ray_vec_is_null(col, 1)); TEST_ASSERT_EQ_I(((int64_t*)ray_data(col))[1], NULL_I64); @@ -249,7 +249,7 @@ static test_result_t test_csv_null_f64(void) { TEST_ASSERT_FALSE(ray_vec_is_null(col, 0)); TEST_ASSERT_EQ_F(((double*)ray_data(col))[0], 1.5, 1e-6); - /* Phase 2: empty F64 cell must be both bitmap-null AND NaN-in-slot. */ + /* Empty F64 cell must report null and carry NaN in the slot. */ TEST_ASSERT_TRUE(ray_vec_is_null(col, 1)); double slot1 = ((double*)ray_data(col))[1]; TEST_ASSERT_TRUE(slot1 != slot1); /* NaN check */ @@ -264,7 +264,7 @@ static test_result_t test_csv_null_f64(void) { PASS(); } -/* Phase 3a: empty I16 cell must be both bitmap-null AND NULL_I16-in-slot. */ +/* Empty I16 cell must report null and carry NULL_I16 in the slot. */ static test_result_t test_csv_null_i16(void) { ray_heap_init(); (void)ray_sym_init(); @@ -295,7 +295,7 @@ static test_result_t test_csv_null_i16(void) { PASS(); } -/* Phase 3a: empty I32 cell must be both bitmap-null AND NULL_I32-in-slot. */ +/* Empty I32 cell must report null and carry NULL_I32 in the slot. */ static test_result_t test_csv_null_i32(void) { ray_heap_init(); (void)ray_sym_init(); @@ -326,7 +326,7 @@ static test_result_t test_csv_null_i32(void) { PASS(); } -/* Phase 3a: empty DATE cell must be both bitmap-null AND NULL_I32-in-slot. */ +/* Empty DATE cell must report null and carry NULL_I32 in the slot. */ static test_result_t test_csv_null_date(void) { ray_heap_init(); (void)ray_sym_init(); @@ -355,7 +355,7 @@ static test_result_t test_csv_null_date(void) { PASS(); } -/* Phase 3a: empty TIME cell must be both bitmap-null AND NULL_I32-in-slot. */ +/* Empty TIME cell must report null and carry NULL_I32 in the slot. */ static test_result_t test_csv_null_time(void) { ray_heap_init(); (void)ray_sym_init(); @@ -384,7 +384,7 @@ static test_result_t test_csv_null_time(void) { PASS(); } -/* Phase 3a: empty TIMESTAMP cell must be both bitmap-null AND NULL_I64-in-slot. */ +/* Empty TIMESTAMP cell must report null and carry NULL_I64 in the slot. */ static test_result_t test_csv_null_timestamp(void) { ray_heap_init(); (void)ray_sym_init(); @@ -414,9 +414,8 @@ static test_result_t test_csv_null_timestamp(void) { } static test_result_t test_csv_null_bool(void) { - /* v4 contract (Phase 1 lockdown): BOOL is non-nullable. Empty cells - * materialize as `false`, not as a null bit — the BOOL column has - * neither HAS_NULLS nor any set bitmap bits. */ + /* BOOL is non-nullable. Empty cells materialize as `false`, not + * as a null — the BOOL column has no HAS_NULLS attribute. */ ray_heap_init(); (void)ray_sym_init(); @@ -1418,10 +1417,10 @@ static test_result_t test_csv_explicit_i32_schema(void) { } static test_result_t test_csv_explicit_u8_schema_serial(void) { - /* v4 contract (Phase 1 lockdown): U8 is non-nullable. Truncated rows - * still fill defaults (0), but no null bit is set and HAS_NULLS is - * stripped post-parse. Exercises the serial parse path - * (n_rows ≤ 8192) plus the past-row-boundary fill branch. */ + /* U8 is non-nullable. Truncated rows still fill defaults (0), but + * no null is set and HAS_NULLS is stripped post-parse. Exercises + * the serial parse path (n_rows ≤ 8192) plus the past-row-boundary + * fill branch. */ ray_heap_init(); (void)ray_sym_init(); diff --git a/test/test_dict.c b/test/test_dict.c index a3562ae3..b710efa0 100644 --- a/test/test_dict.c +++ b/test/test_dict.c @@ -1174,9 +1174,11 @@ static test_result_t test_dict_find_idx_str_with_nulls(void) { TEST_ASSERT_EQ_I(ray_dict_find_idx(d, ka), 2); ray_release(ka); - /* An empty-string lookup must skip the null slot. */ + /* Empty string IS a null STR atom. An empty-string lookup is + * therefore a null lookup and resolves to the first null slot + * (index 1) — STR null = empty string is a deliberate conflation. */ ka = ray_str("", 0); - TEST_ASSERT_EQ_I(ray_dict_find_idx(d, ka), -1); + TEST_ASSERT_EQ_I(ray_dict_find_idx(d, ka), 1); ray_release(ka); ray_release(d); @@ -1207,9 +1209,11 @@ static test_result_t test_dict_find_idx_guid_with_nulls(void) { TEST_ASSERT_EQ_I(ray_dict_find_idx(d, ka), 2); ray_release(ka); - /* All-zero query: would match slot 1 if not null-aware. */ + /* NULL_GUID = 16 all-zero bytes. An all-zero GUID lookup IS a null + * lookup and resolves to the first null slot (index 1) — same + * conflation as STR null = empty string. */ ka = ray_guid(g1); - TEST_ASSERT_EQ_I(ray_dict_find_idx(d, ka), -1); + TEST_ASSERT_EQ_I(ray_dict_find_idx(d, ka), 1); ray_release(ka); ray_release(d); diff --git a/test/test_embedding.c b/test/test_embedding.c index 8398184d..800fb039 100644 --- a/test/test_embedding.c +++ b/test/test_embedding.c @@ -445,7 +445,7 @@ static test_result_t test_hnsw_handle_cow(void) { PASS(); } -/* ============ select ... nearest ... take — Phase 2 integration ============ */ +/* ============ select ... nearest ... take — Pass 2 integration ============ */ /* Helper: build a 5-row test table with id / score / emb columns. Runs * in the Rayfall env, then returns nothing. The subsequent eval_* calls diff --git a/test/test_exec.c b/test/test_exec.c index ca68d425..34b02467 100644 --- a/test/test_exec.c +++ b/test/test_exec.c @@ -1686,11 +1686,13 @@ static test_result_t test_exec_asof_left_join(void) { TEST_ASSERT_FALSE(RAY_IS_ERR(result)); /* Left outer: all 3 left rows preserved */ TEST_ASSERT_EQ_I(ray_table_nrows(result), 3); - /* Verify: time=50 has no match (before any right row), bid should be 0 (NULL fill) */ + /* Verify: time=50 has no match (before any right row), bid is null. + * Check via ray_vec_is_null, not raw payload == 0.0 — post-sentinel- + * migration the null fill is NULL_F64 (NaN), not 0.0. */ ray_t* bid_col = ray_table_get_col(result, n_bid); TEST_ASSERT_NOT_NULL(bid_col); double* bid_data = (double*)ray_data(bid_col); - TEST_ASSERT((bid_data[0]) == (0.0), "double == failed"); /* t=50: no match */ + TEST_ASSERT(ray_vec_is_null(bid_col, 0), "slot 0 should be null (no match)"); TEST_ASSERT((bid_data[1]) == (0.8), "double == failed"); /* t=100: right t=80 */ TEST_ASSERT((bid_data[2]) == (1.5), "double == failed"); /* t=200: right t=150 */ @@ -5085,10 +5087,12 @@ static test_result_t test_expr_unary_cast_narrow_nullable(void) { ray_release(tbl); ray_sym_destroy(); - /* U8 nullable → I64 */ + /* U8 → I64. U8 is non-nullable; set_null is rejected by + * ray_vec_set_null_checked (the void wrapper discards the error), + * so the cell stays at its raw value. Sum becomes 1+2+3 = 6. */ uint8_t raw8[] = {1, 2, 3}; ray_t* v8 = ray_vec_from_raw(RAY_U8, raw8, 3); - ray_vec_set_null(v8, 1, true); + ray_vec_set_null(v8, 1, true); /* no-op for non-nullable U8 */ (void)ray_sym_init(); int64_t n8 = ray_sym_intern("c8", 2); tbl = ray_table_new(1); @@ -5101,18 +5105,18 @@ static test_result_t test_expr_unary_cast_narrow_nullable(void) { s = ray_sum(g, c); result = ray_execute(g, s); TEST_ASSERT_FALSE(RAY_IS_ERR(result)); - TEST_ASSERT_EQ_I(result->i64, 4); /* 1+3=4, pos1=null */ + TEST_ASSERT_EQ_I(result->i64, 6); ray_release(result); ray_graph_free(g); - /* BOOL nullable → I64 */ - g = ray_graph_new(tbl); /* reuse tbl - actually we need BOOL */ + /* BOOL → I64. BOOL is non-nullable, same as U8. Sum = 1+0+1 = 2. */ + g = ray_graph_new(tbl); ray_release(tbl); ray_sym_destroy(); uint8_t rawb[] = {1, 0, 1}; ray_t* vbool = ray_vec_from_raw(RAY_BOOL, rawb, 3); - ray_vec_set_null(vbool, 2, true); + ray_vec_set_null(vbool, 2, true); /* no-op for non-nullable BOOL */ (void)ray_sym_init(); int64_t nb = ray_sym_intern("cb", 2); tbl = ray_table_new(1); @@ -5125,7 +5129,7 @@ static test_result_t test_expr_unary_cast_narrow_nullable(void) { s = ray_sum(g, c); result = ray_execute(g, s); TEST_ASSERT_FALSE(RAY_IS_ERR(result)); - TEST_ASSERT_EQ_I(result->i64, 1); /* 1+0=1, pos2=null */ + TEST_ASSERT_EQ_I(result->i64, 2); ray_release(result); ray_graph_free(g); @@ -5662,7 +5666,11 @@ static test_result_t test_expr_binary_i16_nullable(void) { PASS(); } -/* ---- binary_range: U8 nullable — covers MIN2/MAX2/DIV/MOD ---- */ +/* ---- binary_range: U8 — covers MIN2/MAX2/MOD ---- + * Post-Phase-1: U8 is non-nullable; the original test marked va[3] + * null to force the non-fused path — that's a no-op now. The + * computations still exercise binary_range U8 kernels; only the + * expected sums change (no null masks). */ static test_result_t test_expr_binary_u8_nullable(void) { ray_heap_init(); (void)ray_sym_init(); @@ -5671,8 +5679,6 @@ static test_result_t test_expr_binary_u8_nullable(void) { uint8_t rawb[] = {15, 5, 25, 8}; ray_t* va = ray_vec_from_raw(RAY_U8, rawa, 4); ray_t* vb = ray_vec_from_raw(RAY_U8, rawb, 4); - /* Make nullable to force non-fused path */ - ray_vec_set_null(va, 3, true); int64_t na = ray_sym_intern("a", 1); int64_t nb = ray_sym_intern("b", 1); ray_t* tbl = ray_table_new(2); @@ -5680,7 +5686,7 @@ static test_result_t test_expr_binary_u8_nullable(void) { tbl = ray_table_add_col(tbl, nb, vb); ray_release(va); ray_release(vb); - /* MIN2 — exercises binary_range U8 MIN2 */ + /* MIN2 */ ray_graph_t* g = ray_graph_new(tbl); ray_op_t* a_op = ray_scan(g, "a"); ray_op_t* b_op = ray_scan(g, "b"); @@ -5688,12 +5694,12 @@ static test_result_t test_expr_binary_u8_nullable(void) { ray_op_t* s = ray_sum(g, mn); ray_t* result = ray_execute(g, s); TEST_ASSERT_FALSE(RAY_IS_ERR(result)); - /* min(10,15)+min(20,5)+min(30,25)+null = 10+5+25=40 */ - TEST_ASSERT_EQ_I(result->i64, 40); + /* min(10,15)+min(20,5)+min(30,25)+min(40,8) = 10+5+25+8 = 48 */ + TEST_ASSERT_EQ_I(result->i64, 48); ray_release(result); ray_graph_free(g); - /* MAX2 — exercises binary_range U8 MAX2 */ + /* MAX2 */ g = ray_graph_new(tbl); a_op = ray_scan(g, "a"); b_op = ray_scan(g, "b"); @@ -5701,12 +5707,12 @@ static test_result_t test_expr_binary_u8_nullable(void) { s = ray_sum(g, mx); result = ray_execute(g, s); TEST_ASSERT_FALSE(RAY_IS_ERR(result)); - /* max(10,15)+max(20,5)+max(30,25)+null = 15+20+30=65 */ - TEST_ASSERT_EQ_I(result->i64, 65); + /* max(10,15)+max(20,5)+max(30,25)+max(40,8) = 15+20+30+40 = 105 */ + TEST_ASSERT_EQ_I(result->i64, 105); ray_release(result); ray_graph_free(g); - /* MOD — exercises binary_range U8 MOD */ + /* MOD */ g = ray_graph_new(tbl); a_op = ray_scan(g, "a"); b_op = ray_scan(g, "b"); @@ -5714,7 +5720,7 @@ static test_result_t test_expr_binary_u8_nullable(void) { s = ray_sum(g, md); result = ray_execute(g, s); TEST_ASSERT_FALSE(RAY_IS_ERR(result)); - /* 10%15=10, 20%5=0, 30%25=5, null: sum=15 */ + /* 10%15=10, 20%5=0, 30%25=5, 40%8=0 -> sum = 15 */ TEST_ASSERT_EQ_I(result->i64, 15); ray_release(result); ray_graph_free(g); @@ -5800,7 +5806,9 @@ static test_result_t test_expr_group_linear_mul(void) { PASS(); } -/* ---- binary_range BOOL AND/OR: nullable BOOL columns (non-fused path) ---- */ +/* ---- binary_range BOOL AND/OR: non-fused path coverage ---- + * Post-Phase-1: BOOL is non-nullable; set_null on BOOL is a no-op + * (returns RAY_ERR_TYPE). AND / OR sums recomputed accordingly. */ static test_result_t test_expr_binary_bool_nullable(void) { ray_heap_init(); (void)ray_sym_init(); @@ -5809,8 +5817,6 @@ static test_result_t test_expr_binary_bool_nullable(void) { uint8_t rawb[] = {1, 1, 0, 0, 1}; ray_t* va = ray_vec_from_raw(RAY_BOOL, rawa, 5); ray_t* vb = ray_vec_from_raw(RAY_BOOL, rawb, 5); - /* Make nullable to force non-fused path */ - ray_vec_set_null(va, 4, true); int64_t na = ray_sym_intern("p", 1); int64_t nb = ray_sym_intern("q", 1); ray_t* tbl = ray_table_new(2); @@ -5818,21 +5824,20 @@ static test_result_t test_expr_binary_bool_nullable(void) { tbl = ray_table_add_col(tbl, nb, vb); ray_release(va); ray_release(vb); - /* AND — exercises binary_range BOOL AND (src_is_i64=0, F64 path) */ + /* AND */ ray_graph_t* g = ray_graph_new(tbl); ray_op_t* p = ray_scan(g, "p"); ray_op_t* q = ray_scan(g, "q"); ray_op_t* an = ray_and(g, p, q); - /* Count true values */ ray_op_t* s = ray_sum(g, ray_cast(g, an, RAY_I64)); ray_t* result = ray_execute(g, s); TEST_ASSERT_FALSE(RAY_IS_ERR(result)); - /* AND: 1&&1=1, 0&&1=0, 1&&0=0, 0&&0=0, null: only pos0=1, sum=1 */ - TEST_ASSERT_EQ_I(result->i64, 1); + /* AND: 1&&1=1, 0&&1=0, 1&&0=0, 0&&0=0, 1&&1=1 -> sum = 2 */ + TEST_ASSERT_EQ_I(result->i64, 2); ray_release(result); ray_graph_free(g); - /* OR — exercises binary_range BOOL OR */ + /* OR */ g = ray_graph_new(tbl); p = ray_scan(g, "p"); q = ray_scan(g, "q"); @@ -5840,8 +5845,8 @@ static test_result_t test_expr_binary_bool_nullable(void) { s = ray_sum(g, ray_cast(g, or_op, RAY_I64)); result = ray_execute(g, s); TEST_ASSERT_FALSE(RAY_IS_ERR(result)); - /* OR: 1||1=1, 0||1=1, 1||0=1, 0||0=0, null: 3 non-null true, sum=3 */ - TEST_ASSERT_EQ_I(result->i64, 3); + /* OR: 1||1=1, 0||1=1, 1||0=1, 0||0=0, 1||1=1 -> sum = 4 */ + TEST_ASSERT_EQ_I(result->i64, 4); ray_release(result); ray_graph_free(g); diff --git a/test/test_heap.c b/test/test_heap.c index daa2f9b0..75658e16 100644 --- a/test/test_heap.c +++ b/test/test_heap.c @@ -28,7 +28,7 @@ * over multiple types, scratch-arena bump allocator, ray_heap_release_pages, * GC under both serial and parallel flags, ray_heap_merge with rich source * heaps, and the owned-ref retain/release fan-out for compound types - * (LIST / TABLE / DICT / parted / NULLMAP_EXT / SLICE / STR with str_pool). + * (LIST / TABLE / DICT / parted / SLICE / STR with str_pool). */ /* MAP_ANONYMOUS is a Linux/glibc extension; needs _GNU_SOURCE before @@ -522,28 +522,31 @@ static test_result_t test_str_pool_owned_ref(void) { PASS(); } -/* ---- Owned-ref: NULLMAP_EXT child -------------------------------------- * +/* ---- Sentinel-encoded null release ------------------------------------- * * - * A vec with RAY_ATTR_NULLMAP_EXT carries an owning ref to ext_nullmap. - * ray_release_owned_refs must release that child. Construct one - * manually and free it. */ - -static test_result_t test_nullmap_ext_owned_ref(void) { - ray_t* vec = ray_alloc(8 * sizeof(int64_t)); + * A nullable vec carries no auxiliary bitmap child; null state lives + * entirely in the payload via the type-correct NULL_* sentinel. This + * test exercises release of a >128-element nullable vec and verifies + * the heap remains sane afterwards. */ + +static test_result_t test_sentinel_null_release(void) { + int64_t n = 200; + ray_t* vec = ray_vec_new(RAY_I64, n); TEST_ASSERT_NOT_NULL(vec); - vec->type = RAY_I64; - vec->len = 8; - - ray_t* nm = ray_alloc(8); - TEST_ASSERT_NOT_NULL(nm); - nm->type = RAY_U8; - nm->len = 8; + for (int64_t i = 0; i < n; i++) { + vec = ray_vec_append(vec, &i); + TEST_ASSERT_NOT_NULL(vec); + } - /* Attach extended nullmap. vec now owns nm. */ - vec->ext_nullmap = nm; - vec->attrs |= RAY_ATTR_NULLMAP_EXT; + /* Mark a few rows null — sentinel writes into payload only. */ + TEST_ASSERT_EQ_I(ray_vec_set_null_checked(vec, 5, true), RAY_OK); + TEST_ASSERT_EQ_I(ray_vec_set_null_checked(vec, 150, true), RAY_OK); + TEST_ASSERT_TRUE(vec->attrs & RAY_ATTR_HAS_NULLS); + TEST_ASSERT_TRUE(ray_vec_is_null(vec, 5)); + TEST_ASSERT_TRUE(ray_vec_is_null(vec, 150)); + TEST_ASSERT_FALSE(ray_vec_is_null(vec, 0)); - /* Drop vec — nm must be released as well via the NULLMAP_EXT branch. */ + /* Drop vec — no external bitmap child to release, just the payload. */ ray_release(vec); /* Heap remains sane. */ @@ -1368,42 +1371,35 @@ static test_result_t test_scratch_realloc_slice(void) { PASS(); } -/* ---- ray_scratch_realloc with NULLMAP_EXT -------------------------------- +/* ---- ray_scratch_realloc preserves sentinel-encoded nulls ---------------- * - * A block with RAY_ATTR_NULLMAP_EXT causes ray_detach_owned_refs to clear - * ext_nullmap (lines 782-785) before freeing the old block. This also - * covers the ray_detach_owned_refs NULLMAP_EXT branch. */ - -static test_result_t test_scratch_realloc_nullmap_ext(void) { - ray_t* vec = ray_alloc(4 * sizeof(int64_t)); + * ray_scratch_realloc copies the header bytes into the new block and runs + * ray_detach_owned_refs on the old one. Null state lives in the payload, + * so a HAS_NULLS vec realloced this way must keep its HAS_NULLS bit and + * its sentinel-encoded null rows. */ + +static test_result_t test_scratch_realloc_sentinel_nulls(void) { + int64_t n = 200; + ray_t* vec = ray_vec_new(RAY_I64, n); TEST_ASSERT_NOT_NULL(vec); - vec->type = RAY_I64; - vec->len = 4; - - ray_t* nm = ray_alloc(1); - TEST_ASSERT_NOT_NULL(nm); - nm->type = RAY_U8; - nm->len = 1; - - vec->ext_nullmap = nm; - vec->attrs |= RAY_ATTR_NULLMAP_EXT; - - /* ray_scratch_realloc transfers ownership via memcpy then calls - * ray_detach_owned_refs(old) which just nulls pointers (no release). - * So nm->rc stays at 1 and the ref is now owned by vec2. */ - uint32_t nm_rc = nm->rc; /* should be 1 */ + for (int64_t i = 0; i < n; i++) { + vec = ray_vec_append(vec, &i); + TEST_ASSERT_NOT_NULL(vec); + } + TEST_ASSERT_EQ_I(ray_vec_set_null_checked(vec, 42, true), RAY_OK); + TEST_ASSERT_EQ_I(ray_vec_set_null_checked(vec, 175, true), RAY_OK); + TEST_ASSERT_TRUE(vec->attrs & RAY_ATTR_HAS_NULLS); - /* Realloc: exercises NULLMAP_EXT branch of ray_detach_owned_refs. */ - ray_t* vec2 = ray_scratch_realloc(vec, 4 * sizeof(int64_t)); + /* Realloc to a slightly larger payload — exercises the + * ray_detach_owned_refs path on the old block. */ + ray_t* vec2 = ray_scratch_realloc(vec, (size_t)(n + 4) * sizeof(int64_t)); TEST_ASSERT_NOT_NULL(vec2); - /* Ownership transferred; rc unchanged. */ - TEST_ASSERT_EQ_U(nm->rc, nm_rc); - TEST_ASSERT_TRUE(vec2->attrs & RAY_ATTR_NULLMAP_EXT); - TEST_ASSERT_EQ_PTR(vec2->ext_nullmap, nm); + TEST_ASSERT_TRUE(vec2->attrs & RAY_ATTR_HAS_NULLS); + TEST_ASSERT_TRUE(ray_vec_is_null(vec2, 42)); + TEST_ASSERT_TRUE(ray_vec_is_null(vec2, 175)); + TEST_ASSERT_FALSE(ray_vec_is_null(vec2, 0)); - /* Release vec2 — release_owned_refs drops nm ref. */ ray_release(vec2); - /* nm should now have rc = 0 and be freed. Don't touch nm after this. */ PASS(); } @@ -1538,7 +1534,7 @@ const test_entry_t heap_entries[] = { { "heap/flush_foreign_parallel", test_flush_foreign_during_parallel, heap_setup, heap_teardown }, { "heap/alloc_copy_list", test_alloc_copy_list_retains, heap_setup, heap_teardown }, { "heap/str_pool_owned_ref", test_str_pool_owned_ref, heap_setup, heap_teardown }, - { "heap/nullmap_ext_owned_ref", test_nullmap_ext_owned_ref, heap_setup, heap_teardown }, + { "heap/sentinel_null_release", test_sentinel_null_release, heap_setup, heap_teardown }, { "heap/slice_owned_ref", test_slice_owned_ref, heap_setup, heap_teardown }, { "heap/parted_owned_ref", test_parted_owned_ref, heap_setup, heap_teardown }, { "heap/mapcommon_owned_ref", test_mapcommon_owned_ref, heap_setup, heap_teardown }, @@ -1561,7 +1557,7 @@ const test_entry_t heap_entries[] = { { "heap/free_mmod1_atom", test_free_mmod1_atom, heap_setup, heap_teardown }, { "heap/order_for_size_pow2", test_order_for_size_pow2, heap_setup, heap_teardown }, { "heap/scratch_realloc_slice", test_scratch_realloc_slice, heap_setup, heap_teardown }, - { "heap/scratch_realloc_nullmap", test_scratch_realloc_nullmap_ext, heap_setup, heap_teardown }, + { "heap/scratch_realloc_sentinel_nulls", test_scratch_realloc_sentinel_nulls, heap_setup, heap_teardown }, { "heap/scratch_realloc_parted", test_scratch_realloc_parted, heap_setup, heap_teardown }, { "heap/merge_foreign_fallback", test_merge_foreign_pool_fallback, heap_setup, heap_teardown }, { NULL, NULL, NULL, NULL }, diff --git a/test/test_index.c b/test/test_index.c index aa4c726e..2b8837b8 100644 --- a/test/test_index.c +++ b/test/test_index.c @@ -53,13 +53,13 @@ static ray_t* make_f64_vec(const double* xs, int64_t n) { /* Snapshot the 16-byte nullmap union and attrs bits we care about. */ typedef struct { uint8_t bytes[16]; - uint8_t attrs; /* HAS_NULLS | NULLMAP_EXT */ + uint8_t attrs; /* HAS_NULLS */ } nullmap_snap_t; static nullmap_snap_t snap_take(const ray_t* v) { nullmap_snap_t s; memcpy(s.bytes, v->nullmap, 16); - s.attrs = v->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT); + s.attrs = v->attrs & RAY_ATTR_HAS_NULLS; return s; } @@ -143,35 +143,34 @@ static test_result_t test_index_attach_drop_with_inline_nulls(void) { PASS(); } -static test_result_t test_index_attach_drop_with_ext_nullmap(void) { +static test_result_t test_index_attach_drop_large_sentinel_nulls(void) { + /* Attach + drop on a vec with sentinel-encoded nulls past the + * 128-element boundary. Verifies null state survives the round-trip + * via ray_vec_is_null. */ ray_heap_init(); - int64_t n = 200; /* > 128 forces external nullmap */ + int64_t n = 200; ray_t* v = ray_vec_new(RAY_I32, n); int32_t z = 0; for (int64_t i = 0; i < n; i++) v = ray_vec_append(v, &z); - /* Set a few nulls past the 128-element inline boundary. */ TEST_ASSERT_EQ_I(ray_vec_set_null_checked(v, 130, true), RAY_OK); TEST_ASSERT_EQ_I(ray_vec_set_null_checked(v, 199, true), RAY_OK); - TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_NULLMAP_EXT); - - nullmap_snap_t before = snap_take(v); + TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_HAS_NULLS); ray_t* w = v; ray_t* r = ray_index_attach_zone(&w); TEST_ASSERT_FALSE(RAY_IS_ERR(r)); TEST_ASSERT_TRUE(w->attrs & RAY_ATTR_HAS_INDEX); - TEST_ASSERT_FALSE(w->attrs & RAY_ATTR_NULLMAP_EXT); /* moved into index */ - /* is_null still returns true for the marked rows. */ + /* is_null still returns true for the marked rows under HAS_INDEX. */ TEST_ASSERT_TRUE (ray_vec_is_null(w, 130)); TEST_ASSERT_TRUE (ray_vec_is_null(w, 199)); TEST_ASSERT_FALSE(ray_vec_is_null(w, 0)); ray_index_drop(&w); - nullmap_snap_t after = snap_take(w); - TEST_ASSERT_TRUE(snap_eq(&before, &after)); - TEST_ASSERT_TRUE(w->attrs & RAY_ATTR_NULLMAP_EXT); + TEST_ASSERT_TRUE(w->attrs & RAY_ATTR_HAS_NULLS); TEST_ASSERT_TRUE (ray_vec_is_null(w, 130)); + TEST_ASSERT_TRUE (ray_vec_is_null(w, 199)); + TEST_ASSERT_FALSE(ray_vec_is_null(w, 0)); ray_release(w); ray_heap_destroy(); @@ -459,7 +458,7 @@ static test_result_t test_index_drop_under_shared_cow(void) { static test_result_t test_index_persistence_roundtrip(void) { ray_heap_init(); - /* 200 elements forces ext_nullmap. */ + /* 200 elements is past the legacy 128-inline-bitmap boundary. */ int64_t n = 200; ray_t* v = ray_vec_new(RAY_I64, n); for (int64_t i = 0; i < n; i++) { @@ -468,7 +467,7 @@ static test_result_t test_index_persistence_roundtrip(void) { } TEST_ASSERT_EQ_I(ray_vec_set_null_checked(v, 7, true), RAY_OK); TEST_ASSERT_EQ_I(ray_vec_set_null_checked(v, 150, true), RAY_OK); - TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_NULLMAP_EXT); + TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_HAS_NULLS); ray_t* w = v; TEST_ASSERT_FALSE(RAY_IS_ERR(ray_index_attach_zone(&w))); @@ -511,37 +510,23 @@ static test_result_t test_index_persistence_roundtrip(void) { PASS(); } -/* ─── Slice handling in ray_vec_nullmap_bytes ─────────────────────── */ +/* ─── Slice null detection on indexed/parent vec ───────────────────── */ static test_result_t test_index_nullmap_helper_slice(void) { + /* Slice-relative null detection via ray_vec_is_null delegates to + * the parent's sentinel payload at the translated index. */ ray_heap_init(); - /* Build a parent with nulls at row 1 and row 4. */ int64_t xs[] = { 100, 200, 300, 400, 500, 600 }; ray_t* v = make_i64_vec(xs, 6); TEST_ASSERT_EQ_I(ray_vec_set_null_checked(v, 1, true), RAY_OK); TEST_ASSERT_EQ_I(ray_vec_set_null_checked(v, 4, true), RAY_OK); TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_HAS_NULLS); - /* Slice [2..6) — rows 2,3,4,5 in the parent, with row 4 (parent - * index) being null — slice-local index 2. */ ray_t* s = ray_vec_slice(v, 2, 4); TEST_ASSERT_FALSE(RAY_IS_ERR(s)); TEST_ASSERT_TRUE(s->attrs & RAY_ATTR_SLICE); - /* Slice itself does NOT carry HAS_NULLS — that's the codebase invariant. */ TEST_ASSERT_FALSE(s->attrs & RAY_ATTR_HAS_NULLS); - /* The helper must still resolve to the parent's bitmap and return - * the correct bit_offset (= slice_offset = 2). */ - int64_t off = -1, lb = -1; - const uint8_t* bits = ray_vec_nullmap_bytes(s, &off, &lb); - TEST_ASSERT_NOT_NULL(bits); - TEST_ASSERT_EQ_I(off, 2); - TEST_ASSERT_TRUE(lb >= 8); - /* Parent bit 4 must be set in the buffer (bit 4 = byte 0 bit 4). */ - TEST_ASSERT_TRUE((bits[(off + 2) / 8] >> ((off + 2) % 8)) & 1); - /* And parent bit 1 must also be set (parent has it). */ - TEST_ASSERT_TRUE((bits[1 / 8] >> (1 % 8)) & 1); - /* ray_vec_is_null still works correctly on the slice. */ TEST_ASSERT_FALSE(ray_vec_is_null(s, 0)); /* parent row 2 — not null */ TEST_ASSERT_FALSE(ray_vec_is_null(s, 1)); /* parent row 3 — not null */ @@ -584,35 +569,25 @@ static test_result_t test_index_insert_at_drops_index(void) { /* ─── Null-aware reader correctness on indexed vec ─────────────────── */ static test_result_t test_index_null_readers_through_helper(void) { + /* Verify the sentinel-based null reader invariant: ray_vec_is_null + * returns the same answer before and after an index attach, even + * though w->nullmap[0..7] holds the index pointer after attach. */ ray_heap_init(); - /* 5-element vec with one null in the middle. */ int64_t xs[] = { 100, 200, 300, 400, 500 }; ray_t* v = make_i64_vec(xs, 5); TEST_ASSERT_EQ_I(ray_vec_set_null_checked(v, 2, true), RAY_OK); - /* Snapshot the bitmap pointer/contents before attach. */ - int64_t pre_off = -1, pre_len = -1; - const uint8_t* pre = ray_vec_nullmap_bytes(v, &pre_off, &pre_len); - TEST_ASSERT_NOT_NULL(pre); - TEST_ASSERT_EQ_I(pre_off, 0); - TEST_ASSERT_TRUE(pre_len >= 8); - /* Bit 2 must be set in the pre-snapshot. */ - TEST_ASSERT_TRUE((pre[0] >> 2) & 1); + TEST_ASSERT_TRUE (ray_vec_is_null(v, 2)); + TEST_ASSERT_FALSE(ray_vec_is_null(v, 0)); ray_t* w = v; TEST_ASSERT_FALSE(RAY_IS_ERR(ray_index_attach_zone(&w))); - /* After attach, the helper must still report bit 2 as set, even - * though w->nullmap[] is now the index pointer. */ - int64_t post_off = -1, post_len = -1; - const uint8_t* post = ray_vec_nullmap_bytes(w, &post_off, &post_len); - TEST_ASSERT_NOT_NULL(post); - TEST_ASSERT_EQ_I(post_off, 0); - TEST_ASSERT_TRUE((post[0] >> 2) & 1); - - /* The helper must NOT return the parent's now-clobbered nullmap[] - * (which holds an index pointer in its first 8 bytes). */ - TEST_ASSERT_TRUE(post != w->nullmap); + /* After attach the index pointer overlays bytes 0-7 of the union; + * sentinel-based readers must still see the null at row 2. */ + TEST_ASSERT_TRUE (ray_vec_is_null(w, 2)); + TEST_ASSERT_FALSE(ray_vec_is_null(w, 0)); + TEST_ASSERT_FALSE(ray_vec_is_null(w, 4)); ray_release(w); ray_heap_destroy(); @@ -1105,121 +1080,71 @@ static test_result_t test_index_retain_payload_direct(void) { PASS(); } -/* ─── ray_index_release_saved with RAY_STR/RAY_SYM (covers saved_hi paths) ── */ - -static test_result_t test_index_release_saved_str_sym(void) { +/* ─── ray_index_release_saved / retain_saved are no-ops ────────────── * + * + * Index attachment is restricted to numeric vector types (see + * prepare_attach), so saved_nullmap never carries owned ray_t* refs. + * The functions are kept for call-site symmetry but do nothing. These + * tests verify the no-op contract: calling them on a fully populated + * ix struct must not touch refcounts on whatever pointers happen to + * sit in the saved bytes. */ + +static test_result_t test_index_release_saved_noop(void) { ray_heap_init(); - /* Test RAY_STR parent_type in ray_index_release_saved. - * This covers the `if (ix->parent_type == RAY_STR)` true branch (lines 150-153) - * and saved_hi_ptr/saved_hi_clear. */ - { - ray_index_t ix; - memset(&ix, 0, sizeof(ix)); - ix.kind = RAY_IDX_ZONE; - ix.parent_type = RAY_STR; - ix.saved_attrs = 0; /* no NULLMAP_EXT, so saved_lo_ptr not called */ - /* saved_nullmap[8..15] = 0 (NULL pointer), so saved_hi_ptr returns NULL, - * and `if (hi && ...)` is false - safe to release. */ - ray_index_release_saved(&ix); - } + int64_t dummy[] = { 1 }; + ray_t* victim = make_i64_vec(dummy, 1); + uint32_t rc_before = victim->rc; - /* Test RAY_STR with non-null hi pointer (retained). */ - { - /* Build a dummy ray_t to use as a fake "str_pool" saved pointer. */ - int64_t dummy[] = { 1 }; - ray_t* fake_pool = make_i64_vec(dummy, 1); - ray_retain(fake_pool); /* bump to rc=2 so release brings it to 1 */ - - ray_index_t ix; - memset(&ix, 0, sizeof(ix)); - ix.kind = RAY_IDX_ZONE; - ix.parent_type = RAY_STR; - ix.saved_attrs = 0; - /* Store fake_pool into saved_nullmap[8..15]. */ - memcpy(&ix.saved_nullmap[8], &fake_pool, sizeof(fake_pool)); - /* This calls saved_hi_ptr which reads the pointer and releases it. */ - ray_index_release_saved(&ix); - /* fake_pool rc is now 1 again (was 2, released by release_saved). */ - ray_release(fake_pool); - } + ray_index_t ix; + memset(&ix, 0, sizeof(ix)); + ix.kind = RAY_IDX_ZONE; + ix.parent_type = RAY_I64; + ix.saved_attrs = 0; + /* Put a real pointer into saved_nullmap[8..15] — if the function + * were not a no-op it would try to release it and drop the rc. */ + memcpy(&ix.saved_nullmap[8], &victim, sizeof(victim)); - /* Test RAY_SYM with NULLMAP_EXT — covers the SYM+ext branch (lines 154-162). */ - { - int64_t dummy[] = { 1 }; - ray_t* fake_dict = make_i64_vec(dummy, 1); - ray_retain(fake_dict); /* rc=2 */ - - ray_index_t ix; - memset(&ix, 0, sizeof(ix)); - ix.kind = RAY_IDX_ZONE; - ix.parent_type = RAY_SYM; - ix.saved_attrs = RAY_ATTR_NULLMAP_EXT; - /* lo (saved_nullmap[0..7]) = NULL — so lo release is skipped. */ - /* hi (saved_nullmap[8..15]) = fake_dict pointer. */ - memcpy(&ix.saved_nullmap[8], &fake_dict, sizeof(fake_dict)); - ray_index_release_saved(&ix); - /* fake_dict rc back to 1. */ - ray_release(fake_dict); - } + ray_index_release_saved(&ix); + TEST_ASSERT_EQ_U(victim->rc, rc_before); + ray_release(victim); ray_heap_destroy(); PASS(); } -/* ─── ray_index_retain_saved with RAY_STR/RAY_SYM ───────────────────────── */ - -static test_result_t test_index_retain_saved_str_sym(void) { +static test_result_t test_index_retain_saved_noop(void) { ray_heap_init(); - /* RAY_STR parent_type — covers `if (ix->parent_type == RAY_STR)` true branch - * in ray_index_retain_saved (lines 170-172). */ - { - int64_t dummy[] = { 1 }; - ray_t* fake_pool = make_i64_vec(dummy, 1); - /* rc=1 initially; retain_saved will bump to rc=2. */ - - ray_index_t ix; - memset(&ix, 0, sizeof(ix)); - ix.kind = RAY_IDX_ZONE; - ix.parent_type = RAY_STR; - ix.saved_attrs = 0; /* no NULLMAP_EXT */ - memcpy(&ix.saved_nullmap[8], &fake_pool, sizeof(fake_pool)); - ray_index_retain_saved(&ix); - /* rc is now 2 — release twice. */ - ray_release(fake_pool); - ray_release(fake_pool); - } + int64_t dummy[] = { 1 }; + ray_t* victim = make_i64_vec(dummy, 1); + uint32_t rc_before = victim->rc; - /* RAY_SYM with NULLMAP_EXT — covers the SYM+ext branch in retain_saved - * (lines 173-177). */ - { - int64_t dummy[] = { 1 }; - ray_t* fake_dict = make_i64_vec(dummy, 1); - /* rc=1. */ - - ray_index_t ix; - memset(&ix, 0, sizeof(ix)); - ix.kind = RAY_IDX_ZONE; - ix.parent_type = RAY_SYM; - ix.saved_attrs = RAY_ATTR_NULLMAP_EXT; - /* lo (saved_nullmap[0..7]) = NULL so lo retain is skipped. */ - memcpy(&ix.saved_nullmap[8], &fake_dict, sizeof(fake_dict)); - ray_index_retain_saved(&ix); - /* rc is now 2 — release twice. */ - ray_release(fake_dict); - ray_release(fake_dict); - } + ray_index_t ix; + memset(&ix, 0, sizeof(ix)); + ix.kind = RAY_IDX_ZONE; + ix.parent_type = RAY_I64; + ix.saved_attrs = 0; + memcpy(&ix.saved_nullmap[8], &victim, sizeof(victim)); + ray_index_retain_saved(&ix); + TEST_ASSERT_EQ_U(victim->rc, rc_before); + + ray_release(victim); ray_heap_destroy(); PASS(); } -/* ─── ray_index_retain_saved with ext-nullmap (covers saved_lo branch) ───── */ +/* ─── Shared-index drop preserves sentinel nulls across COW ─────────────── * + * + * When a vec with HAS_INDEX is shared (rc > 1) and then dropped, the + * drop path takes the shared branch (ray_index_retain_saved + memcpy of + * saved bytes). This test verifies the round-trip on a >128-element + * vec with sentinel-encoded nulls — both copies must still see the nulls + * via ray_vec_is_null after the drop. */ -static test_result_t test_index_retain_saved_ext_nullmap(void) { +static test_result_t test_index_drop_shared_with_large_nulls(void) { ray_heap_init(); - /* Build a vector with ext-nullmap (>128 elements). */ int64_t n = 150; ray_t* v = ray_vec_new(RAY_I64, n); for (int64_t i = 0; i < n; i++) { @@ -1227,27 +1152,28 @@ static test_result_t test_index_retain_saved_ext_nullmap(void) { v = ray_vec_append(v, &x); } TEST_ASSERT_EQ_I(ray_vec_set_null_checked(v, 140, true), RAY_OK); - TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_NULLMAP_EXT); + TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_HAS_NULLS); ray_t* w = v; ray_t* r = ray_index_attach_zone(&w); TEST_ASSERT_FALSE(RAY_IS_ERR(r)); TEST_ASSERT_TRUE(w->attrs & RAY_ATTR_HAS_INDEX); - /* Share the index (rc >= 2) so ray_index_drop triggers retain_saved. */ + /* Share the index (rc >= 2) so ray_index_drop hits the shared branch. */ ray_retain(w); ray_retain(w); ray_t* b = ray_cow(w); TEST_ASSERT_TRUE(b != w); TEST_ASSERT_TRUE(b->index == w->index); - /* Drop from w - shared path calls ray_index_retain_saved. */ + /* Drop from w — shared path. */ ray_t* w2 = w; ray_index_drop(&w2); TEST_ASSERT_FALSE(w2->attrs & RAY_ATTR_HAS_INDEX); TEST_ASSERT_TRUE(b->attrs & RAY_ATTR_HAS_INDEX); - /* b still reads nulls correctly. */ + /* Both copies still see the null via the payload sentinel. */ + TEST_ASSERT_TRUE(ray_vec_is_null(w2, 140)); TEST_ASSERT_TRUE(ray_vec_is_null(b, 140)); ray_release(w2); @@ -1395,7 +1321,7 @@ static test_result_t test_index_builtin_fns(void) { const test_entry_t index_entries[] = { { "index/attach_drop_no_nulls", test_index_attach_drop_no_nulls, NULL, NULL }, { "index/attach_drop_with_inline_nulls", test_index_attach_drop_with_inline_nulls, NULL, NULL }, - { "index/attach_drop_with_ext_nullmap", test_index_attach_drop_with_ext_nullmap, NULL, NULL }, + { "index/attach_drop_large_sentinel_nulls", test_index_attach_drop_large_sentinel_nulls, NULL, NULL }, { "index/replace_existing", test_index_replace_existing, NULL, NULL }, { "index/mutation_drops", test_index_mutation_drops, NULL, NULL }, { "index/float_zone", test_index_float_zone, NULL, NULL }, @@ -1422,9 +1348,9 @@ const test_entry_t index_entries[] = { { "index/hash_f64_nan", test_index_hash_f64_nan, NULL, NULL }, { "index/attach_slice_error", test_index_attach_slice_error, NULL, NULL }, { "index/retain_payload_direct", test_index_retain_payload_direct, NULL, NULL }, - { "index/release_saved_str_sym", test_index_release_saved_str_sym, NULL, NULL }, - { "index/retain_saved_str_sym", test_index_retain_saved_str_sym, NULL, NULL }, - { "index/retain_saved_ext_nullmap", test_index_retain_saved_ext_nullmap, NULL, NULL }, + { "index/release_saved_noop", test_index_release_saved_noop, NULL, NULL }, + { "index/retain_saved_noop", test_index_retain_saved_noop, NULL, NULL }, + { "index/drop_shared_with_large_nulls", test_index_drop_shared_with_large_nulls, NULL, NULL }, { "index/info_no_index", test_index_info_no_index, NULL, NULL }, { "index/bloom_with_nulls", test_index_bloom_with_nulls, NULL, NULL }, { "index/guid_unsupported", test_index_guid_unsupported, NULL, NULL }, diff --git a/test/test_lang.c b/test/test_lang.c index bb0fd685..1784a8a1 100644 --- a/test/test_lang.c +++ b/test/test_lang.c @@ -2483,9 +2483,15 @@ static test_result_t test_eval_insert_guid(void) { ray_t* null_atom = ray_typed_null(-RAY_GUID); TEST_ASSERT_FALSE(RAY_IS_ERR(null_atom)); - TEST_ASSERT_EQ_PTR(null_atom->obj, NULL); - - g = ray_vec_insert_at(g, 1, null_atom->obj ? ray_data(null_atom->obj) : (const void*)"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"); + /* NULL_GUID = 16 all-zero bytes in obj's U8 buffer. ray_typed_null + * allocates that buffer rather than leaving obj as NULL, so + * consumers can ray_data(obj) unconditionally. */ + TEST_ASSERT_NOT_NULL(null_atom->obj); + const uint8_t* nb = (const uint8_t*)ray_data(null_atom->obj); + for (int i = 0; i < 16; i++) + TEST_ASSERT_EQ_I(nb[i], 0); + + g = ray_vec_insert_at(g, 1, ray_data(null_atom->obj)); TEST_ASSERT_FALSE(RAY_IS_ERR(g)); TEST_ASSERT_EQ_I(g->len, 3); ray_release(null_atom); diff --git a/test/test_link.c b/test/test_link.c index 4f2908e3..b0516e39 100644 --- a/test/test_link.c +++ b/test/test_link.c @@ -99,7 +99,7 @@ static ray_t* build_target_table(const char* name) { return tab; } -/* ─── Phase 1: storage round-trip ──────────────────────────────────── */ +/* ─── Pass 1: storage round-trip ──────────────────────────────────── */ static test_result_t test_link_attach_basic(void) { int64_t rids[] = { 0, 1, 2, 1, 0 }; @@ -168,7 +168,6 @@ static test_result_t test_link_with_inline_nulls_promotes(void) { ray_t* v = make_i64_vec(rids, 5); TEST_ASSERT_EQ_I(ray_vec_set_null_checked(v, 1, true), RAY_OK); TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_HAS_NULLS); - TEST_ASSERT_FALSE(v->attrs & RAY_ATTR_NULLMAP_EXT); /* inline initially */ ray_t* target = build_target_table("custs"); int64_t custs_sym = ray_sym_intern("custs", 5); @@ -178,11 +177,13 @@ static test_result_t test_link_with_inline_nulls_promotes(void) { ray_t* w = v; ray_t* r = ray_link_attach(&w, custs_sym); TEST_ASSERT_FALSE(RAY_IS_ERR(r)); - /* Inline nulls must have been promoted to ext to free up bytes 8-15. */ - TEST_ASSERT_TRUE(w->attrs & RAY_ATTR_NULLMAP_EXT); + /* Nulls live as NULL_I64 in the payload and don't consume the + * union arm, so link_attach is unconditional and the column stays + * nullable. */ TEST_ASSERT_TRUE(w->attrs & RAY_ATTR_HAS_LINK); - /* Null bit at row 1 is still readable. */ + TEST_ASSERT_TRUE(w->attrs & RAY_ATTR_HAS_NULLS); TEST_ASSERT_TRUE(ray_vec_is_null(w, 1)); + TEST_ASSERT_EQ_I(w->link_target, custs_sym); ray_release(w); PASS(); @@ -212,7 +213,7 @@ static test_result_t test_link_mutation_preserves_link(void) { PASS(); } -/* ─── Phase 2: deref ──────────────────────────────────────────────── */ +/* ─── Pass 2: deref ──────────────────────────────────────────────── */ static test_result_t test_link_deref_basic(void) { int64_t rids[] = { 2, 0, 1, 2 }; @@ -296,7 +297,7 @@ static test_result_t test_link_deref_oob_yields_null(void) { PASS(); } -/* ─── Phase 3: persistence round-trip ─────────────────────────────── */ +/* ─── Pass 3: persistence round-trip ─────────────────────────────── */ static test_result_t test_link_persistence_roundtrip(void) { int64_t rids[] = { 0, 1, 2, 1, 0 }; @@ -814,7 +815,7 @@ static test_result_t test_link_deref_sym_slice_w8(void) { PASS(); } -/* ─── Phase 4: coexistence with HAS_INDEX ─────────────────────────── */ +/* ─── Pass 4: coexistence with HAS_INDEX ─────────────────────────── */ static test_result_t test_link_coexists_with_index(void) { int64_t rids[] = { 0, 1, 2, 1, 0 }; @@ -857,7 +858,7 @@ static test_result_t test_link_coexists_with_index(void) { PASS(); } -/* ─── Phase 5: parted-table interaction ────────────────────────────── */ +/* ─── Pass 5: parted-table interaction ────────────────────────────── */ #define TMP_LINK_PART_DB "/tmp/rayforce_test_link_parted_db" #define TMP_LINK_PART_TBL "facts" diff --git a/test/test_morsel.c b/test/test_morsel.c index c639486b..2e30f05c 100644 --- a/test/test_morsel.c +++ b/test/test_morsel.c @@ -335,8 +335,9 @@ static test_result_t test_morsel_init_range_multi(void) { PASS(); } -/* Inline-nullmap path in ray_morsel_next: vec with HAS_NULLS, offset<128, - * no NULLMAP_EXT. Drives line 96-100 (the inline-bitmap branch). */ +/* ray_morsel_next exposes null_bits for a HAS_NULLS vec — verify the + * morsel iteration surfaces a non-NULL bitmap pointer derived from the + * sentinel-encoded payload. */ static test_result_t test_morsel_nulls_inline(void) { int64_t raw[32]; for (int i = 0; i < 32; i++) raw[i] = (int64_t)i; @@ -353,8 +354,8 @@ static test_result_t test_morsel_nulls_inline(void) { PASS(); } -/* External-nullmap path: vec with >128 elements + HAS_NULLS forces - * RAY_ATTR_NULLMAP_EXT, exercising line 92-95 of morsel.c. */ +/* >128-element nullable vec: morsel iteration must surface null_bits + * derived from the sentinel-encoded payload at every step. */ static test_result_t test_morsel_nulls_external(void) { ray_t* v = ray_vec_new(RAY_I64, 200); int64_t* raw = (int64_t*)ray_data(v); @@ -463,28 +464,25 @@ static test_result_t test_morsel_has_index_ext_nulls(void) { } TEST_ASSERT_EQ_I(v->len, n); - /* null at 150 -> forces NULLMAP_EXT */ + /* Null state lives in the payload via the I64 sentinel and is + * surfaced on the morsel via null_bits_buf (filled by ray_morsel_next + * from sentinel reads). Verify the HAS_INDEX + >128-element path. */ TEST_ASSERT_EQ_I(ray_vec_set_null_checked(v, 150, true), RAY_OK); - TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_NULLMAP_EXT); + TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_HAS_NULLS); ray_t* w = v; ray_t* r = ray_index_attach_zone(&w); TEST_ASSERT_FALSE(RAY_IS_ERR(r)); TEST_ASSERT_TRUE(w->attrs & RAY_ATTR_HAS_INDEX); - /* NULLMAP_EXT cleared in parent; stored in ix->saved_attrs */ - TEST_ASSERT_FALSE(w->attrs & RAY_ATTR_NULLMAP_EXT); - - ray_index_t* ix = ray_index_payload(w->index); - TEST_ASSERT_TRUE(ix->saved_attrs & RAY_ATTR_NULLMAP_EXT); ray_morsel_t m; ray_morsel_init(&m, w); - /* First morsel: hits HAS_INDEX + saved_attrs NULLMAP_EXT (lines 85-88) */ TEST_ASSERT_TRUE(ray_morsel_next(&m)); TEST_ASSERT_NOT_NULL(m.null_bits); - /* Bit 150 should be set */ + /* Bit 150 should be set (morsel-local index == source index for + * the first morsel at offset 0). */ int bit150 = (m.null_bits[150 / 8] >> (150 % 8)) & 1; TEST_ASSERT_EQ_I(bit150, 1); diff --git a/test/test_runtime.c b/test/test_runtime.c index 8dd3c6df..44857774 100644 --- a/test/test_runtime.c +++ b/test/test_runtime.c @@ -162,14 +162,14 @@ static test_result_t test_create_with_sym_load_preserves_user_ids(void) { char path[256]; snprintf(path, sizeof(path), "%s/ids.sym", dir); - /* Phase 1: intern a name then persist the sym table. */ + /* Pass 1: intern a name then persist the sym table. */ ray_runtime_t* rt1 = ray_runtime_create(0, NULL); TEST_ASSERT_NOT_NULL(rt1); int64_t id_before = ray_sym_intern("rayforce-user-marker", 20); TEST_ASSERT_EQ_I((int)ray_sym_save(path), (int)RAY_OK); ray_runtime_destroy(rt1); - /* Phase 2: bring up a fresh runtime via the _with_sym variant so the + /* Pass 2: bring up a fresh runtime via the _with_sym variant so the * persisted table is loaded before builtins register. */ ray_err_t err = RAY_ERR_OOM; ray_runtime_t* rt2 = ray_runtime_create_with_sym_err(path, &err); diff --git a/test/test_sort.c b/test/test_sort.c index 67296717..f563d896 100644 --- a/test/test_sort.c +++ b/test/test_sort.c @@ -982,35 +982,16 @@ static test_result_t test_sort_i16_nulls_first_desc(void) { } static test_result_t test_sort_u8_nulls_last_asc(void) { - /* U8 ASC × nulls-last: with the bug, nulls follow the underlying - * byte data (zeroed) and would group with the smallest values - * instead of trailing the result. */ + /* Post-Phase-1: U8 is non-nullable; ray_vec_set_null returns + * RAY_ERR_TYPE. Sort still works on non-null U8 columns. */ ray_heap_init(); ray_sym_init(); - enum { N = 100 }; - uint8_t data[N]; - for (int i = 0; i < N; i++) data[i] = (uint8_t)(i + 1); /* 1..100, no zeros */ - ray_t* vec = ray_vec_from_raw(RAY_U8, data, N); - int64_t null_pos[] = {2, 33, 77}; - for (int i = 0; i < 3; i++) ray_vec_set_null(vec, null_pos[i], true); + ray_t* vec = ray_vec_new(RAY_U8, 4); + uint8_t z = 0; + for (int i = 0; i < 4; i++) vec = ray_vec_append(vec, &z); + TEST_ASSERT_EQ_I(ray_vec_set_null_checked(vec, 1, true), RAY_ERR_TYPE); - uint8_t desc = 0, nf = 0; /* ASC, nulls LAST */ - ray_t* idx = ray_sort_indices(&vec, &desc, &nf, 1, N); - TEST_ASSERT_FALSE(RAY_IS_ERR(idx)); - const int64_t* idxd = (const int64_t*)ray_data(idx); - - /* Last three must be nulls */ - for (int i = 0; i < 3; i++) - TEST_ASSERT_FMT(ray_vec_is_null(vec, idxd[N - 1 - i]), - "u8 nulls-last: pos %d from end is not null", i); - /* Leading prefix non-decreasing */ - for (int64_t i = 1; i < N - 3; i++) { - TEST_ASSERT_FALSE(ray_vec_is_null(vec, idxd[i])); - TEST_ASSERT_TRUE(data[idxd[i]] >= data[idxd[i-1]]); - } - - ray_release(idx); ray_release(vec); ray_sym_destroy(); ray_heap_destroy(); @@ -1018,33 +999,14 @@ static test_result_t test_sort_u8_nulls_last_asc(void) { } static test_result_t test_sort_u8_nulls_first_desc(void) { - /* DESC × nulls-first: encoded null = ~0 = UINT64_MAX, sorts before - * even 0xFF. Underlying data here intentionally contains 0xFF so - * the bug's natural-byte-order behavior cannot mimic the fix. */ ray_heap_init(); ray_sym_init(); - enum { N = 100 }; - uint8_t data[N]; - for (int i = 0; i < N; i++) data[i] = (uint8_t)(150 + i % 50); /* 150..199 */ - ray_t* vec = ray_vec_from_raw(RAY_U8, data, N); - int64_t null_pos[] = {10, 50, 90}; - for (int i = 0; i < 3; i++) ray_vec_set_null(vec, null_pos[i], true); - - uint8_t desc = 1, nf = 1; - ray_t* idx = ray_sort_indices(&vec, &desc, &nf, 1, N); - TEST_ASSERT_FALSE(RAY_IS_ERR(idx)); - const int64_t* idxd = (const int64_t*)ray_data(idx); + ray_t* vec = ray_vec_new(RAY_U8, 4); + uint8_t z = 0; + for (int i = 0; i < 4; i++) vec = ray_vec_append(vec, &z); + TEST_ASSERT_EQ_I(ray_vec_set_null_checked(vec, 0, true), RAY_ERR_TYPE); - for (int i = 0; i < 3; i++) - TEST_ASSERT_TRUE(ray_vec_is_null(vec, idxd[i])); - /* Tail non-increasing */ - for (int64_t i = 4; i < N; i++) { - TEST_ASSERT_FALSE(ray_vec_is_null(vec, idxd[i])); - TEST_ASSERT_TRUE(data[idxd[i]] <= data[idxd[i-1]]); - } - - ray_release(idx); ray_release(vec); ray_sym_destroy(); ray_heap_destroy(); @@ -1052,38 +1014,15 @@ static test_result_t test_sort_u8_nulls_first_desc(void) { } static test_result_t test_sort_bool_nulls_first(void) { - /* BOOL shares the U8 encode path; nulls must still respect the - * requested boundary and not get folded into the false bucket. */ + /* See test_sort_u8_nulls_last_asc — BOOL is non-nullable. */ ray_heap_init(); ray_sym_init(); - enum { N = 100 }; - uint8_t data[N]; - for (int i = 0; i < N; i++) data[i] = (uint8_t)(i & 1); - ray_t* vec = ray_vec_from_raw(RAY_BOOL, data, N); - int64_t null_pos[] = {0, 25, 50, 99}; - for (int i = 0; i < 4; i++) ray_vec_set_null(vec, null_pos[i], true); - - uint8_t desc = 0, nf = 1; - ray_t* idx = ray_sort_indices(&vec, &desc, &nf, 1, N); - TEST_ASSERT_FALSE(RAY_IS_ERR(idx)); - const int64_t* idxd = (const int64_t*)ray_data(idx); - - for (int i = 0; i < 4; i++) - TEST_ASSERT_FMT(ray_vec_is_null(vec, idxd[i]), - "bool nulls-first: pos %d is not null", i); - - /* Among non-nulls, all 0s come before all 1s. */ - int saw_one = 0; - for (int64_t i = 4; i < N; i++) { - TEST_ASSERT_FALSE(ray_vec_is_null(vec, idxd[i])); - if (data[idxd[i]] == 1) saw_one = 1; - else TEST_ASSERT_FMT(saw_one == 0, - "bool asc: a 0 appears after a 1 at %lld", - (long long)i); - } + ray_t* vec = ray_vec_new(RAY_BOOL, 4); + uint8_t b = 1; + for (int i = 0; i < 4; i++) vec = ray_vec_append(vec, &b); + TEST_ASSERT_EQ_I(ray_vec_set_null_checked(vec, 0, true), RAY_ERR_TYPE); - ray_release(idx); ray_release(vec); ray_sym_destroy(); ray_heap_destroy(); diff --git a/test/test_store.c b/test/test_store.c index 62760870..e0308530 100644 --- a/test/test_store.c +++ b/test/test_store.c @@ -791,19 +791,21 @@ static test_result_t test_group_parted(void) { PASS(); } -/* ---- test_col_ext_nullmap_roundtrip ------------------------------------- */ +/* ---- test_col_large_nullable_roundtrip ---------------------------------- */ -#define EXT_NM_LEN 256 /* >128 to trigger ext_nullmap */ +#define LARGE_NULL_LEN 256 /* >128 — past the legacy inline-bitmap boundary */ -static test_result_t test_col_ext_nullmap_roundtrip(void) { - /* Create a 256-element I64 vector with nulls at various positions */ - ray_t* vec = ray_vec_new(RAY_I64, EXT_NM_LEN); +static test_result_t test_col_large_nullable_roundtrip(void) { + /* Create a 256-element I64 vector with sentinel-encoded nulls at + * various positions and round-trip through ray_col_save + + * ray_col_load / ray_col_mmap. */ + ray_t* vec = ray_vec_new(RAY_I64, LARGE_NULL_LEN); TEST_ASSERT_NOT_NULL(vec); TEST_ASSERT_FALSE(RAY_IS_ERR(vec)); - vec->len = EXT_NM_LEN; + vec->len = LARGE_NULL_LEN; int64_t* data = (int64_t*)ray_data(vec); - for (int i = 0; i < EXT_NM_LEN; i++) data[i] = i * 10; + for (int i = 0; i < LARGE_NULL_LEN; i++) data[i] = i * 10; /* Set nulls at positions: 0, 5, 127, 128, 200, 255 */ int null_positions[] = { 0, 5, 127, 128, 200, 255 }; @@ -811,10 +813,7 @@ static test_result_t test_col_ext_nullmap_roundtrip(void) { for (int i = 0; i < n_nulls; i++) ray_vec_set_null(vec, null_positions[i], true); - /* Verify ext_nullmap was created (>128 elements forces external) */ TEST_ASSERT_TRUE((vec->attrs & RAY_ATTR_HAS_NULLS) != 0); - TEST_ASSERT_TRUE((vec->attrs & RAY_ATTR_NULLMAP_EXT) != 0); - TEST_ASSERT_NOT_NULL(vec->ext_nullmap); /* --- Round-trip via ray_col_load --- */ ray_err_t err = ray_col_save(vec, TMP_COL_PATH); @@ -825,10 +824,8 @@ static test_result_t test_col_ext_nullmap_roundtrip(void) { TEST_ASSERT_FALSE(RAY_IS_ERR(loaded)); TEST_ASSERT_EQ_I(loaded->type, RAY_I64); - TEST_ASSERT_EQ_I(loaded->len, EXT_NM_LEN); + TEST_ASSERT_EQ_I(loaded->len, LARGE_NULL_LEN); TEST_ASSERT_TRUE((loaded->attrs & RAY_ATTR_HAS_NULLS) != 0); - TEST_ASSERT_TRUE((loaded->attrs & RAY_ATTR_NULLMAP_EXT) != 0); - TEST_ASSERT_NOT_NULL(loaded->ext_nullmap); /* Verify null positions preserved */ for (int i = 0; i < n_nulls; i++) @@ -854,10 +851,8 @@ static test_result_t test_col_ext_nullmap_roundtrip(void) { TEST_ASSERT_EQ_U(mapped->mmod, 1); TEST_ASSERT_EQ_I(mapped->type, RAY_I64); - TEST_ASSERT_EQ_I(mapped->len, EXT_NM_LEN); + TEST_ASSERT_EQ_I(mapped->len, LARGE_NULL_LEN); TEST_ASSERT_TRUE((mapped->attrs & RAY_ATTR_HAS_NULLS) != 0); - TEST_ASSERT_TRUE((mapped->attrs & RAY_ATTR_NULLMAP_EXT) != 0); - TEST_ASSERT_NOT_NULL(mapped->ext_nullmap); /* Verify null positions preserved in mmap path */ for (int i = 0; i < n_nulls; i++) @@ -2180,23 +2175,25 @@ static test_result_t test_serde_obj_save_error(void) { * covering lines 586-656 (the RAY_BOOL/U8/I16/I32/DATE/TIME/F32 vector * deserialization with HAS_NULLS). */ static test_result_t test_serde_vec_null_bitmaps(void) { - /* BOOL vector with null at index 1 */ + /* BOOL is non-nullable — set_null rejects. Round-trip a non-null + * BOOL vec to keep the serde path covered. */ { ray_t* v = ray_vec_new(RAY_BOOL, 3); TEST_ASSERT_NOT_NULL(v); TEST_ASSERT_FALSE(RAY_IS_ERR(v)); v->len = 3; uint8_t* d = (uint8_t*)ray_data(v); d[0] = 1; d[1] = 0; d[2] = 1; - ray_vec_set_null(v, 1, true); + TEST_ASSERT_EQ_I(ray_vec_set_null_checked(v, 1, true), RAY_ERR_TYPE); ray_t* w = ray_ser(v); TEST_ASSERT_NOT_NULL(w); TEST_ASSERT_FALSE(RAY_IS_ERR(w)); ray_t* b = ray_de(w); TEST_ASSERT_NOT_NULL(b); TEST_ASSERT_FALSE(RAY_IS_ERR(b)); TEST_ASSERT_EQ_I(b->type, RAY_BOOL); - TEST_ASSERT_TRUE(b->attrs & RAY_ATTR_HAS_NULLS); - TEST_ASSERT_TRUE(ray_vec_is_null(b, 1)); - TEST_ASSERT_FALSE(ray_vec_is_null(b, 0)); + uint8_t* bd = (uint8_t*)ray_data(b); + TEST_ASSERT_EQ_I(bd[0], 1); + TEST_ASSERT_EQ_I(bd[1], 0); + TEST_ASSERT_EQ_I(bd[2], 1); ray_release(b); ray_release(w); ray_release(v); } /* I32 vector with null at index 0 */ @@ -2424,11 +2421,10 @@ static test_result_t test_serde_error_roundtrip(void) { PASS(); } -/* ---- serde coverage: large null vector (>128 elems, ext nullmap path) ---- */ +/* ---- serde coverage: large null vector (>128 elems) --------------------- */ -/* When a vector has more than 128 elements and HAS_NULLS, de_null_bitmap - * allocates an external nullmap (RAY_ATTR_NULLMAP_EXT). This covers - * lines 117-122 in serde.c. */ +/* Round-trip a >128-element nullable vec through ser/de — verifies the + * sentinel-encoded null state survives. */ static test_result_t test_serde_large_null_vec(void) { int64_t n = 200; ray_t* v = ray_vec_new(RAY_I64, n); @@ -3925,40 +3921,6 @@ static test_result_t test_col_recursive_sym_in_list(void) { PASS(); } -/* ---- test_col_validate_mapped_bitmap_truncated --------------------------- */ -/* Covers col_validate_mapped: ext_nullmap bitmap extends beyond file => corrupt. */ -static test_result_t test_col_validate_mapped_bitmap_truncated(void) { - /* Write a valid-looking I64 header claiming HAS_NULLS + NULLMAP_EXT, - * with len=16 (bitmap = 2 bytes needed) but only write 1 byte of bitmap. */ - FILE* f = fopen(TMP_COL_PATH, "wb"); - TEST_ASSERT_NOT_NULL(f); - - uint8_t hdr[32]; - memset(hdr, 0, 32); - hdr[18] = RAY_I64; /* type */ - hdr[19] = RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT; /* attrs */ - hdr[20] = 1; /* rc = 1 */ - int64_t len = 16; - memcpy(hdr + 24, &len, 8); - - /* Write header + data (16 * 8 = 128 bytes) + 1 byte bitmap (need 2) */ - fwrite(hdr, 1, 32, f); - uint8_t data[128]; - memset(data, 0, 128); - fwrite(data, 1, 128, f); - uint8_t bitmap_byte = 0xFF; - fwrite(&bitmap_byte, 1, 1, f); /* write only 1 of the 2 needed bitmap bytes */ - fclose(f); - - ray_t* result = ray_col_mmap(TMP_COL_PATH); - TEST_ASSERT_TRUE(RAY_IS_ERR(result)); - TEST_ASSERT_STR_EQ(ray_err_code(result), "corrupt"); - ray_release(result); - - unlink(TMP_COL_PATH); - PASS(); -} - /* ---- test_col_sym_w64_negative_index ------------------------------------- */ /* Covers validate_sym_bounds W64 negative-index branch (p[i] < 0). */ static test_result_t test_col_sym_w64_negative_index(void) { @@ -4024,7 +3986,7 @@ const test_entry_t store_entries[] = { { "store/parted_release", test_parted_release, store_setup, store_teardown }, { "store/part_open", test_part_open, store_setup, store_teardown }, { "store/group_parted", test_group_parted, store_setup, store_teardown }, - { "store/col_ext_nullmap_roundtrip", test_col_ext_nullmap_roundtrip, store_setup, store_teardown }, + { "store/col_large_nullable_roundtrip", test_col_large_nullable_roundtrip, store_setup, store_teardown }, { "store/col_save_load_str", test_col_save_load_str, store_setup, store_teardown }, { "store/col_save_load_list", test_col_save_load_list, store_setup, store_teardown }, { "store/col_save_load_table", test_col_save_load_table, store_setup, store_teardown }, @@ -4038,7 +4000,6 @@ const test_entry_t store_entries[] = { { "store/col_mmap_size_mismatch", test_col_mmap_size_mismatch, store_setup, store_teardown }, { "store/col_recursive_atoms", test_col_recursive_atoms, store_setup, store_teardown }, { "store/col_recursive_sym_in_list", test_col_recursive_sym_in_list, store_setup, store_teardown }, - { "store/col_validate_bitmap_trunc", test_col_validate_mapped_bitmap_truncated, store_setup, store_teardown }, { "store/col_sym_w64_neg_index", test_col_sym_w64_negative_index, store_setup, store_teardown }, { "store/file_open_close", test_file_open_close, store_setup, store_teardown }, { "store/file_lock_unlock", test_file_lock_unlock, store_setup, store_teardown }, diff --git a/test/test_vec.c b/test/test_vec.c index 42b2ed35..43fed95b 100644 --- a/test/test_vec.c +++ b/test/test_vec.c @@ -244,7 +244,10 @@ static test_result_t test_vec_null_inline(void) { TEST_ASSERT_FALSE(ray_vec_is_null(v, 0)); TEST_ASSERT_FALSE(ray_vec_is_null(v, 4)); - /* Clear a null */ + /* Clear a null. The caller must restore a real payload value + * before clearing HAS_NULLS — the stale NULL_I64 sentinel from the + * prior set-null would otherwise still read back as null. */ + ((int64_t*)ray_data(v))[3] = 30; /* restore vals[3] = 3 * 10 */ ray_vec_set_null(v, 3, false); TEST_ASSERT_FALSE(ray_vec_is_null(v, 3)); @@ -255,25 +258,30 @@ static test_result_t test_vec_null_inline(void) { /* ---- null_external (>128 elements) ------------------------------------- */ static test_result_t test_vec_null_external(void) { - ray_t* v = ray_vec_new(RAY_U8, 200); + /* >128-element nullable vec. U8 is non-nullable so the test uses + * I16, whose null state lives as NULL_I16 in the payload. */ + ray_t* v = ray_vec_new(RAY_I16, 200); - /* Append 200 elements */ for (int i = 0; i < 200; i++) { - uint8_t val = (uint8_t)(i & 0xFF); + int16_t val = (int16_t)i; v = ray_vec_append(v, &val); TEST_ASSERT_FALSE(RAY_IS_ERR(v)); } TEST_ASSERT_EQ_I(v->len, 200); - /* Set null at index 150 (forces external nullmap) */ ray_vec_set_null(v, 150, true); - TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_NULLMAP_EXT); TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_HAS_NULLS); TEST_ASSERT_TRUE(ray_vec_is_null(v, 150)); TEST_ASSERT_FALSE(ray_vec_is_null(v, 0)); TEST_ASSERT_FALSE(ray_vec_is_null(v, 149)); - /* External nullmap is owned by the vector and released with it. */ + /* U8 set-null is rejected (U8 is non-nullable). */ + ray_t* u = ray_vec_new(RAY_U8, 4); + uint8_t z = 0; + for (int i = 0; i < 4; i++) u = ray_vec_append(u, &z); + TEST_ASSERT_EQ_I(ray_vec_set_null_checked(u, 1, true), RAY_ERR_TYPE); + ray_release(u); + ray_release(v); PASS(); } @@ -301,30 +309,25 @@ static test_result_t test_vec_slice_release_parent_ref(void) { PASS(); } -/* ---- null_external_release_ext_ref -------------------------------------- */ +/* ---- null_large_release ------------------------------------------------- */ -static test_result_t test_vec_null_external_release_ext_ref(void) { - ray_t* v = ray_vec_new(RAY_U8, 200); +static test_result_t test_vec_null_large_release(void) { + /* Release-without-leak smoke test on a large nullable vec. ASAN + * is the gate. */ + ray_t* v = ray_vec_new(RAY_I16, 200); TEST_ASSERT_NOT_NULL(v); for (int i = 0; i < 200; i++) { - uint8_t val = (uint8_t)(i & 0xFF); + int16_t val = (int16_t)i; v = ray_vec_append(v, &val); TEST_ASSERT_FALSE(RAY_IS_ERR(v)); } ray_vec_set_null(v, 150, true); - TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_NULLMAP_EXT); - ray_t* ext = v->ext_nullmap; - TEST_ASSERT_NOT_NULL(ext); - - ray_retain(ext); /* guard ref */ - TEST_ASSERT_EQ_U(ext->rc, 2); + TEST_ASSERT_TRUE(v->attrs & RAY_ATTR_HAS_NULLS); + TEST_ASSERT_TRUE(ray_vec_is_null(v, 150)); ray_release(v); - TEST_ASSERT_EQ_U(ext->rc, 1); - - ray_release(ext); PASS(); } @@ -558,7 +561,7 @@ const test_entry_t vec_entries[] = { { "vec/null_inline", test_vec_null_inline, vec_setup, vec_teardown }, { "vec/null_external", test_vec_null_external, vec_setup, vec_teardown }, { "vec/slice_release_parent_ref", test_vec_slice_release_parent_ref, vec_setup, vec_teardown }, - { "vec/null_external_release_ext_ref", test_vec_null_external_release_ext_ref, vec_setup, vec_teardown }, + { "vec/null_large_release", test_vec_null_large_release, vec_setup, vec_teardown }, { "vec/append_grow", test_vec_append_grow, vec_setup, vec_teardown }, { "vec/type_correctness", test_vec_type_correctness, vec_setup, vec_teardown }, { "vec/empty", test_vec_empty, vec_setup, vec_teardown },