diff --git a/.github/workflows/cibuildwheels.yml b/.github/workflows/cibuildwheels.yml index ed69f764..f7e2bef0 100644 --- a/.github/workflows/cibuildwheels.yml +++ b/.github/workflows/cibuildwheels.yml @@ -26,6 +26,7 @@ env: jobs: build_wheels: + if: ${{ github.ref_name != 'ctable3' && github.head_ref != 'ctable3' }} name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} runs-on: ${{ matrix.runs-on || matrix.os }} permissions: @@ -128,10 +129,9 @@ jobs: upload_pypi: + if: ${{ (github.ref_name != 'ctable3' && github.head_ref != 'ctable3') && startsWith(github.event.ref, 'refs/tags') }} needs: [ build_wheels] runs-on: ubuntu-latest - # Only upload wheels when tagging (typically a release) - if: startsWith(github.event.ref, 'refs/tags') steps: - uses: actions/download-artifact@v8 with: diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml index f54afad5..3d293274 100644 --- a/.github/workflows/wasm.yml +++ b/.github/workflows/wasm.yml @@ -14,6 +14,7 @@ env: jobs: build_wheels_wasm: + if: ${{ github.ref_name != 'ctable3' && github.head_ref != 'ctable3' }} name: Build and test wheels for WASM on ${{ matrix.os }} for ${{ matrix.p_ver }} runs-on: ubuntu-latest permissions: diff --git a/bench/ctable/Prueba_iter.py b/bench/ctable/Prueba_iter.py new file mode 100644 index 00000000..51203ba4 --- /dev/null +++ b/bench/ctable/Prueba_iter.py @@ -0,0 +1,97 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from dataclasses import dataclass +from time import time + +import blosc2 +from blosc2 import CTable + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100)) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000 # start small, increase when confident + +data = [(i, float(i % 100), i % 2 == 0) for i in range(N)] +tabla = CTable(Row, new_data=data) + +print(f"Table created with {len(tabla)} rows\n") + +# ------------------------------------------------------------------- +# Test 1: iterate without accessing any column (minimum cost) +# ------------------------------------------------------------------- +t0 = time() +for _row in tabla: + pass +t1 = time() +print(f"[Test 1] Iter without accessing columns: {(t1 - t0)*1000:.3f} ms") + +# ------------------------------------------------------------------- +# Test 2: iterate accessing a single column (real_pos cached once) +# ------------------------------------------------------------------- +t0 = time() +for row in tabla: + _ = row["id"] +t1 = time() +print(f"[Test 2] Iter accessing 'id': {(t1 - t0)*1000:.3f} ms") + +# ------------------------------------------------------------------- +# Test 3: iterate accessing all columns (real_pos cached once per row) +# ------------------------------------------------------------------- +t0 = time() +for row in tabla: + _ = row["id"] + _ = row["score"] + _ = row["active"] +t1 = time() +print(f"[Test 3] Iter accessing 3 columns: {(t1 - t0)*1000:.3f} ms") + +# ------------------------------------------------------------------- +# Test 4: correctness — values match expected +# ------------------------------------------------------------------- +errors = 0 +for row in tabla: + if row["id"] != row._nrow: + errors += 1 + if row["score"] != float(row._nrow % 100): + errors += 1 + if row["active"] != (row._nrow % 2 == 0): + errors += 1 + +print(f"\n[Test 4] Correctness errors: {errors} (expected: 0)") + +# ------------------------------------------------------------------- +# Test 5: with holes (deleted rows) +# ------------------------------------------------------------------- +tabla2 = CTable(Row, new_data=data) +tabla2.delete(list(range(0, N, 2))) # delete even rows, keep odd ones + +print(f"\nTable with holes: {len(tabla2)} rows (expected: {N // 2})") + +t0 = time() +ids = [] +for row in tabla2: + ids.append(row["id"]) +t1 = time() + +expected_ids = [i for i in range(N) if i % 2 != 0] +ok = ids == expected_ids +print(f"[Test 5] Iter with holes ({N//2} rows): {(t1 - t0)*1000:.3f} ms | correctness: {ok}") + +# ------------------------------------------------------------------- +# Test 6: real_pos is cached correctly (not recomputed) +# ------------------------------------------------------------------- +row0 = next(iter(tabla)) +assert row0._real_pos is None, "real_pos should be None before first access" +_ = row0["id"] +assert row0._real_pos is not None, "real_pos should be cached after first access" +print(f"\n[Test 6] real_pos caching: OK (real_pos={row0._real_pos})") diff --git a/bench/ctable/bench_append_regression.py b/bench/ctable/bench_append_regression.py new file mode 100644 index 00000000..03f875db --- /dev/null +++ b/bench/ctable/bench_append_regression.py @@ -0,0 +1,117 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark: append() overhead introduced by the new schema pipeline +# +# The new append() path routes every row through: +# _normalize_row_input → validate_row (Pydantic) → _coerce_row_to_storage +# +# This benchmark isolates how much each step costs, and shows the +# total overhead vs the raw NDArray write speed. + +from dataclasses import dataclass +from time import perf_counter + +import numpy as np + +import blosc2 +from blosc2.schema_compiler import compile_schema +from blosc2.schema_validation import build_validator_model, validate_row + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 5_000 +rng = np.random.default_rng(42) +data = [ + (int(i), float(rng.uniform(0, 100)), bool(i % 2)) + for i in range(N) +] +schema = compile_schema(Row) +# Warm up the Pydantic model cache +build_validator_model(schema) + +print(f"append() pipeline cost breakdown | N = {N:,} rows") +print("=" * 60) + +# ── 1. Raw NDArray writes (no CTable overhead at all) ──────────────────────── +ids = np.zeros(N, dtype=np.int64) +scores = np.zeros(N, dtype=np.float64) +flags = np.zeros(N, dtype=np.bool_) +mask = np.zeros(N, dtype=np.bool_) + +t0 = perf_counter() +for i, (id_, score, active) in enumerate(data): + ids[i] = id_ + scores[i] = score + flags[i] = active + mask[i] = True +t_raw = perf_counter() - t0 +print(f"{'Raw NumPy writes (baseline)':<40} {t_raw:.4f} s") + +# ── 2. _normalize_row_input only ───────────────────────────────────────────── +t_obj = blosc2.CTable(Row, expected_size=N, validate=False) +t0 = perf_counter() +for row in data: + _ = t_obj._normalize_row_input(row) +t_normalize = perf_counter() - t0 +print(f"{'_normalize_row_input only':<40} {t_normalize:.4f} s ({t_normalize/t_raw:.1f}x baseline)") + +# ── 3. Pydantic validate_row only ──────────────────────────────────────────── +row_dicts = [t_obj._normalize_row_input(row) for row in data] +t0 = perf_counter() +for rd in row_dicts: + _ = validate_row(schema, rd) +t_validate = perf_counter() - t0 +print(f"{'validate_row (Pydantic) only':<40} {t_validate:.4f} s ({t_validate/t_raw:.1f}x baseline)") + +# ── 4. _coerce_row_to_storage only ─────────────────────────────────────────── +t0 = perf_counter() +for rd in row_dicts: + _ = t_obj._coerce_row_to_storage(rd) +t_coerce = perf_counter() - t0 +print(f"{'_coerce_row_to_storage only':<40} {t_coerce:.4f} s ({t_coerce/t_raw:.1f}x baseline)") + +# ── 5. Full append(), validate=False (3 runs, take minimum) ───────────────── +RUNS = 3 +best_off = float("inf") +for _ in range(RUNS): + t_obj2 = blosc2.CTable(Row, expected_size=N, validate=False) + t0 = perf_counter() + for row in data: + t_obj2.append(row) + best_off = min(best_off, perf_counter() - t0) +t_append_off = best_off +print(f"{'Full append(), validate=False':<40} {t_append_off:.4f} s ({t_append_off/t_raw:.1f}x baseline)") + +# ── 6. Full append(), validate=True (3 runs, take minimum) ────────────────── +best_on = float("inf") +for _ in range(RUNS): + t_obj3 = blosc2.CTable(Row, expected_size=N, validate=True) + t0 = perf_counter() + for row in data: + t_obj3.append(row) + best_on = min(best_on, perf_counter() - t0) +t_append_on = best_on +print(f"{'Full append(), validate=True':<40} {t_append_on:.4f} s ({t_append_on/t_raw:.1f}x baseline)") + +print() +print("=" * 60) +pydantic_cost = max(t_append_on - t_append_off, 0.0) +print(f"{'Pydantic overhead in append()':<40} {pydantic_cost:.4f} s") +if t_append_on > 0: + print(f"{'Validation fraction of total':<40} {pydantic_cost/t_append_on*100:.1f}%") +print(f"{'Per-row Pydantic cost (isolated)':<40} {(t_validate/N)*1e6:.2f} µs/row") +print() +print(f"Note: append() is dominated by blosc2 I/O ({t_append_off/t_raw:.0f}x raw numpy),") +print(" not by the validation pipeline.") +print(" The main bottleneck is the last_true_pos backward scan per row.") diff --git a/bench/ctable/bench_pandas_roundtrip.py b/bench/ctable/bench_pandas_roundtrip.py new file mode 100644 index 00000000..03a09ffa --- /dev/null +++ b/bench/ctable/bench_pandas_roundtrip.py @@ -0,0 +1,209 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark: pandas ↔ CTable round-trip (with on-disk persistence) +# +# Pipeline measured in four isolated steps: +# +# 1. pandas → CTable : DataFrame.to_arrow() + CTable.from_arrow() +# 2. CTable.save() : write in-memory CTable to disk +# 3. CTable.load() : read disk table back into RAM +# 4. CTable → pandas : CTable.to_arrow().to_pandas() +# +# Plus the combined full round-trip (steps 1-4) is shown at the end. +# +# Each measurement is the minimum of NRUNS repetitions to reduce noise. +# Schema: id (int64), score (float64), active (bool), label (string ≤16). + +import os +import shutil +from time import perf_counter + +import numpy as np +import pandas as pd +import pyarrow as pa + +from blosc2 import CTable + +NRUNS = 3 +TABLE_DIR = "saved_ctable/bench_pandas" +SIZES = [1_000, 10_000, 100_000, 1_000_000] + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def sep(title: str) -> None: + print(f"\n{'─' * 60}") + print(f" {title}") + print(f"{'─' * 60}") + + +def tmin(fn, n: int = NRUNS) -> float: + """Minimum elapsed time (s) over *n* calls of *fn*.""" + best = float("inf") + for _ in range(n): + t0 = perf_counter() + fn() + best = min(best, perf_counter() - t0) + return best + + +def clean(path: str = TABLE_DIR) -> None: + if os.path.exists(path): + shutil.rmtree(path) + os.makedirs(path, exist_ok=True) + + +def make_dataframe(n: int) -> pd.DataFrame: + rng = np.random.default_rng(42) + return pd.DataFrame({ + "id": np.arange(n, dtype=np.int64), + "score": rng.uniform(0, 100, n).astype(np.float64), + "active": rng.integers(0, 2, n, dtype=bool), + "label": [f"r{i % 10000:05d}" for i in range(n)], + }) + + +# --------------------------------------------------------------------------- +# Section 1: pandas → CTable (in-memory) +# --------------------------------------------------------------------------- + +sep("1. pandas → CTable (from_arrow, in-memory)") +print(f"{'rows':>12} {'pandas→arrow (s)':>18} {'arrow→ctable (s)':>18} {'total (s)':>12}") +print(f"{'----':>12} {'----------------':>18} {'----------------':>18} {'---------':>12}") + +ctables: dict[int, CTable] = {} # keep for steps 2 & 4 + +for N in SIZES: + df = make_dataframe(N) + + def bench_to_arrow(df=df): + return pa.Table.from_pandas(df, preserve_index=False) + + def bench_from_arrow(df=df): + at = pa.Table.from_pandas(df, preserve_index=False) + return CTable.from_arrow(at) + + t_pa = tmin(bench_to_arrow) + t_ct = tmin(bench_from_arrow) - t_pa # from_arrow only + t_tot = t_pa + t_ct + + # Keep one CTable for later steps + at = pa.Table.from_pandas(df, preserve_index=False) + ctables[N] = CTable.from_arrow(at) + + print(f"{N:>12,} {t_pa:>18.4f} {t_ct:>18.4f} {t_tot:>12.4f}") + + +# --------------------------------------------------------------------------- +# Section 2: CTable.save() (in-memory → disk) +# --------------------------------------------------------------------------- + +sep("2. CTable.save() (in-memory → disk)") +print(f"{'rows':>12} {'save (s)':>14} {'compressed':>12} {'ratio':>8}") +print(f"{'----':>12} {'--------':>14} {'----------':>12} {'-----':>8}") + +for N in SIZES: + t = ctables[N] + path = os.path.join(TABLE_DIR, f"ct_{N}") + + def bench_save(t=t, path=path): + if os.path.exists(path): + shutil.rmtree(path) + t.save(path, overwrite=True) + + elapsed = tmin(bench_save) + # Final state for size info + t.save(path, overwrite=True) + cbytes = t.cbytes + nbytes = t.nbytes + ratio = nbytes / cbytes if cbytes > 0 else float("nan") + + def _fmt(n): + if n < 1024**2: + return f"{n / 1024:.1f} KB" + return f"{n / 1024**2:.1f} MB" + + print(f"{N:>12,} {elapsed:>14.4f} {_fmt(cbytes):>12} {ratio:>7.2f}x") + + +# --------------------------------------------------------------------------- +# Section 3: CTable.load() (disk → in-memory) +# --------------------------------------------------------------------------- + +sep("3. CTable.load() (disk → in-memory)") +print(f"{'rows':>12} {'load (s)':>14}") +print(f"{'----':>12} {'--------':>14}") + +for N in SIZES: + path = os.path.join(TABLE_DIR, f"ct_{N}") + + def bench_load(path=path): + return CTable.load(path) + + elapsed = tmin(bench_load) + print(f"{N:>12,} {elapsed:>14.4f}") + + +# --------------------------------------------------------------------------- +# Section 4: CTable → pandas (to_arrow → to_pandas) +# --------------------------------------------------------------------------- + +sep("4. CTable → pandas (to_arrow + to_pandas)") +print(f"{'rows':>12} {'ctable→arrow (s)':>18} {'arrow→pandas (s)':>18} {'total (s)':>12}") +print(f"{'----':>12} {'----------------':>18} {'----------------':>18} {'---------':>12}") + +for N in SIZES: + t = ctables[N] + at_cache = t.to_arrow() # pre-convert once so we can time each step cleanly + + def bench_to_arrow_ct(t=t): + return t.to_arrow() + + def bench_to_pandas(at=at_cache): + return at.to_pandas() + + t_arr = tmin(bench_to_arrow_ct) + t_pd = tmin(bench_to_pandas) + t_tot = t_arr + t_pd + + print(f"{N:>12,} {t_arr:>18.4f} {t_pd:>18.4f} {t_tot:>12.4f}") + + +# --------------------------------------------------------------------------- +# Section 5: Full round-trip (pandas → CTable → disk → load → pandas) +# --------------------------------------------------------------------------- + +sep("5. Full round-trip (pandas → CTable → save → load → pandas)") +print(f"{'rows':>12} {'round-trip (s)':>16}") +print(f"{'----':>12} {'---------------':>16}") + +for N in SIZES: + df = make_dataframe(N) + path = os.path.join(TABLE_DIR, f"rt_{N}") + + def bench_roundtrip(df=df, path=path): + # pandas → CTable + at = pa.Table.from_pandas(df, preserve_index=False) + t = CTable.from_arrow(at) + # save to disk + t.save(path, overwrite=True) + # load back + t2 = CTable.load(path) + # CTable → pandas + return t2.to_arrow().to_pandas() + + elapsed = tmin(bench_roundtrip) + print(f"{N:>12,} {elapsed:>16.4f}") + + +# Cleanup +clean() +print() diff --git a/bench/ctable/bench_persistency.py b/bench/ctable/bench_persistency.py new file mode 100644 index 00000000..2cc449bc --- /dev/null +++ b/bench/ctable/bench_persistency.py @@ -0,0 +1,189 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark: persistent vs in-memory CTable +# +# Sections: +# 1. extend() — bulk creation: in-memory vs file-backed +# 2. open() — time to reopen an existing persistent table +# 3. append() — single-row append: in-memory vs file-backed (after reopen) +# 4. column read — materialising a full column: in-memory vs file-backed +# +# Each measurement is the minimum of NRUNS repetitions to reduce noise. + +import os +import shutil +from dataclasses import dataclass +from time import perf_counter + +import blosc2 + +NRUNS = 3 +TABLE_DIR = "saved_ctable/bench" + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +def sep(title: str) -> None: + print(f"\n{'─' * 60}") + print(f" {title}") + print(f"{'─' * 60}") + + +def tmin(fn, n: int = NRUNS) -> float: + """Return the minimum elapsed time (seconds) over *n* calls of *fn*.""" + best = float("inf") + for _ in range(n): + t0 = perf_counter() + fn() + best = min(best, perf_counter() - t0) + return best + + +def clean() -> None: + if os.path.exists(TABLE_DIR): + shutil.rmtree(TABLE_DIR) + os.makedirs(TABLE_DIR, exist_ok=True) + + +# --------------------------------------------------------------------------- +# Section 1: bulk creation — extend() +# --------------------------------------------------------------------------- + +sep("1. extend() — bulk insert: in-memory vs file-backed") + +SIZES = [1_000, 10_000, 100_000, 1_000_000] + +print(f"{'rows':>12} {'in-memory (s)':>16} {'file-backed (s)':>16} {'overhead':>10}") +print(f"{'----':>12} {'-------------':>16} {'---------------':>16} {'--------':>10}") + +for N in SIZES: + data = [(i, float(i % 100), i % 2 == 0) for i in range(N)] + + def bench_mem(N=N, data=data): + t = blosc2.CTable(Row, expected_size=N) + t.extend(data, validate=False) + + def bench_file(N=N, data=data): + clean() + t = blosc2.CTable(Row, urlpath=TABLE_DIR + "/ext", mode="w", expected_size=N) + t.extend(data, validate=False) + + t_mem = tmin(bench_mem) + t_file = tmin(bench_file) + overhead = t_file / t_mem if t_mem > 0 else float("nan") + print(f"{N:>12,} {t_mem:>16.4f} {t_file:>16.4f} {overhead:>9.2f}x") + +# --------------------------------------------------------------------------- +# Section 2: open() — reopen an existing table +# --------------------------------------------------------------------------- + +sep("2. open() — time to reopen a persistent table") + +print(f"{'rows':>12} {'CTable.open() (s)':>20} {'CTable(..., mode=a) (s)':>24}") +print(f"{'----':>12} {'------------------':>20} {'------------------------':>24}") + +for N in SIZES: + data = [(i, float(i % 100), i % 2 == 0) for i in range(N)] + clean() + path = TABLE_DIR + "/reopen" + t = blosc2.CTable(Row, urlpath=path, mode="w", expected_size=N) + t.extend(data, validate=False) + del t + + def bench_open(path=path): + t2 = blosc2.CTable.open(path, mode="r") + _ = len(t2) + + def bench_ctor(path=path): + t2 = blosc2.CTable(Row, urlpath=path, mode="a") + _ = len(t2) + + t_open = tmin(bench_open) + t_ctor = tmin(bench_ctor) + print(f"{N:>12,} {t_open:>20.4f} {t_ctor:>24.4f}") + +# --------------------------------------------------------------------------- +# Section 3: append() — single-row inserts after reopen +# --------------------------------------------------------------------------- + +sep("3. append() — 1 000 single-row inserts: in-memory vs file-backed") + +APPEND_N = 1_000 +PREALLOCATE = 10_000 # avoid resize noise + +print(f"{'backend':>14} {'total (s)':>12} {'µs / row':>12}") +print(f"{'-------':>14} {'---------':>12} {'--------':>12}") + + +def bench_append_mem(): + t = blosc2.CTable(Row, expected_size=PREALLOCATE, validate=False) + for i in range(APPEND_N): + t.append((i, float(i % 100), True)) + + +clean() +path = TABLE_DIR + "/apath" +blosc2.CTable(Row, urlpath=path, mode="w", expected_size=PREALLOCATE) + + +def bench_append_file(): + t = blosc2.CTable(Row, urlpath=path, mode="a", validate=False) + for i in range(APPEND_N): + t.append((i, float(i % 100), True)) + + +for label, fn in [("in-memory", bench_append_mem), ("file-backed", bench_append_file)]: + # Reset file table before each run + if label == "file-backed": + clean() + blosc2.CTable(Row, urlpath=path, mode="w", expected_size=PREALLOCATE) + elapsed = tmin(fn) + us_per_row = elapsed / APPEND_N * 1e6 + print(f"{label:>14} {elapsed:>12.4f} {us_per_row:>12.1f}") + +# --------------------------------------------------------------------------- +# Section 4: column read — to_numpy() after reopen +# --------------------------------------------------------------------------- + +sep("4. column read — to_numpy() on 'id': in-memory vs file-backed") + +print(f"{'rows':>12} {'in-memory (s)':>16} {'file-backed (s)':>16} {'ratio':>8}") +print(f"{'----':>12} {'-------------':>16} {'---------------':>16} {'-----':>8}") + +for N in SIZES: + data = [(i, float(i % 100), i % 2 == 0) for i in range(N)] + + t_mem_table = blosc2.CTable(Row, expected_size=N, validate=False) + t_mem_table.extend(data, validate=False) + + clean() + path = TABLE_DIR + "/read" + t_file_table = blosc2.CTable(Row, urlpath=path, mode="w", expected_size=N) + t_file_table.extend(data, validate=False) + # Reopen read-only (simulates a real read workload) + t_ro = blosc2.CTable.open(path, mode="r") + + def bench_read_mem(t=t_mem_table): + _ = t["id"].to_numpy() + + def bench_read_file(t=t_ro): + _ = t["id"].to_numpy() + + t_m = tmin(bench_read_mem) + t_f = tmin(bench_read_file) + ratio = t_f / t_m if t_m > 0 else float("nan") + print(f"{N:>12,} {t_m:>16.4f} {t_f:>16.4f} {ratio:>7.2f}x") + +# Cleanup +clean() +print() diff --git a/bench/ctable/bench_validation.py b/bench/ctable/bench_validation.py new file mode 100644 index 00000000..7329a3ce --- /dev/null +++ b/bench/ctable/bench_validation.py @@ -0,0 +1,129 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark: cost of constraint validation +# +# Measures the overhead of validate=True vs validate=False for: +# 1. append() — row-by-row, Pydantic path +# 2. extend() — bulk insert, vectorized NumPy path +# +# at increasing batch sizes to show how validation cost scales. + +from dataclasses import dataclass +from time import perf_counter + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +def make_data(n: int): + rng = np.random.default_rng(42) + ids = np.arange(n, dtype=np.int64) + scores = rng.uniform(0, 100, n) + flags = rng.integers(0, 2, n, dtype=np.bool_) + return list(zip(ids.tolist(), scores.tolist(), flags.tolist(), strict=False)) + + +SIZES = [100, 1_000, 10_000, 100_000, 1_000_000] +APPEND_SIZES = [100, 1_000] # append row-by-row is slow at large N + +# ───────────────────────────────────────────────────────────────────────────── +# 1. append() — validate=True vs validate=False +# ───────────────────────────────────────────────────────────────────────────── +print("=" * 65) +print("1. append() — row-by-row (Pydantic validation per row)") +print("=" * 65) +print(f"{'N':>10} {'validate=True':>14} {'validate=False':>15} {'overhead':>10}") +print("-" * 65) + +for n in APPEND_SIZES: + data = make_data(n) + + t = blosc2.CTable(Row, expected_size=n, validate=True) + t0 = perf_counter() + for row in data: + t.append(row) + t_on = perf_counter() - t0 + + t = blosc2.CTable(Row, expected_size=n, validate=False) + t0 = perf_counter() + for row in data: + t.append(row) + t_off = perf_counter() - t0 + + overhead = (t_on / t_off) if t_off > 0 else float("inf") + print(f"{n:>10,} {t_on:>13.4f}s {t_off:>14.4f}s {overhead:>9.2f}x") + +# ───────────────────────────────────────────────────────────────────────────── +# 2. extend() — validate=True vs validate=False +# ───────────────────────────────────────────────────────────────────────────── +print() +print("=" * 65) +print("2. extend() — bulk insert (vectorized NumPy validation)") +print("=" * 65) +print(f"{'N':>10} {'validate=True':>14} {'validate=False':>15} {'overhead':>10}") +print("-" * 65) + +for n in SIZES: + data = make_data(n) + + t = blosc2.CTable(Row, expected_size=n, validate=True) + t0 = perf_counter() + t.extend(data) + t_on = perf_counter() - t0 + + t = blosc2.CTable(Row, expected_size=n, validate=False) + t0 = perf_counter() + t.extend(data) + t_off = perf_counter() - t0 + + overhead = (t_on / t_off) if t_off > 0 else float("inf") + print(f"{n:>10,} {t_on:>13.4f}s {t_off:>14.4f}s {overhead:>9.2f}x") + +# ───────────────────────────────────────────────────────────────────────────── +# 3. extend() — validate=True vs validate=False with structured NumPy array +# ───────────────────────────────────────────────────────────────────────────── +print() +print("=" * 65) +print("3. extend() with structured NumPy array") +print("=" * 65) +print(f"{'N':>10} {'validate=True':>14} {'validate=False':>15} {'overhead':>10}") +print("-" * 65) + +np_dtype = np.dtype([("id", np.int64), ("score", np.float64), ("active", np.bool_)]) + +for n in SIZES: + rng = np.random.default_rng(42) + arr = np.empty(n, dtype=np_dtype) + arr["id"] = np.arange(n, dtype=np.int64) + arr["score"] = rng.uniform(0, 100, n) + arr["active"] = rng.integers(0, 2, n, dtype=np.bool_) + + t = blosc2.CTable(Row, expected_size=n, validate=True) + t0 = perf_counter() + t.extend(arr) + t_on = perf_counter() - t0 + + t = blosc2.CTable(Row, expected_size=n, validate=False) + t0 = perf_counter() + t.extend(arr) + t_off = perf_counter() - t0 + + overhead = (t_on / t_off) if t_off > 0 else float("inf") + print(f"{n:>10,} {t_on:>13.4f}s {t_off:>14.4f}s {overhead:>9.2f}x") + +print() +print("Note: 'overhead' = validate=True time / validate=False time.") +print(" 1.00x means validation is free; 2.00x means it doubles the time.") diff --git a/bench/ctable/compact.py b/bench/ctable/compact.py new file mode 100644 index 00000000..a4817b0a --- /dev/null +++ b/bench/ctable/compact.py @@ -0,0 +1,75 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for measuring compact() time and memory gain after deletions +# of varying fractions of the table. + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 + +print(f"compact() benchmark | N = {N:,}\n") + +# Build base data once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(N) + ], + dtype=np_dtype, +) + +delete_fractions = [0.1, 0.25, 0.5, 0.75, 0.9] + +print("=" * 75) +print(f"{'DELETED':>10} {'ROWS LEFT':>10} {'TIME (s)':>12} {'CBYTES BEFORE':>15} {'CBYTES AFTER':>14}") +print("-" * 75) + +for frac in delete_fractions: + ct = blosc2.CTable(Row, expected_size=N) + ct.extend(DATA) + + n_delete = int(N * frac) + ct.delete(list(range(n_delete))) + + cbytes_before = sum(col.cbytes for col in ct._cols.values()) + ct._valid_rows.cbytes + + t0 = time() + ct.compact() + t_compact = time() - t0 + + cbytes_after = sum(col.cbytes for col in ct._cols.values()) + ct._valid_rows.cbytes + + print( + f"{frac*100:>9.0f}%" + f" {N - n_delete:>10,}" + f" {t_compact:>12.4f}" + f" {cbytes_before / 1024**2:>13.2f} MB" + f" {cbytes_after / 1024**2:>12.2f} MB" + ) + +print("-" * 75) diff --git a/bench/ctable/ctable_v_panda.py b/bench/ctable/ctable_v_panda.py new file mode 100644 index 00000000..3b7a6d52 --- /dev/null +++ b/bench/ctable/ctable_v_panda.py @@ -0,0 +1,121 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark comparing CTable vs pandas DataFrame for: +# 1. Creation from a NumPy structured array +# 2. Column access (full column) +# 3. Filtering (where/query) +# 4. Row iteration + +from dataclasses import dataclass +from time import time + +import numpy as np +import pandas as pd + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 +rng = np.random.default_rng(42) + +print(f"CTable vs pandas benchmark | N = {N:,}\n") + +# Build base data once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.empty(N, dtype=np_dtype) +DATA["id"] = np.arange(N, dtype=np.int64) +DATA["c_val"] = rng.standard_normal(N) + 1j * rng.standard_normal(N) +DATA["score"] = rng.uniform(0, 100, N) +DATA["active"] = rng.integers(0, 2, N, dtype=np.bool_) + +print("=" * 65) +print(f"{'OPERATION':<30} {'CTable':>12} {'pandas':>12} {'SPEEDUP':>10}") +print("-" * 65) + +# 1. Creation +t0 = time() +ct = blosc2.CTable(Row, expected_size=N) +ct.extend(DATA) +t_ct_create = time() - t0 + +t0 = time() +df = pd.DataFrame(DATA) +t_pd_create = time() - t0 + +print(f"{'Creation':<30} {t_ct_create:>12.4f} {t_pd_create:>12.4f} {t_pd_create/t_ct_create:>9.2f}x") + +# 2. Column access (full column) +t0 = time() +arr = ct["score"] +t_ct_col = time() - t0 + +t0 = time() +arr = df["score"] +t_pd_col = time() - t0 + +print(f"{'Column access (full)':<30} {t_ct_col:>12.4f} {t_pd_col:>12.4f} {t_pd_col/t_ct_col:>9.2f}x") + +# 2.5 Column access (full column) +t0 = time() +arr = ct["score"].to_numpy() +t_ct_col = time() - t0 + +t0 = time() +arr = df["score"].to_numpy() +t_pd_col = time() - t0 + +print(f"{'Column access to numpy (full)':<30} {t_ct_col:>12.4f} {t_pd_col:>12.4f} {t_pd_col/t_ct_col:>9.3f}x") + +# 3. Filtering +t0 = time() +result_ct = ct.where((ct["id"] > 250_000) & (ct["id"] < 750_000)) +t_ct_filter = time() - t0 + +t0 = time() +result_pd = df.query("250000 < id < 750000") +t_pd_filter = time() - t0 + +print(f"{'Filter (id 250k-750k)':<30} {t_ct_filter:>12.4f} {t_pd_filter:>12.4f} {t_pd_filter/t_ct_filter:>9.2f}x") + +# 4. Row iteration +t0 = time() +for _val in ct["score"]: + pass +t_ct_iter = time() - t0 + +t0 = time() +for _val in df["score"]: + pass +t_pd_iter = time() - t0 + +print(f"{'Row iteration':<30} {t_ct_iter:>12.4f} {t_pd_iter:>12.4f} {t_pd_iter/t_ct_iter:>9.2f}x") + +print("-" * 65) + +# Memory +ct_cbytes = sum(col.cbytes for col in ct._cols.values()) + ct._valid_rows.cbytes +ct_nbytes = sum(col.nbytes for col in ct._cols.values()) + ct._valid_rows.nbytes +pd_nbytes = df.memory_usage(deep=True).sum() + +print(f"\nMemory — CTable compressed: {ct_cbytes / 1024**2:.2f} MB") +print(f"Memory — CTable uncompressed: {ct_nbytes / 1024**2:.2f} MB") +print(f"Memory — pandas: {pd_nbytes / 1024**2:.2f} MB") +print(f"Compression ratio CTable: {ct_nbytes / ct_cbytes:.2f}x") diff --git a/bench/ctable/delete.py b/bench/ctable/delete.py new file mode 100644 index 00000000..79f59580 --- /dev/null +++ b/bench/ctable/delete.py @@ -0,0 +1,76 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for measuring delete() performance with different index types: +# int, slice, and list — with varying sizes. + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 + +print(f"delete() benchmark | N = {N:,}\n") + +# Build base data once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(N) + ], + dtype=np_dtype, +) + +delete_cases = [ + ("int", 0), + ("slice small", slice(0, 100)), + ("slice large", slice(0, 100_000)), + ("slice full", slice(0, N)), + ("list small", list(range(100))), + ("list large", list(range(100_000))), + ("list full", list(range(N))), +] + +print("=" * 60) +print(f"{'CASE':<20} {'ROWS DELETED':>14} {'TIME (s)':>12}") +print("-" * 60) + +for label, key in delete_cases: + ct = blosc2.CTable(Row, expected_size=N) + ct.extend(DATA) + + if isinstance(key, int): + n_deleted = 1 + elif isinstance(key, slice): + n_deleted = len(range(*key.indices(N))) + else: + n_deleted = len(key) + + t0 = time() + ct.delete(key) + t_delete = time() - t0 + print(f"{label:<20} {n_deleted:>14,} {t_delete:>12.6f}") + +print("-" * 60) diff --git a/bench/ctable/expected_size.py b/bench/ctable/expected_size.py new file mode 100644 index 00000000..e199d589 --- /dev/null +++ b/bench/ctable/expected_size.py @@ -0,0 +1,69 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for measuring the overhead of resize() when expected_size +# is too small (M rows) vs correctly sized (N rows) during extend(). + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + + +M = 779 +N = 62_500 +MAX_N = 1_000_000 +print(f"expected_size benchmark | wrong expected_size = {M}") + +# Pre-generate full dataset once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(MAX_N) + ], + dtype=np_dtype, +) + +while N <= MAX_N: + print("-" * 80) + print(f"N = {N:,} rows") + + # 1. extend() with correct expected_size = N + ct_correct = blosc2.CTable(Row, expected_size=N) + t0 = time() + ct_correct.extend(DATA[:N]) + t_correct = time() - t0 + print(f"extend() expected_size=N ({N:>8,}): {t_correct:.4f} s rows: {len(ct_correct):,}") + + # 2. extend() with wrong expected_size = M (forces resize) + ct_wrong = blosc2.CTable(Row, expected_size=M) + t0 = time() + ct_wrong.extend(DATA[:N]) + t_wrong = time() - t0 + print(f"extend() expected_size=M ({M:>8,}): {t_wrong:.4f} s rows: {len(ct_wrong):,}") + + # Summary + print(f" Slowdown from wrong expected_size: {t_wrong / t_correct:.2f}x") + + N *= 2 diff --git a/bench/ctable/extend.py b/bench/ctable/extend.py new file mode 100644 index 00000000..5e1090ba --- /dev/null +++ b/bench/ctable/extend.py @@ -0,0 +1,105 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for measuring CTable creation time from three different sources: +# 1. Python list of lists (1M rows) +# 2. NumPy structured array (1M rows) — list of named tuples +# 3. An existing CTable (previously created from Python lists, 1M rows) + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 +print(f"CTable creation benchmark with {N:,} rows\n") + +# --------------------------------------------------------------------------- +# Base data generation (not part of the benchmark timing) +# --------------------------------------------------------------------------- +print("Generating base data...") + +t0 = time() +data_list = [ + [i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0] + for i in range(N) +] +t_gen_list = time() - t0 +print(f" Python list generated in: {t_gen_list:.4f} s") + +t0 = time() +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +data_np = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(N) + ], + dtype=np_dtype, +) +t_gen_np = time() - t0 +print(f" NumPy structured array generated: {t_gen_np:.4f} s\n") + +# --------------------------------------------------------------------------- +# 1. Creation from a Python list of lists +# --------------------------------------------------------------------------- +print("CTable from Python list of lists") +t0 = time() +ct_from_list = blosc2.CTable(Row, expected_size=N) +ct_from_list.extend(data_list) +t_from_list = time() - t0 +print(f" extend() time (Python list): {t_from_list:.4f} s") +print(f" Rows: {len(ct_from_list):,}") + +# --------------------------------------------------------------------------- +# 2. Creation from a NumPy structured array (list of named tuples) +# --------------------------------------------------------------------------- +print("CTable from NumPy structured array") +t0 = time() +ct_from_np = blosc2.CTable(Row, expected_size=N) +ct_from_np.extend(data_np) +t_from_np = time() - t0 +print(f" extend() time (NumPy struct): {t_from_np:.4f} s") +print(f" Rows: {len(ct_from_np):,}") + + +# --------------------------------------------------------------------------- +# 3. Creation from an existing CTable (ct_from_list, already built above) +# --------------------------------------------------------------------------- +print("CTable from an existing CTable") +t0 = time() +ct_from_ctable = blosc2.CTable(Row, expected_size=N) +ct_from_ctable.extend(ct_from_list) +t_from_ctable = time() - t0 +print(f" extend() time (CTable): {t_from_ctable:.4f} s") +print(f" Rows: {len(ct_from_ctable):,}") + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- +print("\n") +print("=" * 60) +print(f"{'SOURCE':<30} {'TIME (s)':>12} {'SPEEDUP vs list':>18}") +print("-" * 60) +print(f"{'Python list of lists':<30} {t_from_list:>12.4f} {'1.00x':>18}") +print(f"{'NumPy structured array':<30} {t_from_np:>12.4f} {t_from_list / t_from_np:>17.2f}x") +print(f"{'Existing CTable':<30} {t_from_ctable:>12.4f} {t_from_list / t_from_ctable:>17.2f}x") diff --git a/bench/ctable/extend_vs_apend.py b/bench/ctable/extend_vs_apend.py new file mode 100644 index 00000000..db63206b --- /dev/null +++ b/bench/ctable/extend_vs_apend.py @@ -0,0 +1,76 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for comparing append() (row by row) vs extend() (bulk), +# to find the crossover point where extend() becomes worth it. + +from dataclasses import dataclass +from time import time + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +# Parameter — change N to test different crossover points +N = 2 +print("append() vs extend() benchmark") +for i in range(6): + print("\n") + print("%" * 100) + + + # Base data generation + data_list = [ + [i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0] for i in range(N) + ] + + # 1. N individual append() calls + print(f"{N} individual append() calls") + ct_append = blosc2.CTable(Row, expected_size=N) + t0 = time() + for row in data_list: + ct_append.append(row) + t_append = time() - t0 + print(f" Time: {t_append:.6f} s") + print(f" Rows: {len(ct_append):,}") + + # 2. N individual extend() calls (one row at a time) + print(f"{N} individual extend() calls (one row at a time)") + ct_extend_one = blosc2.CTable(Row, expected_size=N) + t0 = time() + for row in data_list: + ct_extend_one.extend([row]) + t_extend_one = time() - t0 + print(f" Time: {t_extend_one:.6f} s") + print(f" Rows: {len(ct_extend_one):,}") + + # 3. Single extend() call with all N rows at once + print(f"Single extend() call with all {N} rows at once") + ct_extend_bulk = blosc2.CTable(Row, expected_size=N) + t0 = time() + ct_extend_bulk.extend(data_list) + t_extend_bulk = time() - t0 + print(f" Time: {t_extend_bulk:.6f} s") + print(f" Rows: {len(ct_extend_bulk):,}") + + # Summary + print("=" * 70) + print(f"{'METHOD':<35} {'TIME (s)':>12} {'SPEEDUP vs append':>20}") + print("-" * 70) + print(f"{'append() x N':<35} {t_append:>12.6f} {'1.00x':>20}") + print(f"{'extend() x N (one row each)':<35} {t_extend_one:>12.6f} {t_append / t_extend_one:>19.2f}x") + print(f"{'extend() x 1 (all at once)':<35} {t_extend_bulk:>12.6f} {t_append / t_extend_bulk:>19.2f}x") + print("-" * 70) + + N=N*2 diff --git a/bench/ctable/index.py b/bench/ctable/index.py new file mode 100644 index 00000000..3298bde2 --- /dev/null +++ b/bench/ctable/index.py @@ -0,0 +1,63 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for measuring Column[int] access (single row by logical index), +# which exercises _find_physical_index() traversal over chunk metadata. + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 +indices = [0, N // 4, N // 2, (3 * N) // 4, N - 1] + +print(f"Column[int] access benchmark | N = {N:,}\n") + +# Build CTable once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(N) + ], + dtype=np_dtype, +) + +ct = blosc2.CTable(Row, expected_size=N) +ct.extend(DATA) + +print(f"CTable built with {len(ct):,} rows\n") +print("=" * 60) +print(f"{'INDEX':<15} {'POSITION':>12} {'TIME (s)':>12}") +print("-" * 60) + +col = ct["score"] +for idx in indices: + t0 = time() + val = col[idx] + t_access = time() - t0 + position = f"{idx / N * 100:.0f}% into array" + print(f"{idx:<15,} {position:>12} {t_access:.6f}") + +print("-" * 60) diff --git a/bench/ctable/iteration_column.py b/bench/ctable/iteration_column.py new file mode 100644 index 00000000..b1ac3703 --- /dev/null +++ b/bench/ctable/iteration_column.py @@ -0,0 +1,79 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for comparing full column iteration strategies: +# 1. for val in ct["score"] — Python iterator via __iter__ +# 2. np.array(list(ct["score"])) — materialize via list then convert +# 3. ct["score"][0:N].to_array() — slice view + to_array() + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 + +print(f"Column iteration benchmark | N = {N:,}\n") + +# Build CTable once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(N) + ], + dtype=np_dtype, +) + +ct = blosc2.CTable(Row, expected_size=N) +ct.extend(DATA) + +print(f"CTable built with {len(ct):,} rows\n") +print("=" * 60) + +col = ct["score"] + +# 1. Python iterator +t0 = time() +for _val in col: + pass +t_iter = time() - t0 +print(f"for val in col: {t_iter:.4f} s") + +# 2. list() + np.array() +t0 = time() +arr = np.array(list(col)) +t_list = time() - t0 +print(f"np.array(list(col)): {t_list:.4f} s") + +# 3. slice view + to_array() +t0 = time() +arr = col[0:N].to_numpy() +for _val in arr: + pass +t_toarray = time() - t0 +print(f"col[0:N].to_array(): {t_toarray:.4f} s") + +print("=" * 60) +print(f"Speedup to_array vs iter: {t_iter / t_toarray:.2f}x") +print(f"Speedup to_array vs list: {t_list / t_toarray:.2f}x") diff --git a/bench/ctable/print.py b/bench/ctable/print.py new file mode 100644 index 00000000..6efb80bf --- /dev/null +++ b/bench/ctable/print.py @@ -0,0 +1,108 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark: iterative ingestion comparison — Pandas vs CTable +# Data source: randomly generated numpy structured array + +import time +from dataclasses import dataclass + +import numpy as np +import pandas as pd + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64()) + name: str = blosc2.field(blosc2.string(max_length=9), default="") + score: float = blosc2.field(blosc2.float64(ge=0), default=0.0) + + +NAMES = ["benchmark", "alpha", "beta", "gamma", "delta", + "epsilon", "zeta", "eta", "theta", "iota"] + +N = 100_000 +rng = np.random.default_rng(42) + +np_dtype = np.dtype([("id", np.int64), ("name", " np.ndarray: + arr = np.empty(n, dtype=np_dtype) + arr["id"] = np.arange(n, dtype=np.int64) + arr["name"] = np.array([rng.choice(NAMES) for _ in range(n)], dtype=" DataFrame) ---") +data = make_data(N) + +t0 = time.perf_counter() +df = pd.DataFrame(data) +t_pandas = time.perf_counter() - t0 + +mem_pandas = df.memory_usage(deep=True).sum() / (1024 ** 2) +print(f"Total time: {t_pandas:.4f} s") +print(f"Memory (RAM): {mem_pandas:.2f} MB") + +print("\n--- PANDAS: First 10 rows ---") +t0_print = time.perf_counter() +print(df.head(10).to_string()) +t_print_pandas = time.perf_counter() - t0_print +print(f"\nPrint time: {t_print_pandas:.6f} s") + +# ───────────────────────────────────────────────────────────── +# 2. BLOSC2 CTable +# ───────────────────────────────────────────────────────────── +print("\n" + "=" * 60) +print("--- 2. BLOSC2 CTable (structured array -> extend) ---") +data = make_data(N) + +t0 = time.perf_counter() +ct = blosc2.CTable(Row, expected_size=N) +ct.extend(data) +t_blosc = time.perf_counter() - t0 + +fields = ct.col_names +mem_blosc_c = (sum(col.cbytes for col in ct._cols.values()) + ct._valid_rows.cbytes) / (1024 ** 2) +mem_blosc_uc = (sum(col.nbytes for col in ct._cols.values()) + ct._valid_rows.nbytes) / (1024 ** 2) + +print(f"Total time: {t_blosc:.4f} s") +print(f"Memory (uncompressed): {mem_blosc_uc:.2f} MB") +print(f"Memory (compressed): {mem_blosc_c:.2f} MB") + +print("\n--- BLOSC2: First 10 rows ---") +t0_print = time.perf_counter() +print(ct.head(10)) +t_print_blosc = time.perf_counter() - t0_print +print(f"\nPrint time: {t_print_blosc:.6f} s") + +# ───────────────────────────────────────────────────────────── +# SUMMARY +# ───────────────────────────────────────────────────────────── +print("\n" + "=" * 60) +print("--- SUMMARY ---") +speedup = t_pandas / t_blosc +direction = "faster" if t_blosc < t_pandas else "slower" + +print(f"{'METRIC':<30} {'Pandas':>12} {'Blosc2':>12}") +print("-" * 55) +print(f"{'Ingestion time (s)':<30} {t_pandas:>12.4f} {t_blosc:>12.4f}") +print(f"{'Memory (MB)':<30} {mem_pandas:>12.2f} {mem_blosc_c:>12.2f}") +print(f"{'Print time (s)':<30} {t_print_pandas:>12.6f} {t_print_blosc:>12.6f}") +print("-" * 55) +print(f"\nSpeedup: {speedup:.2f}x {direction}") +print(f"Compression ratio: {mem_blosc_uc / mem_blosc_c:.2f}x") +print(f"Blosc2 vs Pandas size: {mem_blosc_c / mem_pandas * 100:.1f}%") diff --git a/bench/ctable/row_acces.py b/bench/ctable/row_acces.py new file mode 100644 index 00000000..050d0309 --- /dev/null +++ b/bench/ctable/row_acces.py @@ -0,0 +1,62 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for measuring row[int] access (full row via _RowIndexer), +# testing access at different positions across the array. + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 +indices = [0, N // 4, N // 2, (3 * N) // 4, N - 1] + +print(f"row[int] access benchmark | N = {N:,}\n") + +# Build CTable once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(N) + ], + dtype=np_dtype, +) + +ct = blosc2.CTable(Row, expected_size=N) +ct.extend(DATA) + +print(f"CTable built with {len(ct):,} rows\n") +print("=" * 60) +print(f"{'INDEX':<15} {'POSITION':>12} {'TIME (s)':>12}") +print("-" * 60) + +for idx in indices: + t0 = time() + row = ct.row[idx] + t_access = time() - t0 + position = f"{idx / N * 100:.0f}% into array" + print(f"{idx:<15,} {position:>12} {t_access:.6f}") + +print("-" * 60) diff --git a/bench/ctable/slice.py b/bench/ctable/slice.py new file mode 100644 index 00000000..a41c50a6 --- /dev/null +++ b/bench/ctable/slice.py @@ -0,0 +1,71 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for measuring Column[slice] access with slices of different +# sizes and positions: small, large, and middle of the array. + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 +slices = [ + ("small — start", slice(0, 100)), + ("small — middle", slice(N // 2, N // 2 + 100)), + ("small — end", slice(N - 100, N)), + ("large — start", slice(0, 100_000)), + ("large — middle", slice(N // 2 - 50_000, N // 2 + 50_000)), + ("large — end", slice(N - 100_000, N)), + ("full — all", slice(0, N)), +] + +print(f"Column[slice] access benchmark | N = {N:,}\n") + +# Build CTable once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(N) + ], + dtype=np_dtype, +) + +ct = blosc2.CTable(Row, expected_size=N) +ct.extend(DATA) + +print(f"CTable built with {len(ct):,} rows\n") +print("=" * 65) +print(f"{'SLICE':<25} {'ROWS':>8} {'TIME (s)':>12}") +print("-" * 65) + +col = ct["score"] +for label, s in slices: + t0 = time() + val = col[s] + t_access = time() - t0 + n_rows = s.stop - s.start + print(f"{label:<25} {n_rows:>8,} {t_access:>12.6f}") + +print("-" * 65) diff --git a/bench/ctable/slice_steps.py b/bench/ctable/slice_steps.py new file mode 100644 index 00000000..0a3fb358 --- /dev/null +++ b/bench/ctable/slice_steps.py @@ -0,0 +1,61 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for measuring Column[::step].to_array() with varying step sizes. + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 +steps = [1, 2, 4, 8, 16, 100, 1000] + +print(f"Column[::step].to_array() benchmark | N = {N:,}\n") + +# Build CTable once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(N) + ], + dtype=np_dtype, +) + +ct = blosc2.CTable(Row, expected_size=N) +ct.extend(DATA) + +print(f"CTable built with {len(ct):,} rows\n") +print("=" * 60) +print(f"{'STEP':<10} {'ROWS RETURNED':>15} {'TIME (s)':>12}") +print("-" * 60) + +col = ct["score"] +for step in steps: + t0 = time() + arr = col[::step].to_numpy() + t_total = time() - t0 + print(f"::{ step:<8} {len(arr):>15,} {t_total:>12.6f}") + +print("-" * 60) diff --git a/bench/ctable/slice_to_array.py b/bench/ctable/slice_to_array.py new file mode 100644 index 00000000..7c58080e --- /dev/null +++ b/bench/ctable/slice_to_array.py @@ -0,0 +1,71 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for measuring Column[slice] + to_array() with slices of +# different sizes and positions: small, large, and middle of the array. + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 +slices = [ + ("small — start", slice(0, 100)), + ("small — middle", slice(N // 2, N // 2 + 100)), + ("small — end", slice(N - 100, N)), + ("large — start", slice(0, 100_000)), + ("large — middle", slice(N // 2 - 50_000, N // 2 + 50_000)), + ("large — end", slice(N - 100_000, N)), + ("full — all", slice(0, N)), +] + +print(f"Column[slice].to_array() benchmark | N = {N:,}\n") + +# Build CTable once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(N) + ], + dtype=np_dtype, +) + +ct = blosc2.CTable(Row, expected_size=N) +ct.extend(DATA) + +print(f"CTable built with {len(ct):,} rows\n") +print("=" * 65) +print(f"{'SLICE':<25} {'ROWS':>8} {'TIME (s)':>12}") +print("-" * 65) + +col = ct["score"] +for label, s in slices: + t0 = time() + arr = col[s].to_numpy() + t_total = time() - t0 + n_rows = s.stop - s.start + print(f"{label:<25} {n_rows:>8,} {t_total:>12.6f}") + +print("-" * 65) diff --git a/bench/ctable/speed_iter.py b/bench/ctable/speed_iter.py new file mode 100644 index 00000000..10afdc36 --- /dev/null +++ b/bench/ctable/speed_iter.py @@ -0,0 +1,40 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from dataclasses import dataclass +from time import time + +import blosc2 +from blosc2 import CTable + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100)) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 # start small, increase when confident + +data = [(i, float(i % 100), i % 2 == 0) for i in range(N)] +tabla = CTable(Row, new_data=data) + +print(f"Table created with {len(tabla)} rows\n") + +# ------------------------------------------------------------------- +# Test 1: iterate without accessing any column (minimum cost) +# ------------------------------------------------------------------- +i=0 +t0 = time() +for row in tabla: + i=(i+1)%10000 + if i==0: + _ = row["score"] + +t1 = time() +print(f"[Test 1] Iter without accessing columns: {(t1 - t0):.3f} s") diff --git a/bench/ctable/where_chain.py b/bench/ctable/where_chain.py new file mode 100644 index 00000000..d2a6092d --- /dev/null +++ b/bench/ctable/where_chain.py @@ -0,0 +1,73 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for comparing chained where() calls vs a single combined filter. +# Filters: 250k < id < 750k, active == False, 25.0 < score < 75.0 + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 + +print(f"where() chained vs combined benchmark | N = {N:,}") + +# Build CTable once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(N) + ], + dtype=np_dtype, +) + +ct = blosc2.CTable(Row, expected_size=N) +ct.extend(DATA) + +print(f"CTable built with {len(ct):,} rows\n") +print("=" * 70) + +# 1. Three chained where() calls +t0 = time() +r1 = ct.where(ct["id"] > 250_000) +r2 = r1.where(ct["id"] < 750_000) +r3 = r2.where(ct["score"] > 25.0) +r4 = r3.where(ct["score"] < 75.0) +r5 = r4.where(not ct["active"]) +t_chained = time() - t0 +print(f"Chained where() (5 calls): {t_chained:.6f} s rows: {len(r5):,}") + +# 2. Single combined where() call +t0 = time() +result = ct.where( + (ct["id"] > 250_000) & (ct["id"] < 750_000) & + (not ct["active"]) & + (ct["score"] > 25.0) & (ct["score"] < 75.0) +) +t_combined = time() - t0 +print(f"Combined where() (1 call): {t_combined:.6f} s rows: {len(result):,}") + +print("=" * 70) +print(f"Speedup combined vs chained: {t_chained / t_combined:.2f}x") diff --git a/bench/ctable/where_selective.py b/bench/ctable/where_selective.py new file mode 100644 index 00000000..c0ba6f78 --- /dev/null +++ b/bench/ctable/where_selective.py @@ -0,0 +1,62 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Benchmark for measuring where() performance with varying selectivity. +# Filter: id < threshold, with thresholds covering 1%, 10%, 50%, 90%, 100% + +from dataclasses import dataclass +from time import time + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +N = 1_000_000 +thresholds = [10,10_000, 100_000,250_000, 500_000,750_000 ,900_000, 999_990, 1_000_000] + +print(f"where() selectivity benchmark | N = {N:,}") + +# Build CTable once +np_dtype = np.dtype([ + ("id", np.int64), + ("c_val", np.complex128), + ("score", np.float64), + ("active", np.bool_), +]) +DATA = np.array( + [ + (i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0) + for i in range(N) + ], + dtype=np_dtype, +) + +ct = blosc2.CTable(Row, expected_size=N) +ct.extend(DATA) + +print(f"CTable built with {len(ct):,} rows\n") +print("=" * 70) +print(f"{'THRESHOLD':<15} {'ROWS RETURNED':>15} {'SELECTIVITY':>13} {'TIME (s)':>12}") +print("-" * 70) + +for threshold in thresholds: + t0 = time() + result = ct.where(ct["id"] < threshold) + t_where = time() - t0 + selectivity = threshold / N * 100 + print(f"id < {threshold:<10,} {len(result):>15,} {selectivity:>12.1f}% {t_where:>12.6f}") + +print("-" * 70) diff --git a/examples/ctable/aggregates.py b/examples/ctable/aggregates.py new file mode 100644 index 00000000..bdda5080 --- /dev/null +++ b/examples/ctable/aggregates.py @@ -0,0 +1,68 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Column aggregates: sum, min, max, mean, std, unique, value_counts, +# describe, and covariance matrix. + +from dataclasses import dataclass + +import numpy as np + +import blosc2 + + +@dataclass +class Reading: + sensor_id: int = blosc2.field(blosc2.int32(ge=0, le=9)) + temperature: float = blosc2.field(blosc2.float64(ge=-50.0, le=60.0), default=20.0) + humidity: float = blosc2.field(blosc2.float64(ge=0.0, le=100.0), default=50.0) + alert: bool = blosc2.field(blosc2.bool(), default=False) + + +rng = np.random.default_rng(42) +N = 500 + +station_ids = rng.integers(0, 10, size=N).astype(np.int32) +temperatures = rng.normal(20.0, 8.0, size=N).clip(-50, 60).astype(np.float64) +humidities = rng.uniform(30.0, 90.0, size=N).astype(np.float64) +alerts = rng.random(N) < 0.05 + +data = list( + zip(station_ids.tolist(), temperatures.tolist(), humidities.tolist(), alerts.tolist(), strict=False) +) + +t = blosc2.CTable(Reading, new_data=data) +print(f"Table: {len(t)} rows\n") + +# -- per-column aggregates -------------------------------------------------- +temp = t["temperature"] +print(f"temperature sum : {temp.sum():.2f}") +print(f"temperature mean : {temp.mean():.2f}") +print(f"temperature std : {temp.std():.2f}") +print(f"temperature min : {temp.min():.2f}") +print(f"temperature max : {temp.max():.2f}") + +print(f"\nalert any : {t['alert'].any()}") +print(f"alert all : {t['alert'].all()}") + +# -- unique / value_counts -------------------------------------------------- +print(f"\nsensor_id unique values : {t['sensor_id'].unique()}") +print(f"sensor_id value_counts : {t['sensor_id'].value_counts()}") + +# -- describe(): per-column summary printed to stdout ----------------------- +print() +t.describe() + +# -- cov(): covariance matrix of numeric columns ---------------------------- +numeric = t.select(["sensor_id", "temperature", "humidity"]) +cov = numeric.cov() +labels = ["sensor_id", "temperature", "humidity"] +col_w = 14 +print("\nCovariance matrix:") +print(" " * 14 + "".join(f"{lbl:>{col_w}}" for lbl in labels)) +for i, row_label in enumerate(labels): + print(f"{row_label:<14}" + "".join(f"{cov[i, j]:>{col_w}.4f}" for j in range(3))) diff --git a/examples/ctable/arrow_interop.py b/examples/ctable/arrow_interop.py new file mode 100644 index 00000000..0139e0ef --- /dev/null +++ b/examples/ctable/arrow_interop.py @@ -0,0 +1,82 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Arrow interop: CTable ↔ pyarrow.Table, and pandas round-trip via Arrow. +# +# Requires: pip install pyarrow pandas + +from dataclasses import dataclass + +import pyarrow as pa + +import blosc2 + + +@dataclass +class Stock: + ticker: str = blosc2.field(blosc2.string(max_length=8), default="") + open: float = blosc2.field(blosc2.float64(ge=0), default=0.0) + close: float = blosc2.field(blosc2.float64(ge=0), default=0.0) + volume: int = blosc2.field(blosc2.int64(ge=0), default=0) + + +data = [ + ("AAPL", 182.5, 184.2, 58_000_000), + ("GOOG", 141.3, 140.8, 21_000_000), + ("MSFT", 378.9, 380.1, 19_000_000), + ("AMZN", 185.6, 187.3, 35_000_000), + ("NVDA", 875.4, 902.1, 42_000_000), +] + +t = blosc2.CTable(Stock, new_data=data) +print("CTable:") +print(t) + +# -- to_arrow() ------------------------------------------------------------- +at = t.to_arrow() +print(f"Arrow table: {len(at)} rows, schema={at.schema}\n") + +# -- from_arrow(): schema is inferred from Arrow types --------------------- +at2 = pa.table( + { + "x": pa.array([1.0, 2.0, 3.0], type=pa.float32()), + "y": pa.array([10, 20, 30], type=pa.int32()), + "label": pa.array(["a", "bb", "ccc"], type=pa.string()), + } +) +t2 = blosc2.CTable.from_arrow(at2) +print("CTable from Arrow (inferred schema):") +print(t2) +print(f" label dtype: {t2['label'].dtype} (max_length inferred from data)") + +# -- pandas round-trip ------------------------------------------------------ +try: + import pandas as pd + + df_original = pd.DataFrame( + { + "ticker": ["TSLA", "META", "AMD"], + "open": [245.1, 502.3, 168.7], + "close": [248.5, 498.1, 171.2], + "volume": [80_000_000, 15_000_000, 28_000_000], + } + ) + print("\nOriginal DataFrame:") + print(df_original) + + # pandas → Arrow → CTable + t_from_pd = blosc2.CTable.from_arrow(pa.Table.from_pandas(df_original, preserve_index=False)) + print("\nCTable from pandas:") + print(t_from_pd) + + # CTable → Arrow → pandas + df_back = t_from_pd.to_arrow().to_pandas() + print("\nDataFrame round-tripped through CTable:") + print(df_back) + +except ImportError: + print("pandas not installed — skipping pandas round-trip demo.") diff --git a/examples/ctable/basics.py b/examples/ctable/basics.py new file mode 100644 index 00000000..0c402cb1 --- /dev/null +++ b/examples/ctable/basics.py @@ -0,0 +1,61 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# CTable basics: creation, append, extend, head/tail, len. + +from dataclasses import dataclass + +import numpy as np + +import blosc2 + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + price: float = blosc2.field(blosc2.float64(ge=0), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +# -- Create an empty table -------------------------------------------------- +t = blosc2.CTable(Row) +print(f"Empty table: {len(t)} rows") + +# -- append(): one row at a time -------------------------------------------- +t.append(Row(id=0, price=1.5, active=True)) +t.append(Row(id=1, price=2.3, active=False)) +print(f"After 2 appends: {len(t)} rows") + +# -- extend(): bulk load from a list of tuples ------------------------------ +bulk = [(i, float(i) * 0.5, i % 2 == 0) for i in range(2, 10)] +t.extend(bulk) +print(f"After extend: {len(t)} rows") + +# -- extend() from a structured numpy array --------------------------------- +arr = np.zeros(5, dtype=[("id", np.int64), ("price", np.float64), ("active", np.bool_)]) +arr["id"] = np.arange(10, 15) +arr["price"] = np.linspace(10.0, 14.0, 5) +arr["active"] = [True, False, True, False, True] +t.extend(arr) +print(f"After numpy extend: {len(t)} rows\n") + +# -- display: head / tail / full table -------------------------------------- +print("head(3):") +print(t.head(3)) + +print("tail(3):") +print(t.tail(3)) + +print("Full table:") +print(t) + +# -- basic properties ------------------------------------------------------- +print(f"nrows : {t.nrows}") +print(f"ncols : {t.ncols}") +print(f"columns: {t.col_names}") +print(f"cbytes : {t.cbytes:,} B (compressed)") +print(f"nbytes : {t.nbytes:,} B (uncompressed)") diff --git a/examples/ctable/csv_interop.py b/examples/ctable/csv_interop.py new file mode 100644 index 00000000..41a76fd9 --- /dev/null +++ b/examples/ctable/csv_interop.py @@ -0,0 +1,82 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# CSV interop: generate a weather CSV, load it into a CTable, write it back. + +import csv +import shutil +import tempfile +from dataclasses import dataclass +from pathlib import Path + +import numpy as np + +import blosc2 + + +@dataclass +class WeatherReading: + station_id: int = blosc2.field(blosc2.int32(ge=0, le=9999)) + temperature: float = blosc2.field(blosc2.float32(ge=-80.0, le=60.0), default=20.0) + humidity: float = blosc2.field(blosc2.float32(ge=0.0, le=100.0), default=50.0) + wind_speed: float = blosc2.field(blosc2.float32(ge=0.0, le=200.0), default=0.0) + pressure: float = blosc2.field(blosc2.float32(ge=800.0, le=1100.0), default=1013.0) + day_of_year: int = blosc2.field(blosc2.int16(ge=1, le=365), default=1) + + +# -- Generate a weather CSV ------------------------------------------------- +rng = np.random.default_rng(42) +N = 1_000 + +station_ids = rng.integers(0, 100, size=N).tolist() +temperatures = [round(v, 2) for v in rng.normal(15.0, 12.0, N).clip(-80, 60).tolist()] +humidities = [round(v, 2) for v in rng.uniform(20.0, 95.0, N).tolist()] +wind_speeds = [round(v, 2) for v in rng.exponential(10.0, N).clip(0, 200).tolist()] +pressures = [round(v, 2) for v in rng.normal(1013.0, 8.0, N).clip(800, 1100).tolist()] +days = rng.integers(1, 366, size=N).tolist() + +rows = list(zip(station_ids, temperatures, humidities, wind_speeds, pressures, days, strict=False)) + +tmpdir = Path(tempfile.mkdtemp(prefix="blosc2_csv_")) +csv_in = tmpdir / "weather.csv" +csv_out = tmpdir / "weather_out.csv" + +# Write the CSV manually so the example is self-contained +with open(csv_in, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["station_id", "temperature", "humidity", "wind_speed", "pressure", "day_of_year"]) + writer.writerows(rows) + +print(f"Generated {N} rows → {csv_in}") + +# -- from_csv(): load into CTable ------------------------------------------- +t = blosc2.CTable.from_csv(str(csv_in), WeatherReading) +print(f"Loaded into CTable: {len(t)} rows") +print(t.head()) + +# -- apply a filter before exporting ---------------------------------------- +cold_days = t.where(t["temperature"] < 0) +print(f"\nCold days (temp < 0°C): {len(cold_days)} rows") +print(cold_days.head()) + +# -- to_csv(): write back to CSV -------------------------------------------- +t.to_csv(str(csv_out)) +print(f"\nFull table written to {csv_out}") + +# Verify round-trip row count +with open(csv_out) as f: + lines = f.readlines() +assert len(lines) == N + 1 # header + data rows +print(f"Round-trip verified: {len(lines) - 1} data rows in output CSV.") + +# -- TSV variant ------------------------------------------------------------ +tsv_out = tmpdir / "weather.tsv" +t.to_csv(str(tsv_out), sep="\t") +print(f"TSV variant written to {tsv_out}") + +shutil.rmtree(tmpdir) +print("Temporary files removed.") diff --git a/examples/ctable/ctable_tutorial.ipynb b/examples/ctable/ctable_tutorial.ipynb new file mode 100644 index 00000000..fbb8f230 --- /dev/null +++ b/examples/ctable/ctable_tutorial.ipynb @@ -0,0 +1,1644 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a8bf6f00", + "metadata": {}, + "source": [ + "# CTable Tutorial\n", + "\n", + "**CTable** is a columnar compressed table built on top of `blosc2.NDArray`. \n", + "It stores each column independently as a compressed array, giving you:\n", + "\n", + "- **Compression** — data lives compressed in RAM and on disk.\n", + "- **Schema** — every column has a declared type and optional constraints.\n", + "- **Speed** — bulk operations stay in NumPy; no row-by-row Python overhead.\n", + "- **Persistence** — tables can be saved to and loaded from disk transparently.\n", + "\n", + "This notebook walks through the full API, starting from the very basics and finishing with a real-world analysis of climate data across ten world cities." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a4073a3e", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:13.708034Z", + "iopub.status.busy": "2026-04-07T12:06:13.707898Z", + "iopub.status.idle": "2026-04-07T12:06:14.162620Z", + "shell.execute_reply": "2026-04-07T12:06:14.161981Z" + } + }, + "outputs": [], + "source": [ + "from dataclasses import dataclass\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "import blosc2\n", + "from blosc2 import CTable" + ] + }, + { + "cell_type": "markdown", + "id": "1637a7b2", + "metadata": {}, + "source": [ + "---\n", + "## Part 1 — The Basics\n", + "\n", + "### 1.1 Defining a schema\n", + "\n", + "Every CTable is typed. You define the schema with a plain Python `@dataclass`.\n", + "Each field gets a **spec** — a blosc2 type that carries the NumPy dtype and optional constraints." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c97f9123", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:14.164585Z", + "iopub.status.busy": "2026-04-07T12:06:14.164404Z", + "iopub.status.idle": "2026-04-07T12:06:14.168886Z", + "shell.execute_reply": "2026-04-07T12:06:14.168381Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty table: 0 rows, columns: ['id', 'location', 'temperature', 'active']\n" + ] + } + ], + "source": [ + "@dataclass\n", + "class Sensor:\n", + " id: int = blosc2.field(blosc2.int32(ge=0))\n", + " location: str = blosc2.field(blosc2.string(max_length=16), default=\"\")\n", + " temperature: float = blosc2.field(blosc2.float64(ge=-80, le=60), default=20.0)\n", + " active: bool = blosc2.field(blosc2.bool(), default=True)\n", + "\n", + "\n", + "# Create an empty in-memory table\n", + "t = CTable(Sensor, expected_size=50)\n", + "print(f\"Empty table: {len(t)} rows, columns: {t.col_names}\")" + ] + }, + { + "cell_type": "markdown", + "id": "c27913d6", + "metadata": {}, + "source": [ + "### 1.2 Appending rows\n", + "\n", + "`append()` adds one row at a time. The row is validated against the schema before writing." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fdc64a5b", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:14.170432Z", + "iopub.status.busy": "2026-04-07T12:06:14.170315Z", + "iopub.status.idle": "2026-04-07T12:06:14.231985Z", + "shell.execute_reply": "2026-04-07T12:06:14.231362Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " id location temperature active \n", + " int32 35)\n", + "print(f\"Days above 35 °C: {len(very_hot)} ({len(very_hot) / len(climate) * 100:.1f}% of all readings)\")\n", + "print(very_hot.head(8))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ba2d719b", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:14.342416Z", + "iopub.status.busy": "2026-04-07T12:06:14.342298Z", + "iopub.status.idle": "2026-04-07T12:06:14.358545Z", + "shell.execute_reply": "2026-04-07T12:06:14.357991Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Moscow below freezing: 148 days out of 365\n", + " city day temperature humidity wind_speed pressure \n", + " 10} {'Min':>7} {'Max':>7} {'Std':>7}\")\n", + "print(\"-\" * 50)\n", + "for city in CITY_PROFILES:\n", + " v = climate.where(climate[\"city\"] == city)\n", + " col = v[\"temperature\"]\n", + " print(f\"{city:<12} {col.mean():>9.1f}° {col.min():>6.1f}° {col.max():>6.1f}° {col.std():>6.1f}°\")" + ] + }, + { + "cell_type": "markdown", + "id": "49dcbad7", + "metadata": {}, + "source": [ + "### 4.2 `describe()` — full summary in one call" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "7254f3b1", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:14.520839Z", + "iopub.status.busy": "2026-04-07T12:06:14.520722Z", + "iopub.status.idle": "2026-04-07T12:06:14.542317Z", + "shell.execute_reply": "2026-04-07T12:06:14.541649Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CTable 3,650 rows × 4 cols\n", + "\n", + " temperature [float32]\n", + " count : 3,650\n", + " mean : 16.04\n", + " std : 10.72\n", + " min : -17.54\n", + " max : 39.75\n", + "\n", + " humidity [float32]\n", + " count : 3,650\n", + " mean : 63.48\n", + " std : 16.02\n", + " min : 8.894\n", + " max : 99.81\n", + "\n", + " wind_speed [float32]\n", + " count : 3,650\n", + " mean : 15.63\n", + " std : 4.874\n", + " min : 8.005\n", + " max : 47.48\n", + "\n", + " pressure [float32]\n", + " count : 3,650\n", + " mean : 1013\n", + " std : 5.328\n", + " min : 991.1\n", + " max : 1036\n", + "\n" + ] + } + ], + "source": [ + "# describe() on a select() view — only numeric columns\n", + "climate.select([\"temperature\", \"humidity\", \"wind_speed\", \"pressure\"]).describe()" + ] + }, + { + "cell_type": "markdown", + "id": "817dbc1f", + "metadata": {}, + "source": [ + "### 4.3 Covariance matrix\n", + "\n", + "`cov()` requires all columns to be numeric (int, float, or bool). \n", + "It returns a standard `numpy.ndarray`." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "6d0dd2c1", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:14.543869Z", + "iopub.status.busy": "2026-04-07T12:06:14.543748Z", + "iopub.status.idle": "2026-04-07T12:06:14.559277Z", + "shell.execute_reply": "2026-04-07T12:06:14.558718Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Covariance matrix (all cities, full year):\n", + " temp humidity wind pressure\n", + "temp 114.963 0.018 -3.523 -0.207\n", + "humidity 0.018 256.861 10.773 6.652\n", + "wind -3.523 10.773 23.760 -2.650\n", + "pressure -0.207 6.652 -2.650 28.394\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Correlation matrix:\n", + " temp humidity wind pressure\n", + "temp 1.000 0.000 -0.067 -0.004\n", + "humidity 0.000 1.000 0.138 0.078\n", + "wind -0.067 0.138 1.000 -0.102\n", + "pressure -0.004 0.078 -0.102 1.000\n" + ] + } + ], + "source": [ + "numeric = climate.select([\"temperature\", \"humidity\", \"wind_speed\", \"pressure\"])\n", + "cov = numeric.cov()\n", + "\n", + "labels = [\"temp\", \"humidity\", \"wind\", \"pressure\"]\n", + "col_w = 12\n", + "print(\"Covariance matrix (all cities, full year):\")\n", + "print(\" \" * 10 + \"\".join(f\"{lbl:>{col_w}}\" for lbl in labels))\n", + "for i, lbl in enumerate(labels):\n", + " print(f\"{lbl:<10}\" + \"\".join(f\"{cov[i, j]:>{col_w}.3f}\" for j in range(4)))\n", + "\n", + "# And the correlation matrix for easier interpretation\n", + "corr = np.corrcoef(\n", + " np.stack([numeric[c].to_numpy() for c in [\"temperature\", \"humidity\", \"wind_speed\", \"pressure\"]])\n", + ")\n", + "print(\"\\nCorrelation matrix:\")\n", + "print(\" \" * 10 + \"\".join(f\"{lbl:>{col_w}}\" for lbl in labels))\n", + "for i, lbl in enumerate(labels):\n", + " print(f\"{lbl:<10}\" + \"\".join(f\"{corr[i, j]:>{col_w}.3f}\" for j in range(4)))" + ] + }, + { + "cell_type": "markdown", + "id": "c10a694e", + "metadata": {}, + "source": [ + "---\n", + "## Part 5 — Analysis: Summer in Madrid\n", + "\n", + "Summer in the northern hemisphere runs roughly from the **summer solstice (day 172, June 21)** \n", + "to the **autumnal equinox (day 264, September 22)**.\n", + "\n", + "Let's zoom in on Madrid during those months and compare it with a few other cities." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "89e89177", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:14.560797Z", + "iopub.status.busy": "2026-04-07T12:06:14.560666Z", + "iopub.status.idle": "2026-04-07T12:06:14.576880Z", + "shell.execute_reply": "2026-04-07T12:06:14.576245Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Madrid summer readings : 93 days\n", + " mean temperature : 25.8 °C\n", + " max temperature : 31.4 °C\n", + " mean humidity : 43.8 %\n", + " mean wind speed : 15.8 km/h\n" + ] + } + ], + "source": [ + "SUMMER_START = 172 # June 21\n", + "SUMMER_END = 264 # September 22\n", + "\n", + "madrid = climate.where(climate[\"city\"] == \"Madrid\")\n", + "madrid_summer = madrid.where((madrid[\"day\"] >= SUMMER_START) & (madrid[\"day\"] <= SUMMER_END))\n", + "\n", + "print(f\"Madrid summer readings : {len(madrid_summer)} days\")\n", + "print(f\" mean temperature : {madrid_summer['temperature'].mean():.1f} °C\")\n", + "print(f\" max temperature : {madrid_summer['temperature'].max():.1f} °C\")\n", + "print(f\" mean humidity : {madrid_summer['humidity'].mean():.1f} %\")\n", + "print(f\" mean wind speed : {madrid_summer['wind_speed'].mean():.1f} km/h\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "a439fecd", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:14.578621Z", + "iopub.status.busy": "2026-04-07T12:06:14.578475Z", + "iopub.status.idle": "2026-04-07T12:06:14.693971Z", + "shell.execute_reply": "2026-04-07T12:06:14.693318Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "City Summer mean Summer max Summer humidity\n", + "----------------------------------------------------------\n", + "Madrid 25.8°C 31.4°C 43.8% \n", + "London 16.5°C 22.7°C 74.6% \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cairo 33.5°C 39.7°C 34.4% \n", + "Moscow 20.1°C 26.3°C 69.3% \n", + "Tokyo 25.1°C 31.0°C 73.0% \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sydney 24.6°C 30.9°C 63.8% (S. summer)\n" + ] + } + ], + "source": [ + "# Compare summer stats across several cities\n", + "compare_cities = [\"Madrid\", \"London\", \"Cairo\", \"Moscow\", \"Tokyo\", \"Sydney\"]\n", + "\n", + "print(f\"{'City':<12} {'Summer mean':>12} {'Summer max':>11} {'Summer humidity':>16}\")\n", + "print(\"-\" * 58)\n", + "for city in compare_cities:\n", + " v = climate.where(climate[\"city\"] == city)\n", + " # For Sydney (S. hemisphere) 'summer' is Jan-Mar, i.e. days 1-80 or 355-365\n", + " if city == \"Sydney\":\n", + " s = v.where((v[\"day\"] <= 80) | (v[\"day\"] >= 355))\n", + " label = \"(S. summer)\"\n", + " else:\n", + " s = v.where((v[\"day\"] >= SUMMER_START) & (v[\"day\"] <= SUMMER_END))\n", + " label = \"\"\n", + " mean_t = s[\"temperature\"].mean()\n", + " max_t = s[\"temperature\"].max()\n", + " mean_h = s[\"humidity\"].mean()\n", + " print(f\"{city:<12} {mean_t:>10.1f}°C {max_t:>9.1f}°C {mean_h:>14.1f}% {label}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "4e2161ee", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:14.696603Z", + "iopub.status.busy": "2026-04-07T12:06:14.695965Z", + "iopub.status.idle": "2026-04-07T12:06:14.752771Z", + "shell.execute_reply": "2026-04-07T12:06:14.751433Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10 hottest days in Madrid:\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " city day temperature humidity \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_cities = {\n", + " \"Madrid\": \"#e63946\",\n", + " \"London\": \"#457b9d\",\n", + " \"Moscow\": \"#2d6a4f\",\n", + " \"Cairo\": \"#f4a261\",\n", + " \"Sydney\": \"#a8dadc\",\n", + "}\n", + "\n", + "fig, ax = plt.subplots(figsize=(12, 5))\n", + "\n", + "for city, color in plot_cities.items():\n", + " v = climate.where(climate[\"city\"] == city)\n", + " d = v[\"day\"].to_numpy().astype(int)\n", + " t = v[\"temperature\"].to_numpy()\n", + " order = np.argsort(d)\n", + " ax.plot(d[order], t[order], label=city, color=color, linewidth=1.5, alpha=0.85)\n", + "\n", + "ax.axvspan(SUMMER_START, SUMMER_END, alpha=0.10, color=\"gold\", label=\"N. summer\")\n", + "ax.set_xlabel(\"Day of year\")\n", + "ax.set_ylabel(\"Temperature (°C)\")\n", + "ax.set_title(\"Daily temperature — selected cities\")\n", + "ax.legend(loc=\"upper left\")\n", + "ax.grid(True, linestyle=\"--\", alpha=0.4)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "4fbb0c53", + "metadata": {}, + "source": [ + "### 5.2 Summer temperature distribution — Madrid vs London\n", + "\n", + "A simple histogram comparison of how often each city exceeds different temperature thresholds." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "94e141a4", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:15.038502Z", + "iopub.status.busy": "2026-04-07T12:06:15.038229Z", + "iopub.status.idle": "2026-04-07T12:06:15.564804Z", + "shell.execute_reply": "2026-04-07T12:06:15.563854Z" + } + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3kAAAGGCAYAAADGq0gwAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAiyVJREFUeJzt3XecE9XaB/DfmexuNtsrW2B36b0p7WIBRKSKIHBVbIBYrgIKCCoqHUUQvVgQvFcpKoh6BdFXBAEpohRBKQrSpCx1YSvbN5nz/rFm2GyysH0m4ff9fBaSk8nkOcmTTJ6cmTNCSilBREREREREHkHROwAiIiIiIiKqPCzyiIiIiIiIPAiLPCIiIiIiIg/CIo+IiIiIiMiDsMgjIiIiIiLyICzyiIiIiIiIPAiLPCIiIiIiIg/CIo+IiIiIiMiDsMgjIiIiIiLyICzyiIjIrdWuXRtDhw7Vrm/atAlCCGzatKnKH3vKlCkQQji0CSEwcuTIKn9sAFi8eDGEEDhx4kS1PJ4nGzp0KGrXrl2qZU+cOAEhBBYvXlyp671eMG+Jqh6LPCIPsX//fgwaNAgJCQnw9fVFzZo1cccdd+Cdd97ROzTDWb16NaZMmaJ3GIb03nvvleqLqyd69dVX8dVXX+kdhktGjq2yCSEghMCjjz7q8vaXXnpJW+bSpUvVHJ37ql27Nu688069wyCiasIij8gD/Pzzz2jbti327t2Lxx57DO+++y4effRRKIqCt956S+/wDGf16tWYOnWq3mEYkicUeZ06dUJOTg46depUpvuVp5B6+eWXkZOTU6b7lEdJsT300EPIyclBQkJClcdQnXx9ffHll18iPz/f6bZPP/0Uvr6+OkR1RUJCAnJycvDQQw/pGgcRUUm89A6AiCrulVdeQXBwMH755ReEhIQ43JaUlKRPUDrLzs6Gn5+f3mHoSkqJ3NxcWCyW6yoORVGqvAjIysqCv78/vLy84OWl36bUZDLBZDLp9vhVpWfPnvj666/x3XffoV+/flr7zz//jOPHj2PgwIH48ssvqz0uq9UKVVXh4+Oje6FJRHQ1HMkj8gDHjh1Ds2bNnAo8AKhRo4Z2+WrHkQghHHZhtB9rdPjwYTz44IMIDg5GZGQkJk6cCCklEhMT0a9fPwQFBSE6OhpvvPGGw/rsx0V9/vnnmDp1KmrWrInAwEAMGjQI6enpyMvLw+jRo1GjRg0EBARg2LBhyMvLc4rrk08+QZs2bWCxWBAWFob77rsPiYmJDst06dIFzZs3x+7du9GpUyf4+fnhxRdfdPlcDR06FPPmzdP6bP+zU1UVc+fORbNmzeDr64uoqCg88cQTSE1NdViPfdenTZs2oW3btrBYLGjRooV2HNiKFSvQokUL+Pr6ok2bNvjtt9+c4ggICMBff/2FHj16wN/fH7GxsZg2bRqklA7LljWmtWvXajG9//77AIBFixaha9euqFGjBsxmM5o2bYr58+c73f+PP/7A5s2bteelS5cuAFwfewa4PrbmanGkpaVh9OjRiIuLg9lsRv369TFr1iyoqury9SpKSokZM2agVq1a8PPzw2233YY//vjDaTlXx+QdOXIEAwcORHR0NHx9fVGrVi3cd999SE9PB1CYC1lZWViyZInWd/txfva+HzhwAPfffz9CQ0Nxyy23XPV5AYClS5eiUaNGWg5s2bLF4faSjtUqvs6rxVbSsU3vvfcemjVrBrPZjNjYWIwYMQJpaWkOy9jfNwcOHMBtt90GPz8/1KxZE7Nnz3bZn+pUs2ZNdOrUCcuWLXNoX7p0KVq0aIHmzZs73efHH3/EP//5T8THx8NsNiMuLg5jxoxxOdL61VdfoXnz5vD19UXz5s2xcuVKp2Xsn5dz5szB3LlzUa9ePZjNZhw4cKDEz9LSrNeVO++8E3Xr1nV5W8eOHdG2bVvt+rp163DLLbcgJCQEAQEBaNSoUYmfd2VltVoxffp0ra+1a9fGiy++6PTZbH+Pb926Fe3bt4evry/q1q2Ljz76yGmdf/zxB7p27QqLxYJatWphxowZJb7f3T1viYyEI3lEHiAhIQHbtm3D77//7vLLT0Xce++9aNKkCV577TV8++23mDFjBsLCwvD++++ja9eumDVrFpYuXYpx48ahXbt2TrvIzZw5ExaLBS+88AKOHj2Kd955B97e3lAUBampqZgyZQq2b9+OxYsXo06dOpg0aZJ231deeQUTJ07EPffcg0cffRQXL17EO++8g06dOuG3335zKGqTk5PRq1cv3HfffXjwwQcRFRXlsj9PPPEEzp49i3Xr1uHjjz92efvixYsxbNgwPP300zh+/Djeffdd/Pbbb/jpp5/g7e2tLXv06FHcf//9eOKJJ/Dggw9izpw56Nu3LxYsWIAXX3wRTz31lPYc3HPPPTh06BAU5cpvazabDT179sQ//vEPzJ49G2vWrMHkyZNhtVoxbdq0csV06NAhDB48GE888QQee+wxNGrUCAAwf/58NGvWDHfddRe8vLzwzTff4KmnnoKqqhgxYgQAYO7cuRg1ahQCAgLw0ksvAUCJz+O1uIojOzsbnTt3xpkzZ/DEE08gPj4eP//8MyZMmIBz585h7ty5V13npEmTMGPGDPTu3Ru9e/fGr7/+iu7du7vcpa+o/Px89OjRA3l5eRg1ahSio6Nx5swZ/N///R/S0tIQHByMjz/+GI8++ijat2+Pxx9/HABQr149h/X885//RIMGDfDqq686FeLFbd68GZ999hmefvppmM1mvPfee+jZsyd27txZ5vdoaWIrasqUKZg6dSq6deuGJ598EocOHcL8+fPxyy+/OOVLamoqevbsiQEDBuCee+7B//73Pzz//PNo0aIFevXqVaY4K9v999+PZ555BpmZmQgICIDVasUXX3yBsWPHIjc312n5L774AtnZ2XjyyScRHh6OnTt34p133sHp06fxxRdfaMt9//33GDhwIJo2bYqZM2ciOTkZw4YNQ61atVzGsWjRIuTm5uLxxx+H2WxGWFiYyyKlrOst6t5778XDDz+MX375Be3atdPaT548ie3bt+P1118HUFgw3XnnnWjZsiWmTZsGs9mMo0eP4qeffrrmY5TGo48+iiVLlmDQoEF49tlnsWPHDsycORMHDx50KliPHj2KQYMGYfjw4RgyZAgWLlyIoUOHok2bNmjWrBkA4Pz587jttttgtVrxwgsvwN/fH//5z39cjup7St4SGYYkIrf3/fffS5PJJE0mk+zYsaN87rnn5Nq1a2V+fr7DcsePH5cA5KJFi5zWAUBOnjxZuz558mQJQD7++ONam9VqlbVq1ZJCCPnaa69p7ampqdJiscghQ4ZobRs3bpQAZPPmzR3iGDx4sBRCyF69ejk8fseOHWVCQoJ2/cSJE9JkMslXXnnFYbn9+/dLLy8vh/bOnTtLAHLBggVXfZ7sRowYIV19/P34448SgFy6dKlD+5o1a5zaExISJAD5888/a21r166VAKTFYpEnT57U2t9//30JQG7cuFFrGzJkiAQgR40apbWpqir79OkjfXx85MWLF8sd05o1a5z6lp2d7dTWo0cPWbduXYe2Zs2ayc6dOzsta8+H4hYtWiQByOPHj18zjunTp0t/f395+PBhh/YXXnhBmkwmeerUKaf12yUlJUkfHx/Zp08fqaqq1v7iiy9KAC5zz/58//bbbxKA/OKLL0pcv5RS+vv7O6zHzt73wYMHl3hbUQAkALlr1y6t7eTJk9LX11fefffdWtuQIUMccv5q6ywptuLPv/156t69u7TZbNpy7777rgQgFy5cqLXZ3zcfffSR1paXlyejo6PlwIEDnR6rugCQI0aMkCkpKdLHx0d+/PHHUkopv/32WymEkCdOnNCeI/v7RErXOT5z5kwphHB4P7Zu3VrGxMTItLQ0re3777+XABxeD/vnZVBQkExKSnJYr6vP0tKu15X09HRpNpvls88+69A+e/Zsh/j//e9/O/W7tBISEmSfPn1KvH3Pnj0SgHz00Ucd2seNGycByB9++MFhXQDkli1btLakpCSnPowePVoCkDt27HBYLjg42OPylshouLsmkQe44447sG3bNtx1113Yu3cvZs+ejR49eqBmzZr4+uuvK7TuojPcmUwmtG3bFlJKDB8+XGsPCQlBo0aN8Ndffznd/+GHH3b4BbZDhw6QUuKRRx5xWK5Dhw5ITEyE1WoFULi7o6qquOeee3Dp0iXtLzo6Gg0aNMDGjRsd7m82mzFs2LAK9fWLL75AcHAw7rjjDofHbNOmDQICApwes2nTpujYsaNDHwCga9euiI+Pd2p39fwUnWrfPvV+fn4+1q9fX66Y6tSpgx49ejg9TtFfztPT03Hp0iV07twZf/31l7bLYmVyFccXX3yBW2+9FaGhoQ596datG2w2m9PujEWtX78e+fn5GDVqlMOujKNHj75mLMHBwQCAtWvXIjs7u3wdAvCvf/2r1Mt27NgRbdq00a7Hx8ejX79+WLt2LWw2W7ljuBb78zR69GiHUePHHnsMQUFB+Pbbbx2WDwgIwIMPPqhd9/HxQfv27V3manULDQ1Fz5498emnnwIAli1bhptuuqnESWaK5nhWVhYuXbqEm266CVJKbXfpc+fOYc+ePRgyZIiWF0DhZ2jTpk1drnfgwIGIjIy8aqzlWW9RQUFB6NWrFz7//HOHUeLPPvsM//jHP7TPE/veC6tWrSrVLs5lsXr1agDA2LFjHdqfffZZAHDKnaZNm+LWW2/VrkdGRjptB1avXo1//OMfaN++vcNyDzzwgMO6PClviYyCRR6Rh2jXrh1WrFiB1NRU7Ny5ExMmTMDly5cxaNAgHDhwoNzrLVqsAIVfmH19fREREeHUXvwYsZLuDwBxcXFO7aqqagXHkSNHIKVEgwYNEBkZ6fB38OBBpwllatasCR8fn/J18m9HjhxBeno6atSo4fSYmZmZTo9Zlr4BcHp+FEVxOg6nYcOGAKAdY1XWmOrUqeOybz/99BO6desGf39/hISEIDIyUjuOp6qKvOKOHDmCNWvWOPWjW7duAK4+SdDJkycBAA0aNHBoj4yMRGho6DVjGTt2LD744ANERESgR48emDdvXpn7XdJz60rxOIHC1zY7OxsXL14s0+OWhf15su+ma+fj44O6detqt9vVqlXL6ZjC0NBQl+/lolJSUnD+/Ply/V1r99qi7r//fqxbtw6nTp3CV199hfvvv7/EZU+dOoWhQ4ciLCwMAQEBiIyMROfOnQFcyfGS8ghwfs7sSvO6l2e9xd17771ITEzEtm3bABQea717927ce++9DsvcfPPNePTRRxEVFYX77rsPn3/+eaUUfCdPnoSiKKhfv75De3R0NEJCQpxyp/jnH+CcOydPnizVc1JdeUt0PeExeUQexsfHB+3atUO7du3QsGFDDBs2DF988QUmT55c4gQRVxtZcDVzX0mz+RX9Bfpay15rHaqqQgiB7777zuWyAQEBDtcrY+ZGVVVRo0YNLF261OXtxX/NL2/fqjImV8/DsWPHcPvtt6Nx48Z48803ERcXBx8fH6xevRr//ve/S/UFsay54yoOVVVxxx134LnnnnN5H3uBWxXeeOMNDB06FKtWrcL333+Pp59+GjNnzsT27dtLdcwUUDk5VlR53o+Vrby5OmDAAGzevLlcj7lx40ZtQp9rueuuu2A2mzFkyBDk5eXhnnvucbmczWbDHXfcgZSUFDz//PNo3Lgx/P39cebMGQwdOrRCRVB1zQrbt29f+Pn54fPPP8dNN92Ezz//HIqi4J///KdDLFu2bMHGjRvx7bffYs2aNfjss8/QtWtXfP/995Uy02pJeVlcZX7OlZWej03kLljkEXkw+4xs586dAwBtxKP4bGXFfyU1gnr16kFKiTp16lT6l/+SvsTUq1cP69evx80331wtX+xUVcVff/3l0L/Dhw8DgDbrYmXE9M033yAvLw9ff/21w6/vxXf1BEp+bormTtEJb8qSO/Xq1UNmZqY2clcW9l30jhw54jD6efHixVL/et+iRQu0aNECL7/8Mn7++WfcfPPNWLBgAWbMmAGg9F9uS+PIkSNObYcPH4afn59WmIeGhjq9FwHXz2lpY7M/T4cOHXJ4nvLz83H8+PFyPfeuvPHGG+UeNWnVqlWpl7VYLOjfvz8++eQT9OrVy2kPArv9+/fj8OHDWLJkCR5++GGtfd26dQ7LFc2j4g4dOlTquIqrjPX6+/vjzjvvxBdffIE333wTn332GW699VbExsY6LKcoCm6//XbcfvvtePPNN/Hqq6/ipZdewsaNGyv0+iYkJEBVVRw5cgRNmjTR2i9cuIC0tLRynYsxISGhVM9JdeUt0fWEu2sSeYCNGze6/AXTfoyFfReYoKAgREREOB379N5771V9kGU0YMAAmEwmTJ061alvUkokJyeXe93+/v4AnIvde+65BzabDdOnT3e6j9VqdfmFvKLeffdd7bKUEu+++y68vb1x++23V1pM9l+9iz6P6enpWLRokdOy/v7+Ltdpn82xaO7Yp/UvrXvuuQfbtm3D2rVrnW5LS0vTjsd0pVu3bvD29sY777zj0I9rzcgJABkZGU7rbtGiBRRFcZgavqS+l8e2bdvw66+/atcTExOxatUqdO/eXXs96tWrh/T0dOzbt09b7ty5cy6n3S9tbN26dYOPjw/efvtth+fpww8/RHp6Ovr06VOBXl3Rpk0bdOvWrVx/19q9trhx48Zh8uTJmDhxYonLuMpxKSXeeusth+ViYmLQunVrLFmyxGF33XXr1lVot/bKWu+9996Ls2fP4oMPPsDevXsddtUECneTLa5169YA4PIUNGXRu3dvAM7vqTfffBMAypU7vXv3xvbt27Fz506t7eLFi057JlRX3hJdTziSR+QBRo0ahezsbNx9991o3Lgx8vPz8fPPP+Ozzz5D7dq1HSYkefTRR/Haa6/h0UcfRdu2bbFlyxZt9MhI6tWrhxkzZmDChAk4ceIE+vfvj8DAQBw/fhwrV67E448/jnHjxpVr3fYJMZ5++mn06NEDJpMJ9913Hzp37ownnngCM2fOxJ49e9C9e3d4e3vjyJEj+OKLL/DWW29h0KBBldZHX19frFmzBkOGDEGHDh3w3Xff4dtvv8WLL76ojfZURkzdu3eHj48P+vbtiyeeeAKZmZn473//ixo1amijvEWfm/nz52PGjBmoX78+atSoga5du6J79+6Ij4/H8OHDMX78eJhMJixcuBCRkZE4depUqfo7fvx4fP3117jzzju1qdazsrKwf/9+/O9//8OJEydKHKmJjIzEuHHjMHPmTNx5553o3bs3fvvtN3z33Xcl3sfuhx9+wMiRI/HPf/4TDRs2hNVqxccffwyTyYSBAwc69H39+vV48803ERsbizp16miT5pRV8+bN0aNHD4dTKADA1KlTtWXuu+8+PP/887j77rvx9NNPIzs7G/Pnz0fDhg0dCsSyxBYZGYkJEyZg6tSp6NmzJ+666y4cOnQI7733Htq1a+cwWYW7aNWq1TVH/xo3box69eph3LhxOHPmDIKCgvDll1+6HG2cOXMm+vTpg1tuuQWPPPIIUlJS8M4776BZs2bIzMwsd5yVsd7evXsjMDAQ48aNc8pPAJg2bRq2bNmCPn36ICEhAUlJSXjvvfdQq1Yt7dyNV3P06FFt5LqoG264AX369MGQIUPwn//8B2lpaejcuTN27tyJJUuWoH///rjttttK90QU8dxzz+Hjjz9Gz5498cwzz2inUEhISHD4ccMT85ZId9U5lScRVY3vvvtOPvLII7Jx48YyICBA+vj4yPr168tRo0bJCxcuOCybnZ0thw8fLoODg2VgYKC85557ZFJSUomnUCg+VfeQIUOkv7+/UwydO3eWzZo1067bp7EvPm29fcr3X375xaG9pMf78ssv5S233CL9/f2lv7+/bNy4sRwxYoQ8dOhQiY99LVarVY4aNUpGRkZKIYTTdPX/+c9/ZJs2baTFYpGBgYGyRYsW8rnnnpNnz57VlilpOnL8Pf17Ufbp1l9//XWtzf48Hjt2THbv3l36+fnJqKgoOXnyZIcpxCsjJiml/Prrr2XLli2lr6+vrF27tpw1a5ZcuHCh0+kPzp8/L/v06SMDAwMlAIfTKezevVt26NBB+vj4yPj4ePnmm2+WeAqFkuK4fPmynDBhgqxfv7708fGRERER8qabbpJz5sxxOuVHcTabTU6dOlXGxMRIi8Uiu3TpIn///XeZkJBw1VMo/PXXX/KRRx6R9erVk76+vjIsLEzedtttcv369Q7r//PPP2WnTp2kxWJxOC1DSblZ9Lai7DnwySefyAYNGkiz2SxvuOEGh1No2H3//feyefPm0sfHRzZq1Eh+8sknLtdZUmyunn8pC6eeb9y4sfT29pZRUVHyySeflKmpqQ7LlPS+KenUDtXF1XuoOFevyYEDB2S3bt1kQECAjIiIkI899pjcu3evy9PGfPnll7JJkybSbDbLpk2byhUrVjj129X7tvht5VnvtTzwwAMSgOzWrZvTbRs2bJD9+vWTsbGx0sfHR8bGxsrBgwc7nZbEFftpD1z9DR8+XEopZUFBgZw6daqsU6eO9Pb2lnFxcXLChAkyNzfXaV2u3uOdO3d2OgXLvn37ZOfOnaWvr6+sWbOmnD59uvzwww89Lm+JjEZIyaNUiYiq29ChQ/G///2vQiMHRERERK7wmDwiIiIiIiIPwiKPiIiIiIjIg7DIIyIiIiIi8iA8Jo+IiIiIiMiDcCSPiIiIiIjIg7DIIyIiIiIi8iAefzJ0VVVx9uxZBAYGQgihdzhERERERETlIqXE5cuXERsbC0UpebzO44u8s2fPIi4uTu8wiIiIiIiIKkViYiJq1apV4u0eX+QFBgYCKHwigoKCdI6mkKqqOH36NGrVqnXVCpyoujE33UdOfgEGz1oOAPj0+ftg8fHWOaKqw7zUj5qTi6S+gwEANb75FIrFV+eIjIN5SUbEvPR8GRkZiIuL02qcknh8kWffRTMoKMhQRV5AQACCgoL4BiRDYW66D+/8AniZLQAKP988vchjXupD9fZBjlfhV4WgoCAWeUUwL8mImJfXj2sdhsZXn4iIiIiIyIOwyCMiIiIiIvIgLPJ0IIRAbGwsZ/skw2FukhExL8mImJdkRMxLsvP4Y/KMSAgBLy8vvgHJcJib7kNAICokQLvsyZiX+hECMEVHaZfpCuZl1bDZbCgoKNA7DLemqipUVdU7DConb29vmEymCq9HSCllJcRjWBkZGQgODkZ6erqhJl45deoU4uPjeVAsGQpzk4yIeUlGxLysXFJKnD9/HmlpaXqH4taklLDZbDCZTPwBwo2FhIQgOjra5WtY2tqGI3lEREREpCt7gVejRg34+fmxQCknKSUKCgrg7e3N59ANSSmRnZ2NpKQkAEBMTEy518Uij4iIiIh0Y7PZtAIvPDxc73DcmpQSiqLAx8eHRZ6bslgKT4+UlJSEGjVqlHvXTV33L5g5cybatWuHwMBA1KhRA/3798ehQ4cclunSpQuEEA5///rXv3SKmIjIGPIKrBg5/2uMnP818gqseodDHkrm5eHSI6Nw6ZFRkHl5eodDHsp+DJ6fn5/OkRAZg/29UJHjU3Udydu8eTNGjBiBdu3awWq14sUXX0T37t1x4MAB+Pv7a8s99thjmDZtmnbd3T8EFEXhPvxkSMxN96FKicNnLmmXPRnzUj9SlSj487B2meMCVzAvKx9HnipOCMFRPA9QGa+frkXemjVrHK4vXrwYNWrUwO7du9GpUyet3c/PD9HR0dUdXpWRUsJqtXJ/aTIc5iYZEfOSjIh5SUYkpYR9TkXm5fXNUD8/paenAwDCwsIc2pcuXYqIiAg0b94cEyZMQHZ2th7hVRopJc6ePQsPn9iU3BBzk4yIeUlGxLyk6nDixAkIIbBnz56rLjdlyhS0bt0aQMm7+A0dOhT9+/ev3ADJsAwz8Yqqqhg9ejRuvvlmNG/eXGu///77kZCQgNjYWOzbtw/PP/88Dh06hBUrVrhcT15eHvKKHDeQkZGhrb/oOUMURXE6h4j9mL+qalcUBVJKLRb77fb24huK8rTr1afKiJ190r9Pqqpqlz2lTxVpN3KfZJH7Shefb+7Yp5La7eu0f356Qp+u1W6UPhVVNM/cuU+V1Q4456S790nv16n4KJSUEqnPTUZ1Cnt9msvX2h5PccOGDcOSJUvw+OOPY8GCBQ7LjhgxAvPnz8eQIUOwaNGiEtdRmnZXz01Rzz77LEaNGuXQVtLyV1tPSe0Vid2o7UaKpXh78fdD0fdNac+BaJgib8SIEfj999+xdetWh/bHH39cu9yiRQvExMTg9ttvx7Fjx1CvXj2n9cycORNTp051ak9MTERgYCAAICAgABEREUhJSUFmZqa2TEhICEJCQnDx4kXk5ORo7eHh4QgMDMS5c+ccfh2JioqCxWJBYmKiw4sVGxsLLy8vnDp1yiGG+Ph4WK1WnDlzBqmpqRBCQFEUJCQkIDc3FxcuXNCW9fb2Rs2aNZGZmYnk5GSt3WKxICoqCunp6Q7nktG7T2fPntXahBDsk5v2SUqpjah7Sp8Az3udAEAtcnRUYuJpmL1NJfZp1faDyFUFMm0mBJhs8FWuxJJtU5CtKgj2ssFbXGnPtCnIVRWEellhKrLHT7pVQYFUEO5tdTg+K7XAhEkPdKuS18lsNhc+dnq69sMd4B6vk7vnXlyNKO16YmIi4Gt2+z5V1usUFhaGrKwsJCYmQgjhEX3S63UqKCjQToJuP8bRbDaX+MNOVRauQOFIWNH12HfJzc/Pd1je29sbAFCrVi189tlnmDVrFiwWC3x8fJCTk4NPP/0UcXFxsNlsyM/P1/pU9PkSovAYOlVVYbVemURLURR4e3vDZrNpMeXn52vtVqtV+2HWZrPBYrEgMDAQ+fn5Duvx8vKCyWTSnmNVVZGfn3/NPhVv9/HxcYodQLn7ZO9X0XZ7n+xMJhO8vLyc2ov2qSyvkzv1yf565eXlOb2fLl++jNIwxMnQR44ciVWrVmHLli2oU6fOVZfNyspCQEAA1qxZgx49ejjd7mokLy4uDqmpqQ4nDNTz1yqbzYbExETExcVBURSP+QWuqtrZp+odyTt9+jTi4+Nd/tLkjn2qSLuR+5SbX4D+M5YCAL56+QH4+niXuPyUpevx9++2ABwnz7hWu4BjLFdrn/5Q9yobyTtz5gxq1aqlfQkruryRX6fythulT8jLx4Xb+wEAaqxbCWHxdfs+VeZI3qlTp1CrVi2tMHH3Pun1OuXk5OD48eOoU6cOfH19teWldI+RvLS0NBw7dgzPP/88HnjgAQghsHTpUsyePRt16tRBSEgIFi1ahLVr12LGjBn4/fffYTKZ0LFjR8ydOxf169fX1r1z507861//wsGDB9G8eXO8+OKLGDhwIH799Ve0bt0amzZtQteuXfHtt99i4sSJ2L9/P9auXYtNmzZh1apV+O2335Cfnw+TyYTx48dj0aJFMJlMeOSRR5CUlIT09HSsXLnyqn1yt1Gv8rYbKZbi7bm5udp7wmKxOLxvMjIyEBoaauyToUspMWrUKKxcuRKbNm26ZoEHQNsnuaSTA5rNZu1X36LsxVTxNleqsl0IAS8vL6e+2j/8XC1flna9+lQZsbNP+vdJURTUrl3b4bbSrseofapIu5H7JBQFwX6+2uXi6yq6vGP5Vrw8u3p7SfMpltReFa+TfY+Hkhj5dSpvu1H6pAJQQoILby+WZ+7ap8psL/p5Wd4Yy9ruybnn9DnnIr7qUNLjXi2eRx55BIsXL8aDDz4IAFi0aBGGDRuGTZs2affNysrC2LFj0bJlS2RmZmLSpEkYMGAA9uzZA0VRkJmZib59++KOO+7AJ598guPHj+OZZ57R7l/0+ZkwYQLmzJmDunXrIjQ0FJs3b9aWM5vNmD17NpYsWYKFCxeiSZMmeOONN7By5Up07dq1VM9xSTlTGc+XkdqNFEvR9uKvd9H3TUnvreJ0LfJGjBiBZcuWYdWqVQgMDMT58+cBAMHBwbBYLDh27BiWLVuG3r17Izw8HPv27cOYMWPQqVMntGzZUs/QK0RKidzcXPj6+ur2AUbkCnPTfVh8vPHFi/frHUa1YF7qR7H4Imr153qHYUjMSyrqwQcfxIQJE3Dy5EkAwE8//YTly5drRR4ADBw40OE+CxcuRGRkJA4cOIDmzZtj2bJlUFUVH374IXx9fdGsWTOcPn0aTz75pNPjTZs2DXfccYdTu32Edu7cuZgwYQIGDBgAAFiwYAHWrl1biT0mo9N1ds358+cjPT0dXbp0QUxMjPb32WefASjcV3b9+vXo3r07GjdujGeffRYDBw7EN998o2fYFSalxIULF1wO1RLpiblJRsS8JCNiXlJRkZGR6NOnDxYvXoxFixahT58+iIiIcFjmyJEjGDx4MOrWrYugoCBtJNh+nOLBgwfRsmVLbZdVAOjYsaPLx2vbtm2JsVy6dAnnzp1Dhw4dtDYvL6+r3oc8j+67a15NXFycNvxMRERERGRUjzzyCEaOHAkAmDdvntPtffv2RUJCAv773/8iNjYWqqqiefPmThOClIa/v3+F4yXPZqjz5BERUenkFVgx7oPVGPfBauQVWK99B6JykHl5SB4xHskjxkMWmdSMiJz17NkT+fn5KCgocJocMDk5GYcOHcLLL7+M22+/HU2aNEFqaqrDMk2aNMG+ffuQm5urtW3fvr3McQQHByMmJgY7duzQ2qxWK3bv3l3mdZH7MswpFK439ulciYyGuekeVCmx78R57bKnY17qQ6oS+b/t0y7zyDNHzEsqymQy4eDBg9rlokJDQxEeHo7//Oc/iImJwalTp/DCCy84LHP//ffjpZdewmOPPYYJEybgxIkTmDNnTpnjEELg6aefxmuvvYYGDRqgcePGePPNNx1OmUGejyN5OlAUBTVr1iz17DhE1YW5SUbEvCQjYl6SK0FBQS6ntVcUBcuXL8fu3bvRvHlzjBkzBq+//rrDMgEBAfjmm2+wf/9+3HDDDXjppZcwa9asMj2+EIXncxs3bhweeughDBkyBB07dkRgYCDuvvvuCvWN3IshzpNXlTIyMhAcHHzNc0lUJyklMjMzERAQwBm5yFCYm+4jJ78A/aZ9DABYNekhWHxKHlGY+PG6aolp+kPOM71VBualftScXO08eVEbVkGx+F7jHtcP5mXlKXpOsKKTjlDZSVl4AnlFUZiXbuxq74nS1jb8+UkHUkokJydzRi4yHOYmGRHzkoyIeUlGZbXyOG1ikUdERERERORRWOQRERERERF5EM6uqROLxaJ3CEQuMTfdh9n7+vkIZ17qR/ia9Q7BsJiXZEScDIgAFnm6UBQFUVFReodB5IS56T4sPt74ZvLDeodRLZiX+lEsvoj+4Wu9wzAk5iUZkRCCp/YgANxdUxdSSqSlpfFgbTIc5iYZEfOSjIh5SUYkpYTVamVeEos8PXDDQEbF3CQjYl6SETEvyahsNpveIZABcHdNIiI3lF9gxbRPfwAATBrcFT7X0fF5VH1kXj5SX5wOAAh9dSKE2UfniIiIqDT4rYCIyA3ZpMTOw6e1y0RVQaoq8rbt1C7z1MpERO6Bu2vqJCAgQO8QiFxibpIRMS/JiJiXpAchBL766qsSb+fsmgRwJE8XiqIgIiJC7zCInDA3yYiYl2REzMvqMfHjddX6eNMfuqNMyw8dOhRpaWlXLbqqE2fXJDuW+jpQVRWXLl2Cqqp6h0LkgLlJRsS8JCNiXpIRSSlRUFDACYGIRZ5eMjMz9Q6ByCXmJhkR85KMiHlJV7N582a0b98eZrMZMTExeOGFF2C1WrXbu3TpgqeffhrPPfccwsLCEB0djSlTpjis48iRI+jUqRN8fX3RtGlTrFvnPLK5f/9+dO3aFRaLBREREfjXv/7lkJtDhw5F//79MWfOHMTExCA8PBwjRoxAQUFBlfWd9Mcij4iIiIioEp05cwa9e/dGu3btsHfvXsyfPx8ffvghZsyY4bDckiVL4O/vjx07dmD27NmYNm2aVsipqooBAwbAx8cHO3bswIIFC/D888873D8rKws9evRAaGgofvnlF3z++ef44YcfMGrUKIflNm7ciGPHjmHjxo1YsmQJFi9ejMWLF1fpc0D6YpFHRERERFSJ3nvvPcTFxeHdd99F48aN0b9/f0ydOhVvvPGGwy6+LVu2xOTJk9GgQQM8/PDDaNu2LTZs2AAAWL9+Pf7880989NFHaNWqFTp16oRXX33V4XGWLVuG3NxcfPTRR2jevDm6du2KuXPn4uOPP8aFCxe05UJDQ7VY7rzzTvTp00d7HPJMnHhFB0IIhISEQAhORk3Gwtx0HxYfb3w/4xG9w6gWzEv9KBZfxPy8Vu8wDIl5SVdz8OBBdOzY0SE/br75ZmRmZuL06dOIj48HUFjkFRUTE4OkpCRtHXFxcYiNjdVu79ixo9PjtGrVCv7+/lrbLbfcAlVVcejQIURFRQEAmjVrBpPJ5PA4+/fvr6TekhGxyNOBfcNAZDTMTTIi5iUZEfOSKkPxmTCFEBWazEcIAS8v56/3lf04ZHzcXVMHqqriwoULfHOR4TA3yYiYl2REzEu6miZNmmDbtm0Os1z+9NNPCAwMRK1atUq9jsTERJw7d05r2759u9Mye/fuRVZWFoDC2TU3b94MRVHQqFGjSugJuSsWeTrJycnROwQil5ib7iG/wIrpn/6A6Z/+gPwC67Xv4OaYl/qQeflIfWkGUl+aAZmXr3c4hsO8JABIT0/Hnj17HP4ef/xxJCYmYtSoUfjzzz+xatUqTJ48GWPHji31ycq7deuGhg0bYsiQIdi7dy9+/PFHvPTSSw7LPPDAA/D19cWQIUPw+++/Y+PGjRg9ejQeeughbVdNuj5xd00iIjdkkxI//nECADBu4K36BkMeS6oqcjf+WHj55XHg0WdEzjZt2oQbbrjBoW348OFYvXo1xo8fj1atWiEsLAzDhw/Hyy+/XOr1KoqClStXYvjw4Wjfvj1q166Nt99+Gz179tSW8fPzw9q1a/HMM8+gXbt28PPzQ//+/TF37tzK6h65KRZ5RERERGRI0x+6Q+8QrupapyLYuXNnibdt2rTJqe2rr75yuN6wYUP8+OOPDm3FT3TeokUL/PDDD9pt+fn58PHxcYixOBaBno+7a+pACIHw8HDOyEWGw9wkI2JekhExL8moXE28QtcfZoEOhBAIDAzUOwwiJ8xNMiLmJRkR85KMSAjhcKoEun5xJE8HqqrizJkznJGLDIe5SUbEvCQjYl6SEdl31yy+Syddf1jk6aSgoEDvEIhcYm6SETEvyYiYl2RELPAI4O6aREREVElSxk+qlscJe31atTwOEZG7YpFHROSGfL29sGrSQ9ploqogfM2I2rBKu0xERO6B3wx0IIRAVFQUZ+Qiw2Fuug8hBCw+3nqHUS2Yl/oRQkBYfPUOw5CYl2RU3t7Xx7aBro5Fng6EELBYLHqHQeSEuUlGxLwkI2JekhEJIfjDAwHgxCu6UFUVJ0+e5IxcZDjMTfeRb7Xh9S+34PUvtyDfatM7nCrFvNSPzM9H2ow5SJsxBzI/X+9wDIV5SUYkpUReXh4nXyEWeXrhm4+MirnpHmyqinW/HcW6347Cdh18yWRe6kPaVOSsXoec1esgbZ6fZ2XFvKTqcOLECQghsGfPnqsuN2XKFNxwww1XXWbo0KHo379/5QVHhsUij4iIiIioHIYOHQohBP71r3853TZixAgIITB06NBqiWXcuHFYv359tTyWUfz3v//FrbfeitDQUISGhqJbt27YuXOnwzL216joX8+ePa+6XpvNhokTJ6JOnTqwWCyoV68epk+f7vDDzvnz59GrVy/ExsZi5MiRTqP6R48exbBhw1CrVi2YzWbUqVMHgwcPxq5duyrvCbgKFnlEREREROUUFxeH5cuXIycnR2vLzc3FsmXLEB8fX+WPL6WE1WpFQEAAwsPDq/zxjGTTpk0YPHgwNm7ciG3btiEuLg7du3fHmTNnHJbr2bMnzp07p/19+umnV13vrFmzMH/+fLz77rs4ePAgZs2ahdmzZ+Odd97Rlpk4cSLatm2L7777Dn/99ReWL1+u3bZr1y60adMGhw8fxvvvv48DBw5g5cqVaNy4MZ599tnKfRJKwCJPB0IIxMbG8sBYMhzmJhkR85KMiHlJdjfeeCPi4uKwYsUKrW3FihWIj4932n1yzZo1uOWWWxASEoLw8HDceeedOHbsmMMyO3fuxA033ABfX1+0bdsWv/32m8PtmzZtghAC3333Hdq0aQOz2YytW7dqu2vaZ9e02WwYO3as9ljPPffcNXcxXrx4MUJCQvB///d/aNSoEfz8/DBo0CBkZ2djyZIlqF27NkJDQ/H000/DZrtyPHheXh7GjRuHmjVrwt/fHx06dMCmTZu025OTkzF48GDUrFkTfn5+aNGihVOh1aVLFzz99NN47rnnEBYWhujoaEyZMuWq8S5duhRPPfUUWrdujcaNG+ODDz6AqqrYsGGDw3JmsxnR0dHaX2ho6FXX+/PPP6Nfv37o06cPateujUGDBqF79+4Oo4Spqalo0aIFWrRogbp16yItLQ1AYdE9dOhQNGjQAD/++CP69OmDevXqoXXr1pg8eTJWrVp11ceuLCzydCCEgJeXFzcMZDjMTTIi5iUZEfOyeqg5uSX+ybz8MiybV6ply+uRRx7BokWLtOsLFy7EsGHDnJbLysrC2LFjsWvXLmzYsAGKouDuu+/WdvXLzMzEnXfeiaZNm2L37t2YMmUKxo0b5/IxX3jhBbz22ms4ePAgWrZsqbXbd0l84403sHjxYixcuBBbt25FSkoKVq5cec2+ZGdn4+2338by5cuxZs0abNq0CXfffTdWr16N1atX4+OPP8b777+P//3vf9p9Ro4ciW3btmH58uXYt28f/vnPf6Jnz544cuQIgMKRzTZt2uDbb7/F77//jscffxwPPfSQ066VS5Ysgb+/P3bs2IHZs2dj2rRpWLdu3TVjLhp7QUEBwsLCHNo3bdqEGjVqoFGjRnjyySeRnJx81fXcdNNN2LBhAw4fPgwA2Lt3L7Zu3YpevXppy7zwwgsYNWoUzGYzfv31Vzz88MMAgD179uCPP/7As88+C0VxLrVCQkJK3Z+K4CkUdKCqKk6dOoX4+HiXLz6RXpibZETMSzIi5mX1uHB7vxJvM3dsj7A3pmvXk/rcA5mb53JZnxtaInze69r1iwMfhpqW7rRczM9ryxXngw8+iAkTJuDkyZMAgJ9++gnLly93GM0CgIEDBzpcX7hwISIjI3HgwAE0b94cy5Ytg6qq+PDDD+Hr64tmzZrh9OnTePLJJ50ec9q0abjjjjuc2vPz8+Hj44O5c+diwoQJGDBgAABgwYIFWLv22v0rKCjA/PnzUa9ePQDAoEGD8PHHH+PChQsICAhA06ZNcdttt2Hjxo249957cerUKSxatAinTp1CbGwsgMLjA9esWYNFixbh1VdfRc2aNR2K1VGjRmHt2rX4/PPP0b59e629ZcuWmDx5MgCgQYMGePfdd7FhwwaX/XTl+eefR2xsLLp166a19ezZEwMGDECdOnVw7NgxvPjii+jVqxe2bdsGk8nkcj0vvPACMjIy0LhxY5hMJthsNrzyyit44IEHtGXatm2LM2fO4NKlS4iOjtba7YVt48aNSxVzVWGRR0Skk4kfl/7XyeKKzqg5/dMfYOKXTCIi3URGRqJPnz5YvHgxpJTo06cPIiIinJY7cuQIJk2ahB07duDSpUvaCN6pU6fQvHlzbVTO19dXu0/Hjh1dPmbbtm1LjCc9PR3nzp1Dhw4dtDYvLy+0bdv2mrts+vn5aQUeAERFRaF27doICAhwaEtKSgIA7N+/HzabDQ0bNnRYT15ennaMoM1mw6uvvorPP/8cZ86cQX5+PvLy8uDn5+dwn6IjkgAQExOjPc61vPbaa1phXfT5u++++7TLLVq0QMuWLVGvXj1s2rQJt99+u8t1ff7551i6dCmWLVuGZs2aYc+ePRg9ejRiY2MxZMgQbTkvLy+HAg8wzqy7LPKIiNyQIgRurBerXSaqCsLXjBrffqZdJqpuURtKPn5JFPtxq8a3n19lWcfPycgvP6pYYC488sgjGDlyJABg3rx5Lpfp27cvEhIS8N///hexsbFQVRXNmzdHfjnOQ+nv71+heEtiP6bPTgjhsq3oLqYmkwm7d+92GhmzF4avv/463nrrLcydOxctWrSAv78/Ro8e7dTvqz3O1cyZMwevvfYa1q9f71QoFle3bl1ERETg6NGjJRZ548ePxwsvvKAViC1atMDJkycxc+ZMhyLPFXux++eff17zlBZViUUeEZEbEkLA28v1biZElUUIAVNoiN5h0HVMsfhee6EqXra0evbsifz8fAgh0KNHD6fbk5OTcejQIW3afwDYunWrwzJNmjTBxx9/jNzcXG00avv27WWOJTg4GDExMdixYwc6deoEALBardi9ezduvPHGMq/vam644QbYbDYkJSVp/Srup59+Qr9+/fDggw8CKNzd+fDhw2jatGmFH3/27Nl45ZVXsHbt2quObtqdPn0aycnJiImJKXGZ7Oxsp92wTSZTqQrO1q1bo2nTpnjjjTdw7733Oq0nLS2tWo7L4/49OlAUhfvwkyExN8mImJdkRMxLKs5kMuHgwYM4cOCAy2O9QkNDER4ejv/85z84evQofvjhB4wdO9Zhmfvvvx9CCDz22GM4cOAAVq9ejTlz5pQpDh8fHwgh8Mwzz+C1117DV199hT///BNPPfWUNgNkZWrYsCEeeOABPPzww1ixYgWOHz+OnTt3YubMmfj2228BFB5ft27dOvz88884ePAgnnjiCVy4cKHCjz1r1ixMnDgRCxcuRO3atXH+/HmcP38emZmZAApHGcePH4/t27fjxIkT2LBhA/r164f69es7FOK333473n33Xe1637598corr+Dbb7/FiRMnsHLlSrz55pu4++67rxmTEAKLFi3C4cOHceutt2L16tX466+/sG/fPrzyyivo16/k40wrEz+ZdGA/n4lR9tklsmNuug9VlTh+IRXHL6RCVT379WJe6kfm5yN9zrtIn/MuZDl2J/NkzEtyJSgoCEFBQS5vUxQFy5cvx+7du9G8eXOMGTMGr7/+usMyAQEB+Oabb7B//37ccMMNeOmllzBr1qwyxSClhJQSzz77LB566CEMGTIEHTt2RGBgYKmKlPJYtGgRHn74YTz77LNo1KgR+vfvj19++UU7T+DLL7+MG2+8ET169ECXLl0QHR2N/v37V/hx58+fj/z8fAwaNAgxMTHan70wNplM2LdvH+666y40bNgQw4cPR5s2bfDjjz/CbL6yC/qxY8dw6dIl7fo777yDQYMG4amnnkKTJk0wbtw4PPHEE5g+fbpTDK60b98eu3btQv369fHYY4+hSZMmuOuuu/DHH39g7ty5Fe53aQjp4Z9OGRkZCA4ORnp6eolvuurGGbnIqJib1auiE6/sOlJ4ste2DWoaYuKV6Q+VbvazsmJe6kfNydVmN4zasOqau7iljJ9UHWEh7PVp1fI4V8O8rDy5ubk4fvw46tSp4zBhBpWdlFKbXZOn93BfV3tPlLa24acSERERERGRB9G1yJs5cybatWuHwMBA1KhRA/3798ehQ4cclsnNzcWIESMQHh6OgIAADBw4sFL24SUiIiIiIvJEuhZ5mzdvxogRI7B9+3asW7cOBQUF6N69O7KysrRlxowZg2+++QZffPEFNm/ejLNnz2ondXRnHEIno2JukhExL8mImJdEZFS6nkJhzZo1DtcXL16MGjVqYPfu3ejUqRPS09Px4YcfYtmyZejatSuAwgM7mzRpgu3bt+Mf//iHHmFXmKIoSEhI0DsMIifMTTIi5iUZEfOSjEgI4TChCF2/DHVMXnp6OgAgLCwMALB7924UFBSgW7du2jKNGzdGfHw8tm3bpkuMlUFKiZycHM7IRYbD3CQjYl6SETEvyYiklFBVlXlJxjkZuqqqGD16NG6++WY0b94cAHD+/Hn4+Pg4nTAwKioK58+fd7mevLw85OXladczMjK09Rc9gaGiKE4nNBRCQAhRZe2KokBKCZvNhnPnzmkzctnbi78hy9OuV58qI3b2Sf8+qaqK8+fPa79Oe0KfKtJe1X0ScHxMCaH9e632ovcVkNp1+XdL8eXL2+4c49XaUSWvk6qquHDhAuLi4px2j2PuVW2fipJFtqNXW14CkEVeJyHtGVy2dlUAgKt2x/j1ep2Awu8pcXFx2uyazL2K9aloTEIIl8+5O7TrGQsAFBQUwMfHp1KfSyM9v5XVbqRYircXfz8Ufd+U5oTsgIGKvBEjRuD333/H1q1bK7SemTNnYurUqU7tiYmJCAwMBFB4DpKIiAikpKRoJ0sEgJCQEISEhODixYvIycnR2sPDwxEYGIhz586hoKBAa4+KioLFYkFiYqLDixUbGwsvLy+cOnXKIYb4+HhYrVacOXMGqampEEJou3vk5uY6TCjj7e2NmjVrIjMzE8nJyVq7xWJBVFQU0tPTHU5oqXefzp49q7UJIdgnN+2TlFIbUfeUPgHGfZ3CvW0OfUouMEEBEFqkXQJILvCCt5AI9rrywW5VJVrXjYFZqAj3USFE4foLpEC61QQ/RcLPdGX5XFUg02ZCgEmFr3IllmybgmxVINhLhbe40p5pU5CrCoR42WAqUlelWxUUSIEwb5tDUZhaYNKmlC+qMl4n+65H6enp2g93AHOvOvoUHxeHwKXv49KlS0i8cB5QlKv2yRtApr8fMgP8tHa/nFwEZ2QiIygA2UVOwRCQmY3ArGykhgYhz8dHaw/OuAy/nDwkh4XA6nXla0pYajrM+QVIigxDZpH49XqdwsLCkJWVhcTERO3LNXOvfH0qKCiAzWZDQUGBVjCbzWZIKR3WLYSAj48PVFWF1WrV2pW/89Jms8Fmszm1W61Why/GJpMJXl5eTu1eXl4wmUwoKChwiN3b2xtCCOQXO1ekt7c3ADi12wusorFXV58KCgoc1uMJffLE1+lafbK/J/Ly8pzeT5cvX0ZpGOI8eSNHjsSqVauwZcsW1KlTR2v/4YcfcPvttyM1NdVhNC8hIQGjR4/GmDFjnNblaiQvLi4OqampDueS0Hskr+i5dTzlF7iqamefqnckLzExEQkJCS5/aXLHPlWkvar7NOnj7x3ayzKSV1K7niN50x/qXmUjeadPn+ZInhv0Ke35KdUykhf22uRq69PVRvJOnjzJkbxK6FNOTo7TOcGMMJpS3na9Y7GfJ8+T+lQV7UaKpXh70fPkWSwWh/dNRkYGQkNDr3mePF1H8qSUGDVqFFauXIlNmzY5FHgA0KZNG3h7e2PDhg0YOHAgAODQoUM4deoUOnbs6HKdZrPZ5QGn9mKqeJsrVdkuROHondlsdojJ/uHnavmytOvVp8qInX0yRp/sGwZP6lN526u6T85lGwCn8knfdtcxltxeVa+T/RdaV+tn7hmsTygsyCraXjjg7KpdljoPqvJ1UlUVPj4+1f79wpNzr3hMruJzl3a9Yil6W2U/l0Z6fiur3UixFG0v/n4o+r4p6b1VnK5F3ogRI7Bs2TKsWrUKgYGB2nF2wcHBsFgsCA4OxvDhwzF27FiEhYUhKCgIo0aNQseOHd12Zk2g8MWpWbOm3mEQOWFuug9VSpy+WLhrba3IYChX2eC7O+alfmRBAS6/vxgAEPjEUIi/d3si5iXpSwiBlStXon///k7tRUfx6Pql6+ya8+fPR3p6Orp06YKYmBjt77PPPtOW+fe//40777wTAwcORKdOnRAdHY0VK1boGHXFSSlx+fJll0O1RHpibroPKSXOpV7GuVTPf72Yl/qRVhuylv0PWcv+B2m1XfsO1xHmJQHA0KFDnQotPdkPC7oe83LLli3o27cvYmNjIYTAV1995XK5gwcP4q677kJwcDD8/f3Rrl07p2NFi1q8eLHTaLN9t2K78+fPo1evXoiNjcXIkSOddks+evQohg0bhlq1asFsNqNOnToYPHgwdu3aVeF+l0TXIs++P3jxv6FDh2rL+Pr6Yt68eUhJSUFWVhZWrFiB6Oho/YKuBFJKJCcnX5dvQDI25iYZEfOSjIh5SUZVdGKQ60lWVhZatWqFefPmlbjMsWPHcMstt6Bx48bYtGkT9u3bh4kTJzoVbcUFBQXh3Llz2t/Jkycdbp84cSLatm2L7777Dn/99ReWL1+u3bZr1y60adMGhw8fxvvvv48DBw5g5cqVaNy4MZ599tmKdfoqDHWePCIiIiIiT7F582a0b98eZrMZMTExeOGFFxyKsC5duuDpp5/Gc889h7CwMERHR2PKlCkO6zhy5Ag6deoEX19fNG3aFOvWrXN6nP3796Nr167w8/NDbGwsHn/8cYdZUu0jjnPmzEFMTAzCw8MxYsQIp5kli5oyZQpat26NhQsXIj4+HgEBAXjqqadgs9kwe/ZsREdHo0aNGnjllVcc7peWloZHH30UkZGRCAoKQteuXbF3717t9mPHjqFfv36IiopCQEAA2rVrh/Xr1zuso3bt2nj11VfxyCOPIDAwEPHx8fjPf/5z1ee6V69emDFjBu6+++4Sl3nppZfQu3dvzJ49GzfccAPq1auHu+66CzVq1LjquoUQiI6O1v6ioqIcbk9NTUWLFi3QokUL1K1bV5u51j541aBBA/z444/o06cP6tWrh9atW2Py5MlYtWrVVR+3IljkEREREZEh5eQXlPiXX2At9bJ5pVy2Mp05cwa9e/dGu3btsHfvXsyfPx8ffvghZsyY4bDckiVL4O/vjx07dmD27NmYNm2aVsipqooBAwbAx8cHO3bswIIFC/D888873D8rKws9evRAaGgodu7ciaVLl2LDhg0YOXKkw3IbN27EsWPHsHHjRixZsgSLFy/G4sWLr9qHY8eO4bvvvsOaNWvw6aef4sMPP0SfPn1w+vRpbN68GbNmzcLLL7+MHTt2aPf55z//iaSkJHz33XfYvXs3brzxRtx+++1ISUkBUHiKpt69e2PDhg347bff0LNnT/Tt29dpl8k33ngDbdu2xW+//YannnoKTz75JA4dOlSm16AoVVXx7bffomHDhujRowdq1KiBDh06lLhbZ1GZmZlISEhAXFwc+vXrhz/++MPh9hdeeAGjRo2C2WzGr7/+iocffhgAsGfPHvzxxx949tlnXU6YUvxc4JXJMOfJu95YLBa9QyByiblJRsS8JCNiXla9ftM+LvG29g1rYcbD3bXr98z81KmYs2tZOxpzHu2tXX94zhdIz851Wu77GY9UIFpH7733HuLi4vDuu+9CCIHGjRvj7NmzeP755zFp0iTtS3/Lli0xeXLhaUEaNGiAd999Fxs2bMAdd9yB9evX488//8TatWsRGxsLAHj11VfRq1cv7XGWLVuG3NxcfPTRR/Dz80Pjxo3xzjvv4K677sKsWbO0UafQ0FC8++67MJlMaNy4Mfr06YMNGzbgscceK7EPqqpi4cKFCAwMRNOmTXHbbbfh0KFDWL16NRRFQaNGjTBr1ixs3LgRHTp0wNatW7Fz504kJSVps93PmTMHX331Ff73v//h8ccfR6tWrdCqVSvtMaZPn46VK1fi66+/dihMe/fujaeeegoA8Pzzz+Pf//43Nm7ciEaNGpXr9UhKSkJmZiZee+01zJgxA7NmzcKaNWswYMAAbNy4EZ07d3Z5v0aNGmHhwoVo2bIl0tPTMWfOHNx00034448/UKtWLQBA27ZtcebMGVy6dMnhsLIjR44AABo3blyumCuCRZ4OFEVxGuYlMgLmJhkR85KMiHlJ13Lw4EF07NjRYbr8m2++GZmZmTh9+jTi4+MBFBZ5RcXExCApKUlbR1xcnFbgAXA6jdjBgwfRqlUr+Pv7Ayg85cwtt9wCVVVx6NAhLU+bNWsGk8nk8Dj79++/ah9q166NwMBA7XpUVBRMJpPDqFRUVJQW7969e5GZmYnw8HCH9eTk5ODYsWMACkfFpkyZgm+//Rbnzp2D1WpFTk6O00he0efFvruk/XHKwz4ZSr9+/bRzbbdu3Ro///wzFixYUGKR17FjR4fn/KabbkKTJk3w/vvvY/r06Vq7l5eX07wheh6zyyJPB1JKpKenIzg4+KrnOSGqbsxNMiLmJRkR87J6rJr0UIm3mYo9759PGFzissVPM/PRuH9WLLBK5F3s1CRCOJ80vrSuNrtmeR7H1X2utp7MzEzExMRg06ZNTuuy75o4btw4rFu3DnPmzEH9+vVhsVgwaNAg5OfnVzjeq4mIiICXlxeaNm3q0N6kSRNs3bq11Ovx9vbGDTfcgKNHj15z2YYNGwIA/vzzT9xwww1lC7iCWOTpQEqJtLQ0BAUFccNAhsLcdB+KEGhRO1q77MmYl/oRZh9EfPK+dpmuYF5WD4tP6c/NWFXLlleTJk3w5ZdfQkqp5chPP/2EwMBAbTe/0qwjMTER586dQ0xMDABg+/btTsssXrwYWVlZ8PPzg81mw08//aTtTlmdbrzxRpw/fx5eXl6oXbu2y2V++uknDB06VJsgJTMzEydOnKjy2Hx8fNCuXTun4/oOHz6MhISEUq/HZrNh//796N279zWXbd26NZo2bYo33ngD9957r9NxeWlpaVV2XB4nXiEickNCCPiZveFn9uYXTKoyQlHgXbc2vOvWhnAxaQARAenp6dizZ4/DX2JiIp566ikkJiZi1KhR+PPPP7Fq1SpMnjwZY8eOdTkJhyvdunVDw4YNMWTIEOzduxc//vgjXnrpJYdlHnjgAfj6+mLIkCH4/fffsWnTJjz99NN46KGHqn2X4m7duqFjx47o378/vv/+e5w4cQI///wzXnrpJe2ccA0aNMCKFSuwZ88e7N27F/fff3+FRujsMjMztecfAI4fP449e/Y47AY6fvx4fPbZZ/jvf/+Lo0eP4t1338U333yjHfsHAA8//DAmTJigXZ82bRq+//57/PXXX/j111/x4IMP4uTJk3j00UevGZMQAosWLcLhw4dx6623YvXq1fjrr7+wb98+vPLKK+jXr1+F+10SjuQREREREZXTpk2bnHbFGz58OD744AOsXr0a48ePR6tWrRAWFobhw4fj5ZdfLvW6FUXBypUrMXz4cLRv3x61a9fG22+/jZ49e2rL+Pn5Ye3atXjmmWfQvn17+Pn5YcCAAfj3v/9daX0sLSEEVq9ejZdeegnDhg3DxYsXER0djU6dOmkF55tvvolHHnkEN910EyIiIvD8888jIyOjwo+9a9cu3Hbbbdr1sWPHAgCGDBmizSJ69913Y8GCBZg5cyaefvppNGrUCF9++SVuueUW7X6nTp1yKMJTU1Px2GOP4fz58wgNDUWbNm3w888/O+32WZL27dtj165deOWVV/DYY4/h0qVLiImJwU033YS5c+dWuN8lEdLDz+KZkZGB4OBgpKenIygoSO9wABQe+JmSkoKwsLBS/5JDVB2Ym9Vr4sfO5zoqLVVKnE0u3CjGhgcZYpfN6Q/dUSXrZV7qRxYUIHNJ4Ul9A4bcB+F99V3cUsZPqo6wEPb6tGp5nKthXlae3NxcHD9+HHXq1LnmSanp6qSUsFqt8PLy4l4ebuxq74nS1jYcydOBoiiIiIjQOwwiJ8xN9yGlxJm/i7yYsEDAgzfmzEv9SKsNmQs/AQD4P/DPaxZ51xPmJRmRq4lR6PrEn550oKoqLl26VCn7HxNVJuYmGRHzkoyIeUlGJKVEQUGBrlP3kzGwyNNJZmam3iEQucTcJCNiXpIRMS/JiPjDAwEs8oiIiIiIiDwKizwiIiIi0h13MSQqVBnvBRZ5OhBCICQkhLMekeEwN8mImJdkRMzLymOfKCQ7O1vnSDyDyWTSOwSqIPt7oSKT6HB2TR3YNwxERsPcJCNiXpIRMS8rj8lkQkhICJKSkgAUnveNxXPFWK1WvUOgcpBSIjs7G0lJSQgJCalQwc4iTweqquLixYuIjIzkuXXIUJib7kMRAs3ia2iXPRnzUj/CxxvhH7ytXaYrmJeVKzo6GgC0Qo/KR0oJVVWhKAoLZTcWEhKivSfKi0WeTnJycvQOgcgl5mahipyovDoIIRBgMesdRrVhXupDmEzwadpI7zAMi3lZeYQQiImJQY0aNVBQUKB3OG5LVVWcO3cOMTEx/PHBTXl7e1fKLrcs8oiIiIjIEEwmE48pqwBVVWEymeDr68si7zrHIo+IyA2pUuJ86mUAQHRooMfvskn6kAUFyPr8KwCA/z39ISowCQAREVUfFnk6EEIgPDyc+0qT4TA33YeUEokX0wEAUSEBgAe/ZsxL/UirDZfnfQAA8BvQl0VeEcxLMiLmJdmxyNOBEAKBgYF6h0HkhLlJRsS8JCNiXpIRMS/Jjjvr6kBVVZw5cwaqquodCpED5iYZEfOSjIh5SUbEvCQ7Fnk64cxRZFTMTTIi5iUZEfOSjIh5SQCLPCIiIiIiIo/CIo+IiIiIiMiDsMjTgRACUVFRnPmIDIe5SUbEvCQjYl6SETEvyY6za+pACAGLxaJ3GEROmJvuQxECTeIitcuejHmpH+HjjbB3Z2uX6QrmJRkR85LsOJKnA1VVcfLkSc58RIbD3HQfQggE+fkiyM/X43+xZV7qR5hMMN/YCuYbW0GYTHqHYyjMSzIi5iXZscjTiZRS7xCIXGJukhExL8mImJdkRMxLAri7JhGRW1KlRFJaJgCgRkiAx++ySfqQViuyV60GAPj16w3hxa8NRETugJ/WRERuSEqJk0lpAIDIYH+ARR5VAVlgRcYb8wAAlt7dWeQREbkJ7q6pAyEEYmNjPf44GnI/zE0yIuYlGRHzkoyIeUl2LPJ0IISAl5cX34BkOMxNMiLmJRkR85KMiHlJdizydKCqKk6dOsWZj8hwmJtkRMxLMiLmJRkR85LsWOQRERERERF5EBZ5REREREREHoRFHhERERERkQfhXMg6UBQF8fHxUBTW2GQszE33oQiBhjUjtMuejHmpH+HtjdDXp2mX6QrmJRkR85LsWOTpQEoJq9UKb29vzn5EhsLcdB9CCIQGWPQOo1owL/UjvEzwvbmD3mEYEvOSjIh5SXYs83UgpcTZs2chpdQ7FCIHzE0yIuYlGRHzkoyIeUl2HMkjInJDqpRIzsgGAIQH+Xn8LpukD2m1ImftDwAAS4+uEF782kBE5A74aU1E5IaklPjrfAoAICzQArDIoyogC6xIf+UNAIBv104s8oiI3AR319QJ95Mmo2JukhExL8mImJdkRMxLAjiSpwtFUZCQkKB3GEROmJtkRMxLMiLmJRkR85LsOJKnAyklcnJyeFAsGQ5zk4yIeUlGxLwkI2Jekh2LPB1IKXHhwgW+AclwmJtkRMxLMiLmJRkR85LsdC3ytmzZgr59+yI2NhZCCHz11VcOtw8dOhRCCIe/nj176hMsERERERGRG9C1yMvKykKrVq0wb968Epfp2bMnzp07p/19+umn1RghERERERGRe9F14pVevXqhV69eV13GbDYjOjq6miKqPt7e3nqHQOQSc9M9KEKgfmy4dtnTMS/1Iby9ETLjJe0yOWJekhExLwlwg9k1N23ahBo1aiA0NBRdu3bFjBkzEB4erndYFaIoCmrWrKl3GEROmJvuQwiB8EA/vcOoFsxL/QgvEyxdO+kdhiExL8mImJdkZ+gir2fPnhgwYADq1KmDY8eO4cUXX0SvXr2wbds2mEwml/fJy8tDXl6edj0jIwMAoKoqVFXV2hVFcbgOQDvur6raFUWBlBKqqiIzMxMBAQEQQmjtxQ+SLU+7Xn2qjNjZJ/37JKVEZmYmgoKCtOvu3qfytheSEA7XAECU2C7guI6ytzuvu6zt14qxavuEKnmdpJTIzs6Gv78/ivPE3HPnPgGFuSCLjC4Lac/UsrWrAgBctTvGr9frJIRARkaGti2/WixGe508MffYp8JYbDabw3dMT+iTJ75OFWkvvkxJDF3k3XfffdrlFi1aoGXLlqhXrx42bdqE22+/3eV9Zs6cialTpzq1JyYmIjAwEAAQEBCAiIgIpKSkIDMzU1smJCQEISEhuHjxInJycrT28PBwBAYG4ty5cygoKNDao6KiYLFYkJiY6PDCxsbGwsvLC6dOnXKIIT4+HlarFWfOnEFKSgrCwsK085nk5ubiwoUL2rLe3t6oWbMmMjMzkZycrLVbLBZERUUhPT0daWlpWrvefTp79qzWJoRgn9y0T1JKpKeno2XLlsjKyvKIPgHle50AIMCkwle5Eku2TUG2KhDspcJbXGnPtCnIVQVCvGwwFamU0q0KCqRAmLfNoYBKLTBBhUS4t82hT8kFJigAQou0SwDJBV7wFhLBXlc+2AtUib/SCuAjJBJCfLRdNgukQLrVBD9Fws90ZflcVSDTZqraPqlqlbxOZrMZeXl5KCgo0H64Azw394zUp/iatZCxfiPS09KhtmsNmExX7ZM3gEx/P2QGXBll9svJRXBGJjKCApBt8b3S18xsBGZlIzU0CHk+Plp7cMZl+OXkITksBFavK19TwlLTYc4vQFJkGDKLxK/X6xQWFobExET4+/trRR5zj33Su09JSUk4e/YswsLCCvf48IA+eeLrVJE+Xb58GaUhpEHmWBVCYOXKlejfv/9Vl4uMjMSMGTPwxBNPuLzd1UheXFwcUlNTtdEJQN9fDGw2G06dOoX4+HgoimL4XwxK0yd3+xWEfXLdrqoqEhMTkZCQACGER/SpvO2Tl26AkUfybKqKX44UbpjaNYiFSVFKFWNV9mn6Q92r5HVSVRWnT59GXFyc9mW66PKelntG6hPy8nHh9n4AgBrrVkL8XaSVtHza81OqZSQv7LXJ5e5TZY5anjx5EnFxcVD+fv8x99gnvftktVodvmN6Qp888XWqSHtGRgZCQ0ORnp7uUNsUZ+iRvOJOnz6N5ORkxMTElLiM2WyG2Wx2arcXU8XbXKnKdiGEFkvRmOwvoqvly9KuV58qI3b2yRh9sl/2pD6Vtx1OpczV253Ls/K0l+4xi95XQrhYV9lir6w+VfXr5Gr9nph7RulT0a8both2tMQ+obAgq2h74YCzq3ZZ6jyoytdJVVWHbfq1Yqms9usl9yqz/XrrU/HvmGWNvaR2vk7G6FNJyxSna5GXmZmJo0ePatePHz+OPXv2ICwsDGFhYZg6dSoGDhyI6OhoHDt2DM899xzq16+PHj166Bh15bBYLHqHQOQSc5OMiHlJRsS8JCNiXhKgc5G3a9cu3Hbbbdr1sWPHAgCGDBmC+fPnY9++fViyZAnS0tIQGxuL7t27Y/r06S5H6tyJoiiIiorSOwwiJ8xNMiLmJRkR85KMiHlJdroWeV26dHG5j7vd2rVrqzGa6mOf3CI4OLiEXcSI9MHcJCNiXpIRMS/JiJiXZFe6nTqpUkkpkZaWdtUCl0gPzE0yIuYlGRHzkoyIeUl2lVbkFZ02lIiIiIiIiPRRriJv1qxZ+Oyzz7Tr99xzD8LDw1GzZk3s3bu30oIjIiLXhBCoGx2GutFh3CWHqozw9kLwS88i+KVnIbzdakJuIqLrWrmKvAULFiAuLg4AsG7dOqxbtw7fffcdevXqhfHjx1dqgJ7KfrJlIqNhbroHRQhEBvsjMthfOxG6J2Ne6kN4ecGvT3f49ekO4cUirzjmJRkR85KAck68cv78ea3I+7//+z/cc8896N69O2rXro0OHTpUaoCeSFEURERE6B0GkRPmJhkR85KMiHlJRsS8JLtyFXmhoaFITExEXFwc1qxZgxkzZgAoPNjTZrNVaoCeSFVVpKSkICwsrNQnNCSqDsxN9yGlRFpWLgAgxN/Xo3fZZF5WXMr4SeW6n5QSamoaAEAJDfHoPCsr5iUZEfOS7Mr16g8YMAD3338/7rjjDiQnJ6NXr14AgN9++w3169ev1AA9VWZmpt4hELnE3HQPqpQ4fOYSDp+5BPU6mEWNeakTVYX14GFYDx4GVFXvaAyHeUlGxLwkoJwjef/+979Ru3ZtJCYmYvbs2dq+v+fOncNTTz1VqQESERERERFR6ZWryMvPz8e4ceOc2seMGVPhgIiIiIiIiKj8yrW7ZlRUFB555BFs3bq1suO5LgghEBLCYxvIeJibZETMSzIi5iUZEfOS7MpV5H3yySdISUlB165d0bBhQ7z22ms4e/ZsZcfmsfgGJKNibpIRMS/JiJiXZETMS7IrV5HXv39/fPXVVzhz5gz+9a9/YdmyZUhISMCdd96JFStWwGq1VnacHkVVVVy4cAEqD2Ing2FukhExL8mImJdkRMxLsqvQ3KqRkZEYO3Ys9u3bhzfffBPr16/HoEGDEBsbi0mTJiE7O7uy4vQ4OTk5eodA5BJzk4yIeUlGxLwkI2JeElDOiVfsLly4gCVLlmDx4sU4efIkBg0ahOHDh+P06dOYNWsWtm/fju+//76yYiUior8JIZBQI0S7TFQlhIBX3QTtMhERuYdyFXkrVqzAokWLsHbtWjRt2hRPPfUUHnzwQYSEhGjL3HTTTWjSpEllxUlEREUoQiA6NFDvMMjDCUWBKSZa7zCIiKiMylXkDRs2DPfddx9++ukntGvXzuUysbGxeOmllyoUnKcSQiA8PJy/vpPhMDfJiJiXZETMSzIi5iXZlavIO3fuHPz8/K66jMViweTJk8sVlKcTQiAwkL/Ak/EwN92HlBKXc/IAAIEWs0dv0JmX+pFSQmZcBgCIoECPzrOyYl6SETEvya5cRV7RAi83Nxf5+fkOtwcFBVUsKg+nqirOnTuHmJgYKEqF5r4hqlTMTfehSomDiRcBAG0b1ITJg798My91pKoo+P0gAMDnH20Bk0nngAqljJ9ULY8T9vq0Em9jXpIRMS/JrlyvflZWFkaOHIkaNWrA398foaGhDn90bQUFBXqHQOQSc5OMiHlJRsS8JCNiXhJQziLvueeeww8//ID58+fDbDbjgw8+wNSpUxEbG4uPPvqosmMkIiIiIiKiUirX7prffPMNPvroI3Tp0gXDhg3Drbfeivr16yMhIQFLly7FAw88UNlxEhERERERUSmUayQvJSUFdevWBVB4/F1KSgoA4JZbbsGWLVsqLzoPJYRAVFQUD2Anw2FukhExL8mImJdkRMxLsitXkVe3bl0cP34cANC4cWN8/vnnAApH+IqeK49cE0LAYrHwDUiGw9wkI2JekhExL8mImJdkV64ib9iwYdi7dy8A4IUXXsC8efPg6+uLMWPGYPz48ZUaoCdSVRUnT56Eqqp6h0LkgLlJRsS8JCNiXpIRMS/JrlzH5I0ZM0a73K1bN/z555/YvXs36tevj5YtW1ZacJ5MSql3CEQuMTfdgxACcZHB2mVPx7zUiRAwJcRpl8kR85KMiHlJQDmKPFVVsXjxYqxYsQInTpyAEAJ16tTBoEGD0KJFi6qIkYiIilGEQGwYz0lKVUsoCrxqxeodBhERlVGZdteUUuKuu+7Co48+ijNnzqBFixZo1qwZTp48iaFDh+Luu++uqjiJiIiIiIioFMo0krd48WJs2bIFGzZswG233eZw2w8//ID+/fvjo48+wsMPP1ypQXoaIQRiY2Ovi12syL0wN92HlBJZufkAAH9fH49+zZiX+pFSQmZmAQBEgD9fgyKYl2REzEuyK9NI3qeffooXX3zRqcADgK5du+KFF17A0qVLKy04TyWEgJeXF9+AZDjMTfehSok/TiXhj1NJUD38+AvmpY5UFQX7/kDBvj8ATuTggHlJRsS8JLsyFXn79u1Dz549S7y9V69e2qybVDJVVXHq1CnOfESGw9wkI2JekhExL8mImJdkV6YiLyUlBVFRUSXeHhUVhdTU1AoHRUREREREROVTpiLPZrPBy6vkw/hMJhOsVmuFgyIiIiIiIqLyKdPEK1JKDB06FGaz2eXteXl5lRIUERERERERlU+ZirwhQ4ZccxnOrHltiqIgPj4eilKmgVSiKsfcJCNiXpIRMS/JiJiXZFemIm/RokVVFcd1RUoJq9UKb29vzn5EhsLcJCNiXpIRMS/JiJiXZMcyXwdSSpw9exbSw6c9J/fD3HQfQgjUDA9CzfAgj9+QMy91JARMcTVhiqsJeHielRXzkoyIeUl2ZRrJIyIiY1CEQK2IYL3DIA8nFAVe8bX0DoOIiMqII3lEREREREQehCN5OvH03avIfTE33YOUEjn5haessfh4efzr5un9MyopJWRODgBAWCx8HYrh80FGxLwkgCN5ulAUBQkJCZz5iAyHuek+VCmx/8R57D9xHqqHH3vBvNSRqqLgt/0o+G0/oKp6R2MozEsyIuYl2XEkTwdSSuTm5sLX15e/tpChMDfJiIrnZcr4SdXyuGGvT6vyx6iuvlDl4+clGRHzkuxY5utASokLFy5w5iMyHOYmGRHzkoyIeUlGxLwkOxZ5REREREREHoRFHhERERERkQdhkacTb29vvUMgcom5SUbEvCQjYl6SETEvCeDEK7pQFAU1a9bUOwwiJ8xNMiLmJRkR85KMiHlJdhzJ04GUEpcvX+ZBsWQ4zE33IYRATGggYkIDPX4GNealjoSAKTYGptgYwMPzrKyYl2REzEuy07XI27JlC/r27YvY2FgIIfDVV1853C6lxKRJkxATEwOLxYJu3brhyJEj+gRbiaSUSE5O5huQDIe56T4UIRBfIwTxNUKgePiXb+alfoSiwKtOPLzqxEPwvFsOmJdkRMxLstP1EzsrKwutWrXCvHnzXN4+e/ZsvP3221iwYAF27NgBf39/9OjRA7m5udUcKRERERERkXvQ9Zi8Xr16oVevXi5vk1Ji7ty5ePnll9GvXz8AwEcffYSoqCh89dVXuO+++6ozVCIiQ5FSIt9qAwD4eJk8fpdN0oeUEsjLL7xi9mGeERG5CcPue3H8+HGcP38e3bp109qCg4PRoUMHbNu2TcfIKofFYtE7BCKXmJvuQZUSe/46hz1/nYN6HeyWw7zUiaoif/ce5O/eA6iq3tEYDvOSjIh5SYCBZ9c8f/48ACAqKsqhPSoqSrvNlby8POTl5WnXMzIyAACqqkItsoFSFMXhOlA4kYEQosraFUXR9pGOjIzU4rK3F99/ujztevWpMmJnn4zRp8jISI/rU3naC0kIh2sAIEpsF3BcR9nbndddUnvR+wpI7fq1YqzaPqHKXqeoqChIKQs/y/8eTRLS/swAssgI07XaVWHvc/F2x2deSln1uVfG2Cvap5LaFSmd1u24xsL7FI7kSSiy7DEapU+u21336Vrb6KLbcsD9P/c88bP8eusT4Pgd0xP65ImvU0Xaiy9TEsMWeeU1c+ZMTJ061ak9MTERgYGBAICAgABEREQgJSUFmZmZ2jIhISEICQnBxYsXkZOTo7WHh4cjMDAQ586dQ0FBgdYeFRUFi8WCxMREhxc2NjYWXl5eOHXqlEMM8fHxsFqtOHPmDHJycmCxWKAoChISEpCbm4sLFy5oy3p7e6NmzZrIzMxEcnKy1m6xWBAVFYX09HSkpaVp7Xr36ezZs1qbEIJ9ctM+SSlhtVpRv359j+kTUL7XCQACTCp8lSuxZNsUZKsCwV4qvMWV9kybglxVIMTLBlOR743pVgUFUiDM2+bwZTm1wAQVEuHeNoc+JReYoAAILdIuASQXeMFbSAR7Xflgz7NduRzmbYPX33EWSIF0qwl+ioSf6coyuapAps1UtX1S1Sp5nXx9feHr6wspJdLT05FVIxwA4JeTi+CMTGQEBSDb4qstH5CZjcCsbKSGBiHPx0drD864DL+cPCSHhcDqdWXzF5aaDnN+AZIiwxy+4AcVFFR57mX6+yEzwE9rr+o+RVxKhUlVceHv59AuKikZNkXBpYjQK32SElHnkrTrSZFhgJcXvKxWRCanIcdiRnpQoHa7OT8fYakZhu9TdFIy8n28kRIarLWX1CfbxYslfkaEh4fj1KlT2o8BgPt/7nniZ/n11qekpCSkpKTAYrFACOERffLE16kifbp8+TJKQ0iDTL8jhMDKlSvRv39/AMBff/2FevXq4bfffkPr1q215Tp37ozWrVvjrbfecrkeVyN5cXFxSE1NRVBQkNau5y8GNpsNp06dQnx8PBRFMfwvBqXpk7v9CsI+uW5XVRWJiYlISEiAEMIj+lTe9slLN8DII3k2VcUvRwo3TO0axML098yHeo7kTX+oe5W8Tqqq4vTp04iLi4MQAikvFP6QV9UjROGzplR57iWPn2ToUS9htSJ/+y4AgFfHdhAmE66nkbyw1yZfdbT/5MmTiIuLg/L3+8/dP/c88bP8euuT1Wp1+I7pCX3yxNepIu0ZGRkIDQ1Fenq6Q21TnGFH8urUqYPo6Ghs2LBBK/IyMjKwY8cOPPnkkyXez2w2w2w2O7Xbi6niba5UZbsQQoulaExFh9mLL1+Wdr36VBmxs0/G6JP9sif1qbztcCplrt7uXJ6Vp710j1n0vvLvHTbLs55rtZe1T1X9OimKAqXYhlSg8Iu+0/IltBcOZLpqL7bev2Oo0j6VMfaK9ulq7a7WXfSaIqXD7ZUVe3X3qSztV9tG23eFq+7vF9fz9qm87ddbn4p/xyxr7CW183UyRp9KWqY4XYu8zMxMHD16VLt+/Phx7NmzB2FhYYiPj8fo0aMxY8YMNGjQAHXq1MHEiRMRGxurjfYRERERERGRI12LvF27duG2227Tro8dOxYAMGTIECxevBjPPfccsrKy8PjjjyMtLQ233HIL1qxZA19f35JW6Tbsx/wQGQ1zk4yIeUlGxLwkI2JeEqBzkdelSxeX+7jbCSEwbdo0TJs2rRqjqnqKoiAiIkLvMIicMDfdh4BAjZAA7bInY17qSAgo0TW0y3QF85KMiHlJdoY9T54nU1UVly5dKvUUqETVhbnpPhRFoE5UKOpEhUJRPPvLN/NSP0JR4F2vDrzr1YEo5XEg1wvmJRkR85Ls+Imtk6LTqRIZCXOTjIh5SUbEvCQjYl4SYODZNYmIqGRSSlj/Pleel0kpYXZQooqRUgJWa+EVLy/mGRGRm+BIHhGRG1KlxK/HzuLXY2ehXuXYZqIKUVXk7/wV+Tt/Bbj7FxGR22CRpwMhBEJCQviLKBkOc5OMiHlJRsS8JCNiXpIdd9fUgf0NSGQ07pCbEz9ep3cIVILqem3GVMujEF2dO3xe0vWHeUl2HMnTgaqquHDhAmc+IsNhbpIxSQR72QBwt1QyDn5ekhExL8mORZ5OcnJy9A6ByCXmJhmNAOAtpIefDZDcET8vyYiYlwSwyCMiIiIiIvIoLPKIiIiIiIg8CCde0YEQAuHh4Zz5iAyHuek+BAQigvy0y55MAsi0KTwiTw9CQKkRoV2mK/h5SUbEvCQ7Fnk6EEIgMDBQ7zCInDA33YeiCNSLCdc7jGoikKvyC4sehKLAu0E9vcMwJH5ekhExL8mOu2vqQFVVnDlzhjMfkeEwN8mIBCRCvawQHMsjA+HnJRkR85LsOJKnk4KCAr1DIHKJuekepJRQZWHRowjh8bvmmDy7e4YlpQTsXxYVxePzrKz4eUlGxLwkgCN5RERuSZUSu46cwa4jZ7Rij6jSqSryt+9C/vZdV4o9IiIyPBZ5REREREREHoRFng6EEIiKiuJuL2Q4zE0yIgkg3crZNclY+HlJRsS8JDsek6cDIQQsFoveYRA5YW6SMQkUSH5hoeqXMn7SNZfJqeBjhL0+rYJrILqC23Gy40ieDlRVxcmTJznzERkOc5OMSEAi3Juza5KxqELgfI1wqBwxIQPhdpzsWOTpRHKiBDIo5iYZEb9GkxFJFnhkQNyOE8Aij4iIiIiIyKPwmDwiIjckIBAWYNEuE1UJIaCEh2mXiYjIPbDI04EQArGxsZz5iAyHuek+FEWgQc0IvcOoFhJAaoGJR+TpQCgKvBs30DsMQxJSIuJSKgR3jSMD4Xac7Li7pg6EEPDy8uIbkAyHuUnGJKD+/T+RUQgAJlVlVpKhcDtOdizydKCqKk6dOsWZj8hwmJtkRIWza9o4uyYZiioELnB2TTIYbsfJjrtrEhG5IZuqYteRMwCAtg1qwqTwNzuqfNJmQ/72XQAAn3+0hTCZdI6IiIhKg98KiIiIiIiIPAiLPCIiIiIiIg/CIk8HiqIgPj4eCnevIoNhbpIRSQgkF5ggOcUFGYgiJaKSkqFwdk0yEG7HyY7H5OlASgmr1Qpvb2/OfkSGwtwkY5JQANggUZ0zbKaMn1Rtj0XuRwKwKQqEzcafH8gwuB0nO5b5OpBS4uzZs5D89Y8MhrlJRiQAhHrzizQZixQClyJCIflFmgyE23GyY5FHRERERETkQbi7JhGRGxIQCPH31S4TVQkhoISGaJeJiMg9sMjTCfeTJqNibroHRRFoVCtS7zCqDXc80odQFHg3baR3GIYluEscGRC34wSwyNOFoihISEjQOwwiJ8xNMqLC2TW5uSJjUaREdFKy3mEQOeB2nOx4TJ4OpJTIycnhQbFkOMxNMiYJb6GC43lkJBJAno83s5IMhdtxsmORpwMpJS5cuMA3IBkOc9N92FQVvxw+jV8On4ZNVfUOp0oJAMFeKo881IG02ZC37RfkbfsF0mbTOxxDkUIgJTSYs2uSoXA7Tnbc/4WIyE2p3IhTdfDwHxGIiDwRR/KIiIiIiIg8CIs8nXh7e+sdApFLzE0yIhsHLclwJLysVvBYUTIabscJ4O6aulAUBTVr1tQ7DCInzE0yIgmBVCs3V2QsigQik9P0DoPIAbfjZMeRPB1IKXH58mUeFEuGw9wkY5LwVTi7JhmLBJBtMTMryVC4HSc7Fnk6kFIiOTmZb0AyHOYmGZEAEGDi7JpkLFIIpAcFcnZNMhRux8mO+78QEbkhASDQYtYuE1UNAREUqF0mIiL3wCKPiMgNKYqCpvE19A6DPJwwKfBp0VTvMIiIqIy4u6ZOLBaL3iEQucTcJKORAAqk4LFPZChCSpjz8yG4WxwZDLfjBHAkTxeKoiAqKkrvMIicMDfJmATSrSa9gyByIACEpWboHQaRA27Hyc7QI3lTpkyBEMLhr3HjxnqHVWFSSqSlpfGgWDIc5qb7sKkqdh89g91Hz8CmqnqHU8Uk/Di7pi6kzYa8HbuRt2M3pM2mdziGIgFc9vdjVpKhcDtOdoYfyWvWrBnWr1+vXffyMnzI12R/AwYFBUFwVi4yEOame7HaPL24KyQA+JlU5KgmfqHWg9WqdwSGJIVAZoAf/LNzuMsmGQa342Rn+IrJy8sL0dHReodBRERERETkFgy9uyYAHDlyBLGxsahbty4eeOABnDp1Su+QiIiIiIiIDMvQI3kdOnTA4sWL0ahRI5w7dw5Tp07Frbfeit9//x2BgYEu75OXl4e8vDztekZG4UHRqqpCLXLciqIoDtcBaMf9VVW7oiiQUkJVVfj5+Wm329uL7z9dnna9+lQZsbNP+vfJnpsADNsn+xyLEkL7t6iytMu/11hZ7cXnfyx7e+ljL3pfAVnkeXHfPpXULiGRqwrIv9vVv3dBEtK+NBxOSH2tdlXY+1y83TGaktoVKZ3WXXK7hCLLHqNR+lT0mvr3e9Hd+1Rpr5OUsOTkQqL0OemqTwC4fWKfKrVPRb9jekqfqqLdXftUfJmSGLrI69Wrl3a5ZcuW6NChAxISEvD5559j+PDhLu8zc+ZMTJ061ak9MTFRKwwDAgIQERGBlJQUZGZmasuEhIQgJCQEFy9eRE5OjtYeHh6OwMBAnDt3DgUFBVp7VFQULBYLEhMTHV7Y2NhYeHl5OY06xsfHw2q14uzZswCA7OxsCCGQkJCA3NxcXLhwQVvW29sbNWvWRGZmJpKTk7V2i8WCqKgopKenIy0tTWs3Sp8AsE8e0CdFUXD58mVD9incu3Dyh+QCExQAod5XJoOQAJILvOAtJIK9rnwI2iSQavWCryIRYLrSXiALZ230UyT8irTnqgKZNhMCTCp8lSuxZNsUZKsCwV4qvMWV9kybglxVIMTLBlOR743pVgUFUiDM2+bwZTm1wAQVUuuLXVn6lFfkeLwwbxu8/o7Tnft07ddJhZ/Jho9iagMAcnKsuHw5D4GBZlgsVzZnWVn5yMoqQEiIL3x8rszKmZGRh9xcK8LCLPDyurIjS1paLvLzbYiM9EfR7/0PXDgFk6riQo1whz5FJSXDpii4FBGqtQkpEZ2UjHwfb6SEBmvtXlYrIpPTkGMxIz3oyo+T5vx8hKVmINPfD5kBflq7X04ugjMykREUgGyLr9YekJmNwKxspIYGIc/HR2sPzrgMv5w8JIeFwFrkmPWw1HSY8wuQFBnmUGxEXEotU5+iziVp15MiwwAvL7fvU2W+TgJAUpHHLU+fAHD7xD5VWp+Sk5ORk5OD7Oxsj+mTJ75OFenT5cuXURpCutn0O+3atUO3bt0wc+ZMl7e7GsmLi4tDamoqgoKCtHY9fzGw2WxISUlBWFgYFEUx/C8GpemTu/0Kwj65bldVFSkpKYiIiIAQwpB9mrK0cCKm630kz6aq+OVI4YapXYNYmBTF7ftUUruERIBJItMmICBQcPBwYXuRhypanF2rvdjATont4wqSOJJntSJ/+y4AgFfHdhAmk9v3qbJeJwBIDwpA4OUsLcry9Cn89WncPrFPldYnq9Xq8B3TE/rkia9TRdozMjIQGhqK9PR0h9qmOEOP5BWXmZmJY8eO4aGHHipxGbPZDLPZ7NRuL6aKt7lSle1CCCiKguzsbERERGjL2F9EV8uXpV2vPlVG7OyTMfpk//XPqH0qeadF6N7uXLKUp720jyng7+td5Lbi63LHPrluFwB8FRVZNgUSAtLFnUr6ubIiy9p7ori4g8CVL/pGaC8cnHXV7rqzpe2ThIAI8Nceo+jt7tqnympXhUCOxRdBl7OcHrvMfeL2iX2qxD4V/45Z1thLaufrZIw+lbRMcYYu8saNG4e+ffsiISEBZ8+exeTJk2EymTB48GC9QyMi0pWiKGiewJmHqWoJkwKfVs31DoOIiMrI0EXe6dOnMXjwYCQnJyMyMhK33HILtm/fjsjISL1DIyIiIiIiMiRDF3nLly/XO4QqIYRASEiIy2FdIj0xN8mIJAoniHGrA8jJ4wkpEZCZzROhk6FwO052hi7yPJX9DUhkNMxN92FTVew7fh4A0LJOtDbximcSyFb5hUUP0mZD/m/7AAA+N7T8e+IVAgqPuwvMytY7DCIH3I6TnSd/KzAsVVVx4cKFUp/ngqi6MDfdS77Vhnyr7doLuj2JYC8bXE1YQdUgL7/wjxxIACmhQcxKMhRux8mORZ5Oip4bg8hImJtkNAKAtyhpPk8ifUghkOfj43SKBiK9cTtOAIs8IiIiIiIij8Iij4iIiIiIyINw4hUdCCEQHh7OmY/IcCqSmxM/XlcFEREVHvuUydk1yWCElAjOuMzZNclQ+B2T7Fjk6UAIgcDAQL3DIHLC3CRjEsjl7JpkMAKAX06e3mEQOeB2nOy4u6YOVFXFmTNnOPMRGQ5z071YfLxg8fH83+oEJEK9rBAcy9OFsFggLBa9wzAcVQAXw0PA3x/ISLgdJzvP/3ZgUAUFBXqHQOQSc9M9mBQFLevE6B1GtTHxi7QuhMkEnxtb6h2GQQlYvbxQOKbHHyDIOLgdJ4AjeURERERERB6FRR4REREREZEH4e6aOhBCICoqijMfkeEwN92HTVXxx8kLAIBmCVEwKZ77m50EkG7l7Jp6kDYbCvb+AQDwbtUMwmTSOSLjEFIiLDWds2uSoXA7TnYs8nQghICFB7GTATE33UtOvlXvEKqJQIHkFxa9yJwcvUMwJAHAnM9jn8hYuB0nO8/96dfAVFXFyZMnOfMRGQ5zk4xIQCLcm7NrkrGoQuB8jXCoHDEhA+F2nOxY5OlEcvcOMijmJhkRv0aTEUkWeGRA3I4TwCKPiIiIiIjIo7DIIyIiIiIi8iCceEUHQgjExsZy5iMyHOYmVUTBgUNVtu4kk4DNxl2QyDiElIi4lFrh2TVTxk+qpIiuLuz1adXyOKQvbsfJjkWeDoQQ8PLy4huQDIe56V58vK6f6exVlQWebsw+ekdgSAKASVV5vCgZCrfjZMfdNXWgqipOnTrFmY/IcJib7sOkKLihXixuqBfr0efIAwAhgMhIf/A7S/UTJhPMbW+Aue0NPEdeMaoQuMDZNclguB0nO8/+ZkBERERERHSdYZFHRERERETkQXhMHhGRG1JVFQcSkwAATeNqQPHwXTZJH9KmouD3AwAA7+ZNIUzMMyIid8AiTweKoiA+Pp5fyshwmJvuQwLIyi3QLnsyKYGLF7PA8/vqQUJmZmmX6QpFSkQlJUNhYpKBcDtOdswAHUgpYbVaIblhIINhbpJRKQontyBjkQBsisLSlwyF23GyY5GnAyklzp49yzcgGQ5zk4xICCA83I+za5KhSCFwKSIUkolJBsLtONmxyCMiIiIiIvIgLPKIiIiIiIg8CIs8nQju3kEGxdwkI+KeR2REgolJBsTtOAGcXVMXiqIgISFB7zCInDA33YvXdTKdvX12zeo226tGlT/Gc9akKn+MCvPiVwVXFCkRnZSsdxhEDrgdJzt+cutASonc3Fz4+vry1xYyFOam+zApCtrUr6l3GNXGx8eE/Hyb3mFcd4TJBHOHNnqHYUgSQL6PN3zyC8BPSzIKbsfJ7vr4GdhgpJS4cOECZz4iw2FukhEJAYSE+HJ2TTIUKQRSQoM5uyYZCrfjZMcij4iIiIiIyINwd00iIjekqir+PH0JANC4VgQUhb/ZUeWTNhUFB/4EAHg3bQxxnRwHSkTk7ljk6cTb21vvEIhcYm66Bwngck6edtnTWa2q3iFcpyRkxmXtMhUl4WW1gs8LGQ234wSwyNOFoiioWfP6mTCB3Adzk4xISiAlJUfvMIgcKBKITE7TOwwiB9yOkx33u9CBlBKXL1/mQbFkOMxNMipfX/4mScYiAWRbzBzHI0PhdpzsWOTpQEqJ5ORkvgHJcJibZERCAEFBZs6uSYYihUB6UCBn1yRD4Xac7FjkEREREREReRAWeURERERERB6EBznoxGKx6B0CkUvMTfehlHI3sYIDh6o4kqolJZCfbwP3PtIJT8/hkpAS5vx8CCYmGQy34wSwyNOFoiiIiorSOwwiJ8xN92FSFLRrWEvvMKpNWlqu3iFcl4TJBHPHdnqHYUgCQFhqht5hEDngdpzs+POcDqSUSEtL40GxZDjMTTIqf3+e94mMRQK47O/H2TXJULgdJzsWeTrgG5CMirlJRiQE4O/vw9k1yVCkEMgM8OPsmmQo3I6THXfXJCJyQ6oqceTsJQBAg9gIKAq/aFLlk6oK659HAABejRtA8Pg8IiK3wCKPiMgNSUikZeVqlwuPECKqZFJCTU3TLhMRkXvgT3I6CQgI0DsEIpeYm2Q0UgI5OVbWGGQoQkr45eRydk0yHG7HCXCTIm/evHmoXbs2fH190aFDB+zcuVPvkCpEURRERERA4W4vZDDMTTKqy5fz9A6ByIEAEJyRyTF0MhRux8nO8Bnw2WefYezYsZg8eTJ+/fVXtGrVCj169EBSUpLeoZWbqqq4dOkSVFXVOxQiB8xNMqrAQLPeIRA5kADSgwI4uyYZCrfjZGf4Iu/NN9/EY489hmHDhqFp06ZYsGAB/Pz8sHDhQr1Dq5DMzEy9QyByiblJRiMEYLF4cXZNMhQpBLItvpxdkwyH23ECDF7k5efnY/fu3ejWrZvWpigKunXrhm3btukYGRERERERkTEZenbNS5cuwWazISoqyqE9KioKf/75p8v75OXlIS/vyrEb6enpAIC0tDSHoWtFUZyGsoUQEEJUWbuiKJBSwmazIT09HWlpaVAURWsvfk6T8rTr1afKiJ190r9PqqoiIyMDGRkZEEKUqU95OY7HphQuIQBIl+2i2E5OZW93XndZ268VY1nbq7NPNlWFNS8HAJCfkwnT38dfuIqxID/HYdKSogMP12ovPkihV3turoK8v/tb2thLajdCn+xtGdY8h5GgopN4lKVdCqDoDKtX2h2DKXO71Yp8qxUA4JWfD2EyAZAQsuwxGqZPLtvL3icpBC7n5sI3Px/K38sZuU9KWpp2iztunyrSfj31yWq1OnzH9IQ+eeLrVJH2jIwMALjmuRANXeSVx8yZMzF16lSn9oSEBB2iISKqehv1DoDKba7eAZTFLz/oHQFVxNuz9I6AiCrR5cuXERwcXOLthi7yIiIiYDKZcOHCBYf2CxcuIDo62uV9JkyYgLFjx2rXVVVFSkoKwsPDIQyy33xGRgbi4uKQmJiIoKAgvcMh0jA3yYiYl2REzEsyIual55NS4vLly4iNjb3qcoYu8nx8fNCmTRts2LAB/fv3B1BYtG3YsAEjR450eR+z2Qyz2XEWtpCQkCqOtHyCgoL4BiRDYm6SETEvyYiYl2REzEvPdrURPDtDF3kAMHbsWAwZMgRt27ZF+/btMXfuXGRlZWHYsGF6h0ZERERERGQ4hi/y7r33Xly8eBGTJk3C+fPn0bp1a6xZs8ZpMhYiIiIiIiJygyIPAEaOHFni7pnuyGw2Y/LkyU67lRLpjblJRsS8JCNiXpIRMS/JTshrzb9JREREREREbsPQJ0MnIiIiIiKismGRR0RERERE5EFY5BEREREREXkQFnk6mDdvHmrXrg1fX1906NABO3fu1Dskuo5s2bIFffv2RWxsLIQQ+Oqrrxxul1Ji0qRJiImJgcViQbdu3XDkyBF9gqXrxsyZM9GuXTsEBgaiRo0a6N+/Pw4dOuSwTG5uLkaMGIHw8HAEBARg4MCBuHDhgk4R0/Vg/vz5aNmypXbOsY4dO+K7777TbmdOkhG89tprEEJg9OjRWhtzk1jkVbPPPvsMY8eOxeTJk/Hrr7+iVatW6NGjB5KSkvQOja4TWVlZaNWqFebNm+fy9tmzZ+Ptt9/GggULsGPHDvj7+6NHjx7Izc2t5kjperJ582aMGDEC27dvx7p161BQUIDu3bsjKytLW2bMmDH45ptv8MUXX2Dz5s04e/YsBgwYoGPU5Olq1aqF1157Dbt378auXbvQtWtX9OvXD3/88QcA5iTp75dffsH777+Pli1bOrQzNwmSqlX79u3liBEjtOs2m03GxsbKmTNn6hgVXa8AyJUrV2rXVVWV0dHR8vXXX9fa0tLSpNlslp9++qkOEdL1KikpSQKQmzdvllIW5qG3t7f84osvtGUOHjwoAcht27bpFSZdh0JDQ+UHH3zAnCTdXb58WTZo0ECuW7dOdu7cWT7zzDNSSn5eUiGO5FWj/Px87N69G926ddPaFEVBt27dsG3bNh0jIyp0/PhxnD9/3iFHg4OD0aFDB+YoVav09HQAQFhYGABg9+7dKCgocMjNxo0bIz4+nrlJ1cJms2H58uXIyspCx44dmZOkuxEjRqBPnz4OOQjw85IKucXJ0D3FpUuXYLPZEBUV5dAeFRWFP//8U6eoiK44f/48ALjMUfttRFVNVVWMHj0aN998M5o3bw6gMDd9fHwQEhLisCxzk6ra/v370bFjR+Tm5iIgIAArV65E06ZNsWfPHuYk6Wb58uX49ddf8csvvzjdxs9LAljkERGRwYwYMQK///47tm7dqncoRGjUqBH27NmD9PR0/O9//8OQIUOwefNmvcOi61hiYiKeeeYZrFu3Dr6+vnqHQwbF3TWrUUREBEwmk9PsRhcuXEB0dLROURFdYc9D5ijpZeTIkfi///s/bNy4EbVq1dLao6OjkZ+fj7S0NIflmZtU1Xx8fFC/fn20adMGM2fORKtWrfDWW28xJ0k3u3fvRlJSEm688UZ4eXnBy8sLmzdvxttvvw0vLy9ERUUxN4lFXnXy8fFBmzZtsGHDBq1NVVVs2LABHTt21DEyokJ16tRBdHS0Q45mZGRgx44dzFGqUlJKjBw5EitXrsQPP/yAOnXqONzepk0beHt7O+TmoUOHcOrUKeYmVStVVZGXl8ecJN3cfvvt2L9/P/bs2aP9tW3bFg888IB2mblJ3F2zmo0dOxZDhgxB27Zt0b59e8ydOxdZWVkYNmyY3qHRdSIzMxNHjx7Vrh8/fhx79uxBWFgY4uPjMXr0aMyYMQMNGjRAnTp1MHHiRMTGxqJ///76BU0eb8SIEVi2bBlWrVqFwMBA7biR4OBgWCwWBAcHY/jw4Rg7dizCwsIQFBSEUaNGoWPHjvjHP/6hc/TkqSZMmIBevXohPj4ely9fxrJly7Bp0yasXbuWOUm6CQwM1I5XtvP390d4eLjWztwkFnnV7N5778XFixcxadIknD9/Hq1bt8aaNWucJrogqiq7du3Cbbfdpl0fO3YsAGDIkCFYvHgxnnvuOWRlZeHxxx9HWloabrnlFqxZs4b7/VOVmj9/PgCgS5cuDu2LFi3C0KFDAQD//ve/oSgKBg4ciLy8PPTo0QPvvfdeNUdK15OkpCQ8/PDDOHfuHIKDg9GyZUusXbsWd9xxBwDmJBkXc5OElFLqHQQRERERERFVDh6TR0RERERE5EFY5BEREREREXkQFnlEREREREQehEUeERERERGRB2GRR0RERERE5EFY5BEREREREXkQFnlEREREREQehEUeERERERGRB2GRR0RE5GEmTpyIxx9/vNLWl5+fj9q1a2PXrl2Vtk4iIqo6LPKIiKhKCCGu+jdlyhS9Q6x0tWvXxty5c3WN4fz583jrrbfw0ksvaW1ZWVm47777EBMTg8GDByM7O9vpPqNGjULdunVhNpsRFxeHvn37YsOGDQAAHx8fjBs3Ds8//3y19oWIiMqHRR4REVWJc+fOaX9z585FUFCQQ9u4ceP0DrFUpJSwWq3V+pj5+fnlvu8HH3yAm266CQkJCVrb3LlzERAQgO+//x4Wi8WhED1x4gTatGmDH374Aa+//jr279+PNWvW4LbbbsOIESO05R544AFs3boVf/zxR7ljIyKi6sEij4iIqkR0dLT2FxwcDCGEQ9vy5cvRpEkT+Pr6onHjxnjvvfe0+544cQJCCHz++ee49dZbYbFY0K5dOxw+fBi//PIL2rZti4CAAPTq1QsXL17U7jd06FD0798fU6dORWRkJIKCgvCvf/3LoWhSVRUzZ85EnTp1YLFY0KpVK/zvf//Tbt+0aROEEPjuu+/Qpk0bmM1mbN26FceOHUO/fv0QFRWFgIAAtGvXDuvXr9fu16VLF5w8eRJjxozRRisBYMqUKWjdurXDczN37lzUrl3bKe5XXnkFsbGxaNSoEQAgMTER99xzD0JCQhAWFoZ+/frhxIkTV33ely9fjr59+zq0paamomHDhmjRogUaN26MtLQ07bannnoKQgjs3LkTAwcORMOGDdGsWTOMHTsW27dv15YLDQ3FzTffjOXLl1/18YmISH8s8oiIqNotXboUkyZNwiuvvIKDBw/i1VdfxcSJE7FkyRKH5SZPnoyXX34Zv/76K7y8vHD//ffjueeew1tvvYUff/wRR48exaRJkxzus2HDBhw8eBCbNm3Cp59+ihUrVmDq1Kna7TNnzsRHH32EBQsW4I8//sCYMWPw4IMPYvPmzQ7reeGFF/Daa6/h4MGDaNmyJTIzM9G7d29s2LABv/32G3r27Im+ffvi1KlTAIAVK1agVq1amDZtmjZaWRYbNmzAoUOHsG7dOvzf//0fCgoK0KNHDwQGBuLHH3/ETz/9hICAAPTs2bPEkb6UlBQcOHAAbdu2dWgfOXIk3n//fXh7e2PRokV45plntOXXrFmDESNGwN/f32l9ISEhDtfbt2+PH3/8sUz9IiKi6ueldwBERHT9mTx5Mt544w0MGDAAAFCnTh0cOHAA77//PoYMGaItN27cOPTo0QMA8Mwzz2Dw4MHYsGEDbr75ZgDA8OHDsXjxYod1+/j4YOHChfDz80OzZs0wbdo0jB8/HtOnT0dBQQFeffVVrF+/Hh07dgQA1K1bF1u3bsX777+Pzp07a+uZNm0a7rjjDu16WFgYWrVqpV2fPn06Vq5cia+//hojR45EWFgYTCYTAgMDER0dXebnxN/fHx988AF8fHwAAJ988glUVcUHH3ygjQouWrQIISEh2LRpE7p37+60jlOnTkFKidjYWIf22rVr48iRI0hKSkJUVJS2vqNHj0JKicaNG5cqxtjYWJw8ebLMfSMiourFIo+IiKpVVlYWjh07huHDh+Oxxx7T2q1WK4KDgx2WbdmypXY5KioKANCiRQuHtqSkJIf7tGrVCn5+ftr1jh07IjMzE4mJicjMzER2drZD8QYUHgN3ww03OLQVHw3LzMzElClT8O233+LcuXOwWq3IycnRRvIqqkWLFlqBBwB79+7F0aNHERgY6LBcbm4ujh075nIdOTk5AABfX1+n2xRFcSo+pZRlitFisThN2kJERMbDIo+IiKpVZmYmAOC///0vOnTo4HCbyWRyuO7t7a1dto8+FW9TVbXMj/3tt9+iZs2aDreZzWaH68V3Xxw3bhzWrVuHOXPmoH79+rBYLBg0aNA1J0lRFMWpmCooKHBarvjjZWZmok2bNli6dKnTspGRkS4fKyIiAkDhMXglLVNUgwYNIITAn3/+ec1lgcLdO0uzXiIi0heLPCIiqlZRUVGIjY3FX3/9hQceeKDS1793717k5OTAYrEAALZv346AgADExcUhLCwMZrMZp06dctg1szR++uknDB06FHfffTeAwiKs+CQoPj4+sNlsDm2RkZE4f/48pJRaobpnz55rPt6NN96Izz77DDVq1EBQUFCpYqxXrx6CgoJw4MABNGzY8JrLh4WFoUePHpg3bx6efvppp0IzLS3N4bi833//3WnEk4iIjIcTrxARUbWbOnUqZs6cibfffhuHDx/G/v37sWjRIrz55psVXnd+fj6GDx+OAwcOYPXq1Zg8eTJGjhwJRVEQGBiIcePGYcyYMViyZAmOHTuGX3/9Fe+8847TpC/FNWjQACtWrMCePXuwd+9e3H///U6jiLVr18aWLVtw5swZXLp0CUDhrJsXL17E7NmzcezYMcybNw/ffffdNfvxwAMPICIiAv369cOPP/6I48ePY9OmTXj66adx+vRpl/dRFAXdunXD1q1bS/lsAfPmzYPNZkP79u3x5Zdf4siRIzh48CDefvtt7bhFux9//NHlsYBERGQsLPKIiKjaPfroo/jggw+waNEitGjRAp07d8bixYtRp06dCq/79ttvR4MGDdCpUyfce++9uOuuuxxOvD59+nRMnDgRM2fORJMmTdCzZ098++2313zsN998E6GhobjpppvQt29f9OjRAzfeeKPDMtOmTcOJEydQr149bbfGJk2a4L333sO8efPQqlUr7Ny5s1TnCPTz88OWLVsQHx+PAQMGoEmTJhg+fDhyc3OvOrL36KOPYvny5aXejbVu3br49ddfcdttt+HZZ59F8+bNcccdd2DDhg2YP3++tty2bduQnp6OQYMGlWq9RESkHyHLetQ1ERGRQQ0dOhRpaWn46quv9A5FN1JKdOjQAWPGjMHgwYMrbb333nsvWrVqhRdffLHS1klERFWDI3lEREQeRAiB//znP7BarZW2zvz8fLRo0QJjxoyptHUSEVHV4UgeERF5DI7kERERscgjIiIiIiLyKNxdk4iIiIiIyIOwyCMiIiIiIvIgLPKIiIiIiIg8CIs8IiIiIiIiD8Iij4iIiIiIyIOwyCMiIiIiIvIgLPKIiIiIiIg8CIs8IiIiIiIiD8Iij4iIiIiIyIP8PygOK17ad2CkAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "madrid_s = climate.where(\n", + " (climate[\"city\"] == \"Madrid\") & (climate[\"day\"] >= SUMMER_START) & (climate[\"day\"] <= SUMMER_END)\n", + ")[\"temperature\"].to_numpy()\n", + "\n", + "london_s = climate.where(\n", + " (climate[\"city\"] == \"London\") & (climate[\"day\"] >= SUMMER_START) & (climate[\"day\"] <= SUMMER_END)\n", + ")[\"temperature\"].to_numpy()\n", + "\n", + "fig, ax = plt.subplots(figsize=(9, 4))\n", + "bins = np.linspace(0, 45, 30)\n", + "ax.hist(madrid_s, bins=bins, alpha=0.7, color=\"#e63946\", label=\"Madrid\")\n", + "ax.hist(london_s, bins=bins, alpha=0.7, color=\"#457b9d\", label=\"London\")\n", + "ax.axvline(\n", + " madrid_s.mean(),\n", + " color=\"#e63946\",\n", + " linestyle=\"--\",\n", + " linewidth=1.5,\n", + " label=f\"Madrid mean {madrid_s.mean():.1f}°C\",\n", + ")\n", + "ax.axvline(\n", + " london_s.mean(),\n", + " color=\"#457b9d\",\n", + " linestyle=\"--\",\n", + " linewidth=1.5,\n", + " label=f\"London mean {london_s.mean():.1f}°C\",\n", + ")\n", + "ax.set_xlabel(\"Temperature (°C)\")\n", + "ax.set_ylabel(\"Days\")\n", + "ax.set_title(\"Summer temperature distribution — Madrid vs London\")\n", + "ax.legend()\n", + "ax.grid(True, linestyle=\"--\", alpha=0.4)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "8a690193", + "metadata": {}, + "source": [ + "### 5.3 Mean summer temperature — all cities ranked" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "334a833a", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:15.573465Z", + "iopub.status.busy": "2026-04-07T12:06:15.573114Z", + "iopub.status.idle": "2026-04-07T12:06:16.558429Z", + "shell.execute_reply": "2026-04-07T12:06:16.555709Z" + } + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAGGCAYAAABmGOKbAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAnZ5JREFUeJzs3XdcleX/P/DXfQABGQcREFBwIyqiODJFEUfinp/cAy3NRM00TRpqw3CmlqYtQVPDzJFaYS7cVGLkQhzJyIGpDEFl3ffvD3/c304ogpwBl6/n43Eeee5x3e9zXofTue5x3ZKiKAqIiIiIiIiISO80pi6AiIiIiIiISFTsdBMREREREREZCDvdRERERERERAbCTjcRERERERGRgbDTTURERERERGQg7HQTERERERERGQg73UREREREREQGwk43ERERERERkYGw001ERERERERkIOx0ExFRuVGrVi0EBweXaNnAwEAEBgYatB5Tmjt3LiRJwq1btwy2jcTEREiShIiIiGKXi4iIgCRJOHHihMFqoZILDg6Gra2tqcsgIqISYqebiIgM7vLly3jllVdQp04dWFlZwd7eHv7+/li+fDnu37//2PXOnTuHuXPnIjEx0XjFEtFjbdy4EcuWLTN1GUREFYq5qQsgIiKx/fjjj3jxxRdhaWmJUaNGwcfHB7m5uThy5AhmzJiBs2fP4osvvgAAJCQkQKP5v/3B586dw3vvvYfAwEDUqlVLp91ffvnFmC+DiPCw033mzBlMnTrV1KUQEVUY7HQTEZHBXLlyBUOGDEHNmjWxf/9+uLm5qfNCQkJw6dIl/Pjjj+o0S0vLErddqVIlvdb6NGRZRm5uLqysrExdCgnm3r17qFy5sqnLICIiPeDp5UREZDALFy5EVlYWvv76a50Od6F69erhtddeU5//+5ruiIgIvPjiiwCAjh07QpIkSJKE6OhoAI++pjsnJwdz5sxBvXr1YGlpCQ8PD8ycORM5OTk6y+3Zswft2rWDg4MDbG1t0aBBA7z11ltPfD2SJGHSpEnYsGEDGjduDEtLS0RFRQEAFi9ejLZt26Jq1aqwtrZGixYt8P333z+2je3bt8PHxweWlpZo3Lix2k5xkpKSUK9ePfj4+CA1NRUAkJ6ejqlTp8LDwwOWlpaoV68eFixYAFmWddZNT09HcHAwtFotHBwcMHr0aKSnpz9xm/927949vPLKK6hatSrs7e0xatQopKWlqfNHjx4NJycn5OXlFVm3a9euaNCgQbHtX7x4EQMHDoSrqyusrKxQo0YNDBkyBBkZGQCKvwZdkiTMnTtXfV54TfyFCxcwYsQIaLVaODs7491334WiKEhJSUHfvn1hb28PV1dXLFmyRKe96OhoSJKE7777Du+99x6qV68OOzs7/O9//0NGRgZycnIwdepUuLi4wNbWFmPGjCnyOQOA9evXo0WLFrC2toajoyOGDBmClJQUnWUCAwPh4+OD2NhYBAQEoHLlyiX6PF69ehX9+vWDra0tnJ2d8cYbb6CgoEBnGVmWsWzZMjRu3BhWVlaoVq0aXnnlFZ3cAOCHH35Az5494e7uDktLS9StWxcffPCBTnuBgYH48ccfkZSUpP49/vcMFCIiKopHuomIyGB27tyJOnXqoG3btqVeNyAgAFOmTMEnn3yCt956Cw0bNgQA9b//Jcsy+vTpgyNHjmD8+PFo2LAhTp8+jaVLl+LChQvYvn07AODs2bPo1asXfH198f7778PS0hKXLl3C0aNHS1TX/v378d1332HSpElwcnJSOx3Lly9Hnz59MHz4cOTm5iIyMhIvvvgidu3ahZ49e+q0ceTIEWzduhUTJ06EnZ0dPvnkEwwcOBDJycmoWrXqI7d7+fJldOrUCY6OjtizZw+cnJxw7949dOjQAVevXsUrr7wCT09PHDt2DKGhobh+/bp67a2iKOjbty+OHDmCCRMmoGHDhti2bRtGjx5dotdcaNKkSXBwcMDcuXORkJCAVatWISkpSe2gjhw5EuvWrcPu3bvRq1cvdb0bN25g//79mDNnzmPbzs3NRVBQEHJycjB58mS4urri6tWr2LVrF9LT06HVaktVa6HBgwejYcOGmD9/Pn788Ud8+OGHcHR0xOeff45OnTphwYIF2LBhA9544w20atUKAQEBOuuHhYXB2toas2bNwqVLl/Dpp5/CwsICGo0GaWlpmDt3LmJiYhAREYHatWtj9uzZ6rrz5s3Du+++i0GDBuHll1/GP//8g08//RQBAQH4448/4ODgoC57+/ZtdO/eHUOGDMGIESNQrVq1Yl9XQUEBgoKC0Lp1ayxevBh79+7FkiVLULduXbz66qvqcq+88goiIiIwZswYTJkyBVeuXMGKFSvwxx9/4OjRo7CwsADwcCeXra0tpk2bBltbW+zfvx+zZ89GZmYmFi1aBAB4++23kZGRgb///htLly4FAA7oRkRUEgoREZEBZGRkKACUvn37lnidmjVrKqNHj1afb968WQGgHDhwoMiyHTp0UDp06KA+/+abbxSNRqMcPnxYZ7nVq1crAJSjR48qiqIoS5cuVQAo//zzT2lejqIoigJA0Wg0ytmzZ4vMu3fvns7z3NxcxcfHR+nUqVORNipVqqRcunRJnfbnn38qAJRPP/1UnTZnzhy1zvj4eMXd3V1p1aqVcufOHXWZDz74QLGxsVEuXLigs41Zs2YpZmZmSnJysqIoirJ9+3YFgLJw4UJ1mfz8fKV9+/YKACU8PLzY1x0eHq4AUFq0aKHk5uaq0xcuXKgAUH744QdFURSloKBAqVGjhjJ48GCd9T/++GNFkiTlr7/+euw2/vjjDwWAsnnz5scuc+XKlcfWC0CZM2eO+rzw/Rs/frzOa65Ro4YiSZIyf/58dXpaWppibW2t89k7cOCAAkDx8fHRec1Dhw5VJElSunfvrrP9Nm3aKDVr1lSfJyYmKmZmZsq8efN0ljt9+rRibm6uM71Dhw4KAGX16tWPfe3/Nnr0aAWA8v777+tM9/PzU1q0aKE+P3z4sAJA2bBhg85yUVFRRab/9/OrKIryyiuvKJUrV1YePHigTuvZs6fO6yQioifj6eVERGQQmZmZAAA7OzujbG/z5s1o2LAhvL29cevWLfXRqVMnAMCBAwcAQD26+MMPPxQ5BbskOnTogEaNGhWZbm1trf47LS0NGRkZaN++PU6ePFlk2S5duqBu3brqc19fX9jb2+Ovv/4qsuyZM2fQoUMH1KpVC3v37kWVKlV0XnP79u1RpUoVndfcpUsXFBQU4NChQwCAn376Cebm5jpHQM3MzDB58uRSvfbx48erR0YB4NVXX4W5uTl++uknAIBGo8Hw4cOxY8cO3L17V11uw4YNaNu2LWrXrv3YtguPZO/evRv37t0rVV3Fefnll9V/m5mZoWXLllAUBS+99JI63cHBAQ0aNHjk+z9q1Cid19y6dWsoioKxY8fqLNe6dWukpKQgPz8fALB161bIsoxBgwbpZOPq6or69eurn8dClpaWGDNmTKle24QJE3Set2/fXuc1bN68GVqtFi+88IJODS1atICtra1ODf/+/N69exe3bt1C+/btce/ePZw/f75UdRERkS52uomIyCDs7e0BQKfzZUgXL17E2bNn4ezsrPPw8vICANy8eRPAw9ON/f398fLLL6NatWoYMmQIvvvuuxJ3wB/Xcdy1axeef/55WFlZwdHREc7Ozli1apV6PfK/eXp6FplWpUqVItfZAkDv3r1hZ2eH3bt3q+/pv19zVFRUkdfcpUsXndeclJQENze3IqcCP+ka6/+qX7++znNbW1u4ubnp3NJt1KhRuH//PrZt2wbg4Yj0sbGxGDlyZLFt165dG9OmTcNXX30FJycnBAUFYeXKlY98/0rjv++1VquFlZUVnJycikx/1Pv/qPUBwMPDo8h0WZbVei9evAhFUVC/fv0i+cTHx6vZFKpevXqpBge0srKCs7OzzrT/foYuXryIjIwMuLi4FKkhKytLp4azZ8+if//+0Gq1sLe3h7OzM0aMGAEAZc6AiOhZx2u6iYjIIOzt7eHu7o4zZ84YZXuyLKNJkyb4+OOPHzm/sJNkbW2NQ4cO4cCBA/jxxx8RFRWFTZs2oVOnTvjll19gZmZW7Hb+fUSw0OHDh9GnTx8EBATgs88+g5ubGywsLBAeHo6NGzcWWf5x21AUpci0gQMHYu3atdiwYQNeeeWVIq/5hRdewMyZMx/ZXuEOB2Nq1KgRWrRogfXr12PUqFFYv349KlWqhEGDBj1x3SVLliA4OBg//PADfvnlF0yZMgVhYWGIiYlBjRo1IEnSI9f77+Bh//ao97o07//jln1SG7IsQ5Ik/Pzzz49c9r87QB71uSrOkz6nhTW4uLhgw4YNj5xf2GlPT09Hhw4dYG9vj/fffx9169aFlZUVTp48iTfffPOpzgghIqL/w043EREZTK9evfDFF1/g+PHjaNOmTanXf1wn61Hq1q2LP//8E507d37iehqNBp07d0bnzp3x8ccf46OPPsLbb7+NAwcOqEeJS2PLli2wsrLC7t27dW57Fh4eXuq2/mvRokUwNzdXB10bNmyYOq9u3brIysp6Ys01a9bEvn37kJWVpdPZS0hIKFUtFy9eRMeOHdXnWVlZuH79Onr06KGz3KhRozBt2jRcv34dGzduRM+ePXVOiy9OkyZN0KRJE7zzzjs4duwY/P39sXr1anz44YdqG/8ddT0pKalUr8MY6tatC0VRULt2bZPs/CisYe/evfD39y+2Ux8dHY3bt29j69atOgPJXblypciypfmbJCKih3h6ORERGczMmTNhY2ODl19+Wb3F1b9dvnwZy5cvf+z6NjY2AIp2sh5l0KBBuHr1Kr788ssi8+7fv4/s7GwAwJ07d4rMb9asGQA88pZPJWFmZgZJknSOuCYmJqojppeFJEn44osv8L///Q+jR4/Gjh071HmDBg3C8ePHsXv37iLrpaenq9cX9+jRA/n5+Vi1apU6v6CgAJ9++mmpavniiy90bge2atUq5Ofno3v37jrLDR06FJIk4bXXXsNff/2lnqZcnMzMTLXeQk2aNIFGo1Fzsbe3h5OTk3qteqHPPvusVK/DGAYMGAAzMzO89957RY6gK4qC27dvG7yGQYMGoaCgAB988EGRefn5+erfVeFR83/XmZub+8j31cbGhqebExGVEo90ExGRwdStWxcbN25Ub9s0atQo+Pj4IDc3F8eOHcPmzZvV+3I/SrNmzWBmZoYFCxYgIyMDlpaW6NSpE1xcXIosO3LkSHz33XeYMGECDhw4AH9/fxQUFOD8+fP47rvvsHv3brRs2RLvv/8+Dh06hJ49e6JmzZq4efMmPvvsM9SoUQPt2rV7qtfZs2dPfPzxx+jWrRuGDRuGmzdvYuXKlahXrx5OnTr1VG3+m0ajwfr169GvXz8MGjQIP/30Ezp16oQZM2Zgx44d6NWrF4KDg9GiRQtkZ2fj9OnT+P7775GYmAgnJyf07t0b/v7+mDVrFhITE9GoUSNs3bq11J2n3NxcdO7cGYMGDUJCQgI+++wztGvXDn369NFZztnZGd26dcPmzZvh4OBQ5JZpj7J//35MmjQJL774Iry8vJCfn49vvvkGZmZmGDhwoLrcyy+/jPnz5+Pll19Gy5YtcejQIVy4cKFUr8MY6tatiw8//BChoaFITExEv379YGdnhytXrmDbtm0YP3483njjDYPW0KFDB7zyyisICwtDXFwcunbtCgsLC1y8eBGbN2/G8uXL8b///Q9t27ZFlSpVMHr0aEyZMgWSJOGbb7555On2LVq0wKZNmzBt2jS0atUKtra26N27t0FfBxFRRcdONxERGVSfPn1w6tQpLFq0CD/88ANWrVoFS0tL+Pr6YsmSJRg3btxj13V1dcXq1asRFhaGl156CQUFBThw4MAjO90ajQbbt2/H0qVLsW7dOmzbtg2VK1dGnTp18Nprr6mn+Pbp0weJiYlYs2YNbt26BScnJ3To0AHvvffeU98LulOnTvj6668xf/58TJ06FbVr18aCBQuQmJiol043AFhYWOD7779H9+7d0bdvX+zduxetW7fGwYMH8dFHH2Hz5s1Yt24d7O3t4eXlpfN6NBoNduzYgalTp2L9+vWQJAl9+vTBkiVL4OfnV+IaVqxYgQ0bNmD27NnIy8vD0KFD8cknnzzylONRo0Zh165dGDRokM4p94/TtGlTBAUFYefOnbh69SoqV66Mpk2b4ueff8bzzz+vLjd79mz8888/+P777/Hdd9+he/fu+Pnnnx/5mTC1WbNmwcvLC0uXLsV7770H4OHYAl27di2yo8JQVq9ejRYtWuDzzz/HW2+9BXNzc9SqVQsjRoyAv78/AKBq1arYtWsXpk+fjnfeeQdVqlTBiBEj0LlzZwQFBem0N3HiRMTFxSE8PBxLly5FzZo12ekmInoCSXnUbkwiIiKiMvjhhx/Qr18/HDp0CO3btzd1OURERCbDTjcRERHpXa9evRAfH49Lly5x8C0iInqm8fRyIiIi0pvIyEicOnUKP/74I5YvX84ONxERPfN4pJuIiIj0RpIk2NraYvDgwVi9ejXMzbl/n4iInm38PyERERHpDfflExER6eJ9uomIiIiIiIgMhJ1uIiIiIiIiIgPh6eUGIssyrl27Bjs7Ow4iQ0REREREJBhFUXD37l24u7tDo3n88Wx2ug3k2rVr8PDwMHUZREREREREZEApKSmoUaPGY+ez020gdnZ2AICkpCQ4ODiYthgqM1mW8ffff6NGjRrF7sWiioF5ioV5ioNZioV5ioV5ioV56kdmZiY8PDzUvt/jsNNtIIWnlNvb28Pe3t7E1VBZybIMW1tb2Nvb84tJAMxTLMxTHMxSLMxTLMxTLMxTv550OTHfYSIiIiIiIiIDYaebiIiIiIiIyEDY6TYwjlwuBkmS4O7uzjwFwTzFwjzFwSzFwjzFwjzFwjyNi51uA+MHWQySJMHc3Jx5CoJ5ioV5ioNZioV5ioV5ioV5Ghc73QYmy7KpSyA9kGUZycnJzFMQzFMszFMczFIszFMszFMszNO42OkmIiIiIiIiMhB2uomIiIiIiIgMhJ1uIiIiIiIiIgORFEVRTF2EiDIzM6HVapGRkQF7e3tTl0N6IMsyNBrupxIF8xQL8xQHsxQL8xQL8xQL8yy7kvb5+C4bGPdpiEFRFOTn5zNPQTBPsTBPcTBLsTBPsTBPsTBP42Kn28D4QRaDoii4du0a8xQE8xQL8xQHsxQL8xQL8xQL8zQudrqJiIj0qIC3XyEiIqJ/MTd1AaJb9sMR3MwqMHUZVEYSgOo2wNXsP8H9gRUf8xRLecrT00WLWS8GmrgKIiIiKk/Y6Tawq//cRVL6fVOXQWWkkQCzqpa4fDsHsql/1VOZMU+xME+xSJJk6hJIj5inWJinWJin8bDTbWD8/ScGWQHO38oxdRmkJ8xTLMxTHBqNBjVr1jR1GaQnzFMszFMszNO4eE03UQnZWPDPRSTMUyzMUwyKouD+/fsc2EcQzFMszFMszNO4+CvFwHjShhg0ElDTwQIaBioE5imWipLnoUOH0Lt3b7i7u0OSJGzfvv2Ry8XHx6NPnz7QarWwsbFBq1atkJyc/Nh2IyIiIEmSzsPKyuqJ9bz33nuoUaMG2rVrhwsXLujMy83NxcKFC9G0aVNUrlwZTk5O8Pf3R3h4OPLy8kr1uktDURSkpqbyR6AgmKdYmKdYmKdxPROd7uJ+3BARERlDdnY2mjZtipUrVz52mcuXL6Ndu3bw9vZGdHQ0Tp06hXffffeJnWh7e3tcv35dfSQlJRW7/NGjR/Hjjz/ihx9+wLBhwzBp0iR1Xm5uLoKCgjB//nyMHz8ex44dw2+//YaQkBB8+umnOHv2bOleOBER0TPO6Nd0BwcHIz09nZ1gIiJ6pnTv3h3du3cvdpm3334bPXr0wMKFC9VpdevWfWLbkiTB1dW1xLWkpaXB3d0dvr6+yM/PR0REhDpv2bJlOHToEE6cOAE/Pz91ep06dfDiiy8iNze3xNshIiKiZ+RIN5E+5OTz9BuRME+xiJCnLMv48ccf4eXlhaCgILi4uKB169Yl2kmdlZWFmjVrwsPDA3379n3i0eigoCA8ePAAlStXRrdu3RAWFqbO27BhA7p06aLT4S5kYWEBGxubUr+20rCwsDBo+2RczFMszFMszNN4ylWn++DBg3juuedgaWkJNzc3zJo1C/n5+er8wMBATJkyBTNnzoSjoyNcXV0xd+5cnTYuXryIgIAAWFlZoVGjRtizZ0+R7Zw+fRqdOnWCtbU1qlativHjxyMrK0udHxwcjH79+mHx4sVwc3ND1apVERIS8lTXsVX8n4EEPBwd+XJaLm9HJAjmKRZR8rx58yaysrIwf/58dOvWDb/88gv69++PAQMG4ODBg49dr0GDBlizZg1++OEHrF+/HrIso23btvj7778fu46FhQWioqJw9epVpKamonPnzuq8ixcvwtvbW6+vraQ0Gg2qV68OjaZc/Tyhp8Q8xcI8xcI8javcvMtXr15Fjx490KpVK/z5559YtWoVvv76a3z44Yc6y61duxY2Njb49ddfsXDhQrz//vtqx1qWZQwYMACVKlXCr7/+itWrV+PNN9/UWT87OxtBQUGoUqUKfv/9d2zevBl79+7VuZ4NAA4cOIDLly/jwIEDWLt2LSIiInROv/uvnJwcZGZm6jxILA5W5ebPhfSAeYpFhDxlWQYA9O3bF6+//jqaNWuGWbNmoVevXli9evVj12vTpg1GjRqFZs2aoUOHDti6dSucnZ3x+eefP3GbLi4uqFSpks40Uw6qoygK7t69y4F9BME8xcI8xcI8javc/Er57LPP4OHhgRUrVsDb2xv9+vXDe++9hyVLlqg/RADA19cXc+bMQf369TFq1Ci0bNkS+/btAwDs3bsX58+fx7p169C0aVMEBATgo48+0tnOxo0b8eDBA6xbtw4+Pj7o1KkTVqxYgW+++QapqanqclWqVFFr6dWrF3r27Klu51HCwsKg1WrVh4eHBwCOXi4KjQS425X/0ZGpZJinWETJ08nJCebm5mjUqJHO9IYNGxY7evl/WVhYwM/PD5cuXXqqOry8vHD+/PmnWresFEXB7du3+SNQEMxTLMxTLMzTuMpNpzs+Ph5t2rSBJP3fryZ/f39kZWXpnCLn6+urs56bmxtu3ryptuHh4QF3d3d1fps2bYpsp2nTpjrXpPn7+0OWZSQkJKjTGjduDDMzs0du51FCQ0ORkZGhPlJSUkr60omIiFCpUiW0atVK5/9FAHDhwgXUrFmzxO0UFBTg9OnTcHNze6o6hg0bhr179+KPP/4oMi8vLw/Z2dlP1S4REdGzyuijl5fVfy/4lyRJ50i4qbZjaWkJS0tLvddBRERiyMrK0jn6fOXKFcTFxcHR0RGenp4AgBkzZmDw4MEICAhAx44dERUVhZ07dyI6Olpdb9SoUahevbo6+Nn777+P559/HvXq1UN6ejoWLVqEpKQkvPzyy09V59SpU/Hjjz+ic+fO+OCDD9CuXTvY2dnhxIkTWLBgAb7++ms0a9bsqd8HIiKiZ0256XQ3bNgQW7ZsgaIo6tHuo0ePws7ODjVq1ChxGykpKbh+/bq6hz8mJqbIMhEREcjOzlaPdh89ehQajQYNGjTQ4ysikSgKkJ0rg2fgiIF5iqWi5HnixAl07NhRfT5t2jQAwOjRo9UxQ/r374/Vq1cjLCwMU6ZMQYMGDbBlyxa0a9dOXS85OVln4Ju0tDSMGzcON27cQJUqVdCiRQscO3asyGnqJWVpaYk9e/Zg6dKl+Pzzz/HGG2+gcuXKaNiwIaZMmQIfH5+narekrK2tDdo+GRfzFAvzFAvzNB6TdLozMjIQFxenM238+PFYtmwZJk+ejEmTJiEhIQFz5szBtGnTSjyqXpcuXeDl5YXRo0dj0aJFyMzMxNtvv62zzPDhwzFnzhyMHj0ac+fOxT///IPJkydj5MiRqFatmr5eoqqc/wakElIAJGWUfvR6Kp+Yp1gqSp6BgYElunZu7NixGDt27GPn//uoNwAsXboUS5cuLWt5OiwtLTFr1izMmjVLr+0+iUajMcj/i8k0mKdYmKdYmKdxmeSa7ujoaPj5+ek8PvjgA/z000/47bff0LRpU0yYMAEvvfQS3nnnnRK3q9FosG3bNty/fx/PPfccXn75ZcybN09nmcqVK2P37t24c+cOWrVqhf/973/o3LkzVqxYoe+XSQKRADhXNuPAeIJgnmJhnuJQFAXp6ekc2EcQzFMszFMszNO4JIXvtEFkZmZCq9Xi5UUbkZR+39TlUBlpJMDbyRLnb+VU+HsBE/MUTXnKs55bVXwW0te0RVRgsiwjOTkZnp6evHesAJinWJinWJinfhT2+TIyMmBvb//Y5fgOExERERERERkIO91EREREREREBsJON1EJKAqQdr+g3I+OTCXDPMXCPMVia2tr6hJIj5inWJinWJin8ZSbW4aJir8BxaAAuJ6Vb+oySE+Yp1iYpzg0Gg2cnJxMXQbpCfMUC/MUC/M0Lna6DayGsx0qWVc2dRlURhIAh0pAei53pIiAeYqlPOXp6aI1cQUVmyzLuHPnDhwdHTmwjwCYp1iYp1iYp3Gx021gU/u2g4ODg6nLoDLiCI9iYZ5iKW95FsgyzMpBHRVVVlYWHB0dTV0G6QnzFAvzFAvzNB7+KiAiItIjdriJiIjo3/jLgIiIiIiIiMhA2Ok2MEmSTF0C6YEkSXBwcGCegmCeYmGe4mCWYmGeYmGeYmGexiUpCm+yYgiZmZnQarXIyMiAvb29qcshIiIiIiIiPSppn49Hug1MlmVTl0B6IMsyUlNTmacgmKdYmKc4/p2lwjwrPP5tioV5ioV5GhdHLzewBzFrcb8gzdRlUBnJioRMxRG20h1oJJ4cUtExT7EwT3EUZmnnYInK7cebuhzSg/v375u6BNIj5ikW5mk87HQbmJJ5A3JOqqnLoDKSIUG2AOS8ZJj+TsBUVsxTLMxTHIVZKrzEkIiIBMLTy4mIiIiIiIgMhJ1uA+POejFIUFCl4DYkHkUTAvMUC/MUB7MUiyRJqFq1KkdHFgTzFAvzNC52ug2Mn2MxSABs5CzuRBEE8xQL8xSHmiXDFIIkSbCzs+OPekEwT7EwT+Nip9vAZO6sF4IMCanm7pD5s14IzFMszFMcapbF/L8zLCwMrVq1gp2dHVxcXNCvXz8kJCToLPPgwQOEhISgatWqsLW1xcCBA5GaWvz4Klu3bkXXrl3VIz9xcXFPrLegoAATJ06Em5sbevTogZs3b+rMz8zMxNtvvw1vb29YWVnB1dUVXbp0wdatW/Es3LFVlmVcvXqVoyMLgnmKhXkal8E73cHBwZAkCRMmTCgyLyQkBJIkITg42NBlEJVZnmRh6hJIj5inWJinOB5m+fgdKAcPHkRISAhiYmKwZ88e5OXloWvXrsjOzlaXef3117Fz505s3rwZBw8exLVr1zBgwIBit5udnY127dphwYIFJa41MjISycnJ2L17N5o3b4533nlHnZeeno62bdti3bp1CA0NxcmTJ3Ho0CEMHjwYM2fOREZGRom3U5Hl5eWZugTSI+YpFuZpPEYZvdzDwwORkZFYunQprK2tATzcC71x40Z4enoaowQiIiISQFRUlM7ziIgIuLi4IDY2FgEBAcjIyMDXX3+NjRs3olOnTgCA8PBwNGzYEDExMXj++ecf2e7IkSMBAImJiSWuJS0tDbVq1YKPjw/i4+OxZcsWdd5bb72FxMREXLhwAe7u7up0Ly8vDB06FFZWViXeDhERVWxGOb28efPm8PDwwNatW9VpW7duhaenJ/z8/NRpOTk5mDJlClxcXGBlZYV27drh999/V+enpaVh+PDhcHZ2hrW1NerXr4/w8HB1/t9//42hQ4fC0dERNjY2aNmyJX799Vd1/qpVq1C3bl1UqlQJDRo0wDfffKPOe+ONN9CrVy/1+bJlyyBJks7/3OvVq4evvvpKf28MERERlUnhEWNHR0cAQGxsLPLy8tClSxd1GW9vb3h6euL48eN63faIESNw/PhxWFpaYvr06eqRblmWERkZieHDh+t0uAvZ2trC3Jx3bSUielYY7ZrusWPH6nSQ16xZgzFjxugsM3PmTGzZsgVr167FyZMnUa9ePQQFBeHOnTsAgHfffRfnzp3Dzz//jPj4eKxatQpOTk4AgKysLHTo0AFXr17Fjh078Oeff2LmzJnqdQrbtm3Da6+9hunTp+PMmTN45ZVXMGbMGBw4cAAA0KFDBxw5cgQFBQUAHp6+5uTkhOjoaADA1atXcfnyZQQGBpbqdfMKQzFIUOCUn8oRdQXBPMXCPMVR2ixlWcbUqVPh7+8PHx8fAMCNGzdQqVIlODg46CxbrVo13LhxQ6/1Ojg4IDY2FikpKUhKSoKvry8A4NatW0hLS4O3t7det1fRSJKEatWqcaAmQTBPsTBP4zLabtYRI0YgNDQUSUlJAICjR48iMjJS7dRmZ2dj1apViIiIQPfu3QEAX375Jfbs2YOvv/4aM2bMQHJyMvz8/NCyZUsAQK1atdT2N27ciH/++Qe///67ure7Xr166vzFixcjODgYEydOBABMmzYNMTExWLx4MTp27Ij27dvj7t27+OOPP9CiRQscOnQIM2bMwPbt2wEA0dHRqF69uk6b/5aTk4OcnBz1eWZmJgCOXi4KCYCV8sDUZZCeME+xME9xFGZZ0v93hoSE4MyZMzhy5IhB63oSV1dXnefPwiBpJSFJknpZIVV8zFMszNO4jHak29nZGT179kRERATCw8PRs2dP9Sg1AFy+fBl5eXnw9/dXp1lYWOC5555DfHw8AODVV19FZGQkmjVrhpkzZ+LYsWPqsnFxcfDz81M73P8VHx+v0zYA+Pv7q207ODigadOmiI6OxunTp1GpUiWMHz8ef/zxB7KysnDw4EF06NDhsa8vLCwMWq1WfXh4eADg6OWikCHhmoUnR0cWBPMUC/MUh5ql8uQsJ02ahF27duHAgQOoUaOGOt3V1RW5ublIT0/XWT41NbVI59hQnJ2d4eDggPPnzxtle+WVLMtISkri6MiCYJ5iYZ7GZdRbho0dOxYRERFYu3Ytxo4dW+r1u3fvjqSkJLz++uu4du0aOnfujDfeeAMA9LKnJjAwENHR0WoH29HREQ0bNsSRI0ee2OkODQ1FRkaG+khJSSlzPVS+8Ae9WJinWJinOJ6UpaIomDRpErZt24b9+/ejdu3aOvNbtGgBCwsL7Nu3T52WkJCA5ORktGnTxiA1/5dGo8GQIUOwYcMGXLt2rcj8rKws5OfnG6UWU+NRf7EwT7EwT+Mxaqe7W7duyM3NRV5eHoKCgnTmFQ5wdvToUXVaXl4efv/9dzRq1Eid5uzsjNGjR2P9+vVYtmwZvvjiCwCAr68v4uLi1Ou//6thw4Y6bQMPT3H/d9uF13Xv27dPvXY7MDAQ3377LS5cuFDs9dyWlpawt7fXeRAREZF+hYSEYP369di4cSPs7Oxw48YN3LhxA/fv3wcAaLVavPTSS5g2bRoOHDiA2NhYjBkzBm3atNEZudzb2xvbtm1Tn9+5cwdxcXE4d+4cgIcd9bi4uKe+DnzevHnw8PBA69atsW7dOpw7dw4XL17EmjVr4Ofnh6ysrDK8C0REVJEYdehMMzMz9XRuMzMznXk2NjZ49dVXMWPGDDg6OsLT0xMLFy7EvXv38NJLLwEAZs+ejRYtWqBx48bIycnBrl270LBhQwDA0KFD8dFHH6Ffv34ICwuDm5sb/vjjD7i7u6NNmzaYMWMGBg0aBD8/P3Tp0gU7d+7E1q1bsXfvXrWGgIAA3L17F7t27cL8+fMBPOx0/+9//4Obmxu8vLyM8TYRERHRY6xatQoAiuwIDw8PR3BwMABg6dKl0Gg0GDhwIHJychAUFITPPvtMZ/mEhASde2Xv2LFDZ4DXIUOGAADmzJmDuXPnlrpOR0dHxMTEYP78+fjwww+RlJSEKlWqoEmTJli0aBG0Wm2p2yQioorJ6PerKO4I8Pz58yHLMkaOHIm7d++iZcuW2L17N6pUqQIAqFSpEkJDQ5GYmAhra2u0b98ekZGR6rxffvkF06dPR48ePZCfn49GjRph5cqVAIB+/fph+fLlWLx4MV577TXUrl0b4eHhOv/TLvyfYWpqqjriaEBAAGRZLvbU8uLwhEcxSFBQLe8aR0cWBPMUC/MUx/9l6fbYZUpyOqSVlRVWrlyp/gYoSTvBwcFqp11ftFotwsLCEBYWptd2KwpJkuDu7s7RkQXBPMXCPI1LUngyv0FkZmZCq9XixnezYPsg1dTlkB7IkKDhj3phME+xME9xyJBg7ugB655zTV0K6YEsy9BojHo1IxkQ8xQL8yy7wj5fRkZGsQeX+S4bGEcvFwNHRxYL8xQL8xRHaUYvp/JPlmUkJydzdGRBME+xME/jYqebiIiIiIiIyEDY6SYiIiIiIiIyEHa6iYiIiIiIiAzE6KOXP2vMtK7Q5FuaugwqIw2AGgqgkTxMXQrpAfMUC/MUR2GWZg6PH72cKg6NRgNPT08O1CQI5ikW5mlc7HQbmOXzwbDmvTgrPEVRkJeXBwsLC95aQQDMUyzMUxz/zlKRZUj8MVihKYqC/Px8/m0KgnmKhXkaF/9vZmC8I5sYFEXBtWvXmKcgmKdYmKc4/p0lO9wVH/82xcI8xcI8jYv/RyMiIiIiIiIyEHa6iYiIiIiIiAyEnW6iEuL1LmJhnmJhnuJglmJhnmJhnmJhnsYjKTyR3yAyMzOh1WqRkZEBe3t7U5dDREREREREelTSPh+PdBsY92mIQVEU3L9/n3kKgnmKhXmKg1lWHIosP3kZ5ikU5ikW5mlcvGWYgd0/HoFKBWmmLoPKSFYkXFMc4S7dgUbil1NFxzzFwjzFwSwrBknrDqt245+4nKIoSE1NhaenJ09jFQDzFAvzNC52ug1MybwBOSfV1GVQGcmQIFsAcl4yAP4QrOiYp1iYpziYZcXA0ySJiEqH35tEREREREREBsJON1EJWSh5pi6B9Ih5ioV5ioNZisXCwsLUJZAeMU+xME/jEaLTHRgYiKlTp5q6jEfS8BIJIWigoFr+NWh4uqMQmKdYmKc4mKUYwsLC0KpVK2i1Wvj5+WHAgAFISEh45LKKoqB79+6QJAnbt29/Ytvx8fHo06cPtFotbGxs0KpVKyQnJz92+YKCAkycOBFubm7o0aMHbt68qTM/MzMTb7/9Nry9vWFlZQVXV1d06dIFW7du5QBT/6HRaFC9enVoNEJ0H555zNO4jP4u//PPP3j11Vfh6ekJS0tLuLq6IigoCEePHjV2KUbB72sxKACyNbb8GSgI5ikW5ikOZimGgwcPIiQkBMePH8f27duRl5eHrl27Ijs7u8iyy5YtK/EgTpcvX0a7du3g7e2N6OhonDp1Cu+++y6srKweu05kZCSSk5Oxe/duNG/eHO+88446Lz09HW3btsW6desQGhqKkydP4tChQxg8eDBmzpyJjIyM0r94gSmKgrt373JnhCCYp3EZfSC1gQMHIjc3F2vXrkWdOnWQmpqKffv24fbt28YuxSj4MRaDAglpZlVhLWdDYqoVHvMUC/MUB7MUQ1RUFABAlmUkJydjzZo1cHV1RWxsLAICAtTl4uLisGTJEpw4cQJubm5PbPftt99Gjx49sHDhQnVa3bp1i10nLS0NtWrVgo+PD+Lj47FlyxZ13ltvvYXExERcuHAB7u7u6nQvLy8MHTq02M78s0hRFNy+fRs2NjYc7VoAzNO4jHqkOz09HYcPH8aCBQvQsWNH1KxZE8899xxCQ0PRp08fjB07Fr169dJZJy8vDy4uLvj6668BANnZ2Rg1ahRsbW3h5uaGJUuWFNlOrVq18NFHH2Hs2LGws7ODp6cnvvjiC51lUlJSMGjQIDg4OMDR0RF9+/ZFYmIiAODQoUOwsLDAjRs3dNaZOnUq2rdvr8d3hIiIiEhshUeMHR0d1Wn37t3DsGHDsHLlSri6uj6xDVmW8eOPP8LLywtBQUFwcXFB69atn3hK+ogRI3D8+HFYWlpi+vTp6pFuWZYRGRmJ4cOH63S4C9na2sLcnDf5ISL9MGqn29bWFra2tti+fTtycnKKzH/55ZcRFRWF69evq9N27dqFe/fuYfDgwQCAGTNm4ODBg/jhhx/wyy+/IDo6GidPnizS1pIlS9CyZUv88ccfmDhxIl599VX1eqK8vDwEBQXBzs4Ohw8fxtGjR2Fra4tu3bohNzcXAQEBqFOnDr755hu1vby8PGzYsAFjx47V99tCREREJCRZlvH666/D398fPj4+6vTXX38dbdu2Rd++fUvUzs2bN5GVlYX58+ejW7du+OWXX9C/f38MGDAABw8efOx6Dg4OiI2NRUpKCpKSkuDr6wsAuHXrFtLS0uDt7V22F0hEVAJG7XSbm5sjIiICa9euhYODA/z9/fHWW2/h1KlTAIC2bduiQYMGOp3d8PBwvPjii7C1tUVWVha+/vprLF68GJ07d0aTJk2wdu1a5OfnF9lWjx49MHHiRNSrVw9vvvkmnJyccODAAQDApk2bIMsyvvrqKzRp0gQNGzZEeHg4kpOTER0dDQB46aWXEB4erra3c+dOPHjwAIMGDXrka8vJyUFmZqbOg8Ripdw3dQmkR8xTLMxTHMxSLO+99x7Onj2LyMhIddqOHTuwf/9+LFu2rMTtyLIMAOjbty9ef/11NGvWDLNmzUKvXr2wevXqJ67v6uoKMzMz9TmvY3061tbWpi6B9Ih5Go/RB1IbOHAgrl27hh07dqBbt26Ijo5G8+bNERERAeDh0e7Czm5qaip+/vln9ejy5cuXkZubi9atW6vtOTo6okGDBkW2U7gnEwAkSYKrq6s6YuWff/6JS5cuwc7OTj367ujoiAcPHuDy5csAgODgYFy6dAkxMTEAgIiICAwaNAg2NjaPfF1hYWHQarXqw8PDAwBHLxeFBgqc8m9yRF1BME+xME9xMEuxTJkyBfv378eBAwdQo0YNdfr+/ftx+fJlODg4wNzcXD2Ne+DAgQgMDHxkW05OTjA3N0ejRo10pjds2LDY0csfx9nZGQ4ODjh//nyp131WaTQaVKtWjaNdC4J5GpdJ3mUrKyu88MILePfdd3Hs2DEEBwdjzpw5AIBRo0bhr7/+wvHjx7F+/XrUrl37qa6j/u995yRJUveSZmVloUWLFoiLi9N5XLhwAcOGDQMAuLi4oHfv3ggPDy/S+X+U0NBQZGRkqI+UlBQAHL1cFAqATI2WPwMFwTzFwjzFwSzFoCgKJk2ahG3btmH79u2oVauWzvxZs2bh1KlTOr/BAGDp0qU6Zxn+W6VKldCqVasitx67cOECatasWeoaNRoNhgwZgg0bNuDatWtF5mdlZT3yTMpnmaIoSE9P51kCgmCexlUuRoho1KiROhBG1apV0a9fP4SHh+P48eMYM2aMulzdunVhYWGBX3/9FZ6engAejkp54cIFdOjQocTba968OTZt2gQXFxfY29s/drmXX34ZQ4cORY0aNVC3bl34+/s/dllLS0tYWloWmc6PsRgUSMg0c4CtnMkRdQXAPMXCPMXBLMUQEhKCjRs3Ytu2bZBlGdevX4dGo4FWq4W1tTVcXV0fOXiap6cnateurT739vZGWFgY+vfvD+DhuD6DBw9GQEAAOnbsiKioKOzcuVO9NLC05s2bh+joaLRu3Rrz5s1Dy5YtYWFhgcOHDyMsLAy///47HBwcnqptERV20uzt7TnatQCYp3EZ9Uj37du30alTJ6xfvx6nTp3ClStXsHnzZixcuFBnII2XX34Za9euRXx8PEaPHq1Ot7W1xUsvvYQZM2Zg//79OHPmDIKDg0t9WsTw4cPh5OSEvn374vDhw7hy5Qqio6MxZcoU/P333+pyQUFBsLe3x4cffqjT+SciIiKiR1u1ahUyMjLQqVMnPPfcc6hevTrc3NywadOmUrWTkJCgc6/s/v37Y/Xq1Vi4cCGaNGmCr776Clu2bEG7du2eqk5HR0fExMRgxIgR+PDDD+Hn54f27dvj22+/xaJFi6DVap+qXSKi/zLqkW5bW1u0bt0aS5cuxeXLl5GXlwcPDw+MGzcOb731lrpcly5d4ObmhsaNGxe5jcOiRYuQlZWF3r17w87ODtOnT9f5Qi6JypUr49ChQ3jzzTcxYMAA3L17F9WrV0fnzp11jnxrNBoEBwfjo48+wqhRo8r24omIiIieAYWnqxbep9vT0/OJB0gedYrro6aNHTtWr3eS0Wq1CAsLQ1hYmN7aJCL6L0kphyfyZ2VloXr16ggPD8eAAQNMWstLL72Ef/75Bzt27CjVepmZmdBqtbi2aRbsc1INVB0ZiwwJGWaO0Bbc4QA/AmCeYmGe4mCWFYPG0RPWPec+cTlZlnHnzh04OjpysCYBME+xME/9KOzzZWRkFHvZcrm4pruQLMu4desWlixZAgcHB/Tp08dktWRkZOD06dPYuHFjqTvc/8bRy8WggYIqBbdNXQbpCfMUC/MUB7MUi0ajgZOTk6nLID1hnmJhnsZVrnZrJCcno1q1ati4cSPWrFmj3kLCFPr27YuuXbtiwoQJeOGFF566HZk76oUgQ0KaWVXI4F4UETBPsTBPcTBLsRQeTCm8ewxVbMxTLMzTuMrVke5atWqVm2Hrn3YkTBJXtsYW2oI7pi6D9IR5ioV5ioNZiiUrKwuOjo6mLoP0hHmKhXkaT7k60k1EREREREQkknJ1pFtEkr0rNAVF799NFYwiQaM4QiMBGql8nI1BZcA8xcI8xcEsKwRJ6/7khYiISMVOt4FZtwmGNe/zWOEpigLnjAxU1mohSbzWsKJjnmJhnuJglhWHIsuQnjDisSRJcHBwYJaCYJ5iYZ7GVS5vGSaCkg4fT0RERERERBVPSft8vKbbwDgioBhkWUZqairzFATzFAvzFAezFAvzFAvzFAvzNC52uolK6P79+6YugfSIeYqFeYqDWYqFeYqFeYqFeRoPO91EREREREREBsJONxEREREREZGBsNNtYBwRUAySJKFq1arMUxDMUyzMUxzMsmJRnnAtKPMUC/MUC/M0Lt4yzMByYiJwPz/N1GWQHpgDeGDqIkhvmKdYmKc4mGXFIGndYdVufPHLSBLs7OyMVBEZGvMUC/M0Lna6Dawg4wbknFRTl0FlJEPCP+ZucM6/Dg14l72KjnmKhXmKg1lWHCU5VVKWZVy/fh1ubm7QPOGe3lT+MU+xME/j4jtMVEJ5koWpSyA9Yp5iYZ7iYJZiycvLM3UJpEfMUyzM03jY6SYiIiIiIiIyEHa6iYiIiEhvwsLC0KpVK2i1WrRs2RL9+/dHQkLCI5dVFAXdu3eHJEnYvn37E9uOj49Hnz59oNVqYWNjg1atWiE5OfmxyxcUFGDixIlwc3NDjx49cPPmTZ35mZmZePvtt+Ht7Q0rKyu4urqiS5cu2Lp1KxSFlzgQkX6w0/0YERERcHBwKHM7HA9QDBIUOOWnQuI1hkJgnmJhnuJglmI4ePAgQkJCcPz4cfz444/Iz89H165dkZ2dXWTZZcuWlXj05MuXL6Ndu3bw9vZGdHQ0Tp06hXfffRdWVlaPXScyMhLJycnYvXs3mjdvjnfeeUedl56ejrZt22LdunUIDQ3FyZMncejQIQwePBgzZ85ERkZG6V+8wCRJQrVq1TjatSCYp3GVqdMdHBwMSZIwf/58nenbt283WoAXLlxA5cqVsXHjRp3psiyjbdu2+N///meUOh6Hn2MxSACslAfciSII5ikW5ikOZimGqKgoBAcHw8fHB61bt0ZERASSk5MRGxurs1xcXByWLFmCNWvWlKjdt99+Gz169MDChQvh5+eHunXrok+fPnBxcXnsOmlpaahVqxZ8fHzQpEkTpKenq/PeeustJCYm4tdff8Xo0aPRqFEjeHl5Ydy4cYiLi4Otre1TvX5RSZIEa2trdtIEwTyNq8xHuq2srLBgwQKkpZnmtlheXl6YP38+Jk+ejOvXr6vTlyxZgr/++gurV68udZv6HFRA5s56IciQcM3CEzJ/CgqBeYqFeYqDWYpFlmUkJSWpvxEdHR3Veffu3cOwYcOwcuVKuLq6lqitH3/8EV5eXggKCoKLiwtat279xFPSR4wYgePHj8PS0hLTp09Xj3TLsozIyEgMHz4c7u7uRdaztbWFuTlv8vNvhXnKT7g/O1UMzNO4ytzp7tKlC1xdXREWFlbsckeOHEH79u1hbW0NDw8PTJkyRT3NaMWKFfDx8VGXLTxS/u8Oc5cuXXROCfq3yZMno2nTphg3bhwA4Pz585g9eza++OILODo64v3330eNGjVgaWmJZs2aISoqSl03MTERkiRh06ZN6NChA6ysrLBhw4Yi2/jnn3/U65JycnJK/gaRMPgjUCzMUyzMUxzMUiwFBQV4/fXX4e/vr/Nb7/XXX0fbtm3Rt2/fErVz8+ZNZGVlYf78+ejWrRt++eUX9O/fHwMGDMDBgwcfu56DgwNiY2ORkpKCpKQk+Pr6AgBu3bqFtLQ0eHt7l+0FPmN4nbtYmKfxlLnTbWZmho8++giffvop/v7770cuc/nyZXTr1g0DBw7EqVOnsGnTJhw5cgSTJk0CAHTo0AHnzp3DP//8A+DhtUBOTk6Ijo4G8PDI8/HjxxEYGPjI9iVJQnh4OA4fPowvv/wSwcHBGDJkCPr06YPly5djyZIlWLx4MU6dOoWgoCD06dMHFy9e1Glj1qxZeO211xAfH4+goCCdeSkpKWjfvj18fHzw/fffw9LSskgNOTk5yMzM1HkQERERPctmz56Ns2fPIjIyUp22Y8cO7N+/H8uWLStxO4VH4/r27YvXX38dzZo1w6xZs9CrV68SndXo6uoKMzMz9Tk7G0RkTHoZSK1///5o1qwZ5syZ88j5YWFhGD58OKZOnYr69eujbdu2+OSTT7Bu3To8ePAAPj4+cHR0VPdURkdHY/r06erz3377DXl5eWjbtu1ja6hZsyaWLVuGCRMm4Pr161i+fDkAYPHixXjzzTcxZMgQNGjQAAsWLECzZs2KfNFPnToVAwYMQO3ateHm5qZOT0hIgL+/P4KCghAeHq7zhf3f16jVatWHh4dHid8/IiIiItFMnjwZ+/fvx759+1CjRg11+v79+3H58mU4ODjA3NxcPY174MCBjz3A4uTkBHNzczRq1EhnesOGDYsdvfxxnJ2d4eDggPPnz5d6XSKi0tLb6OULFizA2rVrER8fX2Ten3/+iYiICNja2qqPoKAgyLKMK1euQJIkBAQEIDo6Gunp6Th37hwmTpyInJwcnD9/HgcPHkSrVq1QuXLlYmsYM2YM3NzcMHnyZNjb2yMzMxPXrl2Dv7+/znL+/v5F6mzZsmWR9u7fv4/27dtjwIABWL58ebEDDYSGhiIjI0N9pKSkAODo5aKQoKBa3jWOqCsI5ikW5ikOZikGRVEwadIkbN++HXv27EGdOnV05s+aNQunTp1CXFyc+gCApUuXIjw8/JFtVqpUCa1atSpy67ELFy6gZs2apa5Ro9FgyJAh2LBhA65du1ZkflZWFvLz80vdrsgkSYK7uzsH3hIE8zQuvXW6AwICEBQUhNDQ0CLzsrKy8Morr+h8uf7555+4ePEi6tatCwAIDAxEdHQ0Dh8+DD8/P9jb26sd8YMHD6JDhw4lquPfe0xLw8bGpsg0S0tLdOnSBbt27cLVq1eLXd/S0hL29vY6D4Cjl4tCAmCGfO5EEQTzFAvzFAezFENISAjWr1+PjRs3okqVKkhNTcWNGzdw//59AA9P9fbx8dF5AICnpydq166ttuPt7Y1t27apz2fMmIFNmzbhyy+/xKVLl7BixQrs3LkTEydOfKo6582bBw8PD7Ru3Rrr1q3DuXPncPHiRaxZswZ+fn7Iysoqw7sgHkmSYG5uzk6aIJincen1Pt3z58/Hzp07cfz4cZ3pzZs3x7lz51CvXr0ij0qVKgH4v+u6N2/erJ5aFBgYiL179+Lo0aOPPd2oOPb29nB3d8fRo0d1ph89erTI6UmPotFo8M0336BFixbo2LHjI/eEPglHLxcDR9QVC/MUC/MUB7MUw6pVq5CRkYHAwEBUr14dbm5ucHNzw6ZNm0rVTkJCgs69svv374/Vq1dj4cKFaNKkCb766its2bIF7dq1e6o6HR0dERMTgxEjRuDDDz+En58f2rdvj2+//RaLFi2CVqt9qnZFJcsykpOTOdq1IJincen1XghNmjTB8OHD8cknn+hMf/PNN/H8889j0qRJePnll2FjY4Nz585hz549WLFiBQDA19cXVapUwcaNG7Fr1y4ADzvdb7zxBiRJKnKKeEnNmDEDc+bMQd26ddGsWTOEh4cjLi7ukSOUP4qZmRk2bNiAoUOHolOnToiOji7RrS2IiIiInkWFg5QV/qj39PSERlP8cZ5HDWz2qGljx47F2LFj9VMoAK1Wi7CwsCfehYeIqCz0eqQbAN5///0ie0x8fX1x8OBBXLhwAe3bt4efnx9mz56tc19ESZLQvn17SJKk7rH09fWFvb09WrZs+cjTv0tiypQpmDZtGqZPn44mTZogKioKO3bsQP369Uvchrm5Ob799ls0btwYnTp1ws2bN5+qFiIiIiIiInq2SArvmWAQmZmZ0Gq1uLZpFuxzUk1dDpVR4SmP7nnJ0HCAnwqPeYqFeYqDWVYcGkdPWPecW+wypTnSTeUf8xQL89SPwj5fRkaGOqbXo/AdNjANL0sTggYKfwQKhHmKhXmKg1mKRaPR8Ae9QJinWJincfFdNjCeRyAGBUABzPkzUBDMUyzMUxzMUiyKoiA/P/+R12ZTxcM8xcI8jYudbgPjx1gMCiSkWrhD4Yi6QmCeYmGe4mCWYlEUBdeuXeOPekEwT7EwT+PS6+jlVJRk7wpNgaWpy6CyUiRoFEdoJEAj8cupwmOeYmGe4mCWFYakdX/yQkREBICdboOzen40rB0cTF0GlZEsy7BIToY1r30RAvMUC/MUB7OsWBRZhsSciIieiN+URCUkSTzdUSTMUyzMUxzMsuIoSYebeYqFeYqFeRoPbxlmICUdPp6IiIiIiIgqHt4yrJzgPg0xKIqC+/fvM09BME+xME9xMEuxME+xME+xME/jYqfbwPhBFoOiKEhNTWWegmCeYmGe4mCWYmGeYmGeYmGexsVONxEREREREZGBsNNNRERERAalyLKpSyAiMhneMszAHsSsxf2CNFOXQWUkK4CiOOD+mXRoONBjhcc8xcI8xcEsxVKY54OUyqjcfrypyyE9sLCwMHUJpEfM03jY6TYw6e4NyA9STV0G6YELkgEA3FcvBuYpFuYpDmYpFhckQ9J4mroM0gONRoPq1aubugzSE+ZpXDy93MA4NoEYFADZGlswTjEwT7EwT3EwS7GoeTJQISiKgrt373LgLUEwT+Nip9vA+DEWgwIJaWZVoYDnO4qAeYqFeYqDWYqFeYpFURTcvn2bnTRBME/jYqebiIiIiEwiLCwMrVq1gp2dHVxcXNCvXz8kJCToLBMYGAhJknQeEyZMKLbdrVu3omvXrqhatSokSUJcXNwTaykoKMDEiRPh5uaGHj164ObNmzrzMzMz8fbbb8Pb2xtWVlZwdXVFly5dsHXrVnZciKhYwna6ExMTS/wlS0RERETGd/DgQYSEhCAmJgZ79uxBXl4eunbtiuzsbJ3lxo0bh+vXr6uPhQsXFttudnY22rVrhwULFpS4lsjISCQnJ2P37t1o3rw53nnnHXVeeno62rZti3Xr1iE0NBQnT57EoUOHMHjwYMycORMZGRmle+FE9Ewp1wOpSVLxpyPNmTMHc+fONU4x9MyzUu6bugTSI+YpFuYpDmYplod5Wj12flRUlM7ziIgIuLi4IDY2FgEBAer0ypUrw9XVtcTbHTlyJICHB2FKKi0tDbVq1YKPjw/i4+OxZcsWdd5bb72FxMREXLhwAe7u7up0Ly8vDB06FFZWj3+NIrG2tjZ1CaRHzNN4ynWn+/r16+q/N23ahNmzZ+uccmRra2uKskqFtzwRgwYKnPJvPnlBqhCYp1iYpziYpVgK89RIJR+9vPCIsaOjo870DRs2YP369XB1dUXv3r3x7rvvonLlynqtd8SIEejcuTMsLS1RrVo1/PTTTwAAWZYRGRmJ4cOH63S4C1WE36P6oNFoUK1aNVOXQXrCPI2rXJ9e7urqqj60Wi0kSVKfu7i44OOPP0aNGjVgaWmJZs2aFdlb+m8FBQUYO3YsvL29cejQIWg0Gpw4cUJnmWXLlqFmzZqQ5Yc3Kjl48CCee+45WFpaws3NDbNmzUJ+fn6pXgMv8RGDAiBTo+XAeIJgnmJhnuJglmJR8yxhoLIsY+rUqfD394ePj486fdiwYVi/fj0OHDiA0NBQfPPNNxgxYoTe63VwcEBsbCxSUlKQlJQEX19fAMCtW7eQlpYGb29vvW+zIlEUBenp6bx+XRDM07jK9ZHu4ixfvhxLlizB559/Dj8/P6xZswZ9+vTB2bNnUb9+fZ1lc3JyMHToUCQmJuLw4cNwdnZGly5dEB4ejpYtW6rLhYeHIzg4GBqNBlevXkWPHj0QHByMdevW4fz58xg3bhysrKweeUp7Tk4OcnJy1OeZmZkAOHq5KBRIyDRzgK2cCYmpVnjMUyzMUxzMUiyFedqXcPmQkBCcOXMGR44c0Zk+fvx49d9NmjSBm5sbOnfujMuXL6Nu3bp6rPih/57Gzk7JQ4WdNHt7+ydeAkrlH/M0rnJ9pLs4ixcvxptvvokhQ4agQYMGWLBgAZo1a4Zly5bpLJeVlYWePXvin3/+wYEDB+Ds7AwAePnll/Htt9+qHeWTJ0/i9OnTGDNmDADgs88+g4eHB1asWAFvb2/069cP7733HpYsWaIeCf+3sLAwaLVa9eHh4WHYN4CIiIhIEJMmTcKuXbtw4MAB1KhRo9hlW7duDQC4dOmSMUqDs7MzHBwccP78eaNsj4jEUyE73ZmZmbh27Rr8/f11pvv7+yM+Pl5n2tChQ5GdnY1ffvkFWq1Wnd6vXz+YmZlh27ZtAB4O3NGxY0fUqlULABAfH482bdro7Pnx9/dHVlYW/v777yI1hYaGIiMjQ32kpKTo6+USERERCUlRFEyaNAnbtm3D/v37Ubt27SeuU3hnGjc3NwNX95BGo8GQIUOwYcMGXLt2rcj8rKysUl9+SETPlgrZ6S6NHj164NSpUzh+/LjO9EqVKmHUqFEIDw9Hbm4uNm7ciLFjxz71diwtLWFvb6/zILHYyFmmLoH0iHmKhXmKg1mK5WGejz89OyQkBOvXr8fGjRthZ2eHGzdu4MaNG7h//+Eo9pcvX8YHH3yA2NhYJCYmYseOHRg1ahQCAgLUa64BwNvbWz2QAgB37txBXFwczp07BwBISEhAXFwcbty48VSvY968efDw8EDr1q2xbt06nDt3DhcvXsSaNWvg5+eHrKxn43P7rAwa96xgnsZTITvd9vb2cHd3x9GjR3WmHz16FI0aNdKZ9uqrr2L+/Pno06cPDh48qDPv5Zdfxt69e/HZZ58hPz8fAwYMUOc1bNgQx48f17mO5+jRo7Czs3viaU//xtHLxaCBgioFt6HhNYZCYJ5iYZ7iYJZiUfMs5rfQqlWrkJGRgcDAQLi5uamPTZs2AXh4kGTv3r3o2rUrvL29MX36dAwcOBA7d+7UaSchIUHnXtk7duyAn58fevbsCQAYMmQI/Pz8sHr16qd6LY6OjoiJicGIESPw4Ycfws/PD+3bt8e3336LRYsW6ZxNKSqNRgMnJydoNBWy+0D/wTyNq8IOpDZjxgzMmTMHdevWRbNmzRAeHo64uDhs2LChyLKTJ09GQUEBevXqhZ9//hnt2rUD8LBj/fzzz+PNN9/E2LFjde5VN3HiRCxbtgyTJ0/GpEmTkJCQgDlz5mDatGml+nDK/N0gBBkSMswcoS24wx+DAmCeYmGe4mCWYinMs0oxUT5pkDIPD48iB01K0k5wcDCCg4NLUmaJabVahIWFISwsTK/tVhSyLOPOnTtwdHRkR00AzNO4Kmyne8qUKcjIyMD06dNx8+ZNNGrUCDt27CgycnmhqVOnQpZl9OjRA1FRUWjbti0A4KWXXsKxY8eKnFpevXp1/PTTT5gxYwaaNm0KR0dHvPTSS3jnnXcM/tqofMrW2EJbcMfUZZCeME+xME9xMEuxZGtsUcXURZDeZGVlFbmHOlVczNN4JOUZvw/CBx98gM2bN+PUqVN6bTczMxNarRbXNs2CfU6qXtsm45Mh4ZqFJ9zzknn0RQDMUyzMUxzMUiyFedawBWx6zTF1OVRGsiwjOTkZnp6ePDIqAOapH4V9voyMjGLH9Hpm3+GsrCycOXMGK1aswOTJk01dDhEREREREQnome10T5o0CS1atEBgYGCZRi1/Eo6jJgYJCuwL0iHxyIsQmKdYmKc4mKVYmKdYJEmCg4ODzu10qeJinsb1zJ9ebiiFpxrc+G4WbB/w9HIiIiJ6NmkcPWHdc66pyyAi0jueXl5OcPRyMciQcMvcBTLPXRAC8xQL8xQHsxSLmid/CwlBlmWkpqZClmVTl0J6wDyNq8KOXl5RSPau0BRYmroMKitFQq7iCI1kDY3EXw8VHvMUC/MUB7MUy//PU9Lyd5Ao7t+/b+oSSI+Yp/Gw021gVs+PhrWDg6nLoDKSZRkWycmw5giPQmCeYmGe4mCWYinM08rTE4osQ2KmRPSM4rcfERERERkUO9xE9CzjN6CBcURAMUiShKpVqzJPQTBPsTBPcTBLsTBPsTBPsTBP4+Lp5QbGD7IYJEmCnZ2dqcsgPWGeYmGe4mCWYmGeYmGeYmGexsUj3QbGEQHFIMsyrl69yjwFwTzFwjzFwSzFwjzFwjzFwjyNi51uohLKy8szdQmkR8xTLMxTHMxSLCXJU+GP/gqDf59iYZ7Gw9PLDexBzFrcL0gzdRlURrIiIU9xxP0zd3gbGwEwT7EwT3EwS7GUJE9J6w6rduONXBkRkXGx021gSuYNyDmppi6DykiGBNkCkPOSAfCHYEXHPMXCPMXBLMVSkjx5yiURPQv4XWdgHEZNDBIUOOWnQuKPQCEwT7EwT3EwS7EwT7FIkoRq1apxkGBBME/j4pFuA+PnWAwSACvlganLID1hnmJhnuJglmJhnmKRJAnW1tamLoP0hHkaF490G5jMnbtCkCHhmoUnZJ67IATmKRbmKQ5mKRbmKRZZlpGUlMTRrgXBPI1LiE53cHAw+vXr99jngYGBmDp1qtHrIrHwR4NYmKdYmKc4mKVYypJnWFgYWrVqBTs7O7i4uKBfv35ISEjQWSYwMBCSJOk8JkyYUOJtTJgwAZIkYdmyZcUul52djSFDhsDNzQ1Dhw7FvXv3dObfuHEDkydPRp06dWBpaQkPDw/07t0b+/btK3EtFYGi8GiSSJin8Zi80x0cHKzzRVm1alV069YNp06dKnEby5cvR0RExGOfb926FR988IEeqyYiIiIiQzp48CBCQkIQExODPXv2IC8vD127dkV2drbOcuPGjcP169fVx8KFC0vU/rZt2xATEwN3d/cnLrts2TLY2tril19+gbW1tU4nPTExES1atMD+/fuxaNEinD59GlFRUejYsSNCQkJK9ZqJSEzl4prubt26ITw8HMDDPYXvvPMOevXqheTk5BKtr9Vqi33u6Oion0KJiIiIyCiioqJ0nkdERMDFxQWxsbEICAhQp1euXBmurq6lavvq1auYPHkydu/ejZ49ez5x+bS0NHh5eaFJkybw9vbGrVu31HkTJ06EJEn47bffYGNjo05v3Lgxxo4dW6q6iEhMJj/SDQCWlpZwdXWFq6srmjVrhlmzZiElJQX//PMPACAlJQWDBg2Cg4MDHB0d0bdvXyQmJqrrl/b08lq1auGjjz7C2LFjYWdnB09PT3zxxRc6NR07dgzNmjWDlZUVWrZsie3bt0OSJMTFxZXqtfEkOTFIUFAt7xpHYBUE8xQL8xQHsxSLvvPMyMgAUPRgyoYNG+Dk5AQfHx+EhoYWOfX7v2RZxsiRIzFjxgw0bty4RNueNGkSPv/8c1hYWCA8PByvvfYaAODOnTuIiopCSEiIToe7kIODQ4narwgkSYK7uztHuxYE8zSuctHp/resrCysX78e9erVQ9WqVZGXl4egoCDY2dnh8OHDOHr0KGxtbdGtWzfk5uY+9XaWLFmCli1b4o8//sDEiRPx6quvqtcJZWZmonfv3mjSpAlOnjyJDz74AG+++eZTbYefYzFIAMyQz50ogmCeYmGe4mCWYtFnnrIsY+rUqfD394ePj486fdiwYVi/fj0OHDiA0NBQfPPNNxgxYkSxbS1YsADm5uaYMmVKibdfq1YtXLx4ESkpKTh37hyqV68OALh06RIURYG3t/fTvbAKRJIkmJubs5MmCOZpXOXi9PJdu3bB1tYWwMOBKtzc3LBr1y5oNBps3LgRsizjq6++Uj8U4eHhcHBwQHR0NLp27fpU2+zRowcmTpwIAHjzzTexdOlSHDhwAA0aNMDGjRshSRK+/PJLWFlZoVGjRrh69SrGjRv32PZycnKQk5OjPs/MzATA0ctFUTgCq3teMjQ8AlPhMU+xME9xMEux6DPPkJAQnDlzBkeOHNGZPn78ePXfTZo0gZubGzp37ozLly+jbt26RdqJjY3F8uXLcfLkyVJ3NjQaTZHT2J+lgahkWUZycjI8PT2h0ZS743ZUSszTuMrFO9yxY0fExcUhLi4Ov/32G4KCgtC9e3ckJSXhzz//xKVLl2BnZwdbW1vY2trC0dERDx48wOXLl596m76+vuq/JUmCq6srbt68CQBISEiAr68vrKys1GWee+65YtsLCwuDVqtVHx4eHk9dGxERERE9NGnSJOzatQsHDhxAjRo1il22devWAB4egX6Uw4cP4+bNm/D09IS5uTnMzc2RlJSE6dOno1atWqWurX79+pAkCefPny/1ukT07CgXR7ptbGxQr1499flXX30FrVaLL7/8EllZWWjRogU2bNhQZD1nZ+en3qaFhYXOc0mSynSfutDQUEybNk19npmZyY43ERER0VNSFAWTJ0/Gtm3bEB0djdq1az9xncKxd9zc3B45f+TIkejSpYvOtKCgIIwcORJjxowpdY2Ojo4ICgrCypUrMWXKlCLXdaenpwt1XTcRPZ1y0en+L0mSoNFocP/+fTRv3hybNm2Ci4sL7O3tjbL9Bg0aYP369cjJyYGlpSUA4Pfffy92HUtLS3VZIiIiIiqbkJAQbNy4ET/88APs7Oxw48YNAA/vUmNtbY3Lly9j48aN6NGjB6pWrYpTp07h9ddfR0BAgM4Zjd7e3ggLC0P//v1RtWpVVK1aVWc7FhYWcHV1RYMGDZ6qzpUrV8Lf3x/PPfcc3n//ffj6+iI/Px979uzBqlWrEB8f//RvAhEJoVycXp6Tk4MbN27gxo0biI+Px+TJk5GVlYXevXtj+PDhcHJyQt++fXH48GFcuXIF0dHRmDJlCv7++2+D1DNs2DDIsozx48cjPj4eu3fvxuLFiwGg9Nf/cGwCIWig8BpDgTBPsTBPcTBLsZQ1z1WrViEjIwOBgYFwc3NTH5s2bQIAVKpUCXv37kXXrl3h7e2N6dOnY+DAgdi5c6dOOwkJCerI54ZQp04dnDx5Eh07dsT06dPh4+ODF154Afv27cOqVasMtl1j02g0vP5XIMzTuMrFke6oqCj1NCA7Ozt4e3tj8+bNCAwMBAAcOnQIb775JgYMGIC7d++ievXq6Ny5s8GOfNvb22Pnzp149dVX0axZMzRp0gSzZ8/GsGHDdK7zLolnaHwNoSkACmAOCXkcVVcAzFMszFMczFIsZc3zSYOUeXh44ODBg2Vu59+3oX1abm5uWLFiBVasWFHmtsorRVGQn58PCwsLjngtAOZpXJIi4LCLQ4cOhZmZGdavX6+3Njds2IAxY8YgIyMD1tbWT1w+MzMTWq0W1zbNgn1Oqt7qINPgiLpiYZ5iYZ7iYJZiKUmeGkdPWPeca9zC6KlwtGuxME/9KOzzZWRkFHtAuFwc6daX/Px8XLhwAcePH8crr7xSprbWrVuHOnXqoHr16vjzzz/x5ptvYtCgQSXqcBMREREREREBgnW6z5w5g7Zt26Jjx46YMGFCmdq6ceMGZs+ejRs3bsDNzQ0vvvgi5s2bp6dKiYiIiIiI6FkgVKe7WbNmuHfvnl7amjlzJmbOnKmXtkgMPNVRLMxTLMxTHMxSLMxTLLz2VyzM03iE6nSXRxy9XAyFI7CSGJinWJinOJilWJinWDQaDWrWrGnqMkhPmKdxsdNtaHau0FTm/bsrOkUBcmABS+SBOwUrPuYpFuYpDmYplpLkKWndjVsUPTVFUfDgwQNYWVnxCKkAmKdxsdNtYJbPj4a1g4Opy6AykmUZNznCozCYp1iYpziYpVhKmqciy5CYd7mnKApSU1Ph6enJTpoAmKdx8RuOiIiIiEyGHW4iEh2/5YiIiIiIiIgMhJ1uohKysLAwdQmkR8xTLMxTHMxSLMxTLMxTLMzTeCRFUXgvBwPIzMyEVqtFRkYG7O3tTV0OERERERER6VFJ+3w80m1g3KchBkVRcPfuXeYpCOYpFuYpDmYpFuYpFuYpFuZpXOx0Gxg/yGJQFAW3b99mnoJgnmJhnuJglmLRZ56KLOuhIioL/n2KhXkaF28ZZmAPYtbifkGaqcugMpIVCXmKI+6fuQONxC+nio55ioV5ioNZikVfeUpad1i1G6/HyoiIjIudbgNTMm9Azkk1dRlURjIkyBaAnJcMgD8EKzrmKRbmKQ5mKRZ95cnTMomoouP3GFEJWSn3TV0C6RHzFAvzFAezFAvzFIu1tbWpSyA9Yp7GwyPdBqaRTF0B6YMGCpzyb5q6DNIT5ikW5ikOZikW5ikWjUaDatWqmboM0hPmaVxCH+lOTEyEJEmIi4srdrm5c+eiWbNmxS4THByMfv36lboGjk0gBgVApkbLkx0FwTzFwjzFwSzFYow8w8LC0KpVK9jZ2cHFxQX9+vVDQkKCzjKBgYGQJEnnMWHChGLbzcrKwqRJk1CjRg1YW1ujUaNGWL16dbHrFBQUYOLEiXBzc0OPHj1w86buDofMzEy8/fbb8Pb2hpWVFVxdXdGlSxds3bq1QgxmpSgK0tPTK0St9GTM07hM2ukODg5+7BdfSEgIJElCcHCwwet44403sG/fPoO0zY+xGBRIyDRzgAKeuiAC5ikW5ikOZikWY+R58OBBhISEICYmBnv27EFeXh66du2K7OxsneXGjRuH69evq4+FCxcW2+60adMQFRWF9evXIz4+HlOnTsWkSZOwY8eOx64TGRmJ5ORk7N69G82bN8c777yjzktPT0fbtm2xbt06hIaG4uTJkzh06BAGDx6MmTNnIiMjo2xvhBGwkyYW5mlcJj+93MPDA5GRkVi6dKl6XcGDBw+wceNGeHp6GnTbiqKgoKAAtra2sLW1Nei2iIiIiEi/oqKidJ5HRETAxcUFsbGxCAgIUKdXrlwZrq6uJW732LFjGD16NAIDAwEA48ePx+eff47ffvsNffr0eeQ6aWlpqFWrFnx8fBAfH48tW7ao89566y0kJibiwoULcHd3V6d7eXlh6NChsLKyKnFtRFTxmPz08ubNm8PDwwNbt25Vp23duhWenp7w8/NTp0VFRaFdu3ZwcHBA1apV0atXL1y+fFmnrd9++w1+fn6wsrJCy5Yt8ccff+jMj46OhiRJ+Pnnn9GiRQtYWlriyJEjRU4vLygowLRp09RtzZw5k3uBiIiIiMq5wiPGjo6OOtM3bNgAJycn+Pj4IDQ0FPfu3Su2nbZt22LHjh24evUqFEXBgQMHcOHCBXTt2vWx64wYMQLHjx+HpaUlpk+frh7plmUZkZGRGD58uE6Hu5CtrS3MzU1+HIyIDMjknW4AGDt2LMLDw9Xna9aswZgxY3SWyc7OxrRp03DixAns27cPGo0G/fv3hyzLAB5ee9OrVy80atQIsbGxmDt3Lt54441Hbm/WrFmYP38+4uPj4evrW2T+kiVLEBERgTVr1uDIkSO4c+cOtm3bpsdXTBWRjZxl6hJIj5inWJinOJilWIyZpyzLmDp1Kvz9/eHj46NOHzZsGNavX48DBw4gNDQU33zzDUaMGFFsW59++ikaNWqEGjVqoFKlSujWrRtWrlypc/T8vxwcHBAbG4uUlBQkJSWpvzFv3bqFtLQ0eHt76+eFmhDPDBUL8zSecrFbbcSIEQgNDUVSUhIA4OjRo4iMjER0dLS6zMCBA3XWWbNmDZydnXHu3Dn4+Phg48aNkGUZX3/9NaysrNC4cWP8/fffePXVV4ts7/3338cLL7zw2HqWLVuG0NBQDBgwAACwevVq7N69u9jXkJOTg5ycHPV5ZmYmAI5eLgoNFFQpuG3qMkhPmKdYmKc4mKVYjJ1nSEgIzpw5gyNHjuhMHz9+vPrvJk2awM3NDZ07d8bly5dRt27dR7b16aefIiYmBjt27EDNmjVx6NAhhISEwN3dHV26dCm2jv+exi7K2ZIajQZOTk6mLoP0hHkaV7k40u3s7IyePXsiIiIC4eHh6NmzZ5EPwcWLFzF06FDUqVMH9vb2qFWrFgAgOTkZANSj1v++JqZNmzaP3F7Lli0fW0tGRgauX7+O1q1bq9PMzc2LXQd4OHqmVqtVHx4eHgAAWYzv2WeeDAlpZlUhc3AfITBPsTBPcTBLsRgzz0mTJmHXrl04cOAAatSoUeyyhb/xLl269Mj59+/fx1tvvYWPP/4YvXv3hq+vLyZNmoTBgwdj8eLFpa7N2dkZDg4OOH/+fKnXLU9kWcatW7fUs0ypYmOexlUuOt3Aw1PMIyIisHbtWowdO7bI/N69e+POnTv48ssv8euvv+LXX38FAOTm5pZ6WzY2NmWu979CQ0ORkZGhPlJSUvS+DTKtbA1PwREJ8xQL8xQHsxSLofNUFAWTJk3Ctm3bsH//ftSuXfuJ6xTeStbNze2R8/Py8pCXlweNRvdnspmZ2VN1UDQaDYYMGYINGzbg2rVrReZnZWUhPz+/1O2aQlYWL/8QCfM0nnLT6e7WrRtyc3ORl5eHoKAgnXm3b99GQkIC3nnnHXTu3BkNGzZEWlqazjINGzbEqVOn8ODBA3VaTExMqevQarVwc3NTO/UAkJ+fj9jY2GLXs7S0hL29vc6DiIiIiAwnJCQE69evx8aNG2FnZ4cbN27gxo0buH//PgDg8uXL+OCDDxAbG4vExETs2LEDo0aNQkBAgM64Pt7e3ur4Pfb29ujQoQNmzJiB6OhoXLlyBREREVi3bh369+//VHXOmzcPHh4eaN26NdatW4dz587h4sWLWLNmDfz8/Nj5IRJcubimG3i49zA+Pl79979VqVIFVatWxRdffAE3NzckJydj1qxZOssMGzYMb7/9NsaNG4fQ0FAkJiY+1SlAAPDaa69h/vz5qF+/Pry9vfHxxx8jPT39qdoiIiIiIsNYtWoVAKi39ioUHh6O4OBgVKpUCXv37sWyZcuQnZ0NDw8PDBw4UOce2gCQkJCgc6/syMhIhIaGYvjw4bhz5w5q1qyJefPmYcKECU9Vp6OjI2JiYjB//nx8+OGHSEpKQpUqVdCkSRMsWrQIWq32qdolooqh3HS6ATz26LBGo0FkZCSmTJkCHx8fNGjQAJ988onOF6ytrS127tyJCRMmwM/PD40aNcKCBQuKDMBWEtOnT8f169cxevRoaDQajB07Fv3799f5Mi4pXpUmBgkK7AvSIYEX6YuAeYqFeYqDWYrFGHk+aZAyDw8PHDx4sNTtuLq66txZRx+0Wi3CwsIQFham13aNRZIkODg4QJL461YEzNO4JEWUIRXLmczMTGi1Wtz4bhZsH6SauhwiIiKiCknj6AnrnnNNXQYRURGFfb6MjIxiLy8uN9d0i4qjl4tBhoRb5i4cUVcQzFMszFMczFIszFMssiwjNTWVo10LgnkaFzvdRCX0QLI2dQmkR8xTLMxTHMxSLMxTLIUD1JEYmKfxsNNNREREREREZCDsdBMREREREREZSLkavVxEGntXaAosTV0GlZGkAFVRCWbwAAd5rPiYp1iYpziYpVj0laekdddfUfTUJElC1apVOdq1IJincbHTbWDWbcfAupiR7KjiqGzqAkivmKdYmKc4mKVY9JWnIsuQNDxB05QkSYKdnZ2pyyA9YZ7GxW8vA+OIgGKQZRlXr15lnoJgnmJhnuJglmLRZ57scJse/z7FwjyNi99gRCWUl5dn6hJIj5inWJinOJilWJinWJinWJin8bDTTURERERERGQg7HQTERERERERGQg73QbGEQHFIEkSqlWrxjwFwTzFwjzFwSzFwjzFwjzFwjyNi6OXGxg/yGKQJAnW1tamLoP0hHmKhXmKg1mKxZh5cnRzw+Pfp1iYp3Gx021g946Fw6IgzdRlUBnJioTrShW4SWnQSIqpy6EyYp5iYZ7iYJZiMVaektYdVu3GG6x9ekiWZaSkpMDDwwMa7uCo8JincbHTbWBK5g3IOammLoPKSIaEAgtAzksGwB+CFR3zFAvzFAezFIux8mR3wXgUhX+XImGexsPvKSIiIiIiIiIDYaebiIiIiIiIyEDY6TYwDqMmBgkKquVdg8TTHYXAPMXCPMXBLMVSXvIMCwtDq1atYGdnBxcXF/Tr1w8JCQlFljt+/Dg6deoEGxsb2NvbIyAgAPfv339su4cOHULv3r3h7u4OSZKwffv2EtXz3nvvoUaNGmjXrh0uXLigMy83NxcLFy5E06ZNUblyZTg5OcHf3x/h4eHIy8sr1evWN0mS1NdKFR/zNC52uv+/iIgIODg46L1dfo7FIAEwQz53ogiCeYqFeYqDWYqlvOR58OBBhISEICYmBnv27EFeXh66du2K7OxsdZnjx4+jW7du6Nq1K3777Tf8/vvvmDRpUrEDTGVnZ6Np06ZYuXJliWs5evQofvzxR/zwww8YNmwYJk2apM7Lzc1FUFAQ5s+fj/Hjx+PYsWP47bffEBISgk8//RRnz559ujdATyRJgrm5OTtpgmCexlWqgdT++ecfzJ49Gz/++CNSU1NRpUoVNG3aFLNnz4a/v7+hakRERATGjBkD4P/2yrzwwgtYsGABXFxcDLZdfZC5s14IMiRcs/CEe14yNDwCU+ExT7EwT3EwS7GUlzyjoqJ0nkdERMDFxQWxsbEICAgAALz++uuYMmUKZs2apS7XoEGDYtvt3r07unfvXqpa0tLS4O7uDl9fX+Tn5yMiIkKdt2zZMhw6dAgnTpyAn5+fOr1OnTp48cUXkZubW6pt6Zssy0hOToanpydHuxYA8zSuUr3DAwcOxB9//IG1a9fiwoUL2LFjBwIDA3H79m1D1aeyt7fH9evX8ffff+PLL7/Ezz//jJEjRxp8u0REREQkjoyMDACAo6MjAODmzZv49ddf4eLigrZt26JatWro0KEDjhw5ovdtBwUF4cGDB6hcuTK6deuGsLAwdd6GDRvQpUsXnQ53IQsLC9jY2Oi9HiIyjhJ3utPT03H48GEsWLAAHTt2RM2aNfHcc88hNDQUffr0UZf7+OOP0aRJE9jY2MDDwwMTJ05EVlaWTltbtmxB48aNYWlpiVq1amHJkiVP3L4kSXB1dYW7uzu6d++OKVOmYO/evbh//z6ioqLQrl07ODg4oGrVqujVqxcuX76srhsdHQ1JkpCenq5Oi4uLgyRJSExMfOw2V61ahbp166JSpUpo0KABvvnmm5K+XURERERUzsiyjKlTp8Lf3x8+Pj4AgL/++gsAMHfuXIwbNw5RUVFo3rw5OnfujIsXL+p1+xYWFoiKisLVq1eRmpqKzp07q/MuXrwIb29vvW6PiMqHEne6bW1tYWtri+3btyMnJ+fxDWo0+OSTT3D27FmsXbsW+/fvx8yZM9X5sbGxGDRoEIYMGYLTp09j7ty5ePfdd3VOrykJa2tryLKM/Px8ZGdnY9q0aThx4gT27dsHjUaD/v37Q5blUrX5b9u2bcNrr72G6dOn48yZM3jllVcwZswYHDhw4JHL5+TkIDMzU+dBREREROVHSEgIzpw5g8jISHVa4e/Fwt96fn5+WLp0KRo0aIA1a9YYpA4XFxdUqlRJZxrvmUwkrhJf021ubo6IiAiMGzcOq1evRvPmzdGhQwcMGTIEvr6+6nJTp05V/12rVi18+OGHmDBhAj777DMAD4+Ed+7cGe+++y4AwMvLC+fOncOiRYsQHBxcolouXryI1atXo2XLlrCzs8PAgQN15q9ZswbOzs44d+6cuheztBYvXozg4GBMnDgRADBt2jTExMRg8eLF6NixY5Hlw8LC8N577xWZruHYBELQQDH5NWmkP8xTLMxTHMxSLOUtz0mTJmHXrl04dOgQatSooU53c3MDADRq1Ehn+YYNGyI5Odlo9Xl5eeH8+fNG215paTQaXv8rEOZpXKW+pvvatWvYsWMHunXrhujoaDRv3lznKPXevXvRuXNnVK9eHXZ2dhg5ciRu376Ne/fuAQDi4+OLDLrm7++PixcvoqCg4LHbzsjIgK2tLSpXrowGDRqgWrVq2LBhA4CHnfChQ4eiTp06sLe3R61atQCgTF+Uj6szPj7+kcuHhoYiIyNDfaSkpAAAuNNSDAqAApiXk58NVFbMUyzMUxzMUizlJU9FUTBp0iRs27YN+/fvR+3atXXm16pVC+7u7kVuI3bhwgXUrFnTaHUOGzYMe/fuxR9//FFkXl5ens5o66agKAry8/N5RF4QzNO4Sr1rw8rKCi+88ALeffddHDt2DMHBwZgzZw4AIDExEb169YKvry+2bNmC2NhY9TYKZR1x0c7ODnFxcThz5gyys7Nx6NAheHl5AQB69+6NO3fu4Msvv8Svv/6KX3/9VWebhXtw/v2h0ve9Di0tLWFvb6/zAGDy/9GQfiiQkGrhDsXkNz4hfWCeYmGe4mCWYikveYaEhGD9+vXYuHEj7OzscOPGDdy4cUO9B7ckSZgxYwY++eQTfP/997h06RLeffddnD9/Hi+99JLaTufOnbFixQr1eVZWFuLi4hAXFwcAuHLlCuLi4p76oE/hteadO3fGypUr8eeff+Kvv/7Cd999h+eff17v15eXlqIouHbtGjtpgmCexlWqW4Y9SqNGjbB9+3YAD6/XlmUZS5YsUTu63333nc7yDRs2xNGjR3WmHT16FF5eXjAzM3vsdjQaDerVq1dk+u3bt5GQkIAvv/wS7du3B4Aio006OzsDAK5fv44qVaoAgPoF+TiFdY4ePVqnzv+eekRERERE5deqVasAAIGBgTrTw8PD1Usbp06digcPHuD111/HnTt30LRpU+zZswd169ZVl798+TJu3bqlPj9x4oTOJYfTpk0DAIwePbrUYxUBDw/g7NmzB0uXLsXnn3+ON954A5UrV0bDhg0xZcqUp75kkohMr8Sd7tu3b+PFF1/E2LFj4evrCzs7O5w4cQILFy5E3759AQD16tVDXl4ePv30U/Tu3RtHjx7F6tWrddqZPn06WrVqhQ8++ACDBw/G8ePHsWLFCvWa79KqUqUKqlatii+++AJubm5ITk7WucdiYV0eHh6YO3cu5s2bhwsXLjxxxPQZM2Zg0KBB8PPzQ5cuXbBz505s3boVe/fufao6iYiIiMj4Snokb9asWUV+Q/7bf+94ExgYqPejhJaWlk+sg4gqnlKNXt66dWssXboUAQEB8PHxwbvvvotx48app9o0bdoUH3/8MRYsWAAfHx9s2LBB5/6DANC8eXN89913iIyMhI+PD2bPno3333+/xIOoFXkBGg0iIyMRGxsLHx8fvP7661i0aJHOMhYWFvj2229x/vx5+Pr6YsGCBfjwww+Lbbdfv35Yvnw5Fi9ejMaNG+Pzzz9HeHh4kb2k9OwoLwPBkH4wT7EwT3EwS7EwT7FIEi/9EAnzNB5J4Yn8BpGZmQmtVosb382C7YNUU5dDREREJCSNoyese841dRlE9Awq7PNlZGSoY3o9CseINzDu0hCDAuCBZMX99YJgnmJhnuJglmJhnmJRFAX379/nwFuCYJ7GxU63gfFjLAYFEm6ZVzP5CKykH8xTLMxTHMxSLMxTLIqiIDU1lZ00QTBP42Knm4iIiIiIiMhAynzLMCqeZO8KTYGlqcugslIkaBRHaCRAI3GPYIXHPMXCPMXBLMVipDwlrbvB2iYi0gd2ug3M6vnRsHZwMHUZVEayLKPy9euwdnNT70FPFRfzFAvzFAezFIsx81RkGRI/MwZnYWFh6hJIj5in8XD0cgMp6Uh2REREREREVPFw9PJygvs0xKAoCu7evcs8BcE8xcI8xcEsxcI8xcI8xcI8jYudbgPjB1kMiqLg9u3bzFMQzFMszFMczFIszFMszFMszNO42OkmIiIiIiIiMhB2uomIiIiIiIgMhJ1uohKytrY2dQmkR8xTLMxTHMxSLMxTLOUpT6WgwNQlVHjlKU/R8ZZhBpaxYBnyrv9j6jJIDzQAbpu6CNIb5ikW5ikOZikW5imW8pKneS0PVJk7y9RlVGgajQbVqlUzdRnPDHa6DSw/6SryE5NNXQaVkSJJuOfihMo3b0HigBMVHvMUC/MUB7MUC/MUC/MUi6IoyMjIgFarhSRJpi5HeDy93MAUfoiFoEgSsl2dmacgmKdYmKc4mKVYmKdYmKdYFEVBeno6Ry83Ena6iYiIiIiIiAyEnW4iIiIiIhLGqlWr4OvrC3t7e9jb26NNmzb4+eefdZb54osvEBgYCHt7e0iShPT09Ce2O3fuXEiSpPPw9vYudp2CggJMnDgRbm5u6NGjB27evKkzPzMzE2+//Ta8vb1hZWUFV1dXdOnSBVu3buVRaIE8E53uuXPnolmzZmVuR5IkbN++vXTrlHmrVB5IigKr22m8hkkQzFMszFMczFIszFMsFSnPGjVqYP78+YiNjcWJEyfQqVMn9O3bF2fPnlWXuXfvHrp164a33nqrVG03btwY169fVx9HjhwpdvnIyEgkJydj9+7daN68Od555x11Xnp6Otq2bYt169YhNDQUJ0+exKFDhzB48GDMnDkTGRkZpXvhpWRra2vQ9un/GLzTHRwcDEmSMGHChCLzQkJCIEkSgoODDV2GXly/fh3du3cv1ToV4YuJnkxSFNj/fZ15CoJ5ioV5ioNZioV5iqUi5dm7d2/06NED9evXh5eXF+bNmwdbW1vExMSoy0ydOhWzZs3C888/X6q2zc3N4erqqj6cnJyKXT4tLQ21atWCj48PmjRponNE/a233kJiYiJ+/fVXjB49Go0aNYKXlxfGjRuHuLg4g3aKNRoNnJycoNE8E8dgTc4o77KHhwciIyNx//59ddqDBw+wceNGeHp6GqMEvXB1dYWlpWWp1uFgE2JQJAmZNdyYpyCYp1iYpziYpViYp1gqap4FBQWIjIxEdnY22rRpU+b2Ll68CHd3d9SpUwfDhw9HcnLxdykaMWIEjh8/DktLS0yfPl090i3LMiIjIzF8+HC4u7sXWc/W1hbm5oa70ZQsy7h16xZkWTbYNuj/GKXT3bx5c3h4eGDr1q3qtK1bt8LT0xN+fn7qtFq1amHZsmU66zZr1gxz585Vn0uShM8//xy9evVC5cqV0bBhQxw/fhyXLl1CYGAgbGxs0LZtW1y+fLlIHZ9//jk8PDxQuXJlDBo0SOeUjd9//x0vvPACnJycoNVq0aFDB5w8eVJn/ac5vbz87wukklAkCQ+qVqlw/6OhR2OeYmGe4mCWYmGeYqloeZ4+fRq2trawtLTEhAkTsG3bNjRq1KhMbbZu3RoRERGIiorCqlWrcOXKFbRv3x5379597DoODg6IjY1FSkoKkpKS4OvrCwC4desW0tLSnnhNuCFlZWWZbNvPGqOdTzB27FiEh4erz9esWYMxY8Y8VVsffPABRo0ahbi4OHh7e2PYsGF45ZVXEBoaihMnTkBRFEyaNElnnUuXLuG7777Dzp07ERUVhT/++AMTJ05U59+9exejR4/GkSNHEBMTg/r166NHjx7F/hH9W05ODjIzM3UeRERERERkfA0aNEBcXBx+/fVXvPrqqxg9ejTOnTtXpja7d++OF198Eb6+vggKCsJPP/2E9PR0fPfdd09c19XVFWZmZupzDpL2bDFap3vEiBE4cuQIkpKSkJSUhKNHj2LEiBFP1daYMWMwaNAgeHl54c0330RiYiKGDx+OoKAgNGzYEK+99hqio6N11nnw4AHWrVuHZs2aISAgAJ9++ikiIyNx48YNAECnTp0wYsQIeHt7o2HDhvjiiy9w7949HDx4sEQ1hYWFQavVqg8PD4+nem1ERERERFQ2lSpVQr169dCiRQuEhYWhadOmWL58uV634eDgAC8vL1y6dKnU6zo7O8PBwQHnz5/Xa01UPhmt0+3s7IyePXsiIiIC4eHh6Nmz5xMHHnicwtMyAKBatWoAgCZNmuhMe/Dggc7RZk9PT1SvXl193qZNG8iyjISEBABAamoqxo0bh/r160Or1cLe3h5ZWVlPvE6jUGhoKDIyMtRHSkoKAA6kJgpJUWBz4x/mKQjmKRbmKQ5mKRbmKZaKnqcsy8jJydFrm1lZWbh8+TLc3NxKva5Go8GQIUOwYcMGXLt27ZFt5+fn66PMR5IkCQ4ODpAqyOUCFZ3hrs5/hLFjx6qnfa9cubLIfI1GU+RUi7y8vCLLWVhYqP8u/KA8alppBgYYPXo0bt++jeXLl6NmzZqwtLREmzZtkJubW6L1LS0tHznIGj/GYpAUBTap/5i6DNIT5ikW5ikOZikW5imWipRnaGgounfvDk9PT9y9excbN25EdHQ0du/erS5z48YN3LhxQz1Kffr0adjZ2cHT0xOOjo4AgM6dO6N///5q/+WNN95A7969UbNmTVy7dg1z5syBmZkZhg4d+lR1zps3D9HR0WjdujXmzZuHli1bwsLCAocPH0ZYWBh+//13ODg4lO3NeIzCTjcZh1HHiO/WrRtyc3ORl5eHoKCgIvOdnZ1x/fp19XlmZiauXLmil20nJyfr7EWKiYmBRqNBgwYNAABHjx7FlClT0KNHDzRu3BiWlpa4detWmbdbUQaboOIpkoT0Op7MUxDMUyzMUxzMUizMUywVKc+bN29i1KhRaNCgATp37ozff/8du3fvxgsvvKAus3r1avj5+WHcuHEAgICAAPj5+WHHjh3qMpcvX9bpD/z9998YOnQoGjRogEGDBqFq1aqIiYmBs7PzU9Xp6OiImJgYjBgxAh9++CH8/PzQvn17fPvtt1i0aBG0Wu1TvgNPJssyUlNTOXq5kRj1SLeZmRni4+PVf/9Xp06dEBERgd69e8PBwQGzZ89+5HJPw8rKCqNHj8bixYuRmZmJKVOmYNCgQXB1dQUA1K9fH9988w1atmyJzMxMzJgxA9bW1mXebsU8AYf+S5Ek5NrZQpGkCntaFf0f5ikW5ikOZikW5imWipTn119//cRl5s6dq3OHpEdJTEzUeR4ZGVmGqh5Nq9UiLCwMYWFhem/7Sf59O2cyLKPfDd3e3h729vaPnBcaGooOHTqgV69e6NmzJ/r164e6devqZbv16tXDgAED0KNHD3Tt2hW+vr747LPP1Plff/010tLS0Lx5c4wcORJTpkyBi4uLXrZNREREREREzyZJ4Xj1BpGZmQmtVouLw8bB5kqSqcuhMpI1Gtxq4g2n0+eh4Wk4FR7zFAvzFAezFAvzFEt5ytPcqx6cI4qOD0UlJ8sykpOT4enpCY3G6MdhhVHY58vIyHjsgWXABEe6nzXl/fQbKhlJlmGXcg0SfzQIgXmKhXmKg1mKhXmKhXmKRZIkVK1alaOXG4lRr+l+FvFjLAYJgPWddFOXQXrCPMXCPMXBLMXCPMXCPMUiSRLs7OxMXcYzg0e6DUzm3iMhyBoN7jSoC5mn3wiBeYqFeYqDWYqFeYqFeYpFlmVcvXqVo5cbCf9qiEoo36rofdip4mKeYmGe4mCWYmGeYmGeYsnLyzN1Cc8Mnl5uYOae7jC3sDB1GVRGsiRBU80Z5g/yoOF1+hUe8xQL8xQHsxQL8xRLecrTvJaHSbdPVFrsdBuYw5tT4eDgYOoyqIxkWcb95GQ4c4RHITBPsTBPcTBLsTBPsZS3PJWCAkhmZqYug6hETP8XIziOCCgGSZJQrVo15ikI5ikW5ikOZikW5imW8pYnO9xlU97yFB2PdBsYP8hikCQJ1tbWpi6D9IR5ioV5ioNZioV5ioV5ioV5GhePdBsYRwQUgyzLSEpKYp6CYJ5iYZ7iYJZiYZ5iYZ5iYZ7GxU43UQkpHARGKMxTLMxTHMxSLMxTLMxTLMzTeNjpJiIiIiIiMhKloMDUJZCR8ZpuA0tfsAx51/8xdRlURrIk4W5Nd/yTdM3kt8mgsmOeYmGe4mCWYmGeYmGe+mFeywNV5s4ydRlkZOx0G1hB0lXkJyabugwqIwWAQ2IyCnJywCtfKj7mKRbmKQ5mKRbmKRbmKRZJkuDu7s5Bn42Ep5cbGD/GYpAAaPLymKcgmKdYmKc4mKVYmKdYmKdYJEmCubk5O91Gwk63gcn8IAtB1mhwq4k3ZA3/ZETAPMXCPMXBLMXCPMXCPMUiyzKSk5M5ermR8K+GiIiIiIiIyECeuU53REQEHBwcTF0GERERERGRatWqVfD19YW9vT3s7e3Rpk0b/PzzzzrLvPLKK6hbty6sra3h7OyMvn374vz588W2GxwcDEmSdB7du3cvdp3s7GwMGTIEbm5uGDp0KO7du6cz/8aNG5g8eTLq1KkDS0tLeHh4oHfv3ti3b9/TvXjBVbhOd1kDHjx4MC5cuGDgKomIiIiIiEquRo0amD9/PmJjY3HixAl06tQJffv2xdmzZ9VlWrRogfDwcMTHx2P37t1QFAVdu3ZFwRNuQ9atWzdcv35dfWzcuLHY5ZctWwZbW1v88ssvsLa2xrJly9R5iYmJaNGiBfbv349Fixbh9OnTiIqKQseOHRESElKm90BUFWr08sTERPj7+8PBwQGLFi1CkyZNkJeXh927dyMkJOSJe3kAwNraGtbW1o+dn5ubi0qVKumtZt5SQQwaWYbT6fPQ8LoXITBPsTBPcTBLsTBPsTBPw+vdu7fO83nz5mHVqlWIiYlB48aNAQDjx49X59eqVQsffvghmjZtisTERNStW/exbVtaWsLV1VVnWpUqVaB5zDX6aWlp8PLyQpMmTeDt7Y1bt26p8yZOnAhJkvDbb7/BxsZGnd64cWOMHTu25C/4GVKhjnT/O+CBAwfCy8sLjRs3xrRp0xATEwMA+Pjjj9GkSRPY2NjAw8MDEydORFZWltrGf08vnzt3Lpo1a4avvvoKtWvXhpWVFQAgOTkZffv2ha2tLezt7TFo0CCkpqaWumZ2ucWgAJAtLJinIJinWJinOJilWJinWJincRUUFCAyMhLZ2dlo06bNI5fJzs5GeHg4ateuDQ8Pj2Lbi46OhouLCxo0aIBXX30Vt27dQn5+PpTHHCCcNGkSPv/8c1hYWCA8PByvvfYaAODOnTuIiopCSEiIToe7EC/jfbQK0+kuacAajQaffPIJzp49i7Vr12L//v2YOXNmsW1funQJW7ZswdatWxEXFwdZltG3b1/cuXMHBw8exJ49e/DXX39h8ODBpa5b4ejlQlA0GtzxrguFI3YKgXmKhXmKg1mKhXmKhXkax+nTp2FrawtLS0tMmDAB27ZtQ6NGjXSW+eyzz2BrawtbW1v8/PPP2LNnT7Fn6nbr1g3r1q3Dvn37sGDBAhw8eBA9evRASkrKYzvdtWrVwsWLF5GSkoJz586hevXqAB72mxRFgbe3t/5e9DOgwpxeXtKAp06dqv678JSLCRMm4LPPPnvsOrm5uVi3bh2cnZ0BAHv27MHp06dx5coVda/RunXr0LhxY/z+++9o1apVkTZycnKQk5OjPs/MzCzNyyMiIiIiomdcgwYNEBcXh4yMDHz//fcYPXo0Dh48qNPxHj58OF544QVcv34dixcvxqBBg3D06FH1jN3/GjJkiPrvJk2awNfXF3Xr1kVMTAxq16792Fo0Gk2RU9If10mn4lWYXVUlDXjv3r3o3LkzqlevDjs7O4wcORK3b98uMuLev9WsWVPtcANAfHw8PDw8dE7TaNSoERwcHBAfH//INsLCwqDVatXHk07xICIiIiIi+rdKlSqhXr16aNGiBcLCwtC0aVMsX75cZxmtVov69esjICAA33//Pc6fP49t27aVeBt16tSBk5MTkpKSSl1f/fr1IUlSicbSov9TYTrdJQk4MTERvXr1gq+vL7Zs2YLY2FisXLkSwMOj2Y/zqNPVSys0NBQZGRnqIyUlpcxtUvkiceAQoTBPsTBPcTBLsTBPsTBP45NlWeds2v9SFAWKohS7zH/9/fffuH37NlxcXEpdj6OjI4KCgrBy5UpkZ2cXmZ+enl7qNp8FFabTXZKAY2NjIcsylixZgueffx5eXl64du1aqbfVsGFDpKSk6HScz507h/T09CLXVBSytLRU76lX+AA4erkoNLIMZ47YKQzmKRbmKQ5mKRbmKRbmaXihoaE4dOgQEhMTcfr0aYSGhiI6OhrDhw8HAPz1118ICwtDbGwskpOTcezYMbz44ouwtrZGjx491Ha8vb3VI99ZWVmYMWMGYmJikJiYiH379qFv376oV68eRo4c+djRy4uzcuVKFBQU4LnnnsOWLVtw8eJFxMfH45NPPnnsoG/PugrT6QaeHHC9evWQl5eHTz/9FH/99Re++eYbrF69utTb6dKlC5o0aYLhw4fj5MmT+O233zBq1Ch06NABLVu2LFVb7HKLQQGQa2fDPAXBPMXCPMXBLMXCPMXCPA3v5s2bGDVqFBo0aIDOnTvj999/x+7du/HCCy8AAKysrHD48GH06NED9erVw+DBg2FnZ4djx47pHLVOSEhARkYGAMDMzAynTp1Cnz594OXlhZdeegktWrTAoUOHIMvyU12jXadOHZw8eRIdO3bE9OnT4ePjgxdeeAH79u3DqlWr9PNmCKbCDKQG/F/A8+bNw/Tp03H9+nU4OzujRYsWWLVqFZo2bYqPP/4YCxYsQGhoKAICAhAWFoZRo0aVajuSJOGHH37A5MmTERAQAI1Gg27duuHTTz8tdc0cvVwMikaD9Do14XT6PE+tEgDzFAvzFAezFAvzFAvzNLyvv/662Pnu7u746aefntjOvzvS1tbW2L17d5FlZFlGcnIyPD09IT1Ff8XNzQ0rVqzAihUrSr3us0hSOASdQWRmZkKr1eLisHGwuVL6QQqofJE1Gtxq4g0nnlYlBOYpFuYpDmYpFuYpFuapH+Ze9eAcsdLUZeh0up/mFHN6qLDPl5GRoV5e/Ch8h4mIiIiIiIgMhJ1uohIyf1DyUSGp/GOeYmGe4mCWYmGeYmGeYrGwsDB1Cc+MCnVNd0XE0cvFoJFlOCZcNnUZpCfMUyzMUxzMUizMUyzMUywajQbVq1c3dRnPDB7pNjB2ucWgALjv6MA8BcE8xcI8xcEsxcI8xcI8xaIoCu7evftUo5dT6fFIt4GZ1awO80qVTF0GlZEsSciu6Q6bpGs8e0EAzFMszFMczFIszFMszFM/zGt5mLoEAA873bdv34aNjc1TjV5OpcNOt4E5vDkVDg4Opi6DykiWZdxPToYzR3gUAvMUC/MUB7MUC/MUC/PUH6WgAJKZmanLICPiXwwREREREZGRsMP97GGnm6iErK2tTV0C6RHzFAvzFAezFAvzFAvzFAvzNB5J4dXzBlHSG6UTERERERFRxVPSPh+PdBsY92mIQVEUpKenM09BME+xME9xMEuxME+xME+xME/jYqfbwPhBFgO/mMTCPMXCPMXBLMXCPMXCPMXCPI2LnW4iIiIiIiIiA2Gnm4iIiIiIiMhA2OkmKiFbW1tTl0B6xDzFwjzFwSzFwjzFwjzFwjyNh6OXGwhHLyciIiIiIhIXRy8vJ2RZNnUJpAeyLOPWrVvMUxDMUyzMUxzMUizMUyzMUyzM07jY6SYqoaysLFOXQHrEPMXCPMXBLMXCPMXCPMXCPI2HnW4iIiIiIiIiAzE3dQGiKrxUPjMzExoN921UdLIs4+7du8xTEMxTLMxTHMxSLMxTLMxTLMxTPzIzMwHgifc7Z6fbQG7fvg0AqFmzpokrISIiIiIiIkO5e/cutFrtY+ez020gjo6OAIDk5ORiA6CKITMzEx4eHkhJSeFo9AJgnmJhnuJglmJhnmJhnmJhnvqhKAru3r0Ld3f3Ypdjp9tACk/T0Gq1/CALxN7ennkKhHmKhXmKg1mKhXmKhXmKhXmWXUkOsPIEfiIiIiIiIiIDYaebiIiIiIiIyEDY6TYQS0tLzJkzB5aWlqYuhfSAeYqFeYqFeYqDWYqFeYqFeYqFeRqXpDxpfHMiIiIiIiIieio80k1ERERERERkIOx0ExERERERERkIO91EREREREREBsJOt4GsXLkStWrVgpWVFVq3bo3ffvvN1CXRU5g7dy4kSdJ5eHt7m7osKqFDhw6hd+/ecHd3hyRJ2L59u858RVEwe/ZsuLm5wdraGl26dMHFixdNUywV60lZBgcHF/lb7datm2mKpScKCwtDq1atYGdnBxcXF/Tr1w8JCQk6yzx48AAhISGoWrUqbG1tMXDgQKSmppqoYnqckmQZGBhY5O9zwoQJJqqYirNq1Sr4+vqq925u06YNfv75Z3U+/y4rliflyb9N42Gn2wA2bdqEadOmYc6cOTh58iSaNm2KoKAg3Lx509Sl0VNo3Lgxrl+/rj6OHDli6pKohLKzs9G0aVOsXLnykfMXLlyITz75BKtXr8avv/4KGxsbBAUF4cGDB0aulJ7kSVkCQLdu3XT+Vr/99lsjVkilcfDgQYSEhCAmJgZ79uxBXl4eunbtiuzsbHWZ119/HTt37sTmzZtx8OBBXLt2DQMGDDBh1fQoJckSAMaNG6fz97lw4UITVUzFqVGjBubPn4/Y2FicOHECnTp1Qt++fXH27FkA/LusaJ6UJ8C/TaNRSO+ee+45JSQkRH1eUFCguLu7K2FhYSasip7GnDlzlKZNm5q6DNIDAMq2bdvU57IsK66ursqiRYvUaenp6YqlpaXy7bffmqBCKqn/ZqkoijJ69Gilb9++JqmHyu7mzZsKAOXgwf/X3r0HRXWefwD/Lit3WGCRcIssCIKiYAMmiFaxQgU0juANNU7RoKkVY7xgUq0GqBdmQuKlnaRjioWJBtF6nUaNdmxQo0hJDCROFOoqpUZQSxFFQQI8vz/8cerKRZTLov1+ZpzZ8573vOd59+WZ8dk95+wJEXmQi6ampvLnP/9Z6XPhwgUBIHl5ecYKkzrg0bUUEQkLC5O33nrLeEFRpzg4OEhGRgbz8jnRvJ4izM2exG+6u1h9fT2+/vprREREKG0mJiaIiIhAXl6eESOjp/WPf/wDbm5u6N+/P1577TWUlZUZOyTqAleuXEFFRYVBrtrZ2SEkJIS5+ozKzc3FCy+8AD8/P/zqV79CZWWlsUOiDqqurgYAaLVaAMDXX3+NH3/80SA/Bw4cCA8PD+ZnL/foWjb79NNP0bdvXwwZMgQrV67EvXv3jBEePYHGxkbk5OTg7t27CA0NZV4+4x5dz2bMzZ7Rx9gBPG/+/e9/o7GxEc7Ozgbtzs7OuHjxopGioqcVEhKCrKws+Pn5oby8HKmpqRg1ahTOnz8PW1tbY4dHnVBRUQEAreZq8z56dkRFRWHy5Mnw8vKCXq/HqlWrEB0djby8PKjVamOHR+1oamrCkiVLMHLkSAwZMgTAg/w0MzODvb29QV/mZ+/W2loCwKxZs6DT6eDm5oZvv/0W77zzDoqLi7Fv3z4jRktt+e677xAaGoq6ujrY2Nhg//798Pf3R2FhIfPyGdTWegLMzZ7EopuoHdHR0crrwMBAhISEQKfTYffu3UhISDBiZET0sBkzZiivAwICEBgYCG9vb+Tm5iI8PNyIkdHjJCYm4vz583xexnOgrbV84403lNcBAQFwdXVFeHg49Ho9vL29ezpMegw/Pz8UFhaiuroae/bsQXx8PE6cOGHssOgptbWe/v7+zM0exMvLu1jfvn2hVqtbPMnx+vXrcHFxMVJU1FXs7e3h6+uLS5cuGTsU6qTmfGSuPp/69++Pvn37Mld7uUWLFuGzzz7DF198gRdffFFpd3FxQX19PW7dumXQn/nZe7W1lq0JCQkBAOZnL2VmZgYfHx8EBwcjLS0NQ4cOxZYtW5iXz6i21rM1zM3uw6K7i5mZmSE4OBjHjx9X2pqamnD8+HGD+yfo2VRTUwO9Xg9XV1djh0Kd5OXlBRcXF4NcvX37NvLz85mrz4GrV6+isrKSudpLiQgWLVqE/fv3429/+xu8vLwM9gcHB8PU1NQgP4uLi1FWVsb87GUet5atKSwsBADm5zOiqakJ9+/fZ14+J5rXszXMze7Dy8u7wbJlyxAfH49hw4bhlVdewebNm3H37l3MnTvX2KHRE0pKSsLEiROh0+lw7do1JCcnQ61WY+bMmcYOjTqgpqbG4NPaK1euoLCwEFqtFh4eHliyZAnWrVuHAQMGwMvLC2vWrIGbmxtiYmKMFzS1qr211Gq1SE1NxZQpU+Di4gK9Xo+3334bPj4+iIyMNGLU1JbExERkZ2fj4MGDsLW1Ve4HtbOzg6WlJezs7JCQkIBly5ZBq9VCo9HgzTffRGhoKIYPH27k6Olhj1tLvV6P7OxsjB8/Ho6Ojvj222+xdOlSjB49GoGBgUaOnh61cuVKREdHw8PDA3fu3EF2djZyc3Nx9OhR5uUzqL31ZG72MGM/Pv159fvf/148PDzEzMxMXnnlFTl79qyxQ6KnEBcXJ66urmJmZibu7u4SFxcnly5dMnZY1EFffPGFAGjxLz4+XkQe/GzYmjVrxNnZWczNzSU8PFyKi4uNGzS1qr21vHfvnowbN06cnJzE1NRUdDqdzJ8/XyoqKowdNrWhtbUEIJmZmUqf2tpaWbhwoTg4OIiVlZXExsZKeXm58YKmVj1uLcvKymT06NGi1WrF3NxcfHx8ZMWKFVJdXW3cwKlVr7/+uuh0OjEzMxMnJycJDw+XY8eOKfuZl8+W9taTudmzVCIiPVnkExEREREREf2v4D3dRERERERERN2ERTcRERERERFRN2HRTURERERERNRNWHQTERERERERdRMW3URERERERETdhEU3ERERERERUTdh0U1ERERERETUTVh0ExEREREREXUTFt1EREREvdy2bdswbty4Lh1z+PDh2Lt3b5eOSURELbHoJiKiXmfOnDlQqVRYsGBBi32JiYlQqVSYM2dOzwdGAACVSoUDBw4YO4weMWbMGCxZssSoMdTV1WHNmjVITk5W2hobG7Fw4UK4urpi/PjxuHHjhsExt2/fxm9+8xsMHDgQFhYWcHFxQUREBPbt2wcRAQCsXr0av/71r9HU1NSj8yEi+l/DopuIiHqlfv36IScnB7W1tUpbXV0dsrOz4eHhYcTI/nf8+OOPxg6h2/T03Orr65/62D179kCj0WDkyJFKW05ODsrKynD06FEEBQVh9erVyr5bt25hxIgR+OSTT7By5UqcO3cOJ0+eRFxcHN5++21UV1cDAKKjo3Hnzh0cOXLk6SdGRESPxaKbiIh6paCgIPTr1w/79u1T2vbt2wcPDw+89NJLBn2bmpqQlpYGLy8vWFpaYujQodizZ4+yv7GxEQkJCcp+Pz8/bNmyxWCMOXPmICYmBu+//z5cXV3h6OiIxMTEdouzoqIi/OxnP4OtrS00Gg2Cg4Px1VdfAQBSUlLwk5/8xKD/5s2b4enp2eKcGzZsgLOzM+zt7fHb3/4WDQ0NWLFiBbRaLV588UVkZmYqx5SWlkKlUmH37t0YNWoULC0t8fLLL6OkpAQFBQUYNmwYbGxsEB0djZs3bxqcPyMjA4MGDYKFhQUGDhyIjz76qMW4u3btQlhYGCwsLPDpp5+2mHNz/LGxsVCpVAbzOXjwIIKCgmBhYYH+/fsjNTUVDQ0Nyn6VSoWtW7fi1VdfhZWVFQYNGoS8vDxcunQJY8aMgbW1NUaMGAG9Xq8c0/w+bt26Ff369YOVlRWmT5+uFI6dmVtlZSVmzpwJd3d3WFlZISAgADt37jRYnxMnTmDLli1QqVRQqVQoLS1FVlYW7O3tDc5/4MABqFSqFnFnZGTAy8sLFhYWAB4UxPPmzYOTkxM0Gg3Gjh2LoqKiFu/zw3JycjBx4kSDtqqqKnh6emLIkCEICAjArVu3lH2rVq1CaWkp8vPzER8fD39/f/j6+mL+/PkoLCyEjY0NAECtVmP8+PHIyclp9/xERNRJQkRE1MvEx8fLpEmTZOPGjRIeHq60h4eHy6ZNm2TSpEkSHx+vtK9bt04GDhwon3/+uej1esnMzBRzc3PJzc0VEZH6+np59913paCgQC5fviw7duwQKysr2bVrl8E5NRqNLFiwQC5cuCB/+ctfxMrKSj7++OM24xw8eLDMnj1bLly4ICUlJbJ7924pLCwUEZHk5GQZOnSoQf9NmzaJTqczOKetra0kJibKxYsXZdu2bQJAIiMjZf369VJSUiJr164VU1NT+de//iUiIleuXBEAyny///57GT58uAQHB8uYMWPkyy+/lHPnzomPj48sWLBAOdeOHTvE1dVV9u7dK5cvX5a9e/eKVquVrKwsg3E9PT2VPteuXWsx5xs3bggAyczMlPLycrlx44aIiJw8eVI0Go1kZWWJXq+XY8eOiaenp6SkpCjHAhB3d3fZtWuXFBcXS0xMjHh6esrYsWMN5hIVFaUck5ycLNbW1jJ27Fj55ptv5MSJE+Lj4yOzZs3q9NyuXr0q6enp8s0334her5ff/e53olarJT8/X0REbt26JaGhoTJ//nwpLy+X8vJyaWhokMzMTLGzszN4X/bv3y8P/7eqOe6oqCg5d+6cFBUViYhIRESETJw4UQoKCqSkpESWL18ujo6OUllZ2ebfmZ2dneTk5Bi0VVVVSVBQkPTp00fc3d2V8RsbG8XBwUHeeOONNsd72B/+8AeDv0kiIup6LLqJiKjXaS66b9y4Iebm5lJaWiqlpaViYWEhN2/eNCi66+rqxMrKSs6cOWMwRkJCgsycObPNcyQmJsqUKVMMzqnT6aShoUFpmzZtmsTFxbU5hq2trVLYPaqjRbdOp5PGxkalzc/PT0aNGqVsNzQ0iLW1tezcuVNE/ltAZmRkKH127twpAOT48eNKW1pamvj5+Snb3t7ekp2dbRDP2rVrJTQ01GDczZs3tznfZgBk//79Bm3h4eGyYcMGg7bt27eLq6urwXGrV69WtvPy8gSAbNu2zWAuFhYWynZycrKo1Wq5evWq0nbkyBExMTGR8vLyLp/bhAkTZPny5cp2WFiYvPXWWwZ9Olp0m5qaKh9KiIicOnVKNBqN1NXVGRzr7e0tW7dubTWeqqoqASAnT55sdX/zBwHNrl+/LgBk48aN7c6z2cGDB8XExMTgb5CIiLpWnx7/ap2IiKiDnJycMGHCBGRlZUFEMGHCBPTt29egz6VLl3Dv3j38/Oc/N2ivr683uAz9ww8/xJ/+9CeUlZWhtrYW9fX1LS7/Hjx4MNRqtbLt6uqK7777rs34li1bhnnz5mH79u2IiIjAtGnT4O3t/URzHDx4MExM/nu3l7OzM4YMGaJsq9VqODo6tnhQVmBgoMExABAQEGDQ1nzM3bt3odfrkZCQgPnz5yt9GhoaYGdnZzDusGHDnij+ZkVFRTh9+jTWr1+vtDU2NqKurg737t2DlZVVh+Ouq6vD7du3odFoAAAeHh5wd3dX+oSGhqKpqQnFxcWwtbV96rk1NjZiw4YN2L17N3744QfU19fj/v37SqydpdPp4OTkpGwXFRWhpqYGjo6OBv1qa2sNLql/dB8A5fL0R7m4uBhsy/8/JK2jLC0t0dTUhPv378PS0vKJjiUioo5h0U1ERL3a66+/jkWLFgF4UDg/qqamBgBw6NAhg8IMAMzNzQE8uCc2KSkJH3zwAUJDQ2Fra4v09HTk5+cb9Dc1NTXYVqlU7T7ZOSUlBbNmzcKhQ4dw5MgRJCcnIycnB7GxsTAxMWlRALV2f3hr5+xIHA/3ab6X+NG25mOa36M//vGPCAkJMRjn4Q8ZAMDa2rrN+banpqYGqampmDx5cot9DxeMHYkbQIefqN2ZuaWnp2PLli3YvHkzAgICYG1tjSVLljz2oWcdXdtHz1dTUwNXV1fk5ua26PvoPeLNHB0doVKpUFVV1W5MzZycnGBvb4+LFy92qP9//vMfWFtbs+AmIupGLLqJiKhXi4qKQn19PVQqFSIjI1vs9/f3h7m5OcrKyhAWFtbqGKdPn8aIESOwcOFCpa2tbxaflK+vL3x9fbF06VLMnDkTmZmZiI2NhZOTEyoqKiAiSiFZWFjYJed8Us7OznBzc8Ply5fx2muvdXo8U1NTNDY2GrQFBQWhuLgYPj4+nR7/UWVlZbh27Rrc3NwAAGfPnoWJiQn8/Pw6NbfTp09j0qRJmD17NoAHhX5JSQn8/f2VPmZmZi3m6uTkhDt37uDu3btKYd2RtQ0KCkJFRQX69Olj8AC69piZmcHf3x/ff/99h36n28TEBDNmzMD27duRnJysvGfNampqYGFhgT59HvwX8Pz58y0eTEhERF2LRTcREfVqarUaFy5cUF4/ytbWFklJSVi6dCmamprw05/+FNXV1Th9+jQ0Gg3i4+MxYMAAfPLJJzh69Ci8vLywfft2FBQUwMvL66njqq2txYoVKzB16lR4eXnh6tWrKCgowJQpUwA8+H3nmzdv4r333sPUqVPx+eef48iRI8ol0z0tNTUVixcvhp2dHaKionD//n189dVXqKqqwrJly55oLE9PTxw/fhwjR46Eubk5HBwc8O677+LVV1+Fh4cHpk6dChMTExQVFeH8+fNYt25dp2K3sLBAfHw83n//fdy+fRuLFy/G9OnTlUurn3ZuAwYMwJ49e3DmzBk4ODhg48aNuH79ukHR7enpifz8fJSWlsLGxgZarRYhISGwsrLCqlWrsHjxYuTn5yMrK+ux84iIiEBoaChiYmLw3nvvwdfXF9euXcOhQ4cQGxvb5qX9kZGR+PLLLzv8e+Hr169Hbm4uQkJCsH79egwbNgympqY4deoU0tLSUFBQoHyzfurUqQ4V80RE9PT4k2FERNTraTSadovVtWvXYs2aNUhLS8OgQYMQFRWFQ4cOKUX1L3/5S0yePBlxcXEICQlBZWWlwbfeT0OtVqOyshK/+MUv4Ovri+nTpyM6OhqpqakAgEGDBuGjjz7Chx9+iKFDh+Lvf/87kpKSOnXOzpg3bx4yMjKQmZmJgIAAhIWFISsr66k+ePjggw/w17/+Ff369VO+JY2MjMRnn32GY8eO4eWXX8bw4cOxadMm6HS6Tsfu4+ODyZMnY/z48Rg3bhwCAwMNfhLsaee2evVqBAUFITIyEmPGjIGLiwtiYmIM+iQlJUGtVsPf3x9OTk4oKyuDVqvFjh07cPjwYeVnxlJSUh47D5VKhcOHD2P06NGYO3cufH19MWPGDPzzn/9U7m9vTUJCAg4fPtziZ9LaotVqcfbsWcyePRvr1q3DSy+9hFGjRmHnzp1IT09X7nX/4YcfcObMGcydO7dD4xIR0dNRyZM+cYOIiIioh6SkpODAgQNGuzS/t5g2bRqCgoKwcuXKLhvznXfeQVVVFT7++OMuG5OIiFriN91EREREvVx6ejpsbGy6dMwXXngBa9eu7dIxiYioJd7TTURERNTLeXp64s033+zSMZcvX96l4xERUet4eTkRERERERFRN+Hl5URERERERETdhEU3ERERERERUTdh0U1ERERERETUTVh0ExEREREREXUTFt1ERERERERE3YRFNxEREREREVE3YdFNRERERERE1E1YdBMRERERERF1ExbdRERERERERN3k/wAvHMldlVd3agAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "city_summer_means = {}\n", + "for city in CITY_PROFILES:\n", + " v = climate.where(climate[\"city\"] == city)\n", + " if city == \"Sydney\" or city == \"Sao Paulo\":\n", + " s = v.where((v[\"day\"] <= 80) | (v[\"day\"] >= 355))\n", + " else:\n", + " s = v.where((v[\"day\"] >= SUMMER_START) & (v[\"day\"] <= SUMMER_END))\n", + " city_summer_means[city] = s[\"temperature\"].mean()\n", + "\n", + "sorted_cities = sorted(city_summer_means.items(), key=lambda x: x[1], reverse=True)\n", + "names = [c for c, _ in sorted_cities]\n", + "means = [m for _, m in sorted_cities]\n", + "colors = [\"#e63946\" if m > 30 else \"#f4a261\" if m > 20 else \"#457b9d\" for m in means]\n", + "\n", + "fig, ax = plt.subplots(figsize=(10, 4))\n", + "bars = ax.barh(names, means, color=colors, edgecolor=\"white\")\n", + "ax.bar_label(bars, fmt=\"%.1f °C\", padding=4)\n", + "ax.set_xlabel(\"Mean summer temperature (°C)\")\n", + "ax.set_title(\"Cities ranked by summer heat\")\n", + "ax.set_xlim(0, max(means) * 1.15)\n", + "ax.grid(True, axis=\"x\", linestyle=\"--\", alpha=0.4)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "d1601d7b", + "metadata": {}, + "source": [ + "---\n", + "## Part 6 — Mutations\n", + "\n", + "CTable supports structural and value mutations: adding/dropping columns, deleting rows, sorting in place." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "2c3a977b", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-07T12:06:16.566822Z", + "iopub.status.busy": "2026-04-07T12:06:16.563241Z", + "iopub.status.idle": "2026-04-07T12:06:16.715871Z", + "shell.execute_reply": "2026-04-07T12:06:16.712493Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Table with feels_like column:\n", + " city day temperature humidity wind_speed pressure feels_like \n", + " +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Mutations: delete, compact, sort_by, add/drop/rename columns, assign. + +from dataclasses import dataclass + +import blosc2 + + +@dataclass +class Employee: + id: int = blosc2.field(blosc2.int64(ge=0)) + name: str = blosc2.field(blosc2.string(max_length=16), default="") + salary: float = blosc2.field(blosc2.float64(ge=0), default=0.0) + + +data = [ + (0, "Alice", 85_000.0), + (1, "Bob", 72_000.0), + (2, "Carol", 91_000.0), + (3, "Dave", 65_000.0), + (4, "Eve", 110_000.0), + (5, "Frank", 78_000.0), +] + +t = blosc2.CTable(Employee, new_data=data) +print("Original:") +print(t) + +# -- delete(): logical deletion (tombstone) --------------------------------- +t.delete([1, 3]) # remove Bob and Dave +print(f"After deleting rows 1 and 3: {len(t)} live rows") +print(t) + +# -- compact(): physically close the gaps ----------------------------------- +t.compact() +print("After compact():") +print(t) + +# -- sort_by(): returns a sorted copy by default ---------------------------- +sorted_t = t.sort_by("salary", ascending=False) +print("Sorted by salary descending:") +print(sorted_t) + +# -- sort_by(inplace=True) -------------------------------------------------- +t.sort_by("name", inplace=True) +print("Sorted in-place by name:") +print(t) + +# -- add_column(): new column filled with a default ------------------------- +t.add_column("bonus", blosc2.float64(ge=0), default=0.0) +print("After add_column('bonus'):") +print(t) + +# -- assign(): fill the new column with computed values --------------------- +bonuses = t["salary"].to_numpy() * 0.10 +t["bonus"].assign(bonuses) +print("After assigning 10% bonuses:") +print(t) + +# -- rename_column() -------------------------------------------------------- +t.rename_column("bonus", "annual_bonus") +print(f"Column names after rename: {t.col_names}") + +# -- drop_column() ---------------------------------------------------------- +t.drop_column("annual_bonus") +print(f"Column names after drop: {t.col_names}") diff --git a/examples/ctable/persistence.py b/examples/ctable/persistence.py new file mode 100644 index 00000000..6433daef --- /dev/null +++ b/examples/ctable/persistence.py @@ -0,0 +1,69 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Persistence: write to disk, open read-only/read-write, save, load. + +import shutil +import tempfile +from dataclasses import dataclass + +import numpy as np + +import blosc2 + + +@dataclass +class Measurement: + sensor_id: int = blosc2.field(blosc2.int32(ge=0)) + temperature: float = blosc2.field(blosc2.float64(), default=0.0) + day: int = blosc2.field(blosc2.int16(ge=1, le=365), default=1) + + +rng = np.random.default_rng(0) +N = 10_000 +data = [ + (int(rng.integers(0, 20)), float(rng.normal(15.0, 10.0)), int(rng.integers(1, 366))) for _ in range(N) +] + +tmpdir = tempfile.mkdtemp(prefix="blosc2_ctable_") +disk_path = f"{tmpdir}/measurements" +copy_path = f"{tmpdir}/measurements_copy" + +try: + # -- Create directly on disk (mode="w") --------------------------------- + t = blosc2.CTable(Measurement, new_data=data, urlpath=disk_path, mode="w") + print(f"Created on disk: {len(t):,} rows at '{disk_path}'") + t.info() + + # -- Open read-only (default) ------------------------------------------- + ro = blosc2.CTable.open(disk_path) # mode="r" by default + print(f"Opened read-only: {len(ro):,} rows") + print(f" mean temperature: {ro['temperature'].mean():.2f}") + + try: + ro.append(Measurement(sensor_id=0, temperature=20.0, day=1)) + except ValueError as e: + print(f" Write blocked (read-only): {e}") + + # -- Open read-write and mutate ----------------------------------------- + rw = blosc2.CTable.open(disk_path, mode="a") + rw.append(Measurement(sensor_id=99, temperature=99.0, day=100)) + print(f"\nAfter append (read-write): {len(rw):,} rows") + + # -- save(): copy in-memory table to disk ------------------------------- + mem = blosc2.CTable(Measurement, new_data=data[:100]) + mem.save(copy_path) + print(f"In-memory table saved to '{copy_path}'") + + # -- load(): pull a disk table fully into RAM --------------------------- + ram = blosc2.CTable.load(disk_path) + print(f"Loaded into RAM: {len(ram):,} rows (cbytes={ram.cbytes:,})") + assert len(ram) == len(rw) + +finally: + shutil.rmtree(tmpdir) + print("\nTemporary files removed.") diff --git a/examples/ctable/querying.py b/examples/ctable/querying.py new file mode 100644 index 00000000..433c5110 --- /dev/null +++ b/examples/ctable/querying.py @@ -0,0 +1,60 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Querying: where() filters, select() column projection, and chaining. + +from dataclasses import dataclass + +import blosc2 + + +@dataclass +class Sale: + id: int = blosc2.field(blosc2.int64(ge=0)) + region: str = blosc2.field(blosc2.string(max_length=16), default="") + amount: float = blosc2.field(blosc2.float64(ge=0), default=0.0) + returned: bool = blosc2.field(blosc2.bool(), default=False) + + +data = [ + (0, "North", 120.0, False), + (1, "South", 340.0, False), + (2, "North", 85.0, True), + (3, "East", 210.0, False), + (4, "West", 430.0, False), + (5, "South", 60.0, True), + (6, "East", 300.0, False), + (7, "North", 500.0, False), + (8, "West", 175.0, True), + (9, "South", 220.0, False), +] + +t = blosc2.CTable(Sale, new_data=data) + +# -- where(): row filter ---------------------------------------------------- +high_value = t.where(t["amount"] > 200) +print(f"Sales > $200: {len(high_value)} rows") +print(high_value) + +not_returned = t.where(not t["returned"]) +print(f"Not returned: {len(not_returned)} rows") + +# -- chained filters (views are composable) --------------------------------- +north = t.where(t["region"] == "North") +north_big = north.where(north["amount"] > 100) +print(f"North region + amount > 100: {len(north_big)} rows") +print(north_big) + +# -- select(): column projection (no data copy) ----------------------------- +slim = t.select(["id", "amount"]) +print("id + amount only:") +print(slim) + +# -- combined: select columns, then filter rows ----------------------------- +result = t.select(["region", "amount"]).where(not t["returned"]) +print("Region + amount for non-returned sales:") +print(result) diff --git a/examples/ctable/real_world.py b/examples/ctable/real_world.py new file mode 100644 index 00000000..927f05bd --- /dev/null +++ b/examples/ctable/real_world.py @@ -0,0 +1,114 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Real-world example: weather station data. +# +# Simulates a year of readings from 10 stations, then: +# - filters to a single station +# - finds the 5 hottest days +# - computes correlations between meteorological variables +# - saves the filtered data to disk and reloads it + +import shutil +import tempfile +from dataclasses import dataclass + +import numpy as np + +import blosc2 + + +@dataclass +class WeatherReading: + station_id: int = blosc2.field(blosc2.int32(ge=0, le=9)) + temperature: float = blosc2.field(blosc2.float32(ge=-80.0, le=60.0), default=20.0) + humidity: float = blosc2.field(blosc2.float32(ge=0.0, le=100.0), default=50.0) + wind_speed: float = blosc2.field(blosc2.float32(ge=0.0, le=200.0), default=0.0) + pressure: float = blosc2.field(blosc2.float32(ge=800.0, le=1100.0), default=1013.0) + day_of_year: int = blosc2.field(blosc2.int16(ge=1, le=365), default=1) + + +# -- Generate a full year of readings for 10 stations ---------------------- +rng = np.random.default_rng(42) +N_STATIONS = 10 +N_DAYS = 365 +N = N_STATIONS * N_DAYS # 3 650 rows + +station_ids = np.tile(np.arange(N_STATIONS, dtype=np.int32), N_DAYS) +temperatures = rng.normal(15.0, 12.0, N).clip(-80, 60).astype(np.float32) +humidities = rng.uniform(20.0, 95.0, N).astype(np.float32) +wind_speeds = rng.exponential(10.0, N).clip(0, 200).astype(np.float32) +pressures = rng.normal(1013.0, 8.0, N).clip(800, 1100).astype(np.float32) +days = np.repeat(np.arange(1, N_DAYS + 1, dtype=np.int16), N_STATIONS) + +arr = np.zeros( + N, + dtype=[ + ("station_id", np.int32), + ("temperature", np.float32), + ("humidity", np.float32), + ("wind_speed", np.float32), + ("pressure", np.float32), + ("day_of_year", np.int16), + ], +) +for col, val in [ + ("station_id", station_ids), + ("temperature", temperatures), + ("humidity", humidities), + ("wind_speed", wind_speeds), + ("pressure", pressures), + ("day_of_year", days), +]: + arr[col] = val + +t = blosc2.CTable(WeatherReading, new_data=arr, validate=False) +print(f"Full dataset: {len(t):,} rows ({N_STATIONS} stations × {N_DAYS} days)") +t.info() + +# -- Filter to station 3 ---------------------------------------------------- +station3 = t.where(t["station_id"] == 3) +print(f"Station 3: {len(station3)} readings") +print(f" mean temperature : {station3['temperature'].mean():.1f} °C") +print(f" mean humidity : {station3['humidity'].mean():.1f} %") +print(f" mean wind speed : {station3['wind_speed'].mean():.1f} km/h\n") + +# -- 5 hottest days at station 3 (sort full table, then filter) ------------ +sorted_by_temp = t.sort_by("temperature", ascending=False) +hottest_s3 = sorted_by_temp.where(sorted_by_temp["station_id"] == 3) +print("5 hottest days at station 3:") +print(hottest_s3.head(5)) + +# -- Covariance of numeric variables (all stations) ------------------------- +numeric = t.select(["temperature", "humidity", "wind_speed", "pressure"]) +cov = numeric.cov() +labels = ["temp", "humidity", "wind", "pressure"] +col_w = 11 +print("Covariance matrix (all stations):") +print(" " * 10 + "".join(f"{lbl:>{col_w}}" for lbl in labels)) +for i, lbl in enumerate(labels): + print(f"{lbl:<10}" + "".join(f"{cov[i, j]:>{col_w}.3f}" for j in range(4))) + +# -- Save station 3 data to disk and reload --------------------------------- +tmpdir = tempfile.mkdtemp(prefix="blosc2_weather_") +path = f"{tmpdir}/station3" +try: + # Views cannot be sorted or saved directly — materialise via Arrow first + s3_copy = blosc2.CTable.from_arrow(station3.to_arrow()) + s3_copy.sort_by("day_of_year", inplace=True) + sorted_s3 = s3_copy + sorted_s3.save(path, overwrite=True) + print(f"\nStation 3 data saved to '{path}'") + + reloaded = blosc2.CTable.load(path) + print( + f"Reloaded: {len(reloaded)} rows, " + f"days {reloaded['day_of_year'].min()}–{reloaded['day_of_year'].max()}" + ) +finally: + shutil.rmtree(tmpdir) + print("Temporary files removed.") diff --git a/examples/ctable/schema.py b/examples/ctable/schema.py new file mode 100644 index 00000000..327b0a11 --- /dev/null +++ b/examples/ctable/schema.py @@ -0,0 +1,55 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Schema layer: dataclass field specs, constraints, and validation. + +from dataclasses import dataclass + +import blosc2 + + +@dataclass +class Product: + id: int = blosc2.field(blosc2.int64(ge=0)) + price: float = blosc2.field(blosc2.float64(ge=0.0, le=10_000.0), default=0.0) + stock: int = blosc2.field(blosc2.int32(ge=0), default=0) + category: str = blosc2.field(blosc2.string(max_length=32), default="") + on_sale: bool = blosc2.field(blosc2.bool(), default=False) + + +t = blosc2.CTable(Product) + +# Valid row +t.append(Product(id=1, price=29.99, stock=100, category="electronics", on_sale=False)) +t.append(Product(id=2, price=4.50, stock=200, category="food", on_sale=True)) +print("Valid rows appended successfully.") +print(t) + +# Inspect the compiled schema +print("Schema:") +for col in t.schema.columns: + print(f" {col.name:<12} dtype={col.dtype} spec={col.spec}") + +# Constraint violation: price < 0 +try: + t.append(Product(id=3, price=-1.0, stock=10, category="misc", on_sale=False)) +except Exception as e: + print(f"\nCaught validation error (price < 0): {e}") + +# Constraint violation: id < 0 +try: + t.append(Product(id=-5, price=10.0, stock=10, category="misc", on_sale=False)) +except Exception as e: + print(f"Caught validation error (id < 0): {e}") + +# String too long (max_length=32) +try: + t.append(Product(id=4, price=1.0, stock=1, category="a" * 50, on_sale=False)) +except Exception as e: + print(f"Caught validation error (string too long): {e}") + +print(f"\nTable still has {len(t)} valid rows.") diff --git a/plans/ctable-implementation-log.md b/plans/ctable-implementation-log.md new file mode 100644 index 00000000..ab81a188 --- /dev/null +++ b/plans/ctable-implementation-log.md @@ -0,0 +1,415 @@ +# CTable Implementation Log + +This document records everything implemented across the CTable feature: +the `ctable-schema.md` redesign (schema, validation, serialization, optimizations) +and the `ctable-persistency.md` phase (file-backed storage, `open()`, read-only mode). + +--- + +## Phase 1 — Schema redesign (`ctable-schema.md`) + +The goal was to replace the original Pydantic-`BaseModel`-based schema API with a +**dataclass-first schema API** using declarative spec objects (`b2.int64()`, +`b2.float64()`, etc.) and to wire in full constraint validation on insert. + +--- + +## New files + +### `src/blosc2/schema.py` + +Defines the public schema vocabulary. + +**Contents:** + +- `SchemaSpec` — abstract base class for all column type descriptors. +- `int64`, `float64`, `bool`, `complex64`, `complex128`, `string`, `bytes` — + concrete spec classes. Each carries: + - `dtype` — the NumPy storage dtype + - `python_type` — the corresponding Python type + - Constraint attributes: `ge`, `gt`, `le`, `lt` (numeric); `min_length`, + `max_length`, `pattern` (string/bytes) + - `to_pydantic_kwargs()` — returns only the non-`None` constraints as a dict, + used internally to build Pydantic validator models + - `to_metadata_dict()` — returns a JSON-compatible dict used for serialization +- `field(spec, *, default, cparams, dparams, chunks, blocks)` — attaches a spec + and per-column storage options to a dataclass field via + `dataclasses.field(metadata={"blosc2": {...}})`. +- `BLOSC2_FIELD_METADATA_KEY = "blosc2"` — stable key for the metadata dict. + +**Key design note:** `bool` and `bytes` shadow Python builtins inside this module. +Private aliases `_builtin_bool` and `_builtin_bytes` are used where the originals +are needed. + +--- + +### `src/blosc2/schema_compiler.py` + +Compiles a dataclass row definition into an internal `CompiledSchema`. + +**Contents:** + +- `ColumnConfig(slots=True)` — holds per-column NDArray storage options: + `cparams`, `dparams`, `chunks`, `blocks`. +- `CompiledColumn(slots=True)` — holds everything about one column: + `name`, `py_type`, `spec`, `dtype`, `default`, `config`, `display_width`. +- `CompiledSchema(slots=True)` — holds the full compiled schema: + `row_cls`, `columns`, `columns_by_name`, `validator_model` (filled lazily by + `schema_validation.py`). +- `compile_schema(row_cls)` — main entry point. Walks `dataclasses.fields()`, + reads `blosc2` metadata from each field, infers specs from plain annotations + where no `b2.field()` is present, validates annotation/spec compatibility, and + returns a `CompiledSchema`. +- `get_blosc2_field_metadata(dc_field)` — extracts the `"blosc2"` metadata dict + from a dataclass field, or returns `None`. +- `infer_spec_from_annotation(annotation)` — builds a default spec from a plain + Python type (`int` → `int64()`, `float` → `float64()`, etc.). Used for inferred + shorthand fields like `id: int` (no `b2.field()`). +- `validate_annotation_matches_spec(name, annotation, spec)` — rejects + declarations where the Python annotation is incompatible with the spec (e.g. + `id: str = b2.field(b2.int64())`). +- `compute_display_width(spec)` — returns a sensible terminal column width based + on dtype kind. +- `schema_to_dict(schema)` — serializes a `CompiledSchema` to a JSON-compatible + dict. Handles `MISSING` defaults (→ `None`), complex defaults + (→ `{"__complex__": True, "real": ..., "imag": ...}`), and optional per-column + storage config fields. +- `schema_from_dict(data)` — reconstructs a `CompiledSchema` from a serialized + dict. Does not require the original Python dataclass. Returns `row_cls=None`. + Raises `ValueError` on unknown `kind` or unsupported `version`. + +--- + +### `src/blosc2/schema_validation.py` + +Row-level constraint validation backed by Pydantic. All Pydantic imports are +isolated here so the rest of the codebase never touches Pydantic directly. + +**Contents:** + +- `build_validator_model(schema)` — builds a `pydantic.create_model(...)` class + from the compiled schema. Each column's `to_pydantic_kwargs()` result is passed + to `pydantic.Field(...)`. The result is cached on `schema.validator_model` so it + is built only once per schema. +- `validate_row(schema, row_dict)` — validates one `{col_name: value}` dict. + Calls the cached Pydantic model, catches `ValidationError`, and re-raises as a + plain `ValueError` so callers never need to import Pydantic. +- `validate_rows_rowwise(schema, rows)` — validates a list of row dicts. Raises + `ValueError` on the first violation, including the row index. + +**When used:** called by `CTable.append()` when `self._validate` is `True`. + +--- + +### `src/blosc2/schema_vectorized.py` + +NumPy-based constraint validation for bulk inserts. Used by `CTable.extend()` to +check entire column arrays at once without per-row Python overhead. + +**Contents:** + +- `validate_column_values(col, values)` — checks all constraint attributes + present on `col.spec` against a NumPy array of values. Uses `np.any(arr < ge)` + style checks. For string/bytes, uses `np.vectorize(len)` to check lengths. + Reports the first offending value in the error message. +- `validate_column_batch(schema, columns)` — calls `validate_column_values` for + every column present in the `columns` dict. + +**Why separate from Pydantic validation:** `extend()` can receive millions of +rows. Row-by-row Pydantic validation would be unacceptably slow for large batches. +NumPy operations run in C with no per-element Python overhead. + +--- + +## Changes to existing files + +### `src/blosc2/ctable.py` + +**Schema detection at construction:** + +```python +if dataclasses.is_dataclass(row_type) and isinstance(row_type, type): + self._schema = compile_schema(row_type) +else: + self._schema = _compile_pydantic_schema(row_type) # legacy path +``` + +**New constructor parameters:** `validate=True`, `cparams=None`, `dparams=None`. +Stored as `self._validate`, `self._table_cparams`, `self._table_dparams`. + +**`_init_columns`:** builds NDArrays from `self._schema.columns` instead of +iterating `row_type.model_fields`. + +**`_resolve_column_storage`:** merges column-level and table-level storage +settings. Column-level wins. + +**`_normalize_row_input`:** normalizes list/tuple/dict/dataclass instance/ +`np.void` to a `{col_name: value}` dict. + +**`_coerce_row_to_storage`:** coerces each value to the column's NumPy dtype +using `np.array(val, dtype=col.dtype).item()`. + +**`append()` new flow:** +1. `_normalize_row_input(data)` → dict +2. `validate_row(schema, row)` if `self._validate` (Pydantic row validation) +3. `_coerce_row_to_storage(row)` → storage dict +4. Find write position, resize if needed, write column by column. + +**`extend()` new signature:** `extend(data, *, validate=None)`. +- `validate=None` uses `self._validate` (table default). +- `validate=True/False` overrides for this call only. +- Vectorized validation runs on raw column arrays before `blosc2.asarray` conversion. + +**Schema introspection (new):** +- `table.schema` property — returns `self._schema`. +- `table.column_schema(name)` — returns `CompiledColumn` for a given column name. +- `table.schema_dict()` — delegates to `schema_to_dict(self._schema)`. + +**Legacy Pydantic adapter kept:** +- `NumpyDtype`, `MaxLen`, `_resolve_field_dtype`, `_LegacySpec`, + `_compile_pydantic_schema` all remain so existing Pydantic-`BaseModel`-based + schemas continue to work during the transition. + +### `src/blosc2/__init__.py` + +Added to delayed imports: + +```python +from .schema import bool, bytes, complex64, complex128, field, float64, int64, string +``` + +Added to `__all__`: +`"bool"`, `"bytes"`, `"complex64"`, `"complex128"`, `"field"`, `"float64"`, +`"int64"`, `"string"`. + +--- + +## Tests + +All tests live in `tests/ctable/`. + +| File | Covers | +|---|---| +| `test_schema_specs.py` | Spec dtypes, python types, constraint storage, `to_pydantic_kwargs`, `to_metadata_dict`, `blosc2` namespace exports | +| `test_schema_compiler.py` | `compile_schema` with explicit `b2.field()`, inferred shorthand, mismatch rejection, defaults, cparams; `schema_to_dict` / `schema_from_dict` roundtrip | +| `test_schema_validation.py` | `append` and `extend` constraint enforcement; boundary values; `validate=False` bypass; `gt`/`lt` exclusive bounds; NumPy structured array path | +| `test_ctable_dataclass_schema.py` | End-to-end CTable construction, `append` with tuple/list/dict, `extend` with iterable and structured array, per-call `validate=` override, schema introspection | +| `test_construct.py` | Construction variants, `append`/`extend`/resize, column integrity, `_valid_rows` | +| `test_column.py` | Column indexing, slicing, iteration, `to_numpy()`, mask independence | +| `test_compact.py` | Manual and auto compaction | +| `test_delete_rows.py` | Single/list/slice deletion, out-of-bounds, edge cases, stress | +| `test_extend_delete.py` | Interleaved extend/delete cycles, mask correctness, resize behavior | +| `test_row_logic.py` | Row indexer (int/slice/list), views, chained views | + +Total: **135 tests, all passing** (after Phase 1 + optimizations). + +--- + +## Phase 1 design decisions + +**Why two validation paths?** +`append()` handles one row at a time — Pydantic is fast enough and also performs +type coercion and default filling. `extend()` handles bulk data — vectorized NumPy +checks are orders of magnitude faster for large batches. + +**Why `validate=None` as the default on `extend()`?** +`None` means "inherit the table-level flag". `True`/`False` are explicit overrides. +This avoids a boolean that accidentally silences the table-level setting. + +**Why keep the Pydantic adapter?** +Existing code using `class RowModel(BaseModel)` continues to work without +modification. The adapter is not on the critical path for new code. + +**Why `schema_to_dict` / `schema_from_dict` now?** +Persistence requires a self-contained schema representation that survives without +the original Python dataclass. Establishing the serialization format before +persistence was built ensured the format was stable before anything depended on it. + +--- + +## Phase 1 optimizations (post-schema) + +Several performance improvements were made after the schema work was complete: + +**`_last_pos` cache** +Added `_last_pos: int | None` to `CTable`. Tracks the physical index of the next +write slot so that `append()` and `extend()` no longer need to scan backward through +chunk metadata on every call. Set to `None` after any deletion (triggers one lazy +recalculation on the next write). Set to `_n_rows` after `compact()`. Eliminated a +backward O(n_chunks) scan per insert. + +**`_grow()` helper** +Extracted the capacity-doubling logic into `_grow()`. Removes duplication between +`append()` and `extend()`. + +**In-place delete** +`delete()` now writes the updated boolean array back with `self._valid_rows[:] = +valid_rows_np` (in-place slice assignment) instead of creating a new NDArray. +Avoids a full allocation on each delete. + +**`head()` / `tail()` refactored** +Both methods now reuse `_find_physical_index()` instead of containing their own +chunk-walk loops. + +**`_make_view()` classmethod** +Added to construct view CTables without going through `__init__`. Avoids +allocating and immediately discarding NDArrays that were never used. + +**`_NumericSpec` mixin + new spec types** +All numeric specs (`int8` through `uint64`, `float32`, `float64`) share a common +`_NumericSpec` mixin for `ge`/`gt`/`le`/`lt` constraint handling, eliminating +boilerplate. New specs added: `int8`, `int16`, `int32`, `uint8`, `uint16`, +`uint32`, `uint64`, `float32`. + +**String vectorized validation** +`validate_column_values` uses `np.char.str_len()` (true C-level) for `U`/`S` dtype +arrays instead of `np.vectorize(len)` (Python loop in disguise). The check also +extracted `_validate_string_lengths()` to reduce cyclomatic complexity. + +**Column name validation** +`compile_schema` now calls `_validate_column_name()` on every field. Rejects names +that are empty, start with `_`, or contain `/` — rules that apply equally to +in-memory and persistent tables. + +--- + +## Phase 2 — Persistency (`ctable-persistency.md`) + +### New file: `src/blosc2/ctable_storage.py` + +A storage-backend abstraction that keeps all file I/O out of `ctable.py`. + +**`TableStorage`** — interface class defining: +`create_column`, `open_column`, `create_valid_rows`, `open_valid_rows`, +`save_schema`, `load_schema`, `table_exists`, `is_read_only`. + +**`InMemoryTableStorage`** — trivial implementation that creates plain in-memory +`blosc2.NDArray` objects and is a no-op for `save_schema`. Used when `urlpath` is +not provided (existing default behaviour, unchanged). + +**`FileTableStorage`** — file-backed implementation. + +Disk layout: + +``` +/ + _meta.b2frame ← blosc2.SChunk; vlmeta holds kind, version, schema JSON + _valid_rows.b2nd ← file-backed boolean NDArray (tombstone mask) + _cols/ + .b2nd ← one file-backed NDArray per column +``` + +Key implementation notes: +- `save_schema` always opens `_meta.b2frame` with `mode="w"` (create path only). +- `load_schema` / `check_kind` use `blosc2.open()` (not `blosc2.SChunk(..., + mode="a")`), which is the correct API for reopening an existing SChunk file. +- File-backed NDArrays (`urlpath=..., mode="w"`) support in-place writes + (`col[pos] = value`, `col[start:end] = arr`) that persist immediately. This is + why resize (`_grow()`), append, extend, and delete all work transparently on + persistent tables. +- `_n_rows` on reopen is reconstructed as `blosc2.count_nonzero(valid_rows)` — + always correct because unwritten slots are `False`, same as deleted slots. +- `_last_pos` is set to `None` on reopen and resolved lazily by `_resolve_last_pos()` + on the first write. + +### Changes to `src/blosc2/ctable.py` + +**Constructor** + +New parameters: `urlpath: str | None = None`, `mode: str = "a"`. + +Logic: +- `urlpath=None` → `InMemoryTableStorage` → existing behaviour unchanged. +- `urlpath` + existing table + `mode != "w"` → open existing (load schema from + disk, open file-backed arrays, reconstruct state). +- `urlpath` + `mode="w"` or no existing table → create new (compile schema, + save to disk, create file-backed arrays). +- Passing `new_data` when opening an existing table raises `ValueError`. + +**`CTable.open(cls, urlpath, *, mode="r")`** + +New classmethod for ergonomic read-only access. Opens the table, verifies +`kind="ctable"` in vlmeta, reconstructs schema from JSON (no dataclass needed), +returns a fully usable `CTable`. + +**Read-only enforcement** + +`_read_only: bool` flag set from `storage.is_read_only()`. Guards added to the top +of `append()`, `extend()`, `delete()`, `compact()` — each raises +`ValueError("Table is read-only (opened with mode='r').")`. + +**`_make_view(cls, parent, new_valid_rows)`** + +New classmethod that constructs a view `CTable` directly via `cls.__new__` without +calling `__init__`. Replaces the old `CTable(self._row_type, expected_size=...)` + +`retval._cols = self._cols` pattern, which was wasteful (allocated NDArrays then +discarded them) and broke when `_row_type` is `None` (tables opened via `open()`). + +**`schema_dict()`** + +No longer needs a local import of `schema_to_dict` — now imported at the module top. + +### New test file: `tests/ctable/test_persistency.py` + +23 tests covering: + +| Test group | What it checks | +|---|---| +| Layout | `_meta.b2frame`, `_valid_rows.b2nd`, `_cols/.b2nd` all exist after creation | +| Metadata | `kind`, `version`, `schema` in vlmeta; column names and order in schema JSON | +| Round-trips | Data survives reopen via both `CTable(Row, urlpath=..., mode="a")` and `CTable.open()` | +| Column order | Preserved exactly from schema JSON, not from filesystem order | +| Constraints | Validation re-enabled after reopen (schema reconstructed from disk) | +| Append/extend/delete after reopen | Mutations visible in subsequent opens | +| `_valid_rows` on disk | Tombstone mask correctly stored and loaded | +| `mode="w"` | Overwrites existing table; subsequent open sees empty table | +| Read-only | `append`, `extend`, `delete`, `compact` all raise on `mode="r"` | +| Read-only reads | `row[]`, column access, `head()`, `tail()`, `where()` all work | +| Error cases | `FileNotFoundError` for missing path; `ValueError` for wrong kind | +| Column name validation | Empty, `_`-prefixed, `/`-containing names rejected | +| `new_data` guard | `ValueError` when `new_data` passed to open-existing path | +| Capacity growth | `_grow()` (resize) works on file-backed arrays and survives reopen | + +Total: **158 tests, all passing**. + +### New benchmark: `bench/ctable/bench_persistency.py` + +Four sections: + +1. **`extend()` bulk insert** — in-memory vs file-backed at 1k–1M rows. + Overhead converges to ~1x at 1M rows (compression dominates, not I/O). +2. **`open()` / reopen time** — ~4–10 ms regardless of table size. Fixed cost: + open 3 files (meta, valid_rows, one column) + parse schema JSON. +3. **`append()` single-row** — file-backed is ~6x slower per row (~3 ms vs ~0.5 ms). + Recommendation: batch inserts via `extend()` for persistent tables. +4. **Column `to_numpy()`** — essentially identical between backends (≤1.06x ratio). + Decompression dominates; file I/O is negligible once data is loaded. + +--- + +## Phase 2 design decisions + +**Why direct files instead of TreeStore?** +TreeStore stores snapshots of in-memory arrays. In-place writes to a +TreeStore-retrieved NDArray do not persist after reopen. File-backed NDArrays +created with `urlpath=...` support in-place writes natively. Using direct `.b2nd` +files aligns with how the rest of blosc2 handles persistent arrays. + +**Why `blosc2.SChunk` vlmeta for metadata, not JSON files?** +`vlmeta` is compressed and is already part of the blosc2 ecosystem. +`blosc2.open()` works on `.b2frame` files the same way it works on `.b2nd` files, +keeping the open path uniform. + +**Why not store `_last_pos` in metadata?** +`_resolve_last_pos()` reconstructs it in O(n_chunks) with no full decompression. +Storing it would create a write on every `append()` just to update a counter in the +SChunk — not worth the extra I/O. + +**Why `_make_view()` instead of calling `__init__`?** +`__init__` now has storage-routing logic and would try to create new NDArrays even +for views (which immediately get thrown away). `_make_view()` via `__new__` is +explicit and zero-waste. + +**Why `CTable.open()` defaults to `mode="r"`?** +The most common read-back scenario is inspection or analysis, not modification. +Defaulting to read-only prevents accidental mutations on shared or archived tables. diff --git a/plans/ctable-persistency.md b/plans/ctable-persistency.md new file mode 100644 index 00000000..a2ff6db2 --- /dev/null +++ b/plans/ctable-persistency.md @@ -0,0 +1,536 @@ +# CTable Persistency Plan + +## Goal + +Add persistent `CTable` support on top of `TreeStore` while keeping the public +API simple: + +* in-memory tables when `urlpath is None` +* persistent tables when `urlpath` is provided + +The first persistency iteration should support: + +* creating a persistent table +* opening an existing persistent table +* reading rows, columns, and views from persisted tables +* appending rows + +The first persistency iteration should **not** promise: + +* full schema evolution +* dropping columns +* renaming columns +* transactional multi-entry updates + +For now, the supported schema evolution story is: + +* append rows only + +--- + +## Storage layout + +Each persisted `CTable` lives under a table root inside a `TreeStore`. + +Confirmed layout: + +* `table_root/_meta` +* `table_root/_valid_rows` +* `table_root/_cols/` + +Example: + +* `people/_meta` +* `people/_valid_rows` +* `people/_cols/id` +* `people/_cols/score` +* `people/_cols/active` + +Rationale: + +* `_meta` holds mutable metadata in `vlmeta` +* `_valid_rows` is real table data and should be stored as a normal persisted array +* `_cols/` stores one persisted NDArray per column + +The underscore-prefixed names form the internal namespace for a table root and +must be treated as reserved. + +--- + +## `_meta` entry + +`_meta` should be a small serialized `SChunk` used primarily to hold mutable +`vlmeta`. + +This is preferable to immutable metalayers because: + +* we may want to evolve metadata over time +* multiple `CTable` objects may live in the same `TreeStore` +* schema and table metadata should be updateable without rewriting the entire table + +For the first version: + +* `tree_store["/_meta"].vlmeta["kind"] = "ctable"` +* `tree_store["/_meta"].vlmeta["version"] = 1` +* `tree_store["/_meta"].vlmeta["schema"] = {...}` + +This gives `open()` a minimal, reliable contract for introspection. + +--- + +## Schema persistence format + +The schema should be stored as JSON-compatible data in: + +* `tree_store["/_meta"].vlmeta["schema"]` + +The schema document should be versioned and explicit. + +Recommended shape: + +```python +{ + "version": 1, + "columns": [ + { + "name": "id", + "py_type": "int", + "spec": {"kind": "int64", "ge": 0}, + "default": None, + }, + { + "name": "score", + "py_type": "float", + "spec": {"kind": "float64", "ge": 0, "le": 100}, + "default": None, + }, + { + "name": "active", + "py_type": "bool", + "spec": {"kind": "bool"}, + "default": True, + }, + ], +} +``` + +Notes: + +* `columns` must be an ordered list, not a dict. +* The order of the list is the source of truth for column order. +* Do not rely on dict ordering or TreeStore iteration order. +* The schema JSON should capture logical schema information only. + +For the first version, do **not** duplicate: + +* per-column `cparams` +* per-column `dparams` +* array chunk/block layout +* `expected_size` +* compaction settings + +Those can be introspected directly from the stored arrays when needed. + +--- + +## `_valid_rows` persistence + +`_valid_rows` should be stored as a normal persisted boolean NDArray under: + +* `table_root/_valid_rows` + +This is the correct representation because `_valid_rows` is: + +* table data, not metadata +* potentially large +* used in normal row visibility semantics +* already aligned with current delete/view/compaction logic + +Do not encode `_valid_rows` into schema JSON or small metadata blobs. + +--- + +## Column persistence + +Each column should be stored as its own persisted NDArray under: + +* `table_root/_cols/` + +This means: + +* each column can be opened independently +* column-level array settings remain attached to the actual stored array +* persistence layout matches the internal columnar design cleanly + +The schema JSON provides the logical order and type constraints; the arrays under +`_cols` provide the physical stored data. + +--- + +## Constructor semantics + +The recommended constructor shape is: + +```python +table = b2.CTable( + Row, + urlpath=None, + mode="a", + expected_size=1_048_576, + compact=False, + validate=True, +) +``` + +Semantics: + +* `urlpath is None` + create an in-memory `CTable` +* `urlpath is not None` + use persistent storage rooted at that path + +Recommended `mode` meanings: + +* `mode="w"` + create a new persistent table, overwriting any existing table root if the API + already supports that pattern elsewhere +* `mode="a"` + open existing or create new +* `mode="r"` + open existing read-only table + +The important public signal is: + +* `urlpath` chooses persistence +* `mode` chooses creation/open behavior + +Users should not need to pass a `TreeStore` object explicitly for the common path. + +--- + +## `open()` support + +An explicit `open()` API should be supported. + +Recommended shape: + +```python +table = b2.open(urlpath) +``` + +or, if needed for clarity: + +```python +table = b2.CTable.open(urlpath, mode="r") +``` + +For `open()` to detect a persisted `CTable`, it should inspect: + +* `urlpath/_meta` +* `urlpath/_meta`.vlmeta["kind"] + +If: + +* `_meta` exists +* `vlmeta["kind"] == "ctable"` + +then the object should be recognized as a persisted `CTable`. + +This keeps `urlpath` simple: it points to the table root, and `_meta` provides +the type marker and schema. + +--- + +## Multiple tables in one TreeStore + +The design must support multiple `CTable` objects in the same `TreeStore`. + +That is one reason `_meta` is a good choice: + +* each table root has its own `_meta` +* each table root can be introspected independently +* schema metadata is naturally scoped to one table subtree + +Example shared TreeStore: + +* `users/_meta` +* `users/_valid_rows` +* `users/_cols/id` +* `orders/_meta` +* `orders/_valid_rows` +* `orders/_cols/order_id` + +No additional global registry is required in the first version. + +--- + +## Column name validation + +Column name validation should be explicit and should be shared between: + +* in-memory `CTable` +* persistent `CTable` + +Reason: + +* a schema should not be valid in memory and then fail only when persisted + +Recommended first-rule constraints for column names: + +* must be a non-empty string +* must not contain `/` +* must not start with `_` +* must not collide with reserved internal names + +Reserved internal names for the table root layout: + +* `_meta` +* `_valid_rows` +* `_cols` + +This validation should happen during schema compilation, not only during +persistent-table creation. + +--- + +## Column order + +Column order should be preserved explicitly in the schema JSON. + +The source of truth is: + +* the order of `schema["columns"]` + +Do not rely on: + +* dict ordering as a persistence contract +* lexical ordering of `_cols/` +* TreeStore iteration order + +On load: + +* reconstruct `table.col_names` from the schema list order +* rebuild any name-to-column map separately + +--- + +## Read-only mode + +When `mode="r"`: + +Allowed: + +* opening the table +* reading rows +* reading columns +* creating non-mutating views +* `head()`, `tail()`, filtering, and other read-only operations + +Disallowed: + +* `append()` +* `delete()` +* `compact()` +* any operation that mutates stored arrays or metadata + +These should fail immediately with a clear error. + +If some existing view path currently requires mutation internally, that should be +cleaned up rather than weakening the read-only contract. + +--- + +## Failure model + +The first persistency version does not need full transactional semantics. + +Be explicit in the implementation and docs: + +* updates touching multiple entries are not guaranteed to be atomic +* partial writes are possible if a failure occurs mid-update + +That is acceptable for the first version as long as it is not hidden. + +The initial goal is a correct and understandable persistent layout, not a full +transaction layer. + +--- + +## Internal API sketch + +This is a proposed internal storage split, not a final public API requirement. + +Possible internal helpers: + +```python +class TableStorage: + def open_column(self, name: str): ... + def create_column( + self, + name: str, + *, + dtype, + shape, + chunks=None, + blocks=None, + cparams=None, + dparams=None + ): ... + def open_valid_rows(self): ... + def create_valid_rows( + self, *, shape, chunks=None, blocks=None, cparams=None, dparams=None + ): ... + def load_schema(self) -> dict: ... + def save_schema(self, schema: dict) -> None: ... + def exists(self) -> bool: ... + def is_read_only(self) -> bool: ... + + +class InMemoryTableStorage(TableStorage): ... + + +class TreeStoreTableStorage(TableStorage): ... +``` + +Then `CTable` can route based on `urlpath`: + +* `urlpath is None` -> `InMemoryTableStorage` +* `urlpath is not None` -> `TreeStoreTableStorage` + +This keeps persistence a backend concern instead of scattering TreeStore logic +throughout all of `CTable`. + +--- + +## Concrete implementation sequence + +### Step 1: extend constructor/open signatures + +Update `src/blosc2/ctable.py` to accept: + +```python +class CTable: + def __init__( + self, + row_type, + new_data=None, + *, + urlpath: str | None = None, + mode: str = "a", + expected_size: int = 1_048_576, + compact: bool = False, + validate: bool = True, + ) -> None: ... +``` + +And add: + +```python +@classmethod +def open(cls, urlpath: str, *, mode: str = "r") -> "CTable": ... +``` + +### Step 2: add storage backend abstraction + +Create a new module: + +* `src/blosc2/ctable_storage.py` + +Add: + +* `TableStorage` +* `InMemoryTableStorage` +* `TreeStoreTableStorage` + +### Step 3: implement TreeStore layout helpers + +In `TreeStoreTableStorage`, add helpers for: + +* `_meta` path +* `_valid_rows` path +* `_cols/` paths +* reading/writing `vlmeta["kind"]` +* reading/writing `vlmeta["version"]` +* reading/writing `vlmeta["schema"]` + +### Step 4: persist schema JSON + +Connect compiled schema export/import to `_meta.vlmeta["schema"]`. + +The schema compiler work should provide: + +```python +def schema_to_dict(schema: CompiledSchema) -> dict: ... +def schema_from_dict(data: dict) -> CompiledSchema: ... +``` + +### Step 5: create/open persistent arrays + +Wire `CTable` initialization so that: + +* create path creates `_meta`, `_valid_rows`, and `_cols/` +* open path loads schema first, then opens `_valid_rows` and columns + +### Step 6: enforce read-only behavior + +Add an internal read-only flag so mutating methods fail early when opened with +`mode="r"`. + +Methods to guard first: + +* `append` +* `extend` +* `delete` +* `compact` + +### Step 7: test persistency layout and round-trips + +Add tests covering: + +* create persistent `CTable` +* reopen persistent `CTable` +* schema JSON present in `_meta.vlmeta` +* `_valid_rows` persisted correctly +* column order preserved after reopen +* multiple tables inside one TreeStore +* read-only mode errors on mutation + +--- + +## Proposed tests + +Suggested test file: + +* `tests/ctable/test_persistency.py` + +Suggested test cases: + +* `test_create_persistent_ctable_layout` +* `test_open_persistent_ctable` +* `test_schema_saved_in_meta_vlmeta` +* `test_valid_rows_persisted` +* `test_column_order_roundtrip` +* `test_multiple_ctables_in_same_treestore` +* `test_read_only_mode_rejects_mutation` + +--- + +## Recommendation + +The recommended persistency design is: + +1. use `urlpath` to switch between in-memory and persistent `CTable` +2. store one table per TreeStore subtree +3. use: + * `_meta` + * `_valid_rows` + * `_cols/` +4. store schema JSON in `_meta.vlmeta["schema"]` +5. store explicit markers in `_meta.vlmeta`: + * `"kind": "ctable"` + * `"version": 1` +6. preserve column order in the schema JSON as an ordered `columns` list +7. keep the first version limited to append-row persistence, not full schema evolution + +This gives `CTable` a clear persistent layout, keeps `open()` introspection +simple, and stays consistent with the existing columnar design. diff --git a/plans/ctable-schema.md b/plans/ctable-schema.md new file mode 100644 index 00000000..d9cd3fb1 --- /dev/null +++ b/plans/ctable-schema.md @@ -0,0 +1,1258 @@ +# CTable Schema Redesign + +## Motivation + +The current `CTable` prototype in PR #598 uses `pydantic.BaseModel` plus +`Annotated[...]` metadata to define table schemas. That works, but it is not the +best long-term API for a columnar container in `python-blosc2`. + +The main issues with the current shape are: + +* It mixes row validation concerns with physical storage concerns. +* It relies on custom metadata objects (`NumpyDtype`, `MaxLen`) embedded in + Pydantic annotations. +* It is verbose for simple schemas. +* It does not provide an obvious place for NDArray-specific per-column options + such as `cparams`, `dparams`, `chunks`, `blocks`, or future indexing hints. + +What we want instead is: + +* A schema API that is easy to read and write. +* A place to attach Blosc2-specific per-column configuration. +* A way to express logical constraints such as `ge=0`, `le=100`, `max_length=10`. +* Internal validation without forcing the public API to be Pydantic-shaped. +* A clean distinction between: + * logical field type and constraints + * physical storage type + * per-column storage options + +The proposed solution is a **dataclass-first schema API** with **declarative field +spec objects** and **optional internal Pydantic-backed validation**. + +The intended usage style is: + +* canonical form for constrained or storage-tuned columns: + `id: int = b2.field(b2.int64(ge=0))` +* shorthand for simple inferred columns: + `id: int` +* not preferred as a primary style: + `id = b2.field(b2.int64(ge=0))` + +The reason is that the canonical form preserves normal Python type annotations, +which are valuable for readability, static tooling, and schema inspection. + +--- + +## Proposed public API + +### Schema declaration + +The intended schema declaration style is: + +```python +from dataclasses import dataclass + +import blosc2 as b2 + + +@dataclass +class Row: + id: int = b2.field(b2.int64(ge=0)) + score: float = b2.field( + b2.float64(ge=0, le=100), + cparams={"codec": b2.Codec.LZ4, "clevel": 5}, + ) + active: bool = b2.field(b2.bool(), default=True) +``` + +This is the target user-facing API for `CTable`. + +This should be documented as the **canonical** schema declaration style. + +For simple unconstrained cases, `CTable` may support an inferred shorthand: + +```python +@dataclass +class Row: + id: int + score: float + active: bool = True +``` + +which is interpreted approximately as: + +```python +@dataclass +class Row: + id: int = b2.field(b2.int64()) + score: float = b2.field(b2.float64()) + active: bool = b2.field(b2.bool(), default=True) +``` + +This shorthand should be limited to simple built-in Python types where the +mapping is obvious. + +### Naming convention + +Use **lowercase names** for schema descriptor objects: + +* `b2.int64` +* `b2.float64` +* `b2.bool` +* later: `b2.string(max_length=...)`, `b2.bytes(max_length=...)`, `b2.complex128` + +Reason: + +* `b2.int64(...)` is not just a dtype; it is a schema descriptor with constraints. +* The lowercase form keeps the API closer in spirit to NumPy and PyTorch. +* If plain NumPy dtypes are needed, callers can use `np.int64`, `np.float64`, + `np.bool_`, etc. +* `b2.bool(...)` is preferred over `b2.bool_(...)` for readability, even though + NumPy uses `bool_`. This is closer to PyTorch style and fits better for a + schema-builder API. + +### Field helper + +`b2.field(...)` should be the standard way to attach schema metadata to a +dataclass field. + +Expected shape: + +```python +b2.field( + b2.float64(ge=0, le=100), + default=..., + cparams=..., + dparams=..., + chunks=..., + blocks=..., +) +``` + +At minimum for the first version: + +* `spec` +* `default` +* `cparams` +* `dparams` +* `chunks` +* `blocks` + +The implementation should store these in `dataclasses.field(metadata=...)`. + +The unannotated form: + +```python +id = b2.field(b2.int64(ge=0)) +``` + +should not be the primary API. It may be supported later only if there is a +strong reason, but the preferred style should retain: + +* a Python type annotation in the annotation slot +* `b2.field(...)` in the field/default slot + +That keeps the schema aligned with normal dataclass usage. + +--- + +## Core design + +### 1. Dataclass is the schema carrier + +The dataclass defines: + +* field names +* Python-level row shape +* user-visible defaults + +Example: + +```python +@dataclass +class Row: + id: int = b2.field(b2.int64(ge=0)) + score: float = b2.field(b2.float64(ge=0, le=100)) + active: bool = b2.field(b2.bool(), default=True) +``` + +This keeps the declaration small and idiomatic. + +The Python annotation should remain part of the design, not be replaced by +`b2.field(...)` alone. The annotation provides value independently of the +Blosc2 schema descriptor. + +### 2. Schema spec objects are the source of truth + +Each lowercase builder object is a lightweight immutable schema descriptor. + +Examples: + +```python +b2.int64(ge=0) +b2.float64(ge=0, le=100) +b2.bool() +b2.string(max_length=32) +b2.bytes(max_length=64) +``` + +Each spec object should carry only schema-level metadata, for example: + +* logical kind +* storage dtype +* numeric constraints (`ge`, `gt`, `le`, `lt`, `multiple_of`) +* string constraints (`max_length`, `min_length`, `pattern`) +* nullability +* maybe logical annotations later (`categorical`, `timezone`, `unit`) + +They should **not** directly carry per-column NDArray instance settings such as +`cparams` or `chunks`; those belong in `b2.field(...)`. + +### 3. Column field metadata carries NDArray-specific configuration + +`b2.field(...)` metadata should be the place for: + +* column storage options +* per-column compression settings +* chunk/block tuning +* persistence options in future versions + +This keeps the separation clean: + +* `b2.float64(ge=0, le=100)` answers: "what values are valid?" +* `b2.field(..., cparams=..., chunks=...)` answers: "how is this column stored?" + +### 4. Schema compilation step inside CTable + +`CTable` should not consume raw dataclass fields repeatedly. On construction, it +should compile the row class into an internal schema representation. + +For example: + +```python +compiled = CompiledSchema( + row_cls=Row, + columns=[ + CompiledColumn( + name="id", + py_type=int, + spec=b2.int64(ge=0), + dtype=np.int64, + default=MISSING, + cparams=..., + dparams=..., + chunks=..., + blocks=..., + validator_info=..., + ), + ..., + ], + validator_model=..., +) +``` + +This compiled form should drive: + +* NDArray creation +* row validation +* bulk validation +* introspection and future serialization + +--- + +## Validation strategy + +### Use Pydantic internally, but do not make it the public schema API + +Pydantic is a good fit for validation because it is: + +* mature +* well-tested +* expressive +* fast enough for row-level operations + +However, it should be an **implementation detail**, not the public schema surface. + +The public schema should remain: + +* dataclass-based +* Blosc2-specific +* independent of any one validation library + +### Why not use Pydantic as the schema source directly? + +Because storage and validation are overlapping but not identical concerns. + +Examples: + +* `dtype=np.int16` is both logical and physical. +* `cparams`, `chunks`, `blocks`, `dparams` are not Pydantic concepts. +* a future column index, bloom filter, or codec hint is not a validation concept. + +Therefore, the internal architecture should be: + +* user declares a dataclass + `b2.field(...)` +* `CTable` compiles it into: + * storage schema + * validation schema + +### Row-level validation + +For `append(row)` and other row-wise inserts: + +* compile a cached internal Pydantic model once per schema +* validate incoming rows against that model +* convert the validated row into column values + +This is the simplest and safest path. + +Expected behavior: + +* `table.append(Row(...))` +* `table.append({"id": 1, "score": 2.0, "active": True})` +* `table.append((1, 2.0, True))` + +All may be accepted, but internally normalized through one validator path. + +### Bulk validation + +For `extend(...)`, row-by-row Pydantic validation may be too expensive for large +batches. Bulk inserts need a separate strategy. + +Recommended modes: + +* `validate=True` + Full validation. May use row-wise Pydantic validation for smaller inputs and + vectorized checks where available. +* `validate=False` + Trust caller, perform dtype coercion only. +* optional later: `validate="sample"` or `validate="vectorized"` + +For numeric and simple string constraints, vectorized checks are preferable when +possible: + +* `ge`, `gt`, `le`, `lt` +* `max_length`, `min_length` +* null checks +* dtype coercion checks + +This means the architecture should support both: + +* Pydantic row validation +* vectorized array validation + +The compiled schema should expose enough information for both. + +### Performance stance + +Pydantic should be treated as: + +* a strong default for correctness +* fast enough for row-wise validation +* not necessarily the fastest choice for large batch validation + +This is important because the performance bottleneck for `extend()` is more about +per-row Python overhead than about Pydantic specifically. + +--- + +## Detailed API proposal + +### Schema spec classes + +Add schema descriptor classes under `blosc2`, for example: + +* `int8`, `int16`, `int32`, `int64` +* `uint8`, `uint16`, `uint32`, `uint64` +* `float32`, `float64` +* `bool` +* `complex64`, `complex128` +* `string` +* `bytes` + +Minimal constructor examples: + +```python +b2.int64(ge=0) +b2.float64(ge=0, le=100) +b2.string(max_length=32) +b2.bytes(max_length=64) +b2.bool() +``` + +Internal common fields: + +* `dtype` +* `constraints` +* `python_type` + +### Field helper + +`b2.field(spec, **kwargs)` should return a `dataclasses.field(...)` object with +Blosc2 metadata attached. + +Example metadata layout: + +```python +{ + "blosc2": { + "spec": ..., + "cparams": ..., + "dparams": ..., + "chunks": ..., + "blocks": ..., + } +} +``` + +This metadata key should be stable and reserved. + +### CTable constructor + +The desired constructor remains: + +```python +table = b2.CTable(Row) +``` + +Optional overrides: + +```python +table = b2.CTable( + Row, + expected_size=1_000_000, + compact=False, + validate=True, +) +``` + +`CTable` should detect that `Row` is a dataclass schema and compile it. + +### Possible compatibility layer + +If needed temporarily, `CTable` may continue accepting the old Pydantic model +style during a transition period: + +```python +table = b2.CTable(LegacyPydanticRow) +``` + +But that should be documented as legacy or transitional once the dataclass API +lands. + +--- + +## Internal compilation pipeline + +### Step 1. Inspect dataclass fields + +For each dataclass field: + +* field name +* Python annotation +* default or default factory +* Blosc2 metadata from `b2.field(...)` + +Reject invalid shapes early: + +* missing `b2.field(...)` +* missing schema spec +* incompatible Python annotation vs schema spec +* unsupported defaults + +If inferred shorthand is supported, refine the first two rules to: + +* either a supported plain annotation, or an explicit `b2.field(...)` +* if `b2.field(...)` is present, it must contain a schema spec + +### Step 2. Build compiled column descriptors + +For each field, produce a `CompiledColumn` object containing: + +* `name` +* `py_type` +* `spec` +* `dtype` +* `default` +* `cparams` +* `dparams` +* `chunks` +* `blocks` +* validation constraints + +### Step 3. Derive physical NDArray creation arguments + +From the compiled column descriptor, derive: + +* `dtype` +* shape +* chunks +* blocks +* `cparams` +* `dparams` + +This should happen once during table initialization. + +### Step 4. Derive validation model + +Translate each schema spec into a Pydantic field definition. + +Examples: + +* `int64(ge=0)` -> integer field with `ge=0` +* `float64(ge=0, le=100)` -> float field with `ge=0`, `le=100` +* `string(max_length=32)` -> string field with `max_length=32` + +Cache the compiled Pydantic model class per row schema. + +### Step 5. Expose introspection hooks + +Expose enough metadata for: + +* debugging +* `table.info()` +* future schema serialization +* future schema-driven docs and reprs + +Possible user-facing hooks later: + +* `table.schema` +* `table.schema.columns` +* `table.schema.as_dict()` + +--- + +## Handling defaults + +Defaults should follow dataclass semantics as closely as possible. + +Examples: + +```python +active: bool = b2.field(b2.bool(), default=True) +``` + +For the first implementation, keep this conservative: + +* support scalar defaults +* reject mutable defaults directly + +On insert: + +* omitted values should be filled from defaults + +--- + +## Insert semantics + +### append() + +`append()` should accept a small set of normalized shapes: + +* dataclass row instance +* dict-like row +* tuple/list in schema order + +Recommended internal path: + +1. normalize the input to a field mapping +2. validate with cached validator model +3. coerce to final column values +4. append into underlying NDArrays + +### extend() + +`extend()` should accept: + +* iterable of row objects +* dict-of-arrays +* structured NumPy array +* maybe another `CTable` + +Recommended internal path: + +1. normalize to column batches where possible +2. validate according to `validate=` mode +3. coerce dtypes +4. write in bulk + +For `dict-of-arrays` and structured arrays, vectorized validation should be the +preferred long-term path. + +--- + +## Per-column NDArray options + +One of the main reasons for `b2.field(...)` is that different columns may want +different storage settings. + +Examples: + +* a boolean column may want different compression parameters from a float column +* a high-cardinality string column may need different chunk sizes +* a metric column may use a specific codec or filter tuning + +So the schema system must allow: + +```python +@dataclass +class Row: + id: int = b2.field(b2.int64(ge=0), cparams={"codec": b2.Codec.ZSTD, "clevel": 1}) + score: float = b2.field( + b2.float64(ge=0, le=100), cparams={"codec": b2.Codec.LZ4HC, "clevel": 9} + ) + active: bool = b2.field(b2.bool(), cparams={"codec": b2.Codec.LZ4}) +``` + +The implementation should define precedence rules clearly: + +* column-level options override table defaults +* table-level options fill in unspecified values + +This implies `CTable(...)` may also take default storage options: + +```python +table = b2.CTable(Row, cparams=..., dparams=...) +``` + +Column-level overrides should merge against those defaults, not replace them +blindly. + +--- + +## Compatibility and migration + +### Goal + +Move toward the dataclass-based schema API without locking the project into the +current Pydantic-shaped declaration model. + +### Migration path + +Phase 1: + +* introduce schema spec classes and `b2.field(...)` +* support dataclass schemas in `CTable` +* keep existing prototype behavior separate + +Phase 2: + +* add row validation via cached internal Pydantic model +* add bulk validation modes +* document the dataclass schema API as preferred + +Phase 3: + +* optionally add a compatibility adapter for existing Pydantic models +* deprecate ad hoc `Annotated[...]` metadata conventions if they remain exposed + +### Non-goal + +Do not make the first implementation solve every possible schema feature. The +first goal is to get the schema shape and internal architecture right. + +--- + +## Serialization implications + +Even if `save()` / `load()` are not implemented yet, this schema design should +anticipate persistence. + +Eventually a persisted `CTable` will need to store: + +* column names +* logical schema descriptors +* per-column defaults +* per-column NDArray storage options +* maybe validation constraints + +That argues strongly for having a stable compiled schema representation early. + +The compiled schema should be serializable to: + +* JSON-compatible metadata +* or a small msgpack payload + +The public dataclass itself does not need to be serialized directly. Only the +compiled schema matters for persistence. + +--- + +## Open questions + +### 1. Should Python annotations be required to match the schema spec? + +Example: + +```python +id: int = b2.field(b2.int64(ge=0)) +``` + +Recommended answer: yes, broadly, with sensible compatibility rules. + +Allowed: + +* `int` with `int64` +* `float` with `float64` +* `bool` with `bool` + +Potentially allowed later: + +* `str` with `string` +* `bytes` with `bytes` + +Reject obviously inconsistent declarations early. + +In other words: + +* `id: int = b2.field(b2.int64(ge=0))` is good +* `id: int` is acceptable shorthand for inferred `b2.int64()` +* `id = b2.field(b2.int64(ge=0))` is not the preferred style because it drops + the Python annotation + +### 2. Should `b2.field()` require a spec? + +Recommended answer: yes for the first version. + +Allowing `b2.field(default=True)` without a spec means we must infer too much +from the Python annotation and lose clarity. + +This still allows fully inferred fields that do not use `b2.field(...)` at all: + +```python +active: bool = True +``` + +but once `b2.field(...)` is used, it should carry an explicit schema spec. + +### 3. How much should Pydantic-specific behavior leak? + +Recommended answer: as little as possible. + +Users should not need to know whether validation is backed by Pydantic, +vectorized NumPy checks, or another mechanism. + +--- + +## Concrete implementation sequence + +This section turns the design into a proposed execution order with concrete +files, class names, and function signatures. + +### Step 1: add schema descriptor primitives + +Create a new module: + +* `src/blosc2/schema.py` + +Primary contents: + +```python +from __future__ import annotations + +from dataclasses import MISSING, Field as DataclassField, field as dc_field +from typing import Any + +import numpy as np +``` + +Proposed public classes and functions: + +```python +class SchemaSpec: + dtype: np.dtype + python_type: type[Any] + + def to_pydantic_kwargs(self) -> dict[str, Any]: ... + def to_metadata_dict(self) -> dict[str, Any]: ... + + +class int64(SchemaSpec): + def __init__(self, *, ge=None, gt=None, le=None, lt=None): ... + + +class float64(SchemaSpec): + def __init__(self, *, ge=None, gt=None, le=None, lt=None): ... + + +class bool(SchemaSpec): + def __init__(self): ... + + +class string(SchemaSpec): + def __init__(self, *, min_length=None, max_length=None, pattern=None): ... + + +class bytes(SchemaSpec): + def __init__(self, *, min_length=None, max_length=None): ... + + +def field( + spec: SchemaSpec, + *, + default=MISSING, + cparams: dict[str, Any] | None = None, + dparams: dict[str, Any] | None = None, + chunks: tuple[int, ...] | None = None, + blocks: tuple[int, ...] | None = None, +) -> DataclassField: ... +``` + +Internal helper constants: + +```python +BLOSC2_FIELD_METADATA_KEY = "blosc2" +``` + +Notes: + +* Start with only the spec classes needed for the first `CTable` iteration: + `int64`, `float64`, `bool`. +* Add `string` and `bytes` only if needed in the same slice of work. +* Avoid over-generalizing the first implementation. + +### Step 2: add schema compiler and compiled representations + +Create a new module: + +* `src/blosc2/schema_compiler.py` + +Primary internal dataclasses: + +```python +from dataclasses import dataclass +from typing import Any + + +@dataclass(slots=True) +class ColumnConfig: + cparams: dict[str, Any] | None + dparams: dict[str, Any] | None + chunks: tuple[int, ...] | None + blocks: tuple[int, ...] | None + + +@dataclass(slots=True) +class CompiledColumn: + name: str + py_type: Any + spec: Any + dtype: np.dtype + default: Any + config: ColumnConfig + + +@dataclass(slots=True) +class CompiledSchema: + row_cls: type[Any] + columns: list[CompiledColumn] + columns_by_name: dict[str, CompiledColumn] + validator_model: type[Any] | None = None +``` + +Primary internal functions: + +```python +def compile_schema(row_cls: type[Any]) -> CompiledSchema: ... +def infer_spec_from_annotation(annotation: Any, default: Any = MISSING) -> Any: ... +def validate_annotation_matches_spec(annotation: Any, spec: Any) -> None: ... +def get_blosc2_field_metadata(dc_field) -> dict[str, Any] | None: ... +``` + +Behavior: + +* accept a dataclass type only +* for explicit `b2.field(...)`, read the spec from metadata +* for inferred fields like `id: int`, derive `b2.int64()` +* reject unsupported annotations early +* normalize all defaults/config into `CompiledSchema` + +### Step 3: export the schema API from `blosc2` + +Update: + +* `src/blosc2/__init__.py` + +Exports to add: + +```python +from .schema import bool, bytes, field, float64, int64, string +``` + +And in `__all__`: + +```python +"bool", +"bytes", +"field", +"float64", +"int64", +"string", +``` + +Notes: + +* Be careful with `bool` and `bytes` in `__init__.py` because they shadow + builtins within the module namespace. That is acceptable if done deliberately, + but it should be reviewed explicitly. +* If shadowing proves too awkward internally, keep the implementation names + private and re-export the public names only. + +### Step 4: refactor `CTable` to consume compiled schemas + +Update: + +* `src/blosc2/ctable.py` + +Primary constructor signature: + +```python +class CTable(Generic[RowT]): + def __init__( + self, + row_type: type[RowT], + new_data=None, + *, + expected_size: int = 1_048_576, + compact: bool = False, + validate: bool = True, + cparams: dict[str, Any] | None = None, + dparams: dict[str, Any] | None = None, + ) -> None: ... +``` + +New internal state: + +```python +self._schema: CompiledSchema +self._validate: bool +self._table_cparams: dict[str, Any] | None +self._table_dparams: dict[str, Any] | None +``` + +New internal helper methods: + +```python +def _init_columns(self, expected_size: int) -> None: ... +def _resolve_column_storage(self, col: CompiledColumn) -> dict[str, Any]: ... +def _normalize_row_input(self, data: Any) -> dict[str, Any]: ... +def _coerce_row_to_storage(self, row: dict[str, Any]) -> dict[str, Any]: ... +``` + +Behavior changes: + +* replace direct inspection of `row_type.model_fields` +* build columns from `self._schema.columns` +* derive column dtypes from compiled schema +* merge table-level and field-level storage settings + +### Step 5: implement row validation adapter + +Create a new internal module: + +* `src/blosc2/schema_validation.py` + +Primary functions: + +```python +from typing import Any + + +def build_validator_model(schema: CompiledSchema) -> type[Any]: ... +def validate_row(schema: CompiledSchema, row: dict[str, Any]) -> dict[str, Any]: ... +def validate_rows_rowwise( + schema: CompiledSchema, rows: list[dict[str, Any]] +) -> list[dict[str, Any]]: ... +``` + +Behavior: + +* build and cache a Pydantic model per compiled schema +* map `SchemaSpec` constraints into Pydantic field definitions +* return normalized Python values ready for storage coercion + +Implementation note: + +* Cache the generated validator model on `CompiledSchema.validator_model`. +* Keep all Pydantic-specific logic isolated in this module. + +### Step 6: wire validation into `append()` + +Update: + +* `src/blosc2/ctable.py` + +Target signatures: + +```python +def append(self, data: Any) -> None: ... +def _append_validated_row(self, row: dict[str, Any]) -> None: ... +``` + +Concrete behavior: + +1. normalize incoming row shape +2. if `self._validate` is true, validate via `schema_validation.validate_row` +3. coerce to storage values +4. append into column NDArrays + +Inputs to support in the first cut: + +* dataclass row instance +* dict +* tuple/list in schema order + +Inputs that can wait until later if needed: + +* structured NumPy scalar +* Pydantic model instance + +### Step 7: add `extend(..., validate=...)` + +Update: + +* `src/blosc2/ctable.py` + +Proposed signature: + +```python +def extend(self, data: Any, *, validate: bool | None = None) -> None: ... +``` + +Supporting internal helpers: + +```python +def _normalize_rows_input( + self, data: Any +) -> tuple[list[dict[str, Any]] | None, dict[str, Any] | None]: ... +def _extend_rowwise(self, rows: list[dict[str, Any]], *, validate: bool) -> None: ... +def _extend_columnwise(self, columns: dict[str, Any], *, validate: bool) -> None: ... +``` + +First implementation target: + +* support iterable of rows via `_extend_rowwise` +* preserve correctness first, optimize later + +Second implementation target: + +* add `_extend_columnwise` for structured arrays and dict-of-arrays +* add vectorized validation for simple constraints + +### Step 8: add vectorized validation helpers + +Create a new internal module: + +* `src/blosc2/schema_vectorized.py` + +Primary functions: + +```python +from typing import Any + + +def validate_column_values(col: CompiledColumn, values: Any) -> None: ... +def validate_column_batch(schema: CompiledSchema, columns: dict[str, Any]) -> None: ... +``` + +Initial checks to support: + +* numeric `ge`, `gt`, `le`, `lt` +* string and bytes `min_length`, `max_length` +* dtype compatibility after coercion + +This module should remain optional in the first PR if the rowwise path is enough +to land the architecture cleanly. + +### Step 9: add schema introspection to `CTable` + +Update: + +* `src/blosc2/ctable.py` + +Proposed property: + +```python +@property +def schema(self) -> CompiledSchema: ... +``` + +Optional helper methods: + +```python +def schema_dict(self) -> dict[str, Any]: ... +def column_schema(self, name: str) -> CompiledColumn: ... +``` + +Goal: + +* make the new schema layer visible and debuggable +* provide a stable base for future save/load work + +### Step 10: add tests in focused modules + +Add: + +* `tests/ctable/test_schema_specs.py` +* `tests/ctable/test_schema_compiler.py` +* `tests/ctable/test_schema_validation.py` +* `tests/ctable/test_ctable_dataclass_schema.py` + +Test scope by file: + +`tests/ctable/test_schema_specs.py` + +* spec construction +* dtype mapping +* metadata export + +`tests/ctable/test_schema_compiler.py` + +* explicit `b2.field(...)` +* inferred shorthand from plain annotations +* annotation/spec mismatch rejection +* defaults handling + +`tests/ctable/test_schema_validation.py` + +* Pydantic validator generation +* constraint enforcement + +`tests/ctable/test_ctable_dataclass_schema.py` + +* `CTable(Row)` construction +* append with dataclass/dict/tuple +* extend with iterable of rows +* per-column `cparams` override plumbing + +### Step 11: keep the legacy prototype isolated during transition + +Short-term implementation choice: + +* if the current `ctable.py` prototype is still in active flux, prefer landing + the schema/compiler modules first and then refactoring `CTable` over them +* do not expand the old Pydantic-specific schema path further + +Possible follow-up helper: + +```python +def compile_legacy_pydantic_schema(row_cls: type[Any]) -> CompiledSchema: ... +``` + +But only add that if compatibility becomes necessary. + +### Step 12: persistence groundwork + +No need to implement `save()` / `load()` immediately, but define serialization +hooks on the schema side now. + +Add to `CompiledSchema` or a related helper: + +```python +def schema_to_dict(schema: CompiledSchema) -> dict[str, Any]: ... +def schema_from_dict(data: dict[str, Any]) -> CompiledSchema: ... +``` + +This should remain internal until the persisted format is stable. + +The persistency design itself is specified in: + +* [ctable-persistency.md](/Users/faltet/blosc/python-blosc2/plans/ctable-persistency.md) + +The schema-layer contract for persistency is: + +* schema must serialize to a versioned JSON-compatible dict +* column order must be preserved explicitly in the serialized `columns` list +* the serialized schema must be sufficient to reconstruct `CompiledSchema` + without requiring the original Python dataclass definition at load time + +### Step 13: delivery order across PRs + +Recommended PR slicing: + +PR 1: + +* `src/blosc2/schema.py` +* `src/blosc2/schema_compiler.py` +* exports in `src/blosc2/__init__.py` +* tests for schema specs and compiler + +PR 2: + +* `CTable` constructor refactor to use compiled schema +* `append()` row normalization +* row-wise validation module +* `tests/ctable/test_ctable_dataclass_schema.py` + +PR 3: + +* `extend(..., validate=...)` +* vectorized validation helpers +* schema introspection property +* more tests for batch validation and overrides + +PR 4: + +* persistence groundwork on the schema side +* optional compatibility adapter for legacy Pydantic model declarations + +PR 5: + +* TreeStore-backed persistency as described in + [ctable-persistency.md](/Users/faltet/blosc/python-blosc2/plans/ctable-persistency.md) +* `urlpath` / `mode` constructor semantics +* explicit `open()` support +* `_meta`, `_valid_rows`, `_cols/` storage layout +* persistency tests + +### Step 14: concrete first-PR checklist + +The smallest coherent first implementation should be: + +1. add `src/blosc2/schema.py` +2. add `src/blosc2/schema_compiler.py` +3. export `field`, `int64`, `float64`, `bool` +4. add tests for: + * explicit field specs + * inferred shorthand + * mismatch rejection +5. stop there + +That first PR gives the project: + +* the public schema vocabulary +* the internal compiled representation +* confidence in the canonical API shape + +before touching too much `CTable` mutation logic. + +After that first PR lands, follow the later phases in this order: + +1. dataclass-driven `CTable` construction and append path +2. validation and batch-insert behavior +3. schema introspection +4. TreeStore-backed persistency + +--- + +## Recommendation + +The recommended direction is: + +1. Make **dataclasses** the public schema declaration mechanism for `CTable`. +2. Introduce **lowercase schema spec objects** such as `b2.int64(...)`. +3. Use **`b2.field(...)`** to carry both the schema spec and per-column NDArray + configuration. +4. Compile the schema once into an internal representation. +5. Use **Pydantic internally for row validation**, but keep it hidden behind the + Blosc2 schema API. +6. Add a separate **bulk validation path** for large inserts so `extend()` does + not depend entirely on per-row Pydantic validation. + +This design gives the project: + +* a cleaner user API +* a better place for columnar storage configuration +* a clear boundary between schema, validation, and storage +* flexibility to evolve validation internals later +* a strong base for future persistence and schema introspection diff --git a/plans/ctable-user-guide.md b/plans/ctable-user-guide.md new file mode 100644 index 00000000..7ff7d41f --- /dev/null +++ b/plans/ctable-user-guide.md @@ -0,0 +1,486 @@ +# CTable User Guide + +This document explains how to use `CTable` as it currently stands. + +--- + +## What is CTable? + +`CTable` is a columnar compressed table built on top of `blosc2.NDArray`. Each +column is stored as a separate compressed array. Rows are never physically removed +on deletion — instead a boolean mask (`_valid_rows`) marks live rows, and +compaction can be triggered manually or automatically. + +--- + +## Defining a schema + +A schema is a Python `@dataclass` where each field uses `b2.field()` to declare +the column type and constraints. + +```python +from dataclasses import dataclass +import blosc2 as b2 + + +@dataclass +class Row: + id: int = b2.field(b2.int64(ge=0)) + score: float = b2.field(b2.float64(ge=0, le=100), default=0.0) + active: bool = b2.field(b2.bool(), default=True) +``` + +### Available spec types + +| Spec | NumPy dtype | Constraints | +|---|---|---| +| `b2.int64(ge, gt, le, lt)` | `int64` | numeric bounds | +| `b2.float64(ge, gt, le, lt)` | `float64` | numeric bounds | +| `b2.bool()` | `bool_` | — | +| `b2.complex64()` | `complex64` | — | +| `b2.complex128()` | `complex128` | — | +| `b2.string(min_length, max_length, pattern)` | `U` | length / regex | +| `b2.bytes(min_length, max_length)` | `S` | length | + +Constraints are enforced on every insert (see **Validation** below). + +### Inferred shorthand + +For columns with no constraints and no per-column storage options, you can omit +`b2.field()` entirely: + +```python +@dataclass +class Row: + id: int # inferred as b2.int64() + score: float # inferred as b2.float64() + flag: bool = True # inferred as b2.bool(), default=True +``` + +### Dataclass field ordering rule + +Python dataclasses require that fields **with defaults come after fields without +defaults**. Plan your schema accordingly: + +```python +@dataclass +class Row: + id: int = b2.field(b2.int64()) # required — no default + score: float = b2.field(b2.float64(), default=0.0) # optional + active: bool = b2.field(b2.bool(), default=True) # optional +``` + +--- + +## Creating a table + +```python +import blosc2 as b2 + +# Empty table (in-memory) +t = b2.CTable(Row) + +# Table pre-loaded with data +t = b2.CTable(Row, new_data=[(1, 95.0, True), (2, 80.0, False)]) + +# Reserve space upfront (avoids resizes) +t = b2.CTable(Row, expected_size=1_000_000) + +# Disable constraint validation (faster for trusted data) +t = b2.CTable(Row, validate=False) + +# Enable auto-compaction (fills gaps before resizing) +t = b2.CTable(Row, compact=True) + +# Table-level compression settings (applied to all columns unless overridden) +t = b2.CTable(Row, cparams={"codec": b2.Codec.ZSTD, "clevel": 5}) +``` + +### Persistent tables + +Pass `urlpath` to store the table on disk. The table root is a directory containing +compressed array files — everything is handled automatically. + +```python +# Create a new persistent table (overwrites any existing table at that path) +t = b2.CTable(Row, urlpath="people", mode="w", expected_size=1_000_000) +t.extend([(i, float(i % 100), True) for i in range(10_000)]) + +# Open an existing persistent table for reading and writing +t = b2.CTable(Row, urlpath="people", mode="a") +t.append((99999, 50.0, True)) + +# Open read-only (default for CTable.open) +t = b2.CTable.open("people") # mode="r" by default +t = b2.CTable.open("people", mode="r") # explicit + +# Open read/write via the classmethod +t = b2.CTable.open("people", mode="a") +``` + +`mode` values: + +| mode | behaviour | +|---|---| +| `"w"` | create (overwrite if the path already exists) | +| `"a"` | open existing or create new | +| `"r"` | open existing read-only | + +In-memory tables (`urlpath=None`, the default) behave exactly as before — no +`mode` or path handling is involved. + +### Disk layout + +``` +people/ + _meta.b2frame ← schema JSON, kind marker, version (in vlmeta) + _valid_rows.b2nd ← tombstone mask + _cols/ + id.b2nd + score.b2nd + active.b2nd +``` + +You can inspect the raw metadata: + +```python +import blosc2, json + +meta = blosc2.open("people/_meta.b2frame") +print(meta.vlmeta["kind"]) # "ctable" +print(meta.vlmeta["version"]) # 1 +schema = json.loads(meta.vlmeta["schema"]) +``` + +### Per-column storage options + +```python +@dataclass +class Row: + id: int = b2.field(b2.int64(), cparams={"codec": b2.Codec.LZ4, "clevel": 1}) + score: float = b2.field( + b2.float64(ge=0, le=100), + cparams={"codec": b2.Codec.ZSTD, "clevel": 9}, + default=0.0, + ) +``` + +Column-level `cparams`/`dparams`/`chunks`/`blocks` override the table-level +defaults for that column only. + +--- + +## Inserting data + +### `append()` — one row at a time + +Accepts a tuple, list, dict, or dataclass instance: + +```python +t.append((1, 95.0, True)) +t.append([2, 80.0, False]) +t.append({"id": 3, "score": 50.0, "active": True}) +``` + +Fields with defaults can be omitted: + +```python +t.append((4,)) # score=0.0 and active=True filled from defaults +``` + +### `extend()` — bulk insert + +Accepts a list of tuples, a NumPy structured array, or another `CTable`: + +```python +# List of tuples +t.extend([(i, float(i), True) for i in range(1000)]) + +# NumPy structured array +import numpy as np + +dtype = np.dtype([("id", np.int64), ("score", np.float64), ("active", np.bool_)]) +arr = np.array([(1, 50.0, True), (2, 75.0, False)], dtype=dtype) +t.extend(arr) + +# Another CTable +t.extend(other_table) +``` + +#### Per-call validation override + +```python +# Skip validation for one trusted batch (even if table was built with validate=True) +t.extend(trusted_data, validate=False) + +# Force validation for one batch (even if table was built with validate=False) +t.extend(external_data, validate=True) +``` + +--- + +## Validation + +When `validate=True` (the default), constraints declared in the schema are +enforced on every insert: + +```python +t.append((-1, 50.0, True)) # ValueError: id violates ge=0 +t.append((1, 150.0, True)) # ValueError: score violates le=100 +t.extend([(-1, 50.0, True)]) # ValueError: id violates ge=0 +``` + +Boundary values are accepted: + +```python +t.append((0, 0.0, True)) # ok — id=0 satisfies ge=0, score=0.0 satisfies ge=0 +t.append((1, 100.0, False)) # ok — score=100.0 satisfies le=100 +``` + +To skip validation entirely: + +```python +t = b2.CTable(Row, validate=False) +``` + +--- + +## Reading data + +### Row access + +```python +t.row[0] # first row → returns a single-row CTable view +t.row[-1] # last row +t.row[2:5] # slice → CTable view with rows 2, 3, 4 +t.row[::2] # every other row +t.row[[0, 5, 10]] # specific rows by logical index +``` + +Row access always uses **logical indices** (i.e. index 0 is the first live row, +not the first physical slot). + +### Column access + +```python +t["id"] # returns a Column object +t.score # attribute-style access also works + +# Iterate values +for val in t["score"]: + print(val) + +# Convert to NumPy array +arr = t["score"].to_numpy() + +# Single value +val = t["id"][5] # logical index 5 +``` + +### Column slicing + +```python +col_view = t["id"][0:10] # returns a Column view (mask applied) +arr = col_view.to_numpy() # materialise to NumPy +``` + +### head / tail + +```python +t.head(10) # CTable view of first 10 rows +t.tail(5) # CTable view of last 5 rows +``` + +--- + +## Deleting rows + +`delete()` marks rows as invalid in the tombstone mask — data is not physically +removed. + +```python +t.delete(0) # delete first live row +t.delete(-1) # delete last live row +t.delete([0, 2, 4]) # delete multiple rows by logical index +t.delete(list(range(10))) # delete first 10 live rows +``` + +Negative indices and mixed positive/negative lists are supported. + +--- + +## Compaction + +After many deletions, physical storage has gaps. Compaction moves all live rows +to the front and clears the rest. + +```python +t.compact() # manual compaction +``` + +Auto-compaction runs automatically before a resize when `compact=True`: + +```python +t = b2.CTable(Row, compact=True) +``` + +--- + +## Read-only mode + +When a table is opened with `mode="r"` (or via `CTable.open()` without specifying +mode), all mutating operations raise immediately: + +```python +t = b2.CTable.open("people") # read-only + +t.append((1, 50.0, True)) # ValueError: Table is read-only +t.extend([(1, 50.0, True)]) # ValueError: Table is read-only +t.delete(0) # ValueError: Table is read-only +t.compact() # ValueError: Table is read-only +``` + +All read operations work normally: `row[]`, column access, `head()`, `tail()`, +`where()`, `len()`, `info()`, `schema_dict()`. + +--- + +## Filtering + +`where()` applies a boolean expression and returns a read-only view: + +```python +view = t.where(t["score"] > 50) +view = t.where((t["id"] > 10) & (t["active"] == True)) +``` + +Views share `_cols` with the parent table and cannot be mutated (no `append` or +`extend`). + +--- + +## Table info + +```python +len(t) # number of live rows +t.nrows # same +t.ncols # number of columns +t.col_names # list of column names + +t.info() # prints a formatted summary with dtypes and memory usage +print(t) # prints the first rows in a table format +``` + +--- + +## Schema introspection + +```python +t.schema # CompiledSchema object +t.column_schema("id") # CompiledColumn for column "id" +t.schema_dict() # JSON-compatible dict of the full schema +``` + +`schema_dict()` example output: + +```python +{ + "version": 1, + "row_cls": "Row", + "columns": [ + {"name": "id", "kind": "int64", "ge": 0, "default": None}, + {"name": "score", "kind": "float64", "ge": 0, "le": 100, "default": 0.0}, + {"name": "active", "kind": "bool", "default": True}, + ], +} +``` + +The dict can be restored to a `CompiledSchema` without the original Python class: + +```python +from blosc2.schema_compiler import schema_from_dict + +restored = schema_from_dict(t.schema_dict()) +``` + +--- + +## Memory and compression + +```python +# Compressed size of all columns + valid_rows mask +cbytes = sum(col.cbytes for col in t._cols.values()) + t._valid_rows.cbytes + +# Uncompressed size +nbytes = sum(col.nbytes for col in t._cols.values()) + t._valid_rows.nbytes + +print(f"Compression ratio: {nbytes / cbytes:.2f}x") +``` + +--- + +## Complete example + +```python +from dataclasses import dataclass +import numpy as np +import blosc2 as b2 + + +@dataclass +class Measurement: + sensor_id: int = b2.field(b2.int64(ge=0)) + value: float = b2.field(b2.float64(ge=-1000, le=1000), default=0.0) + valid: bool = b2.field(b2.bool(), default=True) + + +# Create and populate (in-memory) +t = b2.CTable(Measurement, expected_size=10_000) +t.extend([(i, float(i % 200 - 100), i % 3 != 0) for i in range(5000)]) + +# Query +hot = t.where(t["value"] > 50) +print(f"Hot readings: {len(hot)}") + +# Delete invalid +invalid_indices = [i for i in range(len(t)) if not t.row[i].valid[0]] +if invalid_indices: + t.delete(invalid_indices) + +# Inspect +t.info() +print(t.schema_dict()) +``` + +## Persistency example + +```python +from dataclasses import dataclass +import blosc2 as b2 + + +@dataclass +class Measurement: + sensor_id: int = b2.field(b2.int64(ge=0)) + value: float = b2.field(b2.float64(ge=-1000, le=1000), default=0.0) + valid: bool = b2.field(b2.bool(), default=True) + + +# --- Session 1: create and populate --- +t = b2.CTable(Measurement, urlpath="sensors", mode="w", expected_size=100_000) +t.extend([(i, float(i % 200 - 100), i % 3 != 0) for i in range(50_000)]) +print(f"Saved {len(t)} rows to disk") +# Table is automatically persisted — no explicit save() needed. + +# --- Session 2: reopen and query --- +t = b2.CTable.open("sensors") # read-only by default +hot = t.where(t["value"] > 50) +print(f"Hot readings: {len(hot)}") +arr = t["sensor_id"].to_numpy() +print(f"First 5 sensor IDs: {arr[:5]}") + +# --- Session 3: reopen and append more data --- +t = b2.CTable(Measurement, urlpath="sensors", mode="a") +t.extend([(50_000 + i, float(i), True) for i in range(1_000)]) +print(f"Total rows: {len(t)}") +``` diff --git a/pyproject.toml b/pyproject.toml index 6244b0d9..36f42bfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dependencies = [ "ndindex", "msgpack", "numexpr>=2.14.1; platform_machine != 'wasm32'", + "pydantic", "requests", ] version = "4.1.1.dev0" diff --git a/saved_ctable/climate/_cols/day_of_year.b2nd b/saved_ctable/climate/_cols/day_of_year.b2nd new file mode 100644 index 00000000..1d1e89fb Binary files /dev/null and b/saved_ctable/climate/_cols/day_of_year.b2nd differ diff --git a/saved_ctable/climate/_cols/humidity.b2nd b/saved_ctable/climate/_cols/humidity.b2nd new file mode 100644 index 00000000..3e144812 Binary files /dev/null and b/saved_ctable/climate/_cols/humidity.b2nd differ diff --git a/saved_ctable/climate/_cols/pressure.b2nd b/saved_ctable/climate/_cols/pressure.b2nd new file mode 100644 index 00000000..3fd1169a Binary files /dev/null and b/saved_ctable/climate/_cols/pressure.b2nd differ diff --git a/saved_ctable/climate/_cols/station_id.b2nd b/saved_ctable/climate/_cols/station_id.b2nd new file mode 100644 index 00000000..106e5a66 Binary files /dev/null and b/saved_ctable/climate/_cols/station_id.b2nd differ diff --git a/saved_ctable/climate/_cols/temperature.b2nd b/saved_ctable/climate/_cols/temperature.b2nd new file mode 100644 index 00000000..36636ed1 Binary files /dev/null and b/saved_ctable/climate/_cols/temperature.b2nd differ diff --git a/saved_ctable/climate/_cols/wind_speed.b2nd b/saved_ctable/climate/_cols/wind_speed.b2nd new file mode 100644 index 00000000..c6d2c916 Binary files /dev/null and b/saved_ctable/climate/_cols/wind_speed.b2nd differ diff --git a/saved_ctable/climate/_meta.b2frame b/saved_ctable/climate/_meta.b2frame new file mode 100644 index 00000000..2b05e0b9 Binary files /dev/null and b/saved_ctable/climate/_meta.b2frame differ diff --git a/saved_ctable/climate/_valid_rows.b2nd b/saved_ctable/climate/_valid_rows.b2nd new file mode 100644 index 00000000..11498601 Binary files /dev/null and b/saved_ctable/climate/_valid_rows.b2nd differ diff --git a/saved_ctable/direct_test/_cols/id.b2nd b/saved_ctable/direct_test/_cols/id.b2nd new file mode 100644 index 00000000..890fc7cf Binary files /dev/null and b/saved_ctable/direct_test/_cols/id.b2nd differ diff --git a/saved_ctable/layout_test/_cols/id.b2nd b/saved_ctable/layout_test/_cols/id.b2nd new file mode 100644 index 00000000..32af31b1 Binary files /dev/null and b/saved_ctable/layout_test/_cols/id.b2nd differ diff --git a/saved_ctable/layout_test/_meta.b2frame b/saved_ctable/layout_test/_meta.b2frame new file mode 100644 index 00000000..796d4d8f Binary files /dev/null and b/saved_ctable/layout_test/_meta.b2frame differ diff --git a/saved_ctable/layout_test/_meta2.b2frame b/saved_ctable/layout_test/_meta2.b2frame new file mode 100644 index 00000000..f44fff03 Binary files /dev/null and b/saved_ctable/layout_test/_meta2.b2frame differ diff --git a/saved_ctable/layout_test/_valid_rows.b2nd b/saved_ctable/layout_test/_valid_rows.b2nd new file mode 100644 index 00000000..0681cbdd Binary files /dev/null and b/saved_ctable/layout_test/_valid_rows.b2nd differ diff --git a/saved_ctable/multi.b2d/embed.b2e b/saved_ctable/multi.b2d/embed.b2e new file mode 100644 index 00000000..bc22c31c Binary files /dev/null and b/saved_ctable/multi.b2d/embed.b2e differ diff --git a/saved_ctable/people.b2d/embed.b2e b/saved_ctable/people.b2d/embed.b2e new file mode 100644 index 00000000..49d4ad57 Binary files /dev/null and b/saved_ctable/people.b2d/embed.b2e differ diff --git a/saved_ctable/resize_test/id.b2nd b/saved_ctable/resize_test/id.b2nd new file mode 100644 index 00000000..79bf4182 Binary files /dev/null and b/saved_ctable/resize_test/id.b2nd differ diff --git a/saved_ctable/schunk_test/meta.b2frame b/saved_ctable/schunk_test/meta.b2frame new file mode 100644 index 00000000..ea002f21 Binary files /dev/null and b/saved_ctable/schunk_test/meta.b2frame differ diff --git a/saved_ctable/test.b2d/embed.b2e b/saved_ctable/test.b2d/embed.b2e new file mode 100644 index 00000000..0379ed9c Binary files /dev/null and b/saved_ctable/test.b2d/embed.b2e differ diff --git a/saved_ctable/users/_cols/id.b2nd b/saved_ctable/users/_cols/id.b2nd new file mode 100644 index 00000000..a4db7e32 Binary files /dev/null and b/saved_ctable/users/_cols/id.b2nd differ diff --git a/saved_ctable/users/_cols/retired.b2nd b/saved_ctable/users/_cols/retired.b2nd new file mode 100644 index 00000000..76a7a976 Binary files /dev/null and b/saved_ctable/users/_cols/retired.b2nd differ diff --git a/saved_ctable/users/_cols/score.b2nd b/saved_ctable/users/_cols/score.b2nd new file mode 100644 index 00000000..442d8538 Binary files /dev/null and b/saved_ctable/users/_cols/score.b2nd differ diff --git a/saved_ctable/users/_meta.b2frame b/saved_ctable/users/_meta.b2frame new file mode 100644 index 00000000..d408b963 Binary files /dev/null and b/saved_ctable/users/_meta.b2frame differ diff --git a/saved_ctable/users/_valid_rows.b2nd b/saved_ctable/users/_valid_rows.b2nd new file mode 100644 index 00000000..6b6900fd Binary files /dev/null and b/saved_ctable/users/_valid_rows.b2nd differ diff --git a/saved_ctable/write_test.b2d/embed.b2e b/saved_ctable/write_test.b2d/embed.b2e new file mode 100644 index 00000000..15d1545f Binary files /dev/null and b/saved_ctable/write_test.b2d/embed.b2e differ diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py index e32b2f48..c9c211df 100644 --- a/src/blosc2/__init__.py +++ b/src/blosc2/__init__.py @@ -337,29 +337,18 @@ def isdtype(a_dtype: np.dtype, kind: str | np.dtype | tuple): from numpy import ( bool_, - complex64, complex128, e, euler_gamma, float16, - float32, float64, inf, - int8, - int16, - int32, int64, nan, newaxis, pi, - uint8, - uint16, - uint32, - uint64, ) -bool = bool - DEFAULT_COMPLEX = complex128 """ Default complex floating dtype.""" @@ -595,7 +584,10 @@ def _raise(exc): Disable the overloaded equal operator. """ -# Delayed imports for avoiding overwriting of python builtins +# Delayed imports for avoiding overwriting of python builtins. +# Note: bool, bytes, string shadow builtins in the blosc2 namespace by design — +# they are schema spec constructors (b2.bool(), b2.bytes(), etc.). +from .ctable import Column, CTable from .ndarray import ( abs, acos, @@ -697,6 +689,24 @@ def _raise(exc): var, where, ) +from .schema import ( + bool, + bytes, + complex64, + complex128, + field, + float32, + float64, + int8, + int16, + int32, + int64, + string, + uint8, + uint16, + uint32, + uint64, +) __all__ = [ # noqa : RUF022 # Constants @@ -717,6 +727,23 @@ def _raise(exc): "inf", "nan", "newaxis", + # Schema API (CTable) + "bool", + "bytes", + "complex64", + "complex128", + "field", + "float32", + "float64", + "int8", + "int16", + "int32", + "int64", + "string", + "uint8", + "uint16", + "uint32", + "uint64", # Classes "C2Array", "CParams", diff --git a/src/blosc2/ctable.py b/src/blosc2/ctable.py new file mode 100644 index 00000000..010cfe0f --- /dev/null +++ b/src/blosc2/ctable.py @@ -0,0 +1,2401 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# This source code is licensed under a BSD-style license (found in the +# LICENSE file in the root directory of this source tree) +####################################################################### + +"""CTable: a columnar compressed table built on top of blosc2.NDArray.""" + +from __future__ import annotations + +import dataclasses +import os +import shutil +from collections.abc import Iterable +from dataclasses import MISSING +from typing import Any, Generic, TypeVar + +import numpy as np + +from blosc2 import compute_chunks_blocks +from blosc2.ctable_storage import FileTableStorage, InMemoryTableStorage, TableStorage +from blosc2.schema_compiler import schema_from_dict, schema_to_dict + +try: + from line_profiler import profile +except ImportError: + + def profile(func): + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + + wrapper.__name__ = func.__name__ + return wrapper + + +import blosc2 +from blosc2.schema import SchemaSpec +from blosc2.schema_compiler import ( + ColumnConfig, + CompiledColumn, + CompiledSchema, + _validate_column_name, + compile_schema, + compute_display_width, +) + +# RowT is intentionally left unbound so CTable works with both dataclasses +# and legacy Pydantic models during the transition period. +RowT = TypeVar("RowT") + + +# --------------------------------------------------------------------------- +# Legacy Pydantic-compat helpers +# Keep these so existing code that uses Annotated[type, NumpyDtype(...)] or +# Annotated[str, MaxLen(...)] on a pydantic.BaseModel continues to work. +# --------------------------------------------------------------------------- + + +class NumpyDtype: + """Metadata tag for Pydantic-based schemas (legacy).""" + + def __init__(self, dtype): + self.dtype = dtype + + +class MaxLen: + """Metadata tag for fixed-width string/bytes columns in Pydantic-based schemas (legacy).""" + + def __init__(self, length: int): + self.length = int(length) + + +def _default_display_width(origin) -> int: + """Return a sensible display column width for a given Python type (legacy).""" + return {int: 12, float: 15, bool: 6, complex: 25}.get(origin, 20) + + +def _resolve_field_dtype(field) -> tuple[np.dtype, int]: + """Return (numpy dtype, display_width) for a Pydantic model field (legacy). + + Extracts dtype from NumpyDtype metadata when present (same class), otherwise + falls back to a sensible default for each Python primitive type. + """ + annotation = field.annotation + origin = getattr(annotation, "__origin__", annotation) + + # str / bytes → look for MaxLen metadata, build fixed-width dtype + if origin in (str, bytes) or annotation in (str, bytes): + is_bytes = origin is bytes or annotation is bytes + max_len = 32 + if hasattr(annotation, "__metadata__"): + for meta in annotation.__metadata__: + if isinstance(meta, MaxLen): + max_len = meta.length + break + kind = "S" if is_bytes else "U" + dt = np.dtype(f"{kind}{max_len}") + display_width = max(10, min(max_len, 50)) + return dt, display_width + + # Check for explicit NumpyDtype metadata (same class as defined here) + if hasattr(annotation, "__metadata__"): + for meta in annotation.__metadata__: + if isinstance(meta, NumpyDtype): + dt = np.dtype(meta.dtype) + display_width = _default_display_width(origin) + return dt, display_width + + # Primitive defaults + _PRIMITIVE_MAP = { + int: (np.int64, 12), + float: (np.float64, 15), + bool: (np.bool_, 6), + complex: (np.complex128, 25), + } + if origin in _PRIMITIVE_MAP: + dt_raw, display_width = _PRIMITIVE_MAP[origin] + return np.dtype(dt_raw), display_width + + return np.dtype(np.object_), 20 + + +class _LegacySpec(SchemaSpec): + """Internal compatibility spec wrapping a dtype extracted from a Pydantic schema.""" + + def __init__(self, dtype: np.dtype): + self.dtype = np.dtype(dtype) + self.python_type = object + + def to_pydantic_kwargs(self) -> dict[str, Any]: + return {} + + def to_metadata_dict(self) -> dict[str, Any]: + return {"kind": "legacy", "dtype": str(self.dtype)} + + +def _compile_pydantic_schema(row_cls: type) -> CompiledSchema: + """Compatibility adapter: build a CompiledSchema from a Pydantic BaseModel subclass.""" + columns: list[CompiledColumn] = [] + for name, pyd_field in row_cls.model_fields.items(): + dtype, display_width = _resolve_field_dtype(pyd_field) + spec = _LegacySpec(dtype) + col = CompiledColumn( + name=name, + py_type=object, + spec=spec, + dtype=dtype, + default=MISSING, + config=ColumnConfig(cparams=None, dparams=None, chunks=None, blocks=None), + display_width=display_width, + ) + columns.append(col) + return CompiledSchema( + row_cls=row_cls, + columns=columns, + columns_by_name={col.name: col for col in columns}, + ) + + +# --------------------------------------------------------------------------- +# Internal row/indexing helpers (unchanged) +# --------------------------------------------------------------------------- + + +def _find_physical_index(arr: blosc2.NDArray, logical_key: int) -> int: + """Translate a logical (valid-row) index into a physical array index. + + Iterates chunk metadata of the boolean *arr* (valid_rows) to locate the + *logical_key*-th True value without fully decompressing the array. + + Returns + ------- + int + Physical position in the underlying storage array. + + Raises + ------ + IndexError + If the logical index is out of range or the array is inconsistent. + """ + count = 0 + chunk_size = arr.chunks[0] + + for info in arr.iterchunks_info(): + actual_size = min(chunk_size, arr.shape[0] - info.nchunk * chunk_size) + chunk_start = info.nchunk * chunk_size + + if info.special == blosc2.SpecialValue.ZERO: + continue + + if info.special == blosc2.SpecialValue.VALUE: + val = np.frombuffer(info.repeated_value, dtype=arr.dtype)[0] + if not val: + continue + if count + actual_size <= logical_key: + count += actual_size + continue + return chunk_start + (logical_key - count) + + chunk_data = arr[chunk_start : chunk_start + actual_size] + n_true = int(np.count_nonzero(chunk_data)) + if count + n_true <= logical_key: + count += n_true + continue + + return chunk_start + int(np.flatnonzero(chunk_data)[logical_key - count]) + + raise IndexError("Unexpected error finding physical index.") + + +class _RowIndexer: + def __init__(self, table): + self._table = table + + def __getitem__(self, item): + return self._table._run_row_logic(item) + + +class _Row: + def __init__(self, table: CTable, nrow: int): + self._table = table + self._nrow = nrow + self._real_pos = None + + def _get_real_pos(self) -> int: + self._real_pos = _find_physical_index(self._table._valid_rows, self._nrow) + return self._real_pos + + def __getitem__(self, col_name: str): + if self._real_pos is None: + self._get_real_pos() + return self._table._cols[col_name][self._real_pos] + + +# --------------------------------------------------------------------------- +# Column +# --------------------------------------------------------------------------- + + +class Column: + def __init__(self, table: CTable, col_name: str, mask=None): + self._table = table + self._col_name = col_name + self._mask = mask + + @property + def _raw_col(self): + return self._table._cols[self._col_name] + + @property + def _valid_rows(self): + if self._mask is None: + return self._table._valid_rows + + return (self._table._valid_rows & self._mask).compute() + + def __getitem__(self, key: int | slice | list | np.ndarray): + if isinstance(key, int): + n_rows = len(self) + if key < 0: + key += n_rows + if not (0 <= key < n_rows): + raise IndexError(f"index {key} is out of bounds for column with size {n_rows}") + pos_true = _find_physical_index(self._valid_rows, key) + return self._raw_col[int(pos_true)] + + elif isinstance(key, slice): + real_pos = blosc2.where(self._valid_rows, np.arange(len(self._valid_rows))).compute() + start, stop, step = key.indices(len(real_pos)) + mask = blosc2.zeros(len(self._table._valid_rows), dtype=np.bool_) + if step == 1: + phys_start = real_pos[start] + phys_stop = real_pos[stop - 1] + mask[phys_start : phys_stop + 1] = True + else: + lindices = np.arange(start, stop, step) + phys_indices = real_pos[lindices] + mask[phys_indices[:]] = True + return Column(self._table, self._col_name, mask=mask) + + elif isinstance(key, np.ndarray) and key.dtype == np.bool_: + # Boolean mask in logical space — same convention as numpy/pandas. + # key[i] == True means "include logical row i". + n_live = len(self) + if len(key) != n_live: + raise IndexError( + f"Boolean mask length {len(key)} does not match number of live rows {n_live}." + ) + all_pos = np.where(self._valid_rows[:])[0] + phys_indices = all_pos[key] + return self._raw_col[phys_indices] + + elif isinstance(key, (list, tuple, np.ndarray)): + real_pos = blosc2.where(self._valid_rows, np.arange(len(self._valid_rows))).compute() + phys_indices = np.array([real_pos[i] for i in key], dtype=np.int64) + return self._raw_col[phys_indices] + + raise TypeError(f"Invalid index type: {type(key)}") + + def __setitem__(self, key: int | slice | list | np.ndarray, value): + if self._table._read_only: + raise ValueError("Table is read-only (opened with mode='r').") + if isinstance(key, int): + n_rows = len(self) + if key < 0: + key += n_rows + if not (0 <= key < n_rows): + raise IndexError(f"index {key} is out of bounds for column with size {n_rows}") + pos_true = _find_physical_index(self._valid_rows, key) + self._raw_col[int(pos_true)] = value + + elif isinstance(key, np.ndarray) and key.dtype == np.bool_: + # Boolean mask in logical space. + n_live = len(self) + if len(key) != n_live: + raise IndexError( + f"Boolean mask length {len(key)} does not match number of live rows {n_live}." + ) + all_pos = np.where(self._valid_rows[:])[0] + phys_indices = all_pos[key] + if isinstance(value, (list, tuple)): + value = np.array(value, dtype=self._raw_col.dtype) + self._raw_col[phys_indices] = value + + elif isinstance(key, (slice, list, tuple, np.ndarray)): + real_pos = blosc2.where(self._valid_rows, np.arange(len(self._valid_rows))).compute() + if isinstance(key, slice): + lindices = range(*key.indices(len(real_pos))) + phys_indices = np.array([real_pos[i] for i in lindices], dtype=np.int64) + else: + phys_indices = np.array([real_pos[i] for i in key], dtype=np.int64) + + if isinstance(value, (list, tuple)): + value = np.array(value, dtype=self._raw_col.dtype) + self._raw_col[phys_indices] = value + + else: + raise TypeError(f"Invalid index type: {type(key)}") + + def __iter__(self): + arr = self._valid_rows + chunk_size = arr.chunks[0] + + for info in arr.iterchunks_info(): + actual_size = min(chunk_size, arr.shape[0] - info.nchunk * chunk_size) + chunk_start = info.nchunk * chunk_size + + if info.special == blosc2.SpecialValue.ZERO: + continue + + if info.special == blosc2.SpecialValue.VALUE: + val = np.frombuffer(info.repeated_value, dtype=arr.dtype)[0] + if not val: + continue + yield from self._raw_col[chunk_start : chunk_start + actual_size] + continue + + mask_chunk = arr[chunk_start : chunk_start + actual_size] + data_chunk = self._raw_col[chunk_start : chunk_start + actual_size] + yield from data_chunk[mask_chunk] + + def __len__(self): + return blosc2.count_nonzero(self._valid_rows) + + def __lt__(self, other): + return self._raw_col < other + + def __le__(self, other): + return self._raw_col <= other + + def __eq__(self, other): + return self._raw_col == other + + def __ne__(self, other): + return self._raw_col != other + + def __gt__(self, other): + return self._raw_col > other + + def __ge__(self, other): + return self._raw_col >= other + + @property + def dtype(self): + return self._raw_col.dtype + + def iter_chunks(self, size: int = 65536): + """Iterate over live column values in chunks of *size* rows. + + Yields numpy arrays of at most *size* elements each, skipping deleted + rows. The last chunk may be smaller than *size*. + + Parameters + ---------- + size: + Number of live rows per yielded chunk. Defaults to 65 536. + + Yields + ------ + numpy.ndarray + A 1-D array of up to *size* live values with this column's dtype. + + Examples + -------- + >>> for chunk in t["score"].iter_chunks(size=100_000): + ... process(chunk) + """ + valid = self._valid_rows + raw = self._raw_col + arr_len = len(valid) + phys_chunk = valid.chunks[0] + + pending: list[np.ndarray] = [] + pending_count = 0 + + for info in valid.iterchunks_info(): + actual = min(phys_chunk, arr_len - info.nchunk * phys_chunk) + start = info.nchunk * phys_chunk + + if info.special == blosc2.SpecialValue.ZERO: + continue + + if info.special == blosc2.SpecialValue.VALUE: + val = np.frombuffer(info.repeated_value, dtype=valid.dtype)[0] + if not val: + continue + segment = raw[start : start + actual] + else: + mask = valid[start : start + actual] + segment = raw[start : start + actual][mask] + + if len(segment) == 0: + continue + + pending.append(segment) + pending_count += len(segment) + + while pending_count >= size: + combined = np.concatenate(pending) + yield combined[:size] + rest = combined[size:] + pending = [rest] if len(rest) > 0 else [] + pending_count = len(rest) + + if pending: + yield np.concatenate(pending) + + def to_numpy(self) -> np.ndarray: + """Return all live values as a NumPy array.""" + parts = list(self.iter_chunks(size=max(1, len(self)))) + if not parts: + return np.array([], dtype=self.dtype) + return np.concatenate(parts) if len(parts) > 1 else parts[0] + + def assign(self, data) -> None: + """Replace all live values in this column with *data*. + + Works on both full tables and views — on a view, only the rows + visible through the view's mask are overwritten. + + Parameters + ---------- + data: + List, numpy array, or any iterable. Must have exactly as many + elements as there are live rows in this column. Values are + coerced to the column's dtype if possible. + + Raises + ------ + ValueError + If ``len(data)`` does not match the number of live rows, or the + table is opened read-only. + TypeError + If values cannot be coerced to the column's dtype. + """ + if self._table._read_only: + raise ValueError("Table is read-only (opened with mode='r').") + n_live = len(self) + arr = np.asarray(data) + if len(arr) != n_live: + raise ValueError(f"assign() requires {n_live} values (live rows), got {len(arr)}.") + try: + arr = arr.astype(self.dtype) + except (ValueError, OverflowError) as exc: + raise TypeError(f"Cannot coerce data to column dtype {self.dtype!r}: {exc}") from exc + live_pos = np.where(self._valid_rows[:])[0] + self._raw_col[live_pos] = arr + + def unique(self) -> np.ndarray: + """Return sorted array of unique live values. + + Processes data in chunks — never loads the full column at once. + """ + seen: set = set() + for chunk in self.iter_chunks(): + seen.update(chunk.tolist()) + return np.array(sorted(seen), dtype=self.dtype) + + def value_counts(self) -> dict: + """Return a ``{value: count}`` dict sorted by count descending. + + Processes data in chunks — never loads the full column at once. + + Example + ------- + >>> t["active"].value_counts() + {True: 8432, False: 1568} + """ + counts: dict = {} + for chunk in self.iter_chunks(): + for val in chunk.tolist(): + counts[val] = counts.get(val, 0) + 1 + return dict(sorted(counts.items(), key=lambda kv: -kv[1])) + + # ------------------------------------------------------------------ + # Aggregate helpers + # ------------------------------------------------------------------ + + def _require_nonempty(self, op: str) -> None: + if len(self) == 0: + raise ValueError(f"Column.{op}() called on an empty column.") + + def _require_kind(self, kinds: str, op: str) -> None: + """Raise TypeError if this column's dtype is not in *kinds*.""" + if self.dtype.kind not in kinds: + _kind_names = { + "b": "bool", + "i": "signed int", + "u": "unsigned int", + "f": "float", + "c": "complex", + "U": "string", + "S": "bytes", + } + raise TypeError( + f"Column.{op}() is not supported for dtype {self.dtype!r} " + f"({_kind_names.get(self.dtype.kind, self.dtype.kind)})." + ) + + # ------------------------------------------------------------------ + # Aggregates + # ------------------------------------------------------------------ + + def sum(self): + """Sum of all live values. + + Supported dtypes: bool, int, uint, float, complex. + Bool values are counted as 0 / 1. + """ + self._require_kind("biufc", "sum") + self._require_nonempty("sum") + # Use a wide accumulator to reduce overflow risk + acc_dtype = ( + np.float64 + if self.dtype.kind == "f" + else ( + np.complex128 if self.dtype.kind == "c" else np.int64 if self.dtype.kind in "biu" else None + ) + ) + result = acc_dtype(0) + for chunk in self.iter_chunks(): + result += chunk.sum(dtype=acc_dtype) + # Return in the column's natural dtype when it fits, else keep wide + if self.dtype.kind in "biu": + return int(result) + return result + + def min(self): + """Minimum live value. + + Supported dtypes: bool, int, uint, float, string, bytes. + Strings are compared lexicographically. + """ + self._require_kind("biufUS", "min") + self._require_nonempty("min") + result = None + is_str = self.dtype.kind in "US" + for chunk in self.iter_chunks(): + # numpy .min()/.max() don't support string dtypes in recent NumPy; + # fall back to Python's built-in min/max which work on any comparable type. + chunk_min = min(chunk) if is_str else chunk.min() + if result is None or chunk_min < result: + result = chunk_min + return result + + def max(self): + """Maximum live value. + + Supported dtypes: bool, int, uint, float, string, bytes. + Strings are compared lexicographically. + """ + self._require_kind("biufUS", "max") + self._require_nonempty("max") + result = None + is_str = self.dtype.kind in "US" + for chunk in self.iter_chunks(): + chunk_max = max(chunk) if is_str else chunk.max() + if result is None or chunk_max > result: + result = chunk_max + return result + + def mean(self) -> float: + """Arithmetic mean of all live values. + + Supported dtypes: bool, int, uint, float. + Always returns a Python float. + """ + self._require_kind("biuf", "mean") + self._require_nonempty("mean") + total = np.float64(0) + count = 0 + for chunk in self.iter_chunks(): + total += chunk.sum(dtype=np.float64) + count += len(chunk) + return float(total / count) + + def std(self, ddof: int = 0) -> float: + """Standard deviation of all live values (single-pass, Welford's algorithm). + + Parameters + ---------- + ddof: + Delta degrees of freedom. ``0`` (default) gives the population + std; ``1`` gives the sample std (divides by N-1). + + Supported dtypes: bool, int, uint, float. + Always returns a Python float. + """ + self._require_kind("biuf", "std") + self._require_nonempty("std") + + # Chan's parallel update — combines per-chunk (n, mean, M2) tuples. + # This is numerically stable and requires only a single pass. + n_total = np.int64(0) + mean_total = np.float64(0) + M2_total = np.float64(0) + + for chunk in self.iter_chunks(): + chunk = chunk.astype(np.float64) + n_b = np.int64(len(chunk)) + mean_b = chunk.mean() + M2_b = np.float64(((chunk - mean_b) ** 2).sum()) + + if n_total == 0: + n_total, mean_total, M2_total = n_b, mean_b, M2_b + else: + delta = mean_b - mean_total + n_new = n_total + n_b + mean_total = (n_total * mean_total + n_b * mean_b) / n_new + M2_total += M2_b + delta**2 * n_total * n_b / n_new + n_total = n_new + + divisor = n_total - ddof + if divisor <= 0: + return float("nan") + return float(np.sqrt(M2_total / divisor)) + + def any(self) -> bool: + """Return True if at least one live value is True. + + Supported dtypes: bool. + Short-circuits on the first True found. + """ + self._require_kind("b", "any") + return any(chunk.any() for chunk in self.iter_chunks()) + + def all(self) -> bool: + """Return True if every live value is True. + + Supported dtypes: bool. + Short-circuits on the first False found. + """ + self._require_kind("b", "all") + return all(chunk.all() for chunk in self.iter_chunks()) + + +# --------------------------------------------------------------------------- +# CTable +# --------------------------------------------------------------------------- + + +def _fmt_bytes(n: int) -> str: + """Human-readable byte count (e.g. '1.23 MB').""" + if n < 1024: + return f"{n} B" + if n < 1024**2: + return f"{n / 1024:.2f} KB" + if n < 1024**3: + return f"{n / 1024**2:.2f} MB" + return f"{n / 1024**3:.2f} GB" + + +class CTable(Generic[RowT]): + def __init__( + self, + row_type: type[RowT], + new_data=None, + *, + urlpath: str | None = None, + mode: str = "a", + expected_size: int = 1_048_576, + compact: bool = False, + validate: bool = True, + cparams: dict[str, Any] | None = None, + dparams: dict[str, Any] | None = None, + ) -> None: + self._row_type = row_type + self._validate = validate + self._table_cparams = cparams + self._table_dparams = dparams + self._cols: dict[str, blosc2.NDArray] = {} + self._col_widths: dict[str, int] = {} + self.col_names: list[str] = [] + self.row = _RowIndexer(self) + self.auto_compact = compact + self.base = None + + # Choose storage backend + if urlpath is not None: + storage: TableStorage = FileTableStorage(urlpath, mode) + else: + storage = InMemoryTableStorage() + self._storage = storage + self._read_only = storage.is_read_only() + + if storage.table_exists() and mode != "w": + # ---- Open existing persistent table ---- + if new_data is not None: + raise ValueError( + "Cannot pass new_data when opening an existing table. Use mode='w' to overwrite." + ) + storage.check_kind() + schema_dict = storage.load_schema() + self._schema: CompiledSchema = schema_from_dict(schema_dict) + self._schema = CompiledSchema( + row_cls=row_type, + columns=self._schema.columns, + columns_by_name=self._schema.columns_by_name, + ) + self.col_names = [c["name"] for c in schema_dict["columns"]] + self._valid_rows = storage.open_valid_rows() + for name in self.col_names: + col = storage.open_column(name) + self._cols[name] = col + cc = self._schema.columns_by_name[name] + self._col_widths[name] = max(len(name), cc.display_width) + self._n_rows = int(blosc2.count_nonzero(self._valid_rows)) + self._last_pos = None # resolve lazily on first write + else: + # ---- Create new table ---- + if storage.is_read_only(): + raise FileNotFoundError(f"No CTable found at {urlpath!r}") + + # Build compiled schema from either a dataclass or a legacy Pydantic model + if dataclasses.is_dataclass(row_type) and isinstance(row_type, type): + self._schema = compile_schema(row_type) + else: + self._schema = _compile_pydantic_schema(row_type) + + self._n_rows = 0 + self._last_pos = 0 + + default_chunks, default_blocks = compute_chunks_blocks((expected_size,)) + self._valid_rows = storage.create_valid_rows( + shape=(expected_size,), + chunks=default_chunks, + blocks=default_blocks, + ) + self._init_columns(expected_size, default_chunks, default_blocks, storage) + storage.save_schema(schema_to_dict(self._schema)) + + if new_data is not None: + self._load_initial_data(new_data) + + def _init_columns( + self, expected_size: int, default_chunks, default_blocks, storage: TableStorage + ) -> None: + """Create one NDArray per column using the compiled schema.""" + for col in self._schema.columns: + self.col_names.append(col.name) + self._col_widths[col.name] = max(len(col.name), col.display_width) + col_storage = self._resolve_column_storage(col, default_chunks, default_blocks) + self._cols[col.name] = storage.create_column( + col.name, + dtype=col.dtype, + shape=(expected_size,), + chunks=col_storage["chunks"], + blocks=col_storage["blocks"], + cparams=col_storage.get("cparams"), + dparams=col_storage.get("dparams"), + ) + + def _resolve_column_storage( + self, + col: CompiledColumn, + default_chunks, + default_blocks, + ) -> dict[str, Any]: + """Merge table-level and column-level storage settings. + + Column-level settings (from ``b2.field(...)``) take precedence over + table-level defaults passed to ``CTable.__init__``. + """ + result: dict[str, Any] = { + "chunks": col.config.chunks if col.config.chunks is not None else default_chunks, + "blocks": col.config.blocks if col.config.blocks is not None else default_blocks, + } + cparams = col.config.cparams if col.config.cparams is not None else self._table_cparams + dparams = col.config.dparams if col.config.dparams is not None else self._table_dparams + if cparams is not None: + result["cparams"] = cparams + if dparams is not None: + result["dparams"] = dparams + return result + + def _normalize_row_input(self, data: Any) -> dict[str, Any]: + """Normalize a row input to a ``{col_name: value}`` dict. + + Accepted shapes: + - list / tuple → positional, zipped with ``col_names`` + - dict → used as-is + - dataclass → ``dataclasses.asdict`` + - np.void / structured scalar → field-name access + """ + if isinstance(data, dict): + return data + if isinstance(data, (list, tuple)): + return dict(zip(self.col_names, data, strict=False)) + if dataclasses.is_dataclass(data) and not isinstance(data, type): + return dataclasses.asdict(data) + if isinstance(data, (np.void, np.record)): + return {name: data[name] for name in self.col_names} + # Fallback: try positional indexing + return {name: data[i] for i, name in enumerate(self.col_names)} + + def _coerce_row_to_storage(self, row: dict[str, Any]) -> dict[str, Any]: + """Coerce each value in *row* to the column's storage dtype.""" + result = {} + for col in self._schema.columns: + val = row[col.name] + result[col.name] = np.array(val, dtype=col.dtype).item() + return result + + def _resolve_last_pos(self) -> int: + """Return the physical index of the next write slot. + + Returns the cached ``_last_pos`` when available. After a deletion + ``_last_pos`` is ``None``; this method then walks chunk metadata of + ``_valid_rows`` from the end (no full decompression) to find the last + ``True`` position, caches the result, and returns it. + """ + if self._last_pos is not None: + return self._last_pos + + arr = self._valid_rows + chunk_size = arr.chunks[0] + last_true_pos = -1 + + for info in reversed(list(arr.iterchunks_info())): + actual_size = min(chunk_size, arr.shape[0] - info.nchunk * chunk_size) + chunk_start = info.nchunk * chunk_size + + if info.special == blosc2.SpecialValue.ZERO: + continue + if info.special == blosc2.SpecialValue.VALUE: + val = np.frombuffer(info.repeated_value, dtype=arr.dtype)[0] + if not val: + continue + last_true_pos = chunk_start + actual_size - 1 + break + + chunk_data = arr[chunk_start : chunk_start + actual_size] + nonzero = np.flatnonzero(chunk_data) + if len(nonzero) == 0: + continue + last_true_pos = chunk_start + int(nonzero[-1]) + break + + self._last_pos = last_true_pos + 1 + return self._last_pos + + def _grow(self) -> None: + """Double the physical capacity of all columns and the valid_rows mask.""" + c = len(self._valid_rows) + for col_arr in self._cols.values(): + col_arr.resize((c * 2,)) + self._valid_rows.resize((c * 2,)) + + # ------------------------------------------------------------------ + # Display + # ------------------------------------------------------------------ + + def __str__(self) -> str: + _HEAD_TAIL = 10 # rows shown at each end + + nrows = self._n_rows + ncols = len(self.col_names) + hidden = max(0, nrows - _HEAD_TAIL * 2) + + # -- physical positions for head and tail rows -- + valid_np = self._valid_rows[:] + all_pos = np.where(valid_np)[0] + + if nrows <= _HEAD_TAIL * 2: + head_pos = all_pos + tail_pos = np.array([], dtype=all_pos.dtype) + hidden = 0 + else: + head_pos = all_pos[:_HEAD_TAIL] + tail_pos = all_pos[-_HEAD_TAIL:] + + # -- per-column display widths -- + widths: dict[str, int] = {} + for name in self.col_names: + widths[name] = max( + self._col_widths[name], + len(str(self._cols[name].dtype)), + ) + + sep = " ".join("─" * (w + 2) for w in widths.values()) + + def fmt_row(values: dict) -> str: + return " ".join(f" {values[n]!s:<{widths[n]}} " for n in self.col_names) + + # -- batch-fetch values (one read per column, not one per cell) -- + def rows_to_dicts(positions) -> list[dict]: + if len(positions) == 0: + return [] + col_data = {n: self._cols[n][positions] for n in self.col_names} + return [{n: col_data[n][i].item() for n in self.col_names} for i in range(len(positions))] + + lines = [ + fmt_row({n: n for n in self.col_names}), + fmt_row({n: str(self._cols[n].dtype) for n in self.col_names}), + sep, + ] + + for row in rows_to_dicts(head_pos): + lines.append(fmt_row(row)) + + if hidden > 0: + lines.append(fmt_row(dict.fromkeys(self.col_names, "..."))) + + for row in rows_to_dicts(tail_pos): + lines.append(fmt_row(row)) + + lines.append(sep) + footer = f"{nrows:,} rows × {ncols} columns" + if hidden > 0: + footer += f" ({hidden:,} rows hidden)" + lines.append(footer) + + return "\n".join(lines) + + def __repr__(self) -> str: + cols = ", ".join(self.col_names) + return f"CTable<{cols}>({self._n_rows:,} rows, {_fmt_bytes(self.cbytes)} compressed)" + + def __len__(self): + return self._n_rows + + def __iter__(self): + for i in range(self.nrows): + yield _Row(self, i) + + # ------------------------------------------------------------------ + # Open existing table (classmethod) + # ------------------------------------------------------------------ + + @classmethod + def open(cls, urlpath: str, *, mode: str = "r") -> CTable: + """Open a persistent CTable from *urlpath*. + + Parameters + ---------- + urlpath: + Path to the table root directory (created by passing ``urlpath`` + to :class:`CTable`). + mode: + ``'r'`` (default) — read-only. + ``'a'`` — read/write. + + Raises + ------ + FileNotFoundError + If *urlpath* does not contain a CTable. + ValueError + If the metadata at *urlpath* does not identify a CTable. + """ + storage = FileTableStorage(urlpath, mode) + if not storage.table_exists(): + raise FileNotFoundError(f"No CTable found at {urlpath!r}") + storage.check_kind() + schema_dict = storage.load_schema() + schema = schema_from_dict(schema_dict) + col_names = [c["name"] for c in schema_dict["columns"]] + + obj = cls.__new__(cls) + obj._row_type = None + obj._validate = True + obj._table_cparams = None + obj._table_dparams = None + obj._storage = storage + obj._read_only = storage.is_read_only() + obj._schema = schema + obj._cols = {} + obj._col_widths = {} + obj.col_names = col_names + obj.row = _RowIndexer(obj) + obj.auto_compact = False + obj.base = None + + obj._valid_rows = storage.open_valid_rows() + for name in col_names: + obj._cols[name] = storage.open_column(name) + cc = schema.columns_by_name[name] + obj._col_widths[name] = max(len(name), cc.display_width) + + obj._n_rows = int(blosc2.count_nonzero(obj._valid_rows)) + obj._last_pos = None # resolve lazily on first write + return obj + + # ------------------------------------------------------------------ + # Save / Load (in-memory ↔ disk) + # ------------------------------------------------------------------ + + def save(self, urlpath: str, *, overwrite: bool = False) -> None: + """Copy this (in-memory) table to disk at *urlpath*. + + Only live rows are written — the on-disk table is always compacted. + + Parameters + ---------- + urlpath: + Destination directory path. + overwrite: + If ``False`` (default), raise :exc:`ValueError` when *urlpath* + already exists. Set to ``True`` to replace an existing table. + + Raises + ------ + ValueError + If *urlpath* already exists and ``overwrite=False``, or if called + on a view. + """ + if self.base is not None: + raise ValueError("Cannot save a view — save the parent table instead.") + if os.path.exists(urlpath): + if not overwrite: + raise ValueError(f"Path {urlpath!r} already exists. Use overwrite=True to replace.") + shutil.rmtree(urlpath) + + # Collect live physical positions + valid_np = self._valid_rows[:] + live_pos = np.where(valid_np)[0] + n_live = len(live_pos) + capacity = max(n_live, 1) + + file_storage = FileTableStorage(urlpath, "w") + default_chunks, default_blocks = compute_chunks_blocks((capacity,)) + + # --- valid_rows (all True, compacted) --- + disk_valid = file_storage.create_valid_rows( + shape=(capacity,), + chunks=default_chunks, + blocks=default_blocks, + ) + if n_live > 0: + disk_valid[:n_live] = True + + # --- columns --- + for col in self._schema.columns: + name = col.name + col_storage = self._resolve_column_storage(col, default_chunks, default_blocks) + disk_col = file_storage.create_column( + name, + dtype=col.dtype, + shape=(capacity,), + chunks=col_storage["chunks"], + blocks=col_storage["blocks"], + cparams=col_storage.get("cparams"), + dparams=col_storage.get("dparams"), + ) + if n_live > 0: + disk_col[:n_live] = self._cols[name][live_pos] + + file_storage.save_schema(schema_to_dict(self._schema)) + + @classmethod + def load(cls, urlpath: str) -> CTable: + """Load a persistent table from *urlpath* into RAM. + + The schema is read from the table's metadata — the original Python + dataclass is not required. The returned table is fully in-memory and + read/write. + + Parameters + ---------- + urlpath: + Path to the table root directory. + + Raises + ------ + FileNotFoundError + If *urlpath* does not contain a CTable. + ValueError + If the metadata at *urlpath* does not identify a CTable. + """ + file_storage = FileTableStorage(urlpath, "r") + if not file_storage.table_exists(): + raise FileNotFoundError(f"No CTable found at {urlpath!r}") + file_storage.check_kind() + schema_dict = file_storage.load_schema() + schema = schema_from_dict(schema_dict) + col_names = [c["name"] for c in schema_dict["columns"]] + + disk_valid = file_storage.open_valid_rows() + disk_cols = {name: file_storage.open_column(name) for name in col_names} + phys_size = len(disk_valid) + n_live = int(blosc2.count_nonzero(disk_valid)) + capacity = max(phys_size, 1) + + mem_storage = InMemoryTableStorage() + default_chunks, default_blocks = compute_chunks_blocks((capacity,)) + + mem_valid = mem_storage.create_valid_rows( + shape=(capacity,), + chunks=default_chunks, + blocks=default_blocks, + ) + if phys_size > 0: + mem_valid[:phys_size] = disk_valid[:] + + mem_cols: dict[str, blosc2.NDArray] = {} + for col in schema.columns: + name = col.name + mem_col = mem_storage.create_column( + name, + dtype=col.dtype, + shape=(capacity,), + chunks=default_chunks, + blocks=default_blocks, + cparams=None, + dparams=None, + ) + if phys_size > 0: + mem_col[:phys_size] = disk_cols[name][:] + mem_cols[name] = mem_col + + obj = cls.__new__(cls) + obj._row_type = None + obj._validate = True + obj._table_cparams = None + obj._table_dparams = None + obj._storage = mem_storage + obj._read_only = False + obj._schema = schema + obj._cols = mem_cols + obj._col_widths = {col.name: max(len(col.name), col.display_width) for col in schema.columns} + obj.col_names = col_names + obj.row = _RowIndexer(obj) + obj.auto_compact = False + obj.base = None + obj._valid_rows = mem_valid + obj._n_rows = n_live + obj._last_pos = None # resolve lazily on first write + return obj + + # ------------------------------------------------------------------ + # View / filtering + # ------------------------------------------------------------------ + + @classmethod + def _make_view(cls, parent: CTable, new_valid_rows: blosc2.NDArray) -> CTable: + """Construct a read-only view sharing *parent*'s columns.""" + obj = cls.__new__(cls) + obj._row_type = parent._row_type + obj._validate = parent._validate + obj._table_cparams = parent._table_cparams + obj._table_dparams = parent._table_dparams + obj._storage = None + obj._read_only = parent._read_only # inherit: only True for mode="r" disk tables + obj._schema = parent._schema + obj._cols = parent._cols # shared — views cannot change row structure + obj._col_widths = parent._col_widths + obj.col_names = parent.col_names + obj.row = _RowIndexer(obj) + obj.auto_compact = parent.auto_compact + obj.base = parent + obj._valid_rows = new_valid_rows + obj._n_rows = int(blosc2.count_nonzero(new_valid_rows)) + obj._last_pos = None + return obj + + def view(self, new_valid_rows): + if not ( + isinstance(new_valid_rows, (blosc2.NDArray, blosc2.LazyExpr)) + and (getattr(new_valid_rows, "dtype", None) == np.bool_) + ): + raise TypeError( + f"Expected boolean blosc2.NDArray or LazyExpr, got {type(new_valid_rows).__name__}" + ) + + new_valid_rows = ( + new_valid_rows.compute() if isinstance(new_valid_rows, blosc2.LazyExpr) else new_valid_rows + ) + + if len(self._valid_rows) != len(new_valid_rows): + raise ValueError() + + return CTable._make_view(self, new_valid_rows) + + def head(self, N: int = 5) -> CTable: + if N <= 0: + return self.view(blosc2.zeros(shape=len(self._valid_rows), dtype=np.bool_)) + if self._n_rows <= N: + return self.view(self._valid_rows) + + # Reuse _find_physical_index: physical position of the (N-1)-th live row + arr = self._valid_rows + pos_N_true = _find_physical_index(arr, N - 1) + + if pos_N_true < len(arr) // 2: + mask_arr = blosc2.zeros(shape=len(arr), dtype=np.bool_) + mask_arr[: pos_N_true + 1] = True + else: + mask_arr = blosc2.ones(shape=len(arr), dtype=np.bool_) + mask_arr[pos_N_true + 1 :] = False + + mask_arr = (mask_arr & self._valid_rows).compute() + return self.view(mask_arr) + + def tail(self, N: int = 5) -> CTable: + if N <= 0: + return self.view(blosc2.zeros(shape=len(self._valid_rows), dtype=np.bool_)) + if self._n_rows <= N: + return self.view(self._valid_rows) + + # Physical position of the first row we want = logical index (nrows - N) + arr = self._valid_rows + pos_start = _find_physical_index(arr, self._n_rows - N) + + if pos_start > len(arr) // 2: + mask_arr = blosc2.zeros(shape=len(arr), dtype=np.bool_) + mask_arr[pos_start:] = True + else: + mask_arr = blosc2.ones(shape=len(arr), dtype=np.bool_) + if pos_start > 0: + mask_arr[:pos_start] = False + + mask_arr = (mask_arr & self._valid_rows).compute() + return self.view(mask_arr) + + def sample(self, n: int, *, seed: int | None = None) -> CTable: + """Return a read-only view of *n* randomly chosen live rows. + + Parameters + ---------- + n: + Number of rows to sample. If *n* >= number of live rows, + returns a view of the whole table. + seed: + Optional random seed for reproducibility. + + Returns + ------- + CTable + A read-only view sharing columns with this table. + """ + if n <= 0: + return self.view(blosc2.zeros(shape=len(self._valid_rows), dtype=np.bool_)) + if n >= self._n_rows: + return self.view(self._valid_rows) + + rng = np.random.default_rng(seed) + all_pos = np.where(self._valid_rows[:])[0] + chosen = rng.choice(all_pos, size=n, replace=False) + + mask = np.zeros(len(self._valid_rows), dtype=np.bool_) + mask[chosen] = True + return self.view(blosc2.asarray(mask)) + + def select(self, cols: list[str]) -> CTable: + """Return a column-projection view exposing only *cols*. + + The returned object shares the underlying NDArrays with this table + (no data is copied). Row filtering and value writes work as usual; + structural mutations (add/drop/rename column, append, …) are blocked. + + Parameters + ---------- + cols: + Ordered list of column names to keep. + + Raises + ------ + KeyError + If any name in *cols* is not a column of this table. + ValueError + If *cols* is empty. + """ + if not cols: + raise ValueError("select() requires at least one column name.") + for name in cols: + if name not in self._cols: + raise KeyError(f"No column named {name!r}. Available: {self.col_names}") + + obj = CTable.__new__(CTable) + obj._row_type = self._row_type + obj._validate = self._validate + obj._table_cparams = self._table_cparams + obj._table_dparams = self._table_dparams + obj._storage = None + obj._read_only = self._read_only + obj._valid_rows = self._valid_rows + obj._n_rows = self._n_rows + obj._last_pos = self._last_pos + obj.auto_compact = self.auto_compact + obj.base = self + + # Subset of columns — same NDArray objects, no copy + obj._cols = {name: self._cols[name] for name in cols} + obj.col_names = list(cols) + + # Rebuild schema for the selected columns only + sel_set = set(cols) + sel_compiled = [c for c in self._schema.columns if c.name in sel_set] + # Preserve caller-specified order + order = {name: i for i, name in enumerate(cols)} + sel_compiled.sort(key=lambda c: order[c.name]) + obj._schema = CompiledSchema( + columns=sel_compiled, + columns_by_name={c.name: c for c in sel_compiled}, + row_cls=self._schema.row_cls, + ) + obj._col_widths = {name: self._col_widths[name] for name in cols if name in self._col_widths} + obj.row = _RowIndexer(obj) + return obj + + def describe(self) -> None: + """Print a per-column statistical summary. + + Numeric columns (int, float): count, mean, std, min, max. + Bool columns: count, true-count, true-%. + String columns: count, min (lex), max (lex), n-unique. + """ + n = self._n_rows + lines = [] + lines.append(f"CTable {n:,} rows × {self.ncols} cols") + lines.append("") + + for name in self.col_names: + col = self[name] + dtype = col.dtype + lines.append(f" {name} [{dtype}]") + + if n == 0: + lines.append(" (empty)") + lines.append("") + continue + + if dtype.kind in "biufc" and dtype.kind != "c": + # numeric + bool + if dtype.kind == "b": + arr = col.to_numpy() + true_n = int(arr.sum()) + lines.append(f" count : {n:,}") + lines.append(f" true : {true_n:,} ({true_n / n * 100:.1f} %)") + lines.append(f" false : {n - true_n:,} ({(n - true_n) / n * 100:.1f} %)") + else: + mn = col.min() + mx = col.max() + avg = col.mean() + sd = col.std() + fmt = ".4g" + lines.append(f" count : {n:,}") + lines.append(f" mean : {avg:{fmt}}") + lines.append(f" std : {sd:{fmt}}") + lines.append(f" min : {mn:{fmt}}") + lines.append(f" max : {mx:{fmt}}") + elif dtype.kind in "US": + mn = col.min() + mx = col.max() + nu = len(col.unique()) + lines.append(f" count : {n:,}") + lines.append(f" unique : {nu:,}") + lines.append(f" min : {mn!r}") + lines.append(f" max : {mx!r}") + else: + lines.append(f" count : {n:,}") + lines.append(f" (stats not available for dtype {dtype})") + + lines.append("") + + print("\n".join(lines)) + + def cov(self) -> np.ndarray: + """Return the covariance matrix as a numpy array. + + Only int, float, and bool columns are supported. Bool columns are + cast to int (0/1) before computation. Complex columns raise + :exc:`TypeError`. + + Returns + ------- + numpy.ndarray + Shape ``(ncols, ncols)``. Column order matches + :attr:`col_names`. + + Raises + ------ + TypeError + If any column has an unsupported dtype (complex, string, …). + ValueError + If the table has fewer than 2 live rows (covariance undefined). + """ + for name in self.col_names: + dtype = self._cols[name].dtype + if not ( + np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.floating) or dtype == np.bool_ + ): + raise TypeError( + f"Column {name!r} has dtype {dtype} which is not supported by cov(). " + "Only int, float, and bool columns are allowed." + ) + + if self._n_rows < 2: + raise ValueError(f"cov() requires at least 2 live rows, got {self._n_rows}.") + + # Build (n_cols, n_rows) matrix — one row per column + arrays = [] + for name in self.col_names: + arr = self[name].to_numpy() + if arr.dtype == np.bool_: + arr = arr.astype(np.int8) + arrays.append(arr.astype(np.float64)) + + data = np.stack(arrays, axis=0) # shape (ncols, n_live) + return np.atleast_2d(np.cov(data)) + + # ------------------------------------------------------------------ + # Arrow interop + # ------------------------------------------------------------------ + + def to_arrow(self): + """Convert all live rows to a :class:`pyarrow.Table`. + + Each column is materialized via :meth:`Column.to_numpy` and wrapped + in a ``pyarrow.array``. String columns are emitted as ``pa.string()`` + (variable-length UTF-8); bytes columns as ``pa.large_binary()``. + + Raises + ------ + ImportError + If ``pyarrow`` is not installed. + """ + try: + import pyarrow as pa + except ImportError: + raise ImportError( + "pyarrow is required for to_arrow(). Install it with: pip install pyarrow" + ) from None + + arrays = {} + for name in self.col_names: + col = self[name] + arr = col.to_numpy() + kind = arr.dtype.kind + if kind == "U": + pa_arr = pa.array(arr.tolist(), type=pa.string()) + elif kind == "S": + pa_arr = pa.array(arr.tolist(), type=pa.large_binary()) + else: + pa_arr = pa.array(arr) + arrays[name] = pa_arr + + return pa.table(arrays) + + @classmethod + def from_arrow(cls, arrow_table) -> CTable: + """Build a :class:`CTable` from a :class:`pyarrow.Table`. + + Schema is inferred from the Arrow field types. String columns + (``pa.string()``, ``pa.large_string()``) are stored with + ``max_length`` set to the longest value found in the data. + + Parameters + ---------- + arrow_table: + A ``pyarrow.Table`` instance. + + Returns + ------- + CTable + A new in-memory CTable containing all rows from *arrow_table*. + + Raises + ------ + ImportError + If ``pyarrow`` is not installed. + TypeError + If an Arrow field type has no corresponding blosc2 spec. + """ + try: + import pyarrow as pa + except ImportError: + raise ImportError( + "pyarrow is required for from_arrow(). Install it with: pip install pyarrow" + ) from None + + import blosc2.schema as b2s + + def _arrow_type_to_spec(pa_type, arrow_col): + """Map a pyarrow DataType to a blosc2 SchemaSpec.""" + mapping = [ + (pa.int8(), b2s.int8), + (pa.int16(), b2s.int16), + (pa.int32(), b2s.int32), + (pa.int64(), b2s.int64), + (pa.uint8(), b2s.uint8), + (pa.uint16(), b2s.uint16), + (pa.uint32(), b2s.uint32), + (pa.uint64(), b2s.uint64), + (pa.float32(), b2s.float32), + (pa.float64(), b2s.float64), + (pa.bool_(), b2s.bool), + ] + for arrow_t, spec_cls in mapping: + if pa_type == arrow_t: + return spec_cls() + + # String types: determine max_length from the data + if pa_type in (pa.string(), pa.large_string(), pa.utf8(), pa.large_utf8()): + values = [v for v in arrow_col.to_pylist() if v is not None] + max_len = max((len(v) for v in values), default=1) + return b2s.string(max_length=max(max_len, 1)) + + raise TypeError( + f"No blosc2 spec for Arrow type {pa_type!r}. " + "Supported: int8/16/32/64, uint8/16/32/64, float32/64, bool, string." + ) + + # Build CompiledSchema from Arrow schema + columns: list[CompiledColumn] = [] + for field in arrow_table.schema: + name = field.name + _validate_column_name(name) + spec = _arrow_type_to_spec(field.type, arrow_table.column(name)) + col_config = ColumnConfig(cparams=None, dparams=None, chunks=None, blocks=None) + columns.append( + CompiledColumn( + name=name, + py_type=spec.python_type, + spec=spec, + dtype=spec.dtype, + default=MISSING, + config=col_config, + display_width=compute_display_width(spec), + ) + ) + + schema = CompiledSchema( + row_cls=None, + columns=columns, + columns_by_name={col.name: col for col in columns}, + ) + + n = len(arrow_table) + capacity = max(n, 1) + default_chunks, default_blocks = compute_chunks_blocks((capacity,)) + mem_storage = InMemoryTableStorage() + + new_valid = mem_storage.create_valid_rows( + shape=(capacity,), + chunks=default_chunks, + blocks=default_blocks, + ) + new_cols: dict[str, blosc2.NDArray] = {} + for col in columns: + new_cols[col.name] = mem_storage.create_column( + col.name, + dtype=col.dtype, + shape=(capacity,), + chunks=default_chunks, + blocks=default_blocks, + cparams=None, + dparams=None, + ) + + obj = cls.__new__(cls) + obj._row_type = None + obj._validate = False + obj._table_cparams = None + obj._table_dparams = None + obj._storage = mem_storage + obj._read_only = False + obj._schema = schema + obj._cols = new_cols + obj._col_widths = {col.name: max(len(col.name), col.display_width) for col in columns} + obj.col_names = [col.name for col in columns] + obj.row = _RowIndexer(obj) + obj.auto_compact = False + obj.base = None + obj._valid_rows = new_valid + obj._n_rows = 0 + obj._last_pos = 0 + + if n > 0: + # Write each column directly — one bulk slice assignment per column. + # String columns (dtype.kind == 'U') can't go through Arrow's zero-copy + # to_numpy(), so we convert via to_pylist() and let NumPy handle the + # fixed-width unicode coercion. All other types use zero-copy numpy. + for col in columns: + arrow_col = arrow_table.column(col.name) + if col.dtype.kind in "US": + arr = np.array(arrow_col.to_pylist(), dtype=col.dtype) + else: + arr = arrow_col.to_numpy(zero_copy_only=False).astype(col.dtype) + new_cols[col.name][:n] = arr + + new_valid[:n] = True + obj._n_rows = n + obj._last_pos = n + + return obj + + # ------------------------------------------------------------------ + # CSV interop + # ------------------------------------------------------------------ + + def to_csv(self, path: str, *, header: bool = True, sep: str = ",") -> None: + """Write all live rows to a CSV file. + + Uses Python's stdlib ``csv`` module — no extra dependency required. + Each column is materialised once via :meth:`Column.to_numpy`; rows + are then written one at a time. + + Parameters + ---------- + path: + Destination file path. Created or overwritten. + header: + If ``True`` (default), write column names as the first row. + sep: + Field delimiter. Defaults to ``","``; use ``"\\t"`` for TSV. + """ + import csv + + arrays = [self[name].to_numpy() for name in self.col_names] + + with open(path, "w", newline="") as f: + writer = csv.writer(f, delimiter=sep) + if header: + writer.writerow(self.col_names) + for row in zip(*arrays, strict=True): + writer.writerow(row) + + @classmethod + def from_csv( + cls, + path: str, + row_cls, + *, + header: bool = True, + sep: str = ",", + ) -> CTable: + """Build a :class:`CTable` from a CSV file. + + Schema comes from *row_cls* (a dataclass) — CTable is always typed. + All rows are read in a single pass into per-column Python lists, then + each column is bulk-written into a pre-allocated NDArray (one slice + assignment per column, no ``extend()``). + + Parameters + ---------- + path: + Source CSV file path. + row_cls: + A dataclass whose fields define the column names and types. + header: + If ``True`` (default), the first row is treated as a header and + skipped. Column order in the file must match *row_cls* field + order regardless. + sep: + Field delimiter. Defaults to ``","``; use ``"\\t"`` for TSV. + + Returns + ------- + CTable + A new in-memory CTable containing all rows from the CSV file. + + Raises + ------ + TypeError + If *row_cls* is not a dataclass. + ValueError + If a row has a different number of fields than the schema. + """ + import csv + + schema = compile_schema(row_cls) + ncols = len(schema.columns) + + # Accumulate values per column as Python lists (one pass through file) + col_data: list[list] = [[] for _ in range(ncols)] + + with open(path, newline="") as f: + reader = csv.reader(f, delimiter=sep) + if header: + next(reader) + for lineno, row in enumerate(reader, start=2 if header else 1): + if len(row) != ncols: + raise ValueError(f"Line {lineno}: expected {ncols} fields, got {len(row)}.") + for i, val in enumerate(row): + col_data[i].append(val) + + n = len(col_data[0]) if ncols > 0 else 0 + capacity = max(n, 1) + default_chunks, default_blocks = compute_chunks_blocks((capacity,)) + mem_storage = InMemoryTableStorage() + + new_valid = mem_storage.create_valid_rows( + shape=(capacity,), + chunks=default_chunks, + blocks=default_blocks, + ) + new_cols: dict[str, blosc2.NDArray] = {} + for col in schema.columns: + new_cols[col.name] = mem_storage.create_column( + col.name, + dtype=col.dtype, + shape=(capacity,), + chunks=default_chunks, + blocks=default_blocks, + cparams=None, + dparams=None, + ) + + obj = cls.__new__(cls) + obj._row_type = row_cls + obj._validate = True + obj._table_cparams = None + obj._table_dparams = None + obj._storage = mem_storage + obj._read_only = False + obj._schema = schema + obj._cols = new_cols + obj._col_widths = {col.name: max(len(col.name), col.display_width) for col in schema.columns} + obj.col_names = [col.name for col in schema.columns] + obj.row = _RowIndexer(obj) + obj.auto_compact = False + obj.base = None + obj._valid_rows = new_valid + obj._n_rows = 0 + obj._last_pos = 0 + + if n > 0: + for i, col in enumerate(schema.columns): + if col.dtype == np.bool_: + # np.array(["False"], dtype=bool) treats any non-empty + # string as True. Parse "True"/"False"/"1"/"0" explicitly. + arr = np.array( + [v.strip() in ("True", "true", "1") for v in col_data[i]], + dtype=np.bool_, + ) + else: + arr = np.array(col_data[i], dtype=col.dtype) + new_cols[col.name][:n] = arr + new_valid[:n] = True + obj._n_rows = n + obj._last_pos = n + + return obj + + # ------------------------------------------------------------------ + # Schema mutations: add / drop / rename columns + # ------------------------------------------------------------------ + + def add_column( + self, + name: str, + spec: SchemaSpec, + default, + *, + cparams: dict | None = None, + ) -> None: + """Add a new column filled with *default* for every existing live row. + + Parameters + ---------- + name: + Column name. Must follow the same naming rules as schema fields. + spec: + A schema descriptor such as ``b2.int64(ge=0)`` or ``b2.string()``. + default: + Value written to every existing live row. Must be coercible to + *spec*'s dtype. + cparams: + Optional compression parameters for this column's NDArray. + + Raises + ------ + ValueError + If the table is read-only, is a view, or the column already exists. + TypeError + If *default* cannot be coerced to *spec*'s dtype. + """ + if self._read_only: + raise ValueError("Table is read-only (opened with mode='r').") + if self.base is not None: + raise ValueError("Cannot add a column to a view.") + _validate_column_name(name) + if name in self._cols: + raise ValueError(f"Column {name!r} already exists.") + + try: + default_val = spec.dtype.type(default) + except (ValueError, OverflowError) as exc: + raise TypeError(f"Cannot coerce default {default!r} to dtype {spec.dtype!r}: {exc}") from exc + + capacity = len(self._valid_rows) + default_chunks, default_blocks = compute_chunks_blocks((capacity,)) + new_col = self._storage.create_column( + name, + dtype=spec.dtype, + shape=(capacity,), + chunks=default_chunks, + blocks=default_blocks, + cparams=cparams, + dparams=None, + ) + + live_pos = np.where(self._valid_rows[:])[0] + if len(live_pos) > 0: + new_col[live_pos] = default_val + + compiled_col = CompiledColumn( + name=name, + py_type=spec.python_type, + spec=spec, + dtype=spec.dtype, + default=default, + config=ColumnConfig(cparams=cparams, dparams=None, chunks=None, blocks=None), + display_width=compute_display_width(spec), + ) + self._cols[name] = new_col + self.col_names.append(name) + self._col_widths[name] = max(len(name), compiled_col.display_width) + + new_columns = self._schema.columns + [compiled_col] + self._schema = CompiledSchema( + row_cls=self._schema.row_cls, + columns=new_columns, + columns_by_name={**self._schema.columns_by_name, name: compiled_col}, + ) + if isinstance(self._storage, FileTableStorage): + self._storage.save_schema(schema_to_dict(self._schema)) + + def drop_column(self, name: str) -> None: + """Remove a column from the table. + + On disk tables the corresponding ``.b2nd`` file is deleted. + + Raises + ------ + ValueError + If the table is read-only, is a view, or *name* is the last column. + KeyError + If *name* does not exist. + """ + if self._read_only: + raise ValueError("Table is read-only (opened with mode='r').") + if self.base is not None: + raise ValueError("Cannot drop a column from a view.") + if name not in self._cols: + raise KeyError(f"No column named {name!r}. Available: {self.col_names}") + if len(self.col_names) == 1: + raise ValueError("Cannot drop the last column.") + + if isinstance(self._storage, FileTableStorage): + col_path = self._storage._col_path(name) + if os.path.exists(col_path): + os.remove(col_path) + + del self._cols[name] + del self._col_widths[name] + self.col_names.remove(name) + + new_columns = [c for c in self._schema.columns if c.name != name] + self._schema = CompiledSchema( + row_cls=self._schema.row_cls, + columns=new_columns, + columns_by_name={c.name: c for c in new_columns}, + ) + if isinstance(self._storage, FileTableStorage): + self._storage.save_schema(schema_to_dict(self._schema)) + + def rename_column(self, old: str, new: str) -> None: + """Rename a column. + + On disk tables the corresponding ``.b2nd`` file is renamed. + + Raises + ------ + ValueError + If the table is read-only, is a view, or *new* already exists. + KeyError + If *old* does not exist. + """ + if self._read_only: + raise ValueError("Table is read-only (opened with mode='r').") + if self.base is not None: + raise ValueError("Cannot rename a column in a view.") + if old not in self._cols: + raise KeyError(f"No column named {old!r}. Available: {self.col_names}") + if new in self._cols: + raise ValueError(f"Column {new!r} already exists.") + _validate_column_name(new) + + if isinstance(self._storage, FileTableStorage): + old_path = self._storage._col_path(old) + new_path = self._storage._col_path(new) + os.rename(old_path, new_path) + b2_mode = "r" if self._read_only else "a" + self._cols[new] = blosc2.open(new_path, mode=b2_mode) + else: + self._cols[new] = self._cols[old] + del self._cols[old] + + idx = self.col_names.index(old) + self.col_names[idx] = new + self._col_widths[new] = max(len(new), self._col_widths.pop(old)) + + old_compiled = self._schema.columns_by_name[old] + renamed = CompiledColumn( + name=new, + py_type=old_compiled.py_type, + spec=old_compiled.spec, + dtype=old_compiled.dtype, + default=old_compiled.default, + config=old_compiled.config, + display_width=old_compiled.display_width, + ) + new_columns = [renamed if c.name == old else c for c in self._schema.columns] + self._schema = CompiledSchema( + row_cls=self._schema.row_cls, + columns=new_columns, + columns_by_name={c.name: c for c in new_columns}, + ) + if isinstance(self._storage, FileTableStorage): + self._storage.save_schema(schema_to_dict(self._schema)) + + # ------------------------------------------------------------------ + # Column access + # ------------------------------------------------------------------ + + def __getitem__(self, s: str): + if s in self._cols: + return Column(self, s) + return None + + def __getattr__(self, s: str): + if s in self._cols: + return Column(self, s) + return super().__getattribute__(s) + + # ------------------------------------------------------------------ + # Compaction + # ------------------------------------------------------------------ + + def compact(self): + if self._read_only: + raise ValueError("Table is read-only (opened with mode='r').") + if self.base is not None: + raise ValueError("Cannot compact a view.") + real_poss = blosc2.where(self._valid_rows, np.array(range(len(self._valid_rows)))).compute() + start = 0 + block_size = self._valid_rows.blocks[0] + end = min(block_size, self._n_rows) + while start < end: + for _k, v in self._cols.items(): + v[start:end] = v[real_poss[start:end]] + start += block_size + end = min(end + block_size, self._n_rows) + + self._valid_rows[: self._n_rows] = True + self._valid_rows[self._n_rows :] = False + self._last_pos = self._n_rows # next write goes right after live rows + + def _normalise_sort_keys( + self, + cols: str | list[str], + ascending: bool | list[bool], + ) -> tuple[list[str], list[bool]]: + """Validate and normalise sort key arguments; return (cols, ascending).""" + if isinstance(cols, str): + cols = [cols] + if isinstance(ascending, bool): + ascending = [ascending] * len(cols) + if len(cols) != len(ascending): + raise ValueError( + f"'ascending' must have the same length as 'cols' ({len(cols)}), got {len(ascending)}." + ) + for name in cols: + if name not in self._cols: + raise KeyError(f"No column named {name!r}. Available: {self.col_names}") + dtype = self._cols[name].dtype + if np.issubdtype(dtype, np.complexfloating): + raise TypeError( + f"Column {name!r} has complex dtype {dtype} which does not support ordering." + ) + return cols, ascending + + def _build_lex_keys( + self, + cols: list[str], + ascending: list[bool], + live_pos: np.ndarray, + n: int, + ) -> list[np.ndarray]: + """Build the key list for np.lexsort (innermost = last = primary key).""" + lex_keys = [] + for name, asc in zip(reversed(cols), reversed(ascending), strict=True): + raw = self._cols[name][live_pos] + if not asc: + if raw.dtype.kind in "US": + # strings can't be negated — invert via rank + rank = np.argsort(np.argsort(raw, stable=True), stable=True) + lex_keys.append((n - 1 - rank).astype(np.intp)) + elif np.issubdtype(raw.dtype, np.unsignedinteger): + lex_keys.append(-raw.astype(np.int64)) + else: + lex_keys.append(-raw) + else: + lex_keys.append(raw) + return lex_keys + + def sort_by( + self, + cols: str | list[str], + ascending: bool | list[bool] = True, + *, + inplace: bool = False, + ) -> CTable: + """Return a copy of the table sorted by one or more columns. + + Parameters + ---------- + cols: + Column name or list of column names to sort by. When multiple + columns are given, the first is the primary key, the second is + the tiebreaker, and so on. + ascending: + Sort direction. A single bool applies to all keys; a list must + have the same length as *cols*. + inplace: + If ``True``, rewrite the physical data in place and return + ``self`` (like :meth:`compact` but sorted). If ``False`` + (default), return a new in-memory CTable leaving this one + untouched. + + Raises + ------ + ValueError + If called on a view or a read-only table when ``inplace=True``. + KeyError + If any column name is not found. + TypeError + If a column used as a sort key does not support ordering + (e.g. complex numbers). + """ + if self.base is not None: + raise ValueError("Cannot sort a view. Materialise it first with .to_table() or sort the parent.") + if inplace and self._read_only: + raise ValueError("Table is read-only (opened with mode='r').") + + cols, ascending = self._normalise_sort_keys(cols, ascending) + + # Live physical positions + valid_np = self._valid_rows[:] + live_pos = np.where(valid_np)[0] + n = len(live_pos) + + if n == 0: + if inplace: + return self + return self._empty_copy() + + order = np.lexsort(self._build_lex_keys(cols, ascending, live_pos, n)) + + sorted_pos = live_pos[order] + + if inplace: + for _col_name, arr in self._cols.items(): + arr[:n] = arr[sorted_pos] + self._valid_rows[:n] = True + self._valid_rows[n:] = False + self._n_rows = n + self._last_pos = n + return self + else: + # Build a new in-memory table with the sorted rows + result = self._empty_copy() + for col_name, arr in self._cols.items(): + result._cols[col_name][:n] = arr[sorted_pos] + result._valid_rows[:n] = True + result._valid_rows[n:] = False + result._n_rows = n + result._last_pos = n + return result + + def _empty_copy(self) -> CTable: + """Return a new empty in-memory CTable with the same schema and capacity.""" + from blosc2 import compute_chunks_blocks + + capacity = max(self._n_rows, 1) + default_chunks, default_blocks = compute_chunks_blocks((capacity,)) + mem_storage = InMemoryTableStorage() + + new_valid = mem_storage.create_valid_rows( + shape=(capacity,), + chunks=default_chunks, + blocks=default_blocks, + ) + new_cols = {} + for col in self._schema.columns: + col_storage = self._resolve_column_storage(col, default_chunks, default_blocks) + new_cols[col.name] = mem_storage.create_column( + col.name, + dtype=col.dtype, + shape=(capacity,), + chunks=col_storage["chunks"], + blocks=col_storage["blocks"], + cparams=col_storage.get("cparams"), + dparams=col_storage.get("dparams"), + ) + + obj = CTable.__new__(CTable) + obj._schema = self._schema + obj._row_type = self._row_type + obj._table_cparams = self._table_cparams + obj._table_dparams = self._table_dparams + obj._storage = mem_storage + obj._valid_rows = new_valid + obj._cols = new_cols + obj._col_widths = self._col_widths + obj.col_names = [col.name for col in self._schema.columns] + obj.row = _RowIndexer(obj) + obj._n_rows = 0 + obj._last_pos = None + obj._read_only = False + obj.base = None + obj.auto_compact = self.auto_compact + obj._validate = self._validate + return obj + + # ------------------------------------------------------------------ + # Properties / info + # ------------------------------------------------------------------ + + @property + def nrows(self) -> int: + return self._n_rows + + @property + def ncols(self) -> int: + return len(self._cols) + + @property + def cbytes(self) -> int: + """Total compressed size in bytes (all columns + valid_rows mask).""" + return sum(col.cbytes for col in self._cols.values()) + self._valid_rows.cbytes + + @property + def nbytes(self) -> int: + """Total uncompressed size in bytes (all columns + valid_rows mask).""" + return sum(col.nbytes for col in self._cols.values()) + self._valid_rows.nbytes + + @property + def schema(self) -> CompiledSchema: + """The compiled schema that drives this table's columns and validation.""" + return self._schema + + def column_schema(self, name: str) -> CompiledColumn: + """Return the :class:`CompiledColumn` descriptor for *name*. + + Raises + ------ + KeyError + If *name* is not a column in this table. + """ + try: + return self._schema.columns_by_name[name] + except KeyError: + raise KeyError(f"No column named {name!r}. Available: {self.col_names}") from None + + def schema_dict(self) -> dict[str, Any]: + """Return a JSON-compatible dict describing this table's schema.""" + return schema_to_dict(self._schema) + + def info(self) -> None: + """Print a concise summary of the CTable.""" + ratio = (self.nbytes / self.cbytes) if self.cbytes > 0 else 0.0 + + lines = [] + lines.append("") + lines.append(f"nºColumns: {self.ncols}") + lines.append(f"nºRows: {self.nrows}") + lines.append("") + + header = f" {'#':>3} {'Column':<15} {'Itemsize':<12} {'Dtype':<15}" + lines.append(header) + lines.append(f" {'---':>3} {'------':<15} {'--------':<12} {'-----':<15}") + + for i, name in enumerate(self.col_names): + col_array = self._cols[name] + dtype_str = str(col_array.dtype) + itemsize = f"{col_array.dtype.itemsize} B" + lines.append(f" {i:>3} {name:<15} {itemsize:<12} {dtype_str:<15}") + + lines.append("") + lines.append(f"memory usage: {_fmt_bytes(self.cbytes)}") + lines.append(f"uncompressed size: {_fmt_bytes(self.nbytes)}") + lines.append(f"compression ratio: {ratio:.2f}x") + lines.append("") + + print("\n".join(lines)) + + # ------------------------------------------------------------------ + # Mutation: append / extend / delete + # ------------------------------------------------------------------ + + def _load_initial_data(self, new_data) -> None: + """Dispatch new_data to append() or extend() as appropriate.""" + is_append = False + + if isinstance(new_data, (np.void, np.record)): + is_append = True + elif isinstance(new_data, np.ndarray): + if new_data.dtype.names is not None and new_data.ndim == 0: + is_append = True + elif isinstance(new_data, list) and len(new_data) > 0: + first_elem = new_data[0] + if isinstance(first_elem, (str, bytes, int, float, bool, complex)): + is_append = True + + if is_append: + self.append(new_data) + else: + self.extend(new_data) + + def append(self, data: list | np.void | np.ndarray) -> None: + if self._read_only: + raise ValueError("Table is read-only (opened with mode='r').") + if self.base is not None: + raise TypeError("Cannot extend view.") + + # Normalize → validate → coerce + row = self._normalize_row_input(data) + if self._validate: + from blosc2.schema_validation import validate_row + + row = validate_row(self._schema, row) + row = self._coerce_row_to_storage(row) + + pos = self._resolve_last_pos() + if pos >= len(self._valid_rows): + self._grow() + + for name, col_array in self._cols.items(): + col_array[pos] = row[name] + + self._valid_rows[pos] = True + self._last_pos = pos + 1 + self._n_rows += 1 + + def delete(self, ind: int | slice | str | Iterable) -> None: + if self._read_only: + raise ValueError("Table is read-only (opened with mode='r').") + if self.base is not None: + raise ValueError("Cannot delete rows from a view.") + valid_rows_np = self._valid_rows[:] + true_pos = np.where(valid_rows_np)[0] + + if isinstance(ind, Iterable) and not isinstance(ind, (str, bytes)): + ind = list(ind) + elif not isinstance(ind, int) and not isinstance(ind, slice): + raise TypeError(f"Invalid type '{type(ind)}'") + + false_pos = true_pos[ind] + n_deleted = len(np.unique(false_pos)) + + valid_rows_np[false_pos] = False + self._valid_rows[:] = valid_rows_np # write back in-place; no new array created + self._n_rows -= n_deleted + self._last_pos = None # recalculate on next write + + def extend(self, data: list | CTable | Any, *, validate: bool | None = None) -> None: + if self._read_only: + raise ValueError("Table is read-only (opened with mode='r').") + if self.base is not None: + raise TypeError("Cannot extend view.") + if len(data) <= 0: + return + + # Resolve effective validate flag: per-call override takes precedence + do_validate = self._validate if validate is None else validate + + start_pos = self._resolve_last_pos() + + current_col_names = self.col_names + columns_to_insert = [] + new_nrows = 0 + + if hasattr(data, "_cols") and hasattr(data, "_n_rows"): + for name in current_col_names: + col = data._cols[name][: data._n_rows] + columns_to_insert.append(col) + new_nrows = data._n_rows + else: + if isinstance(data, np.ndarray) and data.dtype.names is not None: + for name in current_col_names: + columns_to_insert.append(data[name]) + new_nrows = len(data) + else: + columns_to_insert = list(zip(*data, strict=False)) + new_nrows = len(data) + + # Validate constraints column-by-column before writing + if do_validate: + from blosc2.schema_vectorized import validate_column_batch + + raw_columns = {current_col_names[i]: columns_to_insert[i] for i in range(len(current_col_names))} + validate_column_batch(self._schema, raw_columns) + + processed_cols = [] + for i, raw_col in enumerate(columns_to_insert): + target_dtype = self._cols[current_col_names[i]].dtype + b2_arr = blosc2.asarray(raw_col, dtype=target_dtype) + processed_cols.append(b2_arr) + + end_pos = start_pos + new_nrows + + if self.auto_compact and end_pos >= len(self._valid_rows): + self.compact() # sets _last_pos = _n_rows + start_pos = self._last_pos + end_pos = start_pos + new_nrows + + while end_pos > len(self._valid_rows): + self._grow() + + for j, name in enumerate(current_col_names): + self._cols[name][start_pos:end_pos] = processed_cols[j][:] + + self._valid_rows[start_pos:end_pos] = True + self._last_pos = end_pos + self._n_rows += new_nrows + + # ------------------------------------------------------------------ + # Filtering + # ------------------------------------------------------------------ + + @profile + def where(self, expr_result) -> CTable: + if not ( + isinstance(expr_result, (blosc2.NDArray, blosc2.LazyExpr)) + and (getattr(expr_result, "dtype", None) == np.bool_) + ): + raise TypeError(f"Expected boolean blosc2.NDArray or LazyExpr, got {type(expr_result).__name__}") + + filter = expr_result.compute() if isinstance(expr_result, blosc2.LazyExpr) else expr_result + + target_len = len(self._valid_rows) + + if len(filter) > target_len: + filter = filter[:target_len] + elif len(filter) < target_len: + padding = blosc2.zeros(target_len, dtype=np.bool_) + padding[: len(filter)] = filter[:] + filter = padding + + filter = (filter & self._valid_rows).compute() + + return self.view(filter) + + def _run_row_logic(self, ind: int | slice | str | Iterable) -> CTable: + valid_rows_np = self._valid_rows[:] + true_pos = np.where(valid_rows_np)[0] + + if isinstance(ind, Iterable) and not isinstance(ind, (str, bytes)): + ind = list(ind) + + mant_pos = true_pos[ind] + + new_mask_np = np.zeros_like(valid_rows_np, dtype=bool) + new_mask_np[mant_pos] = True + + new_mask = blosc2.asarray(new_mask_np) + return self.view(new_mask) diff --git a/src/blosc2/ctable_storage.py b/src/blosc2/ctable_storage.py new file mode 100644 index 00000000..23755ef2 --- /dev/null +++ b/src/blosc2/ctable_storage.py @@ -0,0 +1,248 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# This source code is licensed under a BSD-style license (found in the +# LICENSE file in the root directory of this source tree) +####################################################################### + +"""Storage backends for CTable. + +Two concrete backends: + +* :class:`InMemoryTableStorage` — all arrays live in RAM (default when + ``urlpath`` is not provided). +* :class:`FileTableStorage` — arrays are stored as individual Blosc2 files + under a table root directory; schema and kind metadata live in a small + :class:`blosc2.SChunk` whose ``vlmeta`` is the source of truth. + +Layout produced by :class:`FileTableStorage`:: + + / + _meta.b2frame ← SChunk with vlmeta: kind, version, schema JSON + _valid_rows.b2nd ← boolean NDArray (tombstone mask) + _cols/ + .b2nd ← one NDArray per column +""" + +from __future__ import annotations + +import json +import os +from typing import Any + +import numpy as np + +import blosc2 + +# --------------------------------------------------------------------------- +# Abstract base +# --------------------------------------------------------------------------- + + +class TableStorage: + """Interface that CTable uses to create/open its backing arrays.""" + + def create_column( + self, + name: str, + *, + dtype: np.dtype, + shape: tuple[int, ...], + chunks: tuple[int, ...], + blocks: tuple[int, ...], + cparams: dict[str, Any] | None, + dparams: dict[str, Any] | None, + ) -> blosc2.NDArray: + raise NotImplementedError + + def open_column(self, name: str) -> blosc2.NDArray: + raise NotImplementedError + + def create_valid_rows( + self, + *, + shape: tuple[int, ...], + chunks: tuple[int, ...], + blocks: tuple[int, ...], + ) -> blosc2.NDArray: + raise NotImplementedError + + def open_valid_rows(self) -> blosc2.NDArray: + raise NotImplementedError + + def save_schema(self, schema_dict: dict[str, Any]) -> None: + raise NotImplementedError + + def load_schema(self) -> dict[str, Any] | None: + raise NotImplementedError + + def table_exists(self) -> bool: + raise NotImplementedError + + def is_read_only(self) -> bool: + raise NotImplementedError + + +# --------------------------------------------------------------------------- +# In-memory backend +# --------------------------------------------------------------------------- + + +class InMemoryTableStorage(TableStorage): + """All arrays are plain in-memory blosc2.NDArray objects.""" + + def create_column(self, name, *, dtype, shape, chunks, blocks, cparams, dparams): + kwargs: dict[str, Any] = {"chunks": chunks, "blocks": blocks} + if cparams is not None: + kwargs["cparams"] = cparams + if dparams is not None: + kwargs["dparams"] = dparams + return blosc2.zeros(shape, dtype=dtype, **kwargs) + + def open_column(self, name): + raise RuntimeError("In-memory tables have no on-disk representation to open.") + + def create_valid_rows(self, *, shape, chunks, blocks): + return blosc2.zeros(shape, dtype=np.bool_, chunks=chunks, blocks=blocks) + + def open_valid_rows(self): + raise RuntimeError("In-memory tables have no on-disk representation to open.") + + def save_schema(self, schema_dict): + pass # nothing to persist + + def load_schema(self): + return None + + def table_exists(self): + return False + + def is_read_only(self): + return False + + +# --------------------------------------------------------------------------- +# File-backed backend +# --------------------------------------------------------------------------- + +_META_FILE = "_meta.b2frame" +_VALID_ROWS_FILE = "_valid_rows.b2nd" +_COLS_DIR = "_cols" + + +class FileTableStorage(TableStorage): + """Arrays stored as individual Blosc2 files inside *urlpath* directory. + + Parameters + ---------- + urlpath: + Path to the table root directory. + mode: + ``'w'`` — create (overwrite existing files). + ``'a'`` — open existing or create new. + ``'r'`` — open existing read-only. + """ + + def __init__(self, urlpath: str, mode: str) -> None: + if mode not in ("r", "a", "w"): + raise ValueError(f"mode must be 'r', 'a', or 'w'; got {mode!r}") + self._root = urlpath + self._mode = mode + self._meta: blosc2.SChunk | None = None + + # ------------------------------------------------------------------ + # Path helpers + # ------------------------------------------------------------------ + + @property + def _meta_path(self) -> str: + return os.path.join(self._root, _META_FILE) + + @property + def _valid_rows_path(self) -> str: + return os.path.join(self._root, _VALID_ROWS_FILE) + + def _col_path(self, name: str) -> str: + return os.path.join(self._root, _COLS_DIR, f"{name}.b2nd") + + def _ensure_dirs(self) -> None: + os.makedirs(os.path.join(self._root, _COLS_DIR), exist_ok=True) + + # ------------------------------------------------------------------ + # TableStorage interface + # ------------------------------------------------------------------ + + def table_exists(self) -> bool: + return os.path.exists(self._meta_path) + + def is_read_only(self) -> bool: + return self._mode == "r" + + def create_column(self, name, *, dtype, shape, chunks, blocks, cparams, dparams): + self._ensure_dirs() + kwargs: dict[str, Any] = { + "chunks": chunks, + "blocks": blocks, + "urlpath": self._col_path(name), + "mode": "w", + } + if cparams is not None: + kwargs["cparams"] = cparams + if dparams is not None: + kwargs["dparams"] = dparams + return blosc2.zeros(shape, dtype=dtype, **kwargs) + + def open_column(self, name: str) -> blosc2.NDArray: + b2_mode = "r" if self._mode == "r" else "a" + return blosc2.open(self._col_path(name), mode=b2_mode) + + def create_valid_rows(self, *, shape, chunks, blocks): + self._ensure_dirs() + return blosc2.zeros( + shape, + dtype=np.bool_, + chunks=chunks, + blocks=blocks, + urlpath=self._valid_rows_path, + mode="w", + ) + + def open_valid_rows(self) -> blosc2.NDArray: + b2_mode = "r" if self._mode == "r" else "a" + return blosc2.open(self._valid_rows_path, mode=b2_mode) + + def save_schema(self, schema_dict: dict[str, Any]) -> None: + """Write *schema_dict* (plus kind/version markers) to ``_meta.b2frame``.""" + self._ensure_dirs() + # Always overwrite: save_schema is only called at table-creation time. + self._meta = blosc2.SChunk(urlpath=self._meta_path, mode="w") + self._meta.vlmeta["kind"] = "ctable" + self._meta.vlmeta["version"] = 1 + self._meta.vlmeta["schema"] = json.dumps(schema_dict) + + def _open_meta(self) -> blosc2.SChunk: + """Open (or return cached) the ``_meta.b2frame`` SChunk.""" + if self._meta is None: + b2_mode = "r" if self._mode == "r" else "a" + self._meta = blosc2.open(self._meta_path, mode=b2_mode) + return self._meta + + def load_schema(self) -> dict[str, Any]: + """Read and return the schema dict stored in ``_meta.b2frame``.""" + raw = self._open_meta().vlmeta["schema"] + if isinstance(raw, bytes): + raw = raw.decode() + return json.loads(raw) + + def check_kind(self) -> None: + """Raise :exc:`ValueError` if ``_meta`` does not identify a CTable.""" + kind = self._open_meta().vlmeta["kind"] + if isinstance(kind, bytes): + kind = kind.decode() + if kind != "ctable": + raise ValueError(f"Path {self._root!r} does not contain a CTable (kind={kind!r}).") + + def column_names_from_schema(self) -> list[str]: + d = self.load_schema() + return [c["name"] for c in d["columns"]] diff --git a/src/blosc2/schema.py b/src/blosc2/schema.py new file mode 100644 index 00000000..98df998b --- /dev/null +++ b/src/blosc2/schema.py @@ -0,0 +1,348 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# This source code is licensed under a BSD-style license (found in the +# LICENSE file in the root directory of this source tree) +####################################################################### + +"""Schema spec primitives and field helper for CTable.""" + +from __future__ import annotations + +import dataclasses +from dataclasses import MISSING +from typing import Any + +import numpy as np + +BLOSC2_FIELD_METADATA_KEY = "blosc2" + +# Aliases so we can still use the builtins inside this module +# after our spec classes shadow them. +_builtin_bool = bool +_builtin_bytes = bytes + + +# --------------------------------------------------------------------------- +# Base spec class +# --------------------------------------------------------------------------- + + +class SchemaSpec: + """Base class for all Blosc2 column schema descriptors. + + Subclasses carry the logical type, storage dtype, and optional + validation constraints for one column. + """ + + dtype: np.dtype + python_type: type + + def to_pydantic_kwargs(self) -> dict[str, Any]: + """Return kwargs for building a Pydantic field annotation.""" + raise NotImplementedError + + def to_metadata_dict(self) -> dict[str, Any]: + """Return a JSON-compatible dict for schema serialization.""" + raise NotImplementedError + + +# --------------------------------------------------------------------------- +# Numeric spec classes +# --------------------------------------------------------------------------- + +# Internal helper to avoid repeating the constraint boilerplate for every +# integer and float spec. Subclasses only need to set `dtype`, `python_type`, +# and `_kind` as class attributes. + + +class _NumericSpec(SchemaSpec): + """Mixin for numeric specs that support ge / gt / le / lt constraints.""" + + _kind: str # set by each concrete subclass + + def __init__(self, *, ge=None, gt=None, le=None, lt=None): + self.ge = ge + self.gt = gt + self.le = le + self.lt = lt + + def to_pydantic_kwargs(self) -> dict[str, Any]: + return { + k: v + for k, v in {"ge": self.ge, "gt": self.gt, "le": self.le, "lt": self.lt}.items() + if v is not None + } + + def to_metadata_dict(self) -> dict[str, Any]: + return {"kind": self._kind, **self.to_pydantic_kwargs()} + + +# ── Signed integers ────────────────────────────────────────────────────────── + + +class int8(_NumericSpec): + """8-bit signed integer column (−128 … 127).""" + + dtype = np.dtype(np.int8) + python_type = int + _kind = "int8" + + +class int16(_NumericSpec): + """16-bit signed integer column (−32 768 … 32 767).""" + + dtype = np.dtype(np.int16) + python_type = int + _kind = "int16" + + +class int32(_NumericSpec): + """32-bit signed integer column (−2 147 483 648 … 2 147 483 647).""" + + dtype = np.dtype(np.int32) + python_type = int + _kind = "int32" + + +class int64(_NumericSpec): + """64-bit signed integer column.""" + + dtype = np.dtype(np.int64) + python_type = int + _kind = "int64" + + +# ── Unsigned integers ──────────────────────────────────────────────────────── + + +class uint8(_NumericSpec): + """8-bit unsigned integer column (0 … 255).""" + + dtype = np.dtype(np.uint8) + python_type = int + _kind = "uint8" + + +class uint16(_NumericSpec): + """16-bit unsigned integer column (0 … 65 535).""" + + dtype = np.dtype(np.uint16) + python_type = int + _kind = "uint16" + + +class uint32(_NumericSpec): + """32-bit unsigned integer column (0 … 4 294 967 295).""" + + dtype = np.dtype(np.uint32) + python_type = int + _kind = "uint32" + + +class uint64(_NumericSpec): + """64-bit unsigned integer column.""" + + dtype = np.dtype(np.uint64) + python_type = int + _kind = "uint64" + + +# ── Floating point ─────────────────────────────────────────────────────────── + + +class float32(_NumericSpec): + """32-bit floating-point column (single precision).""" + + dtype = np.dtype(np.float32) + python_type = float + _kind = "float32" + + +class float64(_NumericSpec): + """64-bit floating-point column (double precision).""" + + dtype = np.dtype(np.float64) + python_type = float + _kind = "float64" + + +class complex64(SchemaSpec): + """64-bit complex number column (two 32-bit floats).""" + + dtype = np.dtype(np.complex64) + python_type = complex + + def __init__(self): + pass + + def to_pydantic_kwargs(self) -> dict[str, Any]: + return {} + + def to_metadata_dict(self) -> dict[str, Any]: + return {"kind": "complex64"} + + +class complex128(SchemaSpec): + """128-bit complex number column (two 64-bit floats).""" + + dtype = np.dtype(np.complex128) + python_type = complex + + def __init__(self): + pass + + def to_pydantic_kwargs(self) -> dict[str, Any]: + return {} + + def to_metadata_dict(self) -> dict[str, Any]: + return {"kind": "complex128"} + + +class bool(SchemaSpec): + """Boolean column.""" + + dtype = np.dtype(np.bool_) + python_type = _builtin_bool + + def __init__(self): + pass + + def to_pydantic_kwargs(self) -> dict[str, Any]: + return {} + + def to_metadata_dict(self) -> dict[str, Any]: + return {"kind": "bool"} + + +# --------------------------------------------------------------------------- +# String / bytes spec classes +# --------------------------------------------------------------------------- + + +class string(SchemaSpec): + """Fixed-width Unicode string column. + + Parameters + ---------- + max_length: + Maximum number of characters. Determines the NumPy ``U`` dtype. + Defaults to 32 if not specified. + min_length: + Minimum number of characters (validation only, no effect on dtype). + pattern: + Regex pattern the value must match (validation only). + """ + + python_type = str + _DEFAULT_MAX_LENGTH = 32 + + def __init__(self, *, min_length=None, max_length=None, pattern=None): + self.min_length = min_length + self.max_length = max_length if max_length is not None else self._DEFAULT_MAX_LENGTH + self.pattern = pattern + self.dtype = np.dtype(f"U{self.max_length}") + + def to_pydantic_kwargs(self) -> dict[str, Any]: + d = {} + if self.min_length is not None: + d["min_length"] = self.min_length + if self.max_length is not None: + d["max_length"] = self.max_length + if self.pattern is not None: + d["pattern"] = self.pattern + return d + + def to_metadata_dict(self) -> dict[str, Any]: + return {"kind": "string", **self.to_pydantic_kwargs()} + + +class bytes(SchemaSpec): + """Fixed-width bytes column. + + Parameters + ---------- + max_length: + Maximum number of bytes. Determines the NumPy ``S`` dtype. + Defaults to 32 if not specified. + min_length: + Minimum number of bytes (validation only, no effect on dtype). + """ + + python_type = _builtin_bytes + _DEFAULT_MAX_LENGTH = 32 + + def __init__(self, *, min_length=None, max_length=None): + self.min_length = min_length + self.max_length = max_length if max_length is not None else self._DEFAULT_MAX_LENGTH + self.dtype = np.dtype(f"S{self.max_length}") + + def to_pydantic_kwargs(self) -> dict[str, Any]: + d = {} + if self.min_length is not None: + d["min_length"] = self.min_length + if self.max_length is not None: + d["max_length"] = self.max_length + return d + + def to_metadata_dict(self) -> dict[str, Any]: + return {"kind": "bytes", **self.to_pydantic_kwargs()} + + +# --------------------------------------------------------------------------- +# Field helper +# --------------------------------------------------------------------------- + + +def field( + spec: SchemaSpec, + *, + default=MISSING, + cparams: dict[str, Any] | None = None, + dparams: dict[str, Any] | None = None, + chunks: tuple[int, ...] | None = None, + blocks: tuple[int, ...] | None = None, +) -> dataclasses.Field: + """Attach a Blosc2 schema spec and per-column storage options to a dataclass field. + + Parameters + ---------- + spec: + A schema descriptor such as ``b2.int64(ge=0)`` or ``b2.float64()``. + default: + Default value for the field. Omit for required fields. + cparams: + Compression parameters for this column's NDArray. + dparams: + Decompression parameters for this column's NDArray. + chunks: + Chunk shape for this column's NDArray. + blocks: + Block shape for this column's NDArray. + + Examples + -------- + >>> from dataclasses import dataclass + >>> import blosc2 as b2 + >>> @dataclass + ... class Row: + ... id: int = b2.field(b2.int64(ge=0)) + ... score: float = b2.field(b2.float64(ge=0, le=100)) + ... active: bool = b2.field(b2.bool(), default=True) + """ + if not isinstance(spec, SchemaSpec): + raise TypeError(f"field() requires a SchemaSpec as its first argument, got {type(spec)!r}.") + + metadata = { + BLOSC2_FIELD_METADATA_KEY: { + "spec": spec, + "cparams": cparams, + "dparams": dparams, + "chunks": chunks, + "blocks": blocks, + } + } + if default is MISSING: + return dataclasses.field(metadata=metadata) + return dataclasses.field(default=default, metadata=metadata) diff --git a/src/blosc2/schema_compiler.py b/src/blosc2/schema_compiler.py new file mode 100644 index 00000000..1300cecd --- /dev/null +++ b/src/blosc2/schema_compiler.py @@ -0,0 +1,436 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# This source code is licensed under a BSD-style license (found in the +# LICENSE file in the root directory of this source tree) +####################################################################### + +"""Schema compiler: turns a dataclass row definition into a CompiledSchema.""" + +from __future__ import annotations + +import dataclasses +import typing +from dataclasses import MISSING +from typing import Any + +import numpy as np # noqa: TC002 + +from blosc2.schema import ( + BLOSC2_FIELD_METADATA_KEY, + SchemaSpec, + complex64, + complex128, + float32, + float64, + int8, + int16, + int32, + int64, + string, + uint8, + uint16, + uint32, + uint64, +) +from blosc2.schema import ( + bool as b2_bool, +) +from blosc2.schema import ( + bytes as b2_bytes, +) + +# Maps the "kind" string used in serialized dicts back to spec constructors. +_KIND_TO_SPEC: dict[str, type[SchemaSpec]] = { + # signed integers + "int8": int8, + "int16": int16, + "int32": int32, + "int64": int64, + # unsigned integers + "uint8": uint8, + "uint16": uint16, + "uint32": uint32, + "uint64": uint64, + # floats + "float32": float32, + "float64": float64, + # complex + "complex64": complex64, + "complex128": complex128, + # bool / string / bytes + "bool": b2_bool, + "string": string, + "bytes": b2_bytes, +} + +# --------------------------------------------------------------------------- +# Display-width helper (used by CTable.__str__ / info()) +# --------------------------------------------------------------------------- + +_DTYPE_DISPLAY_WIDTH: dict[str, int] = { + "int8": 6, + "int16": 8, + "int32": 10, + "int64": 12, + "uint8": 6, + "uint16": 8, + "uint32": 10, + "uint64": 12, + "float32": 12, + "float64": 15, + "bool": 6, + "complex64": 20, + "complex128": 25, +} + + +def compute_display_width(spec: SchemaSpec) -> int: + """Return a reasonable terminal display width for *spec*'s column.""" + dtype = spec.dtype + if dtype.kind == "U": # fixed-width unicode (string spec) + return max(10, min(dtype.itemsize // 4, 50)) + if dtype.kind == "S": # fixed-width bytes + return max(10, min(dtype.itemsize, 50)) + return _DTYPE_DISPLAY_WIDTH.get(dtype.name, 20) + + +# --------------------------------------------------------------------------- +# Mapping from Python primitive annotations to default spec constructors. +# Keys are the actual builtin types (bool before int because bool <: int). +# --------------------------------------------------------------------------- +_ANNOTATION_TO_SPEC: dict[type, type[SchemaSpec]] = { + bool: b2_bool, # must come before int (bool is a subclass of int) + int: int64, + float: float64, + complex: complex128, + str: string, + bytes: b2_bytes, +} + + +# --------------------------------------------------------------------------- +# Compiled representations +# --------------------------------------------------------------------------- + + +@dataclasses.dataclass(slots=True) +class ColumnConfig: + """Per-column NDArray storage options.""" + + cparams: dict[str, Any] | None + dparams: dict[str, Any] | None + chunks: tuple[int, ...] | None + blocks: tuple[int, ...] | None + + +@dataclasses.dataclass(slots=True) +class CompiledColumn: + """All compile-time information about a single CTable column.""" + + name: str + py_type: Any + spec: SchemaSpec + dtype: np.dtype + default: Any # MISSING means required (no default) + config: ColumnConfig + display_width: int = 20 # terminal column width for __str__ / info() + + +@dataclasses.dataclass(slots=True) +class CompiledSchema: + """Compiled representation of a CTable row schema. + + Built once per row class by :func:`compile_schema` and cached on the + ``CTable`` instance. Drives NDArray creation, row validation, and + future schema serialization. + """ + + row_cls: type[Any] + columns: list[CompiledColumn] + columns_by_name: dict[str, CompiledColumn] + validator_model: type[Any] | None = None # filled in by schema_validation + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + + +def get_blosc2_field_metadata(dc_field: dataclasses.Field) -> dict[str, Any] | None: + """Return the ``blosc2`` metadata dict stored on a dataclass field, or ``None``.""" + return dc_field.metadata.get(BLOSC2_FIELD_METADATA_KEY) + + +def infer_spec_from_annotation(annotation: Any) -> SchemaSpec: + """Build a default :class:`SchemaSpec` from a plain Python type annotation. + + Supports ``bool``, ``int``, ``float``, ``str``, and ``bytes``. + + Raises + ------ + TypeError + If the annotation is not one of the supported primitive types. + """ + spec_cls = _ANNOTATION_TO_SPEC.get(annotation) + if spec_cls is None: + raise TypeError( + f"Cannot infer a Blosc2 schema spec from annotation {annotation!r}. " + f"Use b2.field(b2.(...)) to declare this column explicitly." + ) + return spec_cls() + + +def validate_annotation_matches_spec(name: str, annotation: Any, spec: SchemaSpec) -> None: + """Raise :exc:`TypeError` if *annotation* is incompatible with *spec*. + + Parameters + ---------- + name: + Column name, used only in the error message. + annotation: + The resolved Python type from the dataclass field. + spec: + The :class:`SchemaSpec` attached via ``b2.field(...)``. + """ + expected = spec.python_type + if annotation is not expected: + raise TypeError( + f"Column {name!r}: annotation {annotation!r} is incompatible with " + f"spec {type(spec).__name__!r} (expected Python type {expected.__name__!r})." + ) + + +# --------------------------------------------------------------------------- +# Public compiler entry point +# --------------------------------------------------------------------------- + + +_RESERVED_COLUMN_NAMES: frozenset[str] = frozenset({"_meta", "_valid_rows", "_cols"}) + + +def _validate_column_name(name: str) -> None: + """Raise :exc:`ValueError` if *name* is not a legal CTable column name. + + Rules (enforced for both in-memory and persistent tables so that an + in-memory schema can always be persisted without surprises): + + * must be a non-empty string + * must not start with ``_`` (reserved for internal table layout) + * must not contain ``/`` (used as path separator in persistent layout) + * must not be one of the reserved internal names + """ + if not name: + raise ValueError("Column name cannot be empty.") + if name.startswith("_"): + raise ValueError(f"Column name cannot start with '_' (reserved for internal use): {name!r}") + if "/" in name: + raise ValueError(f"Column name cannot contain '/': {name!r}") + if name in _RESERVED_COLUMN_NAMES: + raise ValueError(f"Column name {name!r} is reserved for internal CTable use.") + + +def compile_schema(row_cls: type[Any]) -> CompiledSchema: + """Compile *row_cls* (a dataclass) into a :class:`CompiledSchema`. + + Parameters + ---------- + row_cls: + A class decorated with ``@dataclass``. Each field must either carry a + ``b2.field(...)`` default or use a supported plain annotation + (``int``, ``float``, ``bool``, ``str``, ``bytes``). + + Returns + ------- + CompiledSchema + + Raises + ------ + TypeError + If *row_cls* is not a dataclass, if a field spec is incompatible with + its annotation, or if an unsupported annotation is encountered. + ValueError + If any column name violates the naming rules. + """ + if not dataclasses.is_dataclass(row_cls) or not isinstance(row_cls, type): + raise TypeError( + f"{row_cls!r} is not a dataclass type. CTable row schemas must be defined with @dataclass." + ) + + # Resolve string annotations (handles `from __future__ import annotations`) + try: + hints = typing.get_type_hints(row_cls) + except Exception as exc: + raise TypeError(f"Could not resolve type hints for {row_cls!r}: {exc}") from exc + + columns: list[CompiledColumn] = [] + + for dc_field in dataclasses.fields(row_cls): + name = dc_field.name + _validate_column_name(name) + annotation = hints.get(name, dc_field.type) + meta = get_blosc2_field_metadata(dc_field) + + if meta is not None: + # Explicit b2.field(...) path + spec = meta["spec"] + if not isinstance(spec, SchemaSpec): + raise TypeError( + f"Column {name!r}: b2.field() requires a SchemaSpec as its first " + f"argument, got {type(spec)!r}." + ) + validate_annotation_matches_spec(name, annotation, spec) + config = ColumnConfig( + cparams=meta.get("cparams"), + dparams=meta.get("dparams"), + chunks=meta.get("chunks"), + blocks=meta.get("blocks"), + ) + else: + # Inferred shorthand: plain annotation without b2.field() + spec = infer_spec_from_annotation(annotation) + config = ColumnConfig(cparams=None, dparams=None, chunks=None, blocks=None) + + # Resolve default value + if dc_field.default is not MISSING: + default = dc_field.default + elif dc_field.default_factory is not MISSING: # type: ignore[misc] + default = dc_field.default_factory + else: + default = MISSING + + columns.append( + CompiledColumn( + name=name, + py_type=annotation, + spec=spec, + dtype=spec.dtype, + default=default, + config=config, + display_width=compute_display_width(spec), + ) + ) + + return CompiledSchema( + row_cls=row_cls, + columns=columns, + columns_by_name={col.name: col for col in columns}, + ) + + +# --------------------------------------------------------------------------- +# Schema serialization helpers (Step 12 — persistence groundwork) +# --------------------------------------------------------------------------- + + +def _default_to_json(value: Any) -> Any: + """Convert a field default to a JSON-compatible value.""" + if value is MISSING: + return None + if isinstance(value, complex): + return {"__complex__": True, "real": value.real, "imag": value.imag} + return value + + +def _default_from_json(value: Any) -> Any: + """Reverse of :func:`_default_to_json`.""" + if value is None: + return MISSING + if isinstance(value, dict) and value.get("__complex__"): + return complex(value["real"], value["imag"]) + return value + + +def schema_to_dict(schema: CompiledSchema) -> dict[str, Any]: + """Serialize *schema* to a JSON-compatible dict. + + The result is self-contained: it can be stored as table metadata and + later passed to :func:`schema_from_dict` to reconstruct the schema + without the original Python dataclass. + + Example output:: + + { + "version": 1, + "row_cls": "Row", + "columns": [ + {"name": "id", "kind": "int64", "ge": 0, "default": null}, + {"name": "score", "kind": "float64", "ge": 0, "le": 100, "default": 0.0}, + {"name": "active", "kind": "bool", "default": true}, + ] + } + """ + cols = [] + for col in schema.columns: + entry: dict[str, Any] = {"name": col.name} + entry.update(col.spec.to_metadata_dict()) # adds "kind" + constraints + entry["default"] = _default_to_json(col.default) + if col.config.cparams is not None: + entry["cparams"] = col.config.cparams + if col.config.dparams is not None: + entry["dparams"] = col.config.dparams + if col.config.chunks is not None: + entry["chunks"] = list(col.config.chunks) + if col.config.blocks is not None: + entry["blocks"] = list(col.config.blocks) + cols.append(entry) + + return { + "version": 1, + "row_cls": schema.row_cls.__name__ if schema.row_cls is not None else None, + "columns": cols, + } + + +def schema_from_dict(data: dict[str, Any]) -> CompiledSchema: + """Reconstruct a :class:`CompiledSchema` from a dict produced by + :func:`schema_to_dict`. + + The original Python dataclass is *not* required. ``row_cls`` on the + returned schema will be ``None``. + + Raises + ------ + ValueError + If *data* uses an unknown schema version or an unknown column kind. + """ + version = data.get("version", 1) + if version != 1: + raise ValueError(f"Unsupported schema version {version!r}") + + columns: list[CompiledColumn] = [] + for entry in data["columns"]: + entry = dict(entry) # don't mutate caller's data + name = entry.pop("name") + kind = entry.pop("kind") + default = _default_from_json(entry.pop("default", None)) + cparams = entry.pop("cparams", None) + dparams = entry.pop("dparams", None) + chunks = tuple(entry.pop("chunks")) if "chunks" in entry else None + blocks = tuple(entry.pop("blocks")) if "blocks" in entry else None + + spec_cls = _KIND_TO_SPEC.get(kind) + if spec_cls is None: + raise ValueError(f"Unknown column kind {kind!r}") + + # Remaining keys in entry are constraint kwargs (ge, le, max_length, …) + spec = spec_cls(**entry) + + columns.append( + CompiledColumn( + name=name, + py_type=spec.python_type, + spec=spec, + dtype=spec.dtype, + default=default, + config=ColumnConfig(cparams=cparams, dparams=dparams, chunks=chunks, blocks=blocks), + display_width=compute_display_width(spec), + ) + ) + + return CompiledSchema( + row_cls=None, + columns=columns, + columns_by_name={col.name: col for col in columns}, + ) diff --git a/src/blosc2/schema_validation.py b/src/blosc2/schema_validation.py new file mode 100644 index 00000000..dd750122 --- /dev/null +++ b/src/blosc2/schema_validation.py @@ -0,0 +1,104 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# This source code is licensed under a BSD-style license (found in the +# LICENSE file in the root directory of this source tree) +####################################################################### + +"""Row-level validation via an internally-generated Pydantic model. + +All Pydantic-specific logic is isolated here. CTable and the rest of the +schema layer never import from Pydantic directly. +""" + +from __future__ import annotations + +from dataclasses import MISSING +from typing import Any + +from pydantic import BaseModel, Field, ValidationError, create_model + +from blosc2.schema_compiler import CompiledSchema # noqa: TC001 + + +def build_validator_model(schema: CompiledSchema) -> type[BaseModel]: + """Return (and cache) a Pydantic model class for *schema*. + + Built once per schema; subsequent calls return the cached class. + The model enforces all constraints declared in each column's + :class:`~blosc2.schema.SchemaSpec` (``ge``, ``le``, ``gt``, ``lt``, + ``max_length``, ``min_length``, ``pattern``). + """ + if schema.validator_model is not None: + return schema.validator_model + + field_definitions: dict[str, Any] = {} + for col in schema.columns: + pydantic_kwargs = col.spec.to_pydantic_kwargs() + if col.default is MISSING: + field_definitions[col.name] = (col.py_type, Field(**pydantic_kwargs)) + else: + field_definitions[col.name] = (col.py_type, Field(default=col.default, **pydantic_kwargs)) + + cls_name = schema.row_cls.__name__ if schema.row_cls is not None else "Unknown" + model_cls = create_model(f"_Validator_{cls_name}", **field_definitions) + schema.validator_model = model_cls + return model_cls + + +def validate_row(schema: CompiledSchema, row: dict[str, Any]) -> dict[str, Any]: + """Validate a single row dict and return the coerced values. + + Parameters + ---------- + schema: + Compiled schema for the table. + row: + ``{column_name: value}`` mapping for one row. + + Returns + ------- + dict + Validated (and Pydantic-coerced) values ready for storage. + + Raises + ------ + ValueError + If any constraint is violated. The message includes the column + name and the violated constraint. + """ + model_cls = build_validator_model(schema) + try: + instance = model_cls(**row) + except ValidationError as exc: + # Re-raise as a plain ValueError so callers don't need to import Pydantic. + raise ValueError(str(exc)) from exc + return instance.model_dump() + + +def validate_rows_rowwise(schema: CompiledSchema, rows: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Validate a list of row dicts. Returns a list of validated dicts. + + Parameters + ---------- + schema: + Compiled schema for the table. + rows: + List of ``{column_name: value}`` mappings. + + Raises + ------ + ValueError + On the first row that violates a constraint, with the row index + and the Pydantic error details. + """ + model_cls = build_validator_model(schema) + result = [] + for i, row in enumerate(rows): + try: + instance = model_cls(**row) + except ValidationError as exc: + raise ValueError(f"Row {i}: {exc}") from exc + result.append(instance.model_dump()) + return result diff --git a/src/blosc2/schema_vectorized.py b/src/blosc2/schema_vectorized.py new file mode 100644 index 00000000..26a31874 --- /dev/null +++ b/src/blosc2/schema_vectorized.py @@ -0,0 +1,112 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# This source code is licensed under a BSD-style license (found in the +# LICENSE file in the root directory of this source tree) +####################################################################### + +"""Vectorized (NumPy-based) constraint validation for bulk inserts. + +Used by ``CTable.extend()`` to check entire column arrays at once, +avoiding the per-row Python overhead of Pydantic validation for large +batches. +""" + +from __future__ import annotations + +from typing import Any + +import numpy as np + +from blosc2.schema_compiler import CompiledColumn, CompiledSchema # noqa: TC001 + + +def _validate_string_lengths(col: CompiledColumn, arr: Any) -> None: + """Check min_length / max_length constraints on a string/bytes column.""" + if arr.dtype.kind in ("U", "S"): + lengths = np.char.str_len(arr) + else: + lengths = np.vectorize(len)(arr.astype(object)) + + spec = col.spec + if getattr(spec, "max_length", None) is not None: + bad = lengths > spec.max_length + if np.any(bad): + first = arr.astype(object)[bad][0] + raise ValueError(f"Column '{col.name}': value {first!r} exceeds max_length={spec.max_length}") + if getattr(spec, "min_length", None) is not None: + bad = lengths < spec.min_length + if np.any(bad): + first = arr.astype(object)[bad][0] + raise ValueError( + f"Column '{col.name}': value {first!r} is shorter than min_length={spec.min_length}" + ) + + +def validate_column_values(col: CompiledColumn, values: Any) -> None: + """Check all constraint attributes of *col*'s spec against *values*. + + Parameters + ---------- + col: + Compiled column descriptor (carries the spec with constraints). + values: + Array-like of values for this column. + + Raises + ------ + ValueError + If any value violates a constraint declared on the column's spec. + """ + spec = col.spec + arr = np.asarray(values) + + # Numeric bounds + if getattr(spec, "ge", None) is not None: + bad = arr < spec.ge + if np.any(bad): + first = arr[bad][0] + raise ValueError(f"Column '{col.name}': value {first!r} violates constraint ge={spec.ge}") + if getattr(spec, "gt", None) is not None: + bad = arr <= spec.gt + if np.any(bad): + first = arr[bad][0] + raise ValueError(f"Column '{col.name}': value {first!r} violates constraint gt={spec.gt}") + if getattr(spec, "le", None) is not None: + bad = arr > spec.le + if np.any(bad): + first = arr[bad][0] + raise ValueError(f"Column '{col.name}': value {first!r} violates constraint le={spec.le}") + if getattr(spec, "lt", None) is not None: + bad = arr >= spec.lt + if np.any(bad): + first = arr[bad][0] + raise ValueError(f"Column '{col.name}': value {first!r} violates constraint lt={spec.lt}") + + # String / bytes length bounds + # np.char.str_len is a true C-level vectorized operation for 'U' and 'S' + # dtypes. Fall back to np.vectorize(len) only for unexpected object arrays. + if getattr(spec, "max_length", None) is not None or getattr(spec, "min_length", None) is not None: + _validate_string_lengths(col, arr) + + +def validate_column_batch(schema: CompiledSchema, columns: dict[str, Any]) -> None: + """Validate a dict of column arrays against all constraints in *schema*. + + Parameters + ---------- + schema: + Compiled schema for the table. + columns: + ``{column_name: array_like}`` mapping of the batch being inserted. + + Raises + ------ + ValueError + On the first constraint violation found, naming the column and + the violated constraint. + """ + for col in schema.columns: + if col.name in columns: + validate_column_values(col, columns[col.name]) diff --git a/tests/ctable/test_arrow_interop.py b/tests/ctable/test_arrow_interop.py new file mode 100644 index 00000000..9d997030 --- /dev/null +++ b/tests/ctable/test_arrow_interop.py @@ -0,0 +1,225 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Tests for CTable.to_arrow() and CTable.from_arrow().""" + +from dataclasses import dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable + +pa = pytest.importorskip("pyarrow") + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + label: str = blosc2.field(blosc2.string(max_length=16), default="") + + +DATA10 = [(i, float(i * 10 % 100), i % 2 == 0, f"r{i}") for i in range(10)] + + +# =========================================================================== +# to_arrow() +# =========================================================================== + + +def test_to_arrow_returns_pyarrow_table(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + assert isinstance(at, pa.Table) + + +def test_to_arrow_column_names(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + assert at.column_names == ["id", "score", "active", "label"] + + +def test_to_arrow_row_count(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + assert len(at) == 10 + + +def test_to_arrow_int_values(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + np.testing.assert_array_equal(at["id"].to_pylist(), [r[0] for r in DATA10]) + + +def test_to_arrow_float_values(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + np.testing.assert_allclose(at["score"].to_pylist(), [r[1] for r in DATA10]) + + +def test_to_arrow_bool_values(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + assert at["active"].to_pylist() == [r[2] for r in DATA10] + + +def test_to_arrow_string_values(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + assert at["label"].to_pylist() == [r[3] for r in DATA10] + + +def test_to_arrow_string_type(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + assert at.schema.field("label").type == pa.string() + + +def test_to_arrow_skips_deleted_rows(): + t = CTable(Row, new_data=DATA10) + t.delete([0, 1]) + at = t.to_arrow() + assert len(at) == 8 + assert at["id"].to_pylist() == list(range(2, 10)) + + +def test_to_arrow_empty_table(): + t = CTable(Row) + at = t.to_arrow() + assert len(at) == 0 + assert at.column_names == ["id", "score", "active", "label"] + + +def test_to_arrow_select_view(): + t = CTable(Row, new_data=DATA10) + at = t.select(["id", "score"]).to_arrow() + assert at.column_names == ["id", "score"] + assert len(at) == 10 + + +def test_to_arrow_where_view(): + t = CTable(Row, new_data=DATA10) + at = t.where(t["id"] > 4).to_arrow() + assert len(at) == 5 + + +# =========================================================================== +# from_arrow() +# =========================================================================== + + +def test_from_arrow_returns_ctable(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + t2 = CTable.from_arrow(at) + assert isinstance(t2, CTable) + + +def test_from_arrow_row_count(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + t2 = CTable.from_arrow(at) + assert len(t2) == 10 + + +def test_from_arrow_column_names(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + t2 = CTable.from_arrow(at) + assert t2.col_names == ["id", "score", "active", "label"] + + +def test_from_arrow_int_values(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + t2 = CTable.from_arrow(at) + np.testing.assert_array_equal(t2["id"].to_numpy(), t["id"].to_numpy()) + + +def test_from_arrow_float_values(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + t2 = CTable.from_arrow(at) + np.testing.assert_allclose(t2["score"].to_numpy(), t["score"].to_numpy()) + + +def test_from_arrow_bool_values(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + t2 = CTable.from_arrow(at) + np.testing.assert_array_equal(t2["active"].to_numpy(), t["active"].to_numpy()) + + +def test_from_arrow_string_values(): + t = CTable(Row, new_data=DATA10) + at = t.to_arrow() + t2 = CTable.from_arrow(at) + assert t2["label"].to_numpy().tolist() == t["label"].to_numpy().tolist() + + +def test_from_arrow_empty_table(): + schema = pa.schema( + [ + pa.field("id", pa.int64()), + pa.field("val", pa.float64()), + ] + ) + at = pa.table({"id": pa.array([], type=pa.int64()), "val": pa.array([], type=pa.float64())}) + t = CTable.from_arrow(at) + assert len(t) == 0 + assert t.col_names == ["id", "val"] + + +def test_from_arrow_roundtrip(): + """to_arrow then from_arrow preserves all values.""" + t = CTable(Row, new_data=DATA10) + t2 = CTable.from_arrow(t.to_arrow()) + for name in ["id", "score", "active"]: + np.testing.assert_array_equal(t2[name].to_numpy(), t[name].to_numpy()) + assert t2["label"].to_numpy().tolist() == t["label"].to_numpy().tolist() + + +def test_from_arrow_all_numeric_types(): + """All integer and float Arrow types map to correct blosc2 specs.""" + at = pa.table( + { + "i8": pa.array([1, 2, 3], type=pa.int8()), + "i16": pa.array([1, 2, 3], type=pa.int16()), + "i32": pa.array([1, 2, 3], type=pa.int32()), + "i64": pa.array([1, 2, 3], type=pa.int64()), + "u8": pa.array([1, 2, 3], type=pa.uint8()), + "u16": pa.array([1, 2, 3], type=pa.uint16()), + "u32": pa.array([1, 2, 3], type=pa.uint32()), + "u64": pa.array([1, 2, 3], type=pa.uint64()), + "f32": pa.array([1.0, 2.0, 3.0], type=pa.float32()), + "f64": pa.array([1.0, 2.0, 3.0], type=pa.float64()), + } + ) + t = CTable.from_arrow(at) + assert len(t) == 3 + assert t.col_names == list(at.column_names) + + +def test_from_arrow_string_max_length(): + """String max_length is set from the longest value in the data.""" + at = pa.table({"name": pa.array(["hi", "hello world", "!"], type=pa.string())}) + t = CTable.from_arrow(at) + # "hello world" is 11 chars — stored dtype must accommodate it + assert t["name"].dtype.itemsize // 4 >= 11 + + +def test_from_arrow_unsupported_type_raises(): + at = pa.table({"ts": pa.array([1, 2, 3], type=pa.timestamp("s"))}) + with pytest.raises(TypeError, match="No blosc2 spec"): + CTable.from_arrow(at) + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/ctable/test_column.py b/tests/ctable/test_column.py new file mode 100644 index 00000000..46cfca28 --- /dev/null +++ b/tests/ctable/test_column.py @@ -0,0 +1,679 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from dataclasses import dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0)) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +@dataclass +class StrRow: + label: str = blosc2.field(blosc2.string(max_length=16)) + + +DATA20 = [(i, float(i * 10), True) for i in range(20)] + + +# ------------------------------------------------------------------- +# Tests +# ------------------------------------------------------------------- + + +def test_column_metadata(): + """dtype correctness, internal reference consistency, and mask defaults.""" + tabla = CTable(Row, new_data=DATA20) + + assert tabla.id.dtype == np.int64 + assert tabla.score.dtype == np.float64 + assert tabla.active.dtype == np.bool_ + + assert tabla.id._raw_col is tabla._cols["id"] + assert tabla.id._valid_rows is tabla._valid_rows + + # mask is None by default + assert tabla.id._mask is None + assert tabla.score._mask is None + + +def test_column_getitem_no_holes(): + """int, slice, and list indexing on a full table.""" + tabla = CTable(Row, new_data=DATA20) + col = tabla.id + + # int + assert col[0] == 0 + assert col[5] == 5 + assert col[19] == 19 + assert col[-1] == 19 + assert col[-5] == 15 + + # slice returns a Column view + assert isinstance(col[0:5], blosc2.Column) + assert isinstance(col[10:15], blosc2.Column) + + # list + assert list(col[[0, 5, 10, 15]]) == [0, 5, 10, 15] + assert list(col[[19, 0, 10]]) == [19, 0, 10] + + +def test_column_getitem_with_holes(): + """int, slice, and list indexing after deletions.""" + tabla = CTable(Row, new_data=DATA20) + tabla.delete([1, 3, 5, 7, 9]) + col = tabla.id + + assert col[0] == 0 + assert col[1] == 2 + assert col[2] == 4 + assert col[3] == 6 + assert col[4] == 8 + assert col[-1] == 19 + assert col[-2] == 18 + + assert list(col[[0, 2, 4]]) == [0, 4, 8] + assert list(col[[5, 3, 1]]) == [10, 6, 2] + + tabla2 = CTable(Row, new_data=DATA20) + tabla2.delete([1, 3, 5, 7, 9, 11, 13, 15, 17, 19]) + col2 = tabla2.id + + assert list(col2[0:5].to_numpy()) == [0, 2, 4, 6, 8] + assert list(col2[5:10].to_numpy()) == [10, 12, 14, 16, 18] + assert list(col2[::2].to_numpy()) == [0, 4, 8, 12, 16] + + +def test_column_getitem_out_of_range(): + """int and list indexing raise IndexError when out of bounds.""" + tabla = CTable(Row, new_data=DATA20) + tabla.delete([1, 3, 5, 7, 9]) + col = tabla.id + + with pytest.raises(IndexError): + _ = col[100] + with pytest.raises(IndexError): + _ = col[-100] + with pytest.raises(IndexError): + _ = col[[0, 1, 100]] + + +def test_column_setitem_no_holes(): + """int, slice, and list assignment on a full table.""" + tabla = CTable(Row, new_data=DATA20) + col = tabla.id + + col[0] = 999 + assert col[0] == 999 + col[10] = 888 + assert col[10] == 888 + col[-1] = 777 + assert col[-1] == 777 + + col[0:5] = [100, 101, 102, 103, 104] + assert list(col[0:5].to_numpy()) == [100, 101, 102, 103, 104] + + col[[0, 5, 10]] = [10, 50, 100] + assert col[0] == 10 + assert col[5] == 50 + assert col[10] == 100 + + +def test_column_setitem_with_holes(): + """int, slice, and list assignment after deletions.""" + tabla = CTable(Row, new_data=DATA20) + tabla.delete([1, 3, 5, 7, 9]) + col = tabla.id + + col[0] = 999 + assert col[0] == 999 + assert tabla._cols["id"][0] == 999 + + col[2] = 888 + assert col[2] == 888 + assert tabla._cols["id"][4] == 888 + + col[-1] = 777 + assert col[-1] == 777 + + col[0:3] = [100, 200, 300] + assert col[0] == 100 + assert col[1] == 200 + assert col[2] == 300 + + col[[0, 2, 4]] = [11, 22, 33] + assert col[0] == 11 + assert col[2] == 22 + assert col[4] == 33 + + +def test_column_iter(): + """Iteration over full table, with odd-index holes, and on score column.""" + tabla = CTable(Row, new_data=DATA20) + assert list(tabla.id) == list(range(20)) + + tabla2 = CTable(Row, new_data=DATA20) + tabla2.delete([1, 3, 5, 7, 9, 11, 13, 15, 17, 19]) + assert list(tabla2.id) == [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] + + tabla3 = CTable(Row, new_data=DATA20) + tabla3.delete([0, 5, 10, 15]) + # fmt: off + expected_score = [ + 10.0, 20.0, 30.0, 40.0, + 60.0, 70.0, 80.0, 90.0, + 110.0, 120.0, 130.0, 140.0, + 160.0, 170.0, 180.0, 190.0, + ] + # fmt: on + assert list(tabla3.score) == expected_score + + +def test_column_len(): + """len() after no deletions, partial deletions, cumulative deletions, and cross-column.""" + tabla = CTable(Row, new_data=DATA20) + col = tabla.id + assert len(col) == 20 + + tabla.delete([1, 3, 5, 7, 9]) + assert len(col) == 15 + + tabla2 = CTable(Row, new_data=DATA20) + col2 = tabla2.id + tabla2.delete([0, 1, 2]) + assert len(col2) == 17 + tabla2.delete([0, 1, 2, 3, 4]) + assert len(col2) == 12 + + data = [(i, float(i * 10), i % 2 == 0) for i in range(10)] + tabla3 = CTable(Row, new_data=data, expected_size=10) + tabla3.delete([0, 1, 5, 6, 9]) + assert len(tabla3.id) == len(tabla3.score) == len(tabla3.active) == 5 + for i in range(len(tabla3.id)): + assert tabla3.score[i] == float(tabla3.id[i] * 10) + + +def test_column_edge_cases(): + """Empty table and fully-deleted table both behave as zero-length columns.""" + tabla = CTable(Row) + assert len(tabla.id) == 0 + assert list(tabla.id) == [] + + data = [(i, float(i * 10), True) for i in range(10)] + tabla2 = CTable(Row, new_data=data) + tabla2.delete(list(range(10))) + assert len(tabla2.id) == 0 + assert list(tabla2.id) == [] + + +# ------------------------------------------------------------------- +# New tests for Column view (mask) and to_array() +# ------------------------------------------------------------------- + + +def test_column_slice_returns_view(): + """Column[slice] returns a Column instance with a non-None mask.""" + tabla = CTable(Row, new_data=DATA20) + col = tabla.id + + view = col[0:5] + assert isinstance(view, blosc2.Column) + assert view._mask is not None + assert view._table is tabla + assert view._col_name == "id" + + +def test_to_array_slices(): + """to_array() on slice views: full table and with holes.""" + # No holes + tabla = CTable(Row, new_data=DATA20) + col = tabla.id + np.testing.assert_array_equal(col[0:5].to_numpy(), np.array([0, 1, 2, 3, 4], dtype=np.int64)) + np.testing.assert_array_equal(col[5:10].to_numpy(), np.array([5, 6, 7, 8, 9], dtype=np.int64)) + np.testing.assert_array_equal(col[15:20].to_numpy(), np.array([15, 16, 17, 18, 19], dtype=np.int64)) + np.testing.assert_array_equal(col[0:20].to_numpy(), np.arange(20, dtype=np.int64)) + + # With holes: delete odd indices → keep evens 0,2,4,...,18 + tabla.delete([1, 3, 5, 7, 9, 11, 13, 15, 17, 19]) + col = tabla.id + np.testing.assert_array_equal(col[0:5].to_numpy(), np.array([0, 2, 4, 6, 8], dtype=np.int64)) + np.testing.assert_array_equal(col[5:10].to_numpy(), np.array([10, 12, 14, 16, 18], dtype=np.int64)) + + +def test_to_array_full_column(): + """to_array() with no slice (full column) returns all valid rows.""" + tabla = CTable(Row, new_data=DATA20) + tabla.delete([0, 10, 19]) + col = tabla.id + + expected = np.array([i for i in range(20) if i not in {0, 10, 19}], dtype=np.int64) + np.testing.assert_array_equal(col[0 : len(col)].to_numpy(), expected) + + +def test_to_array_mask_does_not_include_deleted(): + """Mask & valid_rows intersection excludes deleted rows inside the slice range.""" + tabla = CTable(Row, new_data=DATA20) + # delete rows 2 and 3, which fall inside slice [0:5] + tabla.delete([2, 3]) + col = tabla.id + + # logical [0:5] should now map to physical rows 0,1,4,5,6 + result = col[0:5].to_numpy() + np.testing.assert_array_equal(result, np.array([0, 1, 4, 5, 6], dtype=np.int64)) + + +def test_column_view_mask_is_independent(): + """Two slice views on the same column have independent masks.""" + tabla = CTable(Row, new_data=DATA20) + col = tabla.id + + view_a = col[0:5] + + np.testing.assert_array_equal(view_a.to_numpy(), np.arange(0, 5, dtype=np.int64)) + + +# ------------------------------------------------------------------- +# iter_chunks +# ------------------------------------------------------------------- + + +def test_iter_chunks_full_table(): + """iter_chunks reassembles to the same values as to_numpy().""" + tabla = CTable(Row, new_data=DATA20) + expected = tabla["id"].to_numpy() + got = np.concatenate(list(tabla["id"].iter_chunks(size=7))) + np.testing.assert_array_equal(got, expected) + + +def test_iter_chunks_chunk_sizes(): + """Each yielded chunk has at most *size* elements; last may be smaller.""" + tabla = CTable(Row, new_data=DATA20) + chunks = list(tabla["score"].iter_chunks(size=6)) + for c in chunks[:-1]: + assert len(c) == 6 + assert len(chunks[-1]) <= 6 + assert sum(len(c) for c in chunks) == 20 + + +def test_iter_chunks_skips_deleted_rows(): + """Deleted rows are not included in any chunk.""" + tabla = CTable(Row, new_data=DATA20) + tabla.delete([0, 1, 2]) # delete id 0, 1, 2 + chunks = list(tabla["id"].iter_chunks(size=5)) + all_vals = np.concatenate(chunks) + assert 0 not in all_vals + assert 1 not in all_vals + assert 2 not in all_vals + assert len(all_vals) == 17 + + +def test_iter_chunks_size_larger_than_table(): + """A size larger than the table yields a single chunk with all rows.""" + tabla = CTable(Row, new_data=DATA20) + chunks = list(tabla["id"].iter_chunks(size=1000)) + assert len(chunks) == 1 + np.testing.assert_array_equal(chunks[0], np.arange(20, dtype=np.int64)) + + +def test_iter_chunks_empty_table(): + """iter_chunks on an empty table yields nothing.""" + tabla = CTable(Row) + chunks = list(tabla["id"].iter_chunks()) + assert chunks == [] + + +# ------------------------------------------------------------------- +# Aggregates: sum +# ------------------------------------------------------------------- + + +def test_sum_int(): + t = CTable(Row, new_data=DATA20) + assert t["id"].sum() == sum(range(20)) + + +def test_sum_float(): + t = CTable(Row, new_data=DATA20) + assert t["score"].sum() == pytest.approx(sum(i * 10.0 for i in range(20))) + + +def test_sum_bool_counts_trues(): + t = CTable(Row, new_data=DATA20) # all active=True + assert t["active"].sum() == 20 + + +def test_sum_skips_deleted_rows(): + t = CTable(Row, new_data=DATA20) + t.delete([0]) # remove id=0 + assert t["id"].sum() == sum(range(1, 20)) + + +def test_sum_empty_raises(): + t = CTable(Row) + with pytest.raises(ValueError, match="empty"): + t["id"].sum() + + +def test_sum_wrong_type_raises(): + t = CTable(StrRow, new_data=[("hello",)]) + with pytest.raises(TypeError): + t["label"].sum() + + +# ------------------------------------------------------------------- +# Aggregates: min / max +# ------------------------------------------------------------------- + + +def test_min_int(): + t = CTable(Row, new_data=DATA20) + assert t["id"].min() == 0 + + +def test_max_int(): + t = CTable(Row, new_data=DATA20) + assert t["id"].max() == 19 + + +def test_min_float(): + t = CTable(Row, new_data=DATA20) + assert t["score"].min() == pytest.approx(0.0) + + +def test_max_float(): + t = CTable(Row, new_data=DATA20) + assert t["score"].max() == pytest.approx(190.0) + + +def test_min_max_string(): + t = CTable(StrRow, new_data=[("banana",), ("apple",), ("cherry",)]) + assert t["label"].min() == "apple" + assert t["label"].max() == "cherry" + + +def test_min_skips_deleted(): + t = CTable(Row, new_data=DATA20) + t.delete([0]) # remove id=0, next min is 1 + assert t["id"].min() == 1 + + +def test_min_empty_raises(): + t = CTable(Row) + with pytest.raises(ValueError, match="empty"): + t["id"].min() + + +def test_max_complex_raises(): + @dataclass + class CRow: + val: complex = blosc2.field(blosc2.complex128()) + + t = CTable(CRow, new_data=[(1 + 2j,)]) + with pytest.raises(TypeError): + t["val"].max() + + +# ------------------------------------------------------------------- +# Aggregates: mean +# ------------------------------------------------------------------- + + +def test_mean_int(): + t = CTable(Row, new_data=DATA20) + assert t["id"].mean() == pytest.approx(9.5) + + +def test_mean_float(): + t = CTable(Row, new_data=DATA20) + assert t["score"].mean() == pytest.approx(95.0) + + +def test_mean_skips_deleted(): + t = CTable(Row, new_data=[(0, 0.0, True), (10, 100.0, True)]) + t.delete([0]) # remove id=0; only id=10 remains + assert t["id"].mean() == pytest.approx(10.0) + + +def test_mean_empty_raises(): + t = CTable(Row) + with pytest.raises(ValueError, match="empty"): + t["id"].mean() + + +# ------------------------------------------------------------------- +# Aggregates: std +# ------------------------------------------------------------------- + + +def test_std_population(): + t = CTable(Row, new_data=DATA20) + ids = np.arange(20, dtype=np.float64) + assert t["id"].std() == pytest.approx(float(ids.std(ddof=0))) + + +def test_std_sample(): + t = CTable(Row, new_data=DATA20) + ids = np.arange(20, dtype=np.float64) + assert t["id"].std(ddof=1) == pytest.approx(float(ids.std(ddof=1))) + + +def test_std_single_element(): + t = CTable(Row, new_data=[(5, 50.0, True)]) + assert t["id"].std() == pytest.approx(0.0) + + +def test_std_single_element_ddof1_is_nan(): + t = CTable(Row, new_data=[(5, 50.0, True)]) + assert np.isnan(t["id"].std(ddof=1)) + + +def test_std_empty_raises(): + t = CTable(Row) + with pytest.raises(ValueError, match="empty"): + t["id"].std() + + +# ------------------------------------------------------------------- +# Aggregates: any / all +# ------------------------------------------------------------------- + + +def test_any_all_true(): + t = CTable(Row, new_data=DATA20) # all active=True + assert t["active"].any() is True + assert t["active"].all() is True + + +def test_any_some_false(): + data = [(i, float(i), i % 2 == 0) for i in range(10)] + t = CTable(Row, new_data=data) + assert t["active"].any() is True + assert t["active"].all() is False + + +def test_all_false(): + data = [(i, float(i), False) for i in range(5)] + t = CTable(Row, new_data=data) + assert t["active"].any() is False + assert t["active"].all() is False + + +def test_any_empty_is_false(): + t = CTable(Row) + assert t["active"].any() is False + + +def test_all_empty_is_true(): + # vacuous truth: all() over nothing is True (same as Python's built-in) + t = CTable(Row) + assert t["active"].all() is True + + +def test_any_wrong_type_raises(): + t = CTable(Row, new_data=DATA20) + with pytest.raises(TypeError): + t["id"].any() + + +# ------------------------------------------------------------------- +# unique +# ------------------------------------------------------------------- + + +def test_unique_int(): + t = CTable(Row, new_data=[(i % 5, float(i), True) for i in range(20)]) + result = t["id"].unique() + np.testing.assert_array_equal(result, np.array([0, 1, 2, 3, 4], dtype=np.int64)) + + +def test_unique_bool(): + data = [(i, float(i), i % 3 != 0) for i in range(10)] + t = CTable(Row, new_data=data) + result = t["active"].unique() + assert set(result.tolist()) == {True, False} + + +def test_unique_skips_deleted(): + t = CTable(Row, new_data=[(i % 3, float(i), True) for i in range(9)]) + # ids are [0,1,2,0,1,2,0,1,2]; logical rows with id==0 are at positions 0,3,6 + t.delete([0, 3, 6]) + result = t["id"].unique() + assert 0 not in result.tolist() + assert set(result.tolist()) == {1, 2} + + +def test_unique_empty(): + t = CTable(Row) + result = t["id"].unique() + assert len(result) == 0 + + +# ------------------------------------------------------------------- +# value_counts +# ------------------------------------------------------------------- + + +def test_value_counts_basic(): + data = [(i % 3, float(i), True) for i in range(9)] # ids: 0,1,2,0,1,2,0,1,2 + t = CTable(Row, new_data=data) + vc = t["id"].value_counts() + assert vc[0] == 3 + assert vc[1] == 3 + assert vc[2] == 3 + + +def test_value_counts_sorted_by_count(): + data = [(0, 0.0, True)] * 5 + [(1, 1.0, True)] * 2 + [(2, 2.0, True)] * 8 + t = CTable(Row, new_data=data) + vc = t["id"].value_counts() + counts = list(vc.values()) + assert counts == sorted(counts, reverse=True) + + +def test_value_counts_bool(): + data = [(i, float(i), i % 4 != 0) for i in range(20)] # 5 False, 15 True + t = CTable(Row, new_data=data) + vc = t["active"].value_counts() + assert vc[True] == 15 + assert vc[False] == 5 + assert list(vc.keys())[0] is True # True comes first (higher count) + + +def test_value_counts_empty(): + t = CTable(Row) + assert t["id"].value_counts() == {} + + +# ------------------------------------------------------------------- +# sample (on CTable) +# ------------------------------------------------------------------- + + +def test_sample_returns_correct_count(): + t = CTable(Row, new_data=DATA20) + s = t.sample(5, seed=0) + assert len(s) == 5 + + +def test_sample_rows_are_subset(): + t = CTable(Row, new_data=DATA20) + s = t.sample(7, seed=42) + all_ids = set(t["id"].to_numpy().tolist()) + sample_ids = set(s["id"].to_numpy().tolist()) + assert sample_ids.issubset(all_ids) + + +def test_sample_is_read_only(): + t = CTable(Row, new_data=DATA20) + s = t.sample(5, seed=0) + with pytest.raises((ValueError, TypeError)): + s.append((99, 9.0, True)) + + +def test_sample_seed_reproducible(): + t = CTable(Row, new_data=DATA20) + s1 = t.sample(5, seed=7) + s2 = t.sample(5, seed=7) + np.testing.assert_array_equal(s1["id"].to_numpy(), s2["id"].to_numpy()) + + +def test_sample_n_larger_than_table(): + t = CTable(Row, new_data=DATA20) + s = t.sample(1000, seed=0) + assert len(s) == 20 + + +def test_sample_zero(): + t = CTable(Row, new_data=DATA20) + assert len(t.sample(0)) == 0 + + +# ------------------------------------------------------------------- +# cbytes / nbytes / __repr__ +# ------------------------------------------------------------------- + + +def test_cbytes_nbytes_positive(): + t = CTable(Row, new_data=DATA20) + assert t.cbytes > 0 + assert t.nbytes > 0 + assert t.nbytes >= t.cbytes # compressed is never larger than raw + + +def test_cbytes_nbytes_consistent_with_info(): + t = CTable(Row, new_data=DATA20) + expected_cb = sum(col.cbytes for col in t._cols.values()) + t._valid_rows.cbytes + expected_nb = sum(col.nbytes for col in t._cols.values()) + t._valid_rows.nbytes + assert t.cbytes == expected_cb + assert t.nbytes == expected_nb + + +def test_repr_contains_col_names_and_row_count(): + t = CTable(Row, new_data=DATA20) + r = repr(t) + assert "id" in r + assert "score" in r + assert "active" in r + assert "20" in r + + +def test_repr_is_single_line(): + t = CTable(Row, new_data=DATA20) + assert "\n" not in repr(t) + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/ctable/test_compact.py b/tests/ctable/test_compact.py new file mode 100644 index 00000000..f67688d7 --- /dev/null +++ b/tests/ctable/test_compact.py @@ -0,0 +1,152 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from dataclasses import dataclass + +import numpy as np + +import blosc2 +from blosc2 import CTable + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100)) + + +def generate_test_data(n_rows: int) -> list: + return [(i, float(i)) for i in range(n_rows)] + + +def test_compact_empty_table(): + """Test compact() on a completely empty table (no data).""" + table = CTable(Row, expected_size=100) + + assert len(table) == 0 + + # Should not raise any error + table.compact() + + # Capacity might have drastically reduced, but the logical table must remain empty + assert len(table) == 0 + # Verify that if data is added later, it works correctly + table.append((1, 10.0)) + assert len(table) == 1 + assert table.id[0] == 1 + + +def test_compact_full_table(): + """Test compact() on a completely full table (no holes or free space).""" + data = generate_test_data(50) + table = CTable(Row, new_data=data, expected_size=50) + + assert len(table) == 50 + initial_capacity = len(table._valid_rows) + + # Should not raise any error or change the logical state + table.compact() + + assert len(table) == 50 + # Capacity should not have changed because it was already full + assert len(table._valid_rows) == initial_capacity + + # Verify data integrity + assert table.id[0] == 0 + assert table.id[-1] == 49 + + +def test_compact_already_compacted_table(): + """Test compact() on a table that has free space but no holes (contiguous data).""" + data = generate_test_data(20) + # Large expected_size to ensure free space at the end + table = CTable(Row, new_data=data, expected_size=100) + + assert len(table) == 20 + + # Execute compact. Since data is already contiguous, the table might reduce + # its size due to the < len//2 while loop, but it shouldn't fail. + table.compact() + + assert len(table) == 20 + + # Verify that data remains in place + for i in range(20): + assert table.id[i] == i + + # Validate that all True values are consecutive at the beginning + mask = table._valid_rows[: len(table._valid_rows)] + assert np.all(mask[:20]) + if len(mask) > 20: + assert not np.any(mask[20:]) + + +def test_compact_with_holes(): + """Test compact() on a table with high fragmentation (holes).""" + data = generate_test_data(30) + table = CTable(Row, new_data=data, expected_size=50) + + # Delete sparsely: leave only [0, 5, 10, 15, 20, 25] + to_delete = [i for i in range(30) if i % 5 != 0] + table.delete(to_delete) + + assert len(table) == 6 + + # Execute compact + table.compact() + + assert len(table) == 6 + + # Verify that the correct data survived and moved to the beginning + expected_ids = [0, 5, 10, 15, 20, 25] + for i, exp_id in enumerate(expected_ids): + # Through the logical view (Column wrapper) + assert table.id[i] == exp_id + # Through the physical blosc2 array (to ensure compact worked) + assert table._cols["id"][i] == exp_id + + # Verify physical mask: first 6 must be True, the rest False + mask = table._valid_rows[: len(table._valid_rows)] + assert np.all(mask[:6]) + if len(mask) > 6: + assert not np.any(mask[6:]) + + +def test_compact_all_deleted(): + """Test compact() on a table where absolutely all rows have been deleted.""" + data = generate_test_data(20) + table = CTable(Row, new_data=data, expected_size=20) + + # Delete everything + table.delete(list(range(20))) + assert len(table) == 0 + + # Should handle empty arrays correctly + table.compact() + + assert len(table) == 0 + + # Check that we can write to it again + table.append((99, 99.0)) + assert len(table) == 1 + assert table.id[0] == 99 + + +def test_compact_multiple_times(): + """Calling compact() multiple times in a row must not corrupt data or crash.""" + data = generate_test_data(10) + table = CTable(Row, new_data=data, expected_size=20) + + table.delete([1, 3, 5, 7, 9]) # 5 elements remaining + + # Compact 3 times in a row + table.compact() + table.compact() + table.compact() + + assert len(table) == 5 + assert list(table.id) == [0, 2, 4, 6, 8] diff --git a/tests/ctable/test_construct.py b/tests/ctable/test_construct.py new file mode 100644 index 00000000..4a091997 --- /dev/null +++ b/tests/ctable/test_construct.py @@ -0,0 +1,201 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from dataclasses import dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +# ------------------------------------------------------------------- +# Predefined Test Data +# ------------------------------------------------------------------- +SMALL_DATA = [ + (1, 1 + 2j, 95.5, True), + (2, 3 - 4j, 80.0, False), + (3, 0j, 50.2, True), + (4, -1 + 1j, 12.3, False), + (5, 5j, 99.9, True), +] +SMALLEST_DATA = SMALL_DATA[:2] + +dtype_struct = [("id", "i8"), ("c_val", "c16"), ("score", "f8"), ("active", "?")] +SMALL_STRUCT = np.array(SMALL_DATA, dtype=dtype_struct) + + +# ------------------------------------------------------------------- +# Validation Utility +# ------------------------------------------------------------------- +def assert_table_equals_data(table: CTable, expected_data: list): + assert len(table) == len(expected_data), f"Expected length {len(expected_data)}, got {len(table)}" + if not expected_data: + return + col_names = table.col_names + # Transpose: expected_data is list-of-rows → list-of-columns + expected_cols = list(zip(*expected_data, strict=False)) + for col_idx, col_name in enumerate(col_names): + actual = table[col_name].to_numpy() + expected = expected_cols[col_idx] + if isinstance(expected[0], (float, complex)): + np.testing.assert_allclose(actual, expected, err_msg=f"col {col_name}") + else: + np.testing.assert_array_equal(actual, expected, err_msg=f"col {col_name}") + + +# ------------------------------------------------------------------- +# Tests +# ------------------------------------------------------------------- + + +def test_empty_table_variants(): + """Empty table: default, with expected_size, and with compact=True.""" + table = CTable(Row) + assert len(table) == 0 + assert table.nrows == 0 + assert table.ncols == 4 + for col_name in ["id", "c_val", "score", "active"]: + assert col_name in table._cols + assert isinstance(table._cols[col_name], blosc2.NDArray) + + table_sized = CTable(Row, expected_size=5000) + assert len(table_sized) == 0 + assert all(len(col) == 5000 for col in table_sized._cols.values()) + + table_compact = CTable(Row, compact=True) + assert len(table_compact) == 0 + assert table_compact.auto_compact is True + + +def test_empty_data_lifecycle(): + """Create from [], extend with [], then extend with real data.""" + table = CTable(Row, new_data=[]) + assert len(table) == 0 + + table.extend([]) + assert len(table) == 0 + + table.extend(SMALL_DATA) + assert_table_equals_data(table, SMALL_DATA) + + +def test_construction_variants(): + """Sources (list, structured array), expected_size, and compact flag.""" + # list of tuples and structured array produce identical tables + assert_table_equals_data(CTable(Row, new_data=SMALL_DATA), SMALL_DATA) + assert_table_equals_data(CTable(Row, new_data=SMALL_STRUCT), SMALL_DATA) + + # expected_size smaller than data → resize; larger → preallocated + for es in [1, 5]: + assert_table_equals_data(CTable(Row, new_data=SMALL_DATA, expected_size=es), SMALL_DATA) + table_large = CTable(Row, new_data=SMALL_DATA, expected_size=1000) + assert_table_equals_data(table_large, SMALL_DATA) + assert all(len(col) == 1000 for col in table_large._cols.values()) + + # compact flag is stored and data is intact + table_false = CTable(Row, new_data=SMALL_DATA, compact=False) + assert table_false.auto_compact is False + assert_table_equals_data(table_false, SMALL_DATA) + + table_true = CTable(Row, new_data=SMALL_DATA, compact=True) + assert table_true.auto_compact is True + assert_table_equals_data(table_true, SMALL_DATA) + + +def test_append_and_clone(): + """Build table row by row, then clone it into a new CTable.""" + table = CTable(Row) + for row in SMALLEST_DATA: + table.append(row) + assert_table_equals_data(table, SMALLEST_DATA) + + cloned = CTable(Row, new_data=table) + assert_table_equals_data(cloned, SMALLEST_DATA) + assert table is not cloned + + +def test_invalid_append(): + """Constraint violation and incompatible type both raise errors.""" + table = CTable(Row, expected_size=1) + + # Constraint violation: id must be >= 0 + with pytest.raises(ValueError): + table.append((-1, 1 + 2j, 95.5, True)) + + # Constraint violation: score must be <= 100 + with pytest.raises(ValueError): + table.append((1, 1 + 2j, 150.0, True)) + + # Incompatible type for id: string cannot be coerced to int + with pytest.raises((TypeError, ValueError)): + table.append(["invalid_text", 1 + 2j, 95.5, True]) + + +def test_extreme_values(): + """Extreme complex, float boundary, and large integer values in one table.""" + # Combine all extremes into one table to avoid repeated CTable construction + extreme_data = [ + (1, complex(1e308, -1e308), 0.0, True), + (2**32, 0j, 100.0, False), + (2**60, complex(-1e308, 1e308), 0.0001, True), + (4, 0j, 99.9999, False), + ] + assert_table_equals_data(CTable(Row, new_data=extreme_data), extreme_data) + + +def test_extend_append_and_resize(): + """Auto-resize via append one-by-one, then extend+append beyond initial size.""" + # Append beyond expected_size triggers resize + table = CTable(Row, expected_size=2) + for row in SMALL_DATA: + table.append(row) + assert_table_equals_data(table, SMALL_DATA) + assert all(len(col) >= 5 for col in table._cols.values()) + + # Extend beyond expected_size, then append the last row + table2 = CTable(Row, expected_size=2) + table2.extend(SMALL_DATA[:4]) + assert len(table2) == 4 + table2.append(SMALL_DATA[4]) + assert_table_equals_data(table2, SMALL_DATA) + + +def test_column_integrity(): + """Column access via [] and getattr, and correct dtypes.""" + table = CTable(Row, new_data=SMALL_DATA) + + assert isinstance(table["id"], blosc2.ctable.Column) + assert isinstance(table.score, blosc2.ctable.Column) + + assert table._cols["id"].dtype == np.int64 + assert table._cols["c_val"].dtype == np.complex128 + assert table._cols["score"].dtype == np.float64 + assert table._cols["active"].dtype == np.bool_ + + +def test_valid_rows(): + """_valid_rows has exactly 5 True entries after creation and after extend.""" + table_direct = CTable(Row, new_data=SMALL_DATA) + assert blosc2.count_nonzero(table_direct._valid_rows) == 5 + + table_extended = CTable(Row) + table_extended.extend(SMALL_DATA) + assert blosc2.count_nonzero(table_extended._valid_rows) == 5 + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/ctable/test_csv_interop.py b/tests/ctable/test_csv_interop.py new file mode 100644 index 00000000..7b07277a --- /dev/null +++ b/tests/ctable/test_csv_interop.py @@ -0,0 +1,235 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Tests for CTable.to_csv() and CTable.from_csv().""" + +import csv +import os +from dataclasses import dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + label: str = blosc2.field(blosc2.string(max_length=16), default="") + + +DATA10 = [(i, float(i * 10 % 100), i % 2 == 0, f"r{i}") for i in range(10)] + + +@pytest.fixture +def tmp_csv(tmp_path): + return str(tmp_path / "table.csv") + + +@pytest.fixture +def table10(): + return CTable(Row, new_data=DATA10) + + +# =========================================================================== +# to_csv() +# =========================================================================== + + +def test_to_csv_creates_file(table10, tmp_csv): + table10.to_csv(tmp_csv) + assert os.path.exists(tmp_csv) + + +def test_to_csv_header_row(table10, tmp_csv): + table10.to_csv(tmp_csv) + with open(tmp_csv) as f: + first = f.readline().strip() + assert first == "id,score,active,label" + + +def test_to_csv_row_count(table10, tmp_csv): + table10.to_csv(tmp_csv) + with open(tmp_csv) as f: + rows = list(csv.reader(f)) + assert len(rows) == 11 # 1 header + 10 data + + +def test_to_csv_no_header(table10, tmp_csv): + table10.to_csv(tmp_csv, header=False) + with open(tmp_csv) as f: + rows = list(csv.reader(f)) + assert len(rows) == 10 + + +def test_to_csv_int_values(table10, tmp_csv): + table10.to_csv(tmp_csv) + with open(tmp_csv) as f: + reader = csv.DictReader(f) + ids = [int(row["id"]) for row in reader] + assert ids == list(range(10)) + + +def test_to_csv_float_values(table10, tmp_csv): + table10.to_csv(tmp_csv) + with open(tmp_csv) as f: + reader = csv.DictReader(f) + scores = [float(row["score"]) for row in reader] + assert scores == [r[1] for r in DATA10] + + +def test_to_csv_bool_values(table10, tmp_csv): + table10.to_csv(tmp_csv) + with open(tmp_csv) as f: + reader = csv.DictReader(f) + actives = [row["active"] for row in reader] + # numpy bool serialises as "True"/"False" + assert actives == [str(r[2]) for r in DATA10] + + +def test_to_csv_string_values(table10, tmp_csv): + table10.to_csv(tmp_csv) + with open(tmp_csv) as f: + reader = csv.DictReader(f) + labels = [row["label"] for row in reader] + assert labels == [r[3] for r in DATA10] + + +def test_to_csv_custom_separator(table10, tmp_csv): + table10.to_csv(tmp_csv, sep="\t") + with open(tmp_csv) as f: + first = f.readline().strip() + assert "\t" in first + assert "," not in first + + +def test_to_csv_skips_deleted_rows(table10, tmp_csv): + table10.delete([0, 1]) + table10.to_csv(tmp_csv) + with open(tmp_csv) as f: + rows = list(csv.reader(f)) + assert len(rows) == 9 # 1 header + 8 live rows + assert rows[1][0] == "2" # first live id + + +def test_to_csv_empty_table(tmp_csv): + t = CTable(Row) + t.to_csv(tmp_csv) + with open(tmp_csv) as f: + rows = list(csv.reader(f)) + assert rows == [["id", "score", "active", "label"]] + + +def test_to_csv_select_view(table10, tmp_csv): + table10.select(["id", "label"]).to_csv(tmp_csv) + with open(tmp_csv) as f: + reader = csv.DictReader(f) + rows = list(reader) + assert list(rows[0].keys()) == ["id", "label"] + assert len(rows) == 10 + + +# =========================================================================== +# from_csv() +# =========================================================================== + + +def test_from_csv_returns_ctable(table10, tmp_csv): + table10.to_csv(tmp_csv) + t2 = CTable.from_csv(tmp_csv, Row) + assert isinstance(t2, CTable) + + +def test_from_csv_row_count(table10, tmp_csv): + table10.to_csv(tmp_csv) + t2 = CTable.from_csv(tmp_csv, Row) + assert len(t2) == 10 + + +def test_from_csv_column_names(table10, tmp_csv): + table10.to_csv(tmp_csv) + t2 = CTable.from_csv(tmp_csv, Row) + assert t2.col_names == ["id", "score", "active", "label"] + + +def test_from_csv_int_values(table10, tmp_csv): + table10.to_csv(tmp_csv) + t2 = CTable.from_csv(tmp_csv, Row) + np.testing.assert_array_equal(t2["id"].to_numpy(), table10["id"].to_numpy()) + + +def test_from_csv_float_values(table10, tmp_csv): + table10.to_csv(tmp_csv) + t2 = CTable.from_csv(tmp_csv, Row) + np.testing.assert_allclose(t2["score"].to_numpy(), table10["score"].to_numpy()) + + +def test_from_csv_bool_values(table10, tmp_csv): + table10.to_csv(tmp_csv) + t2 = CTable.from_csv(tmp_csv, Row) + # bool is serialised as "True"/"False"; np.array(..., dtype=bool) parses that + np.testing.assert_array_equal(t2["active"].to_numpy(), table10["active"].to_numpy()) + + +def test_from_csv_string_values(table10, tmp_csv): + table10.to_csv(tmp_csv) + t2 = CTable.from_csv(tmp_csv, Row) + assert t2["label"].to_numpy().tolist() == table10["label"].to_numpy().tolist() + + +def test_from_csv_no_header(table10, tmp_csv): + table10.to_csv(tmp_csv, header=False) + t2 = CTable.from_csv(tmp_csv, Row, header=False) + assert len(t2) == 10 + np.testing.assert_array_equal(t2["id"].to_numpy(), table10["id"].to_numpy()) + + +def test_from_csv_custom_separator(table10, tmp_csv): + table10.to_csv(tmp_csv, sep="\t") + t2 = CTable.from_csv(tmp_csv, Row, sep="\t") + assert len(t2) == 10 + + +def test_from_csv_empty_file(tmp_csv): + with open(tmp_csv, "w") as f: + f.write("id,score,active,label\n") + t = CTable.from_csv(tmp_csv, Row) + assert len(t) == 0 + assert t.col_names == ["id", "score", "active", "label"] + + +def test_from_csv_roundtrip(table10, tmp_csv): + """to_csv then from_csv preserves all values.""" + table10.to_csv(tmp_csv) + t2 = CTable.from_csv(tmp_csv, Row) + for name in ["id", "score"]: + np.testing.assert_array_equal(t2[name].to_numpy(), table10[name].to_numpy()) + np.testing.assert_array_equal(t2["active"].to_numpy(), table10["active"].to_numpy()) + assert t2["label"].to_numpy().tolist() == table10["label"].to_numpy().tolist() + + +def test_from_csv_wrong_field_count_raises(tmp_csv): + with open(tmp_csv, "w") as f: + f.write("id,score,active,label\n") + f.write("1,2.0\n") # only 2 fields instead of 4 + with pytest.raises(ValueError, match="expected 4 fields"): + CTable.from_csv(tmp_csv, Row) + + +def test_from_csv_not_dataclass_raises(tmp_csv): + with open(tmp_csv, "w") as f: + f.write("id\n1\n") + with pytest.raises(TypeError): + CTable.from_csv(tmp_csv, int) + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/ctable/test_ctable_dataclass_schema.py b/tests/ctable/test_ctable_dataclass_schema.py new file mode 100644 index 00000000..90283269 --- /dev/null +++ b/tests/ctable/test_ctable_dataclass_schema.py @@ -0,0 +1,289 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""End-to-end CTable tests using the dataclass schema API.""" + +from dataclasses import dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable +from blosc2.schema_compiler import schema_from_dict + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +@dataclass +class RowComplex: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +# ------------------------------------------------------------------- +# Construction +# ------------------------------------------------------------------- + + +def test_construction_empty(): + t = CTable(Row) + assert len(t) == 0 + assert t.ncols == 3 + assert t.col_names == ["id", "score", "active"] + + +def test_construction_with_data(): + data = [(i, float(i), True) for i in range(10)] + t = CTable(Row, new_data=data) + assert len(t) == 10 + + +def test_construction_expected_size(): + t = CTable(Row, expected_size=500) + assert all(len(col) == 500 for col in t._cols.values()) + + +# ------------------------------------------------------------------- +# append — different input shapes +# ------------------------------------------------------------------- + + +def test_append_tuple(): + t = CTable(Row) + t.append((1, 50.0, True)) + assert len(t) == 1 + assert t.row[0].id[0] == 1 + assert t.row[0].score[0] == 50.0 + assert t.row[0].active[0] + + +def test_append_list(): + t = CTable(Row) + t.append([2, 75.0, False]) + assert len(t) == 1 + assert t.row[0].id[0] == 2 + + +def test_append_dict(): + t = CTable(Row) + t.append({"id": 3, "score": 25.0, "active": True}) + assert len(t) == 1 + assert t.row[0].id[0] == 3 + + +def test_append_dataclass_instance(): + t = CTable(Row) + + @dataclass + class Row2: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + t2 = CTable(Row2) + # Simulate appending a dict (dataclass instance path) + t2.append({"id": 4, "score": 10.0, "active": False}) + assert t2.row[0].id[0] == 4 + + +def test_append_defaults_filled(): + """Omitting optional fields fills them from defaults.""" + t = CTable(Row) + t.append((5,)) # only id; score=0.0 and active=True filled in + assert t.row[0].score[0] == 0.0 + assert t.row[0].active[0] + + +# ------------------------------------------------------------------- +# extend — iterable of rows +# ------------------------------------------------------------------- + + +def test_extend_list_of_tuples(): + t = CTable(Row, expected_size=10) + t.extend([(i, float(i), i % 2 == 0) for i in range(10)]) + assert len(t) == 10 + + +def test_extend_list_of_dicts(): + """extend() also accepts list of dicts via zip(*data) → positional path.""" + # This goes through the zip(*data) path so dicts aren't directly supported + # in extend; test that the common tuple path works correctly. + t = CTable(Row, expected_size=5) + data = [(i, float(i * 10), True) for i in range(5)] + t.extend(data) + for i in range(5): + assert t.row[i].id[0] == i + + +def test_extend_numpy_structured(): + dtype = np.dtype([("id", np.int64), ("score", np.float64), ("active", np.bool_)]) + arr = np.array([(1, 50.0, True), (2, 75.0, False)], dtype=dtype) + t = CTable(Row, expected_size=5) + t.extend(arr) + assert len(t) == 2 + assert t.row[0].id[0] == 1 + assert t.row[1].score[0] == 75.0 + + +# ------------------------------------------------------------------- +# extend — per-call validate override +# ------------------------------------------------------------------- + + +def test_extend_validate_override_false(): + """validate=False on a per-call basis skips checks even for a table with validate=True.""" + t = CTable(Row, expected_size=5, validate=True) + # Would fail if validate were applied + t.extend([(-1, 200.0, True)], validate=False) + assert len(t) == 1 + + +def test_extend_validate_override_true(): + """validate=True on a per-call basis enforces checks even for a table with validate=False.""" + t = CTable(Row, expected_size=5, validate=False) + with pytest.raises(ValueError): + t.extend([(-1, 50.0, True)], validate=True) + + +def test_extend_validate_none_uses_table_default(): + t_on = CTable(Row, expected_size=5, validate=True) + with pytest.raises(ValueError): + t_on.extend([(-1, 50.0, True)], validate=None) + + t_off = CTable(Row, expected_size=5, validate=False) + t_off.extend([(-1, 50.0, True)], validate=None) # no error + assert len(t_off) == 1 + + +# ------------------------------------------------------------------- +# Schema introspection (Step 9) +# ------------------------------------------------------------------- + + +def test_schema_property(): + from blosc2.schema_compiler import CompiledSchema + + t = CTable(Row) + assert isinstance(t.schema, CompiledSchema) + assert t.schema.row_cls is Row + + +def test_column_schema(): + from blosc2.schema_compiler import CompiledColumn + + t = CTable(Row) + col = t.column_schema("id") + assert isinstance(col, CompiledColumn) + assert col.name == "id" + assert col.spec.ge == 0 + + +def test_column_schema_unknown(): + t = CTable(Row) + with pytest.raises(KeyError, match="no_such_col"): + t.column_schema("no_such_col") + + +def test_schema_dict(): + t = CTable(Row) + d = t.schema_dict() + assert d["version"] == 1 + assert d["row_cls"] == "Row" + col_names = [c["name"] for c in d["columns"]] + assert col_names == ["id", "score", "active"] + + +def test_schema_dict_roundtrip(): + """schema_from_dict on a CTable's schema_dict restores column structure.""" + t = CTable(Row) + d = t.schema_dict() + restored = schema_from_dict(d) + assert len(restored.columns) == 3 + assert restored.columns_by_name["id"].spec.ge == 0 + assert restored.columns_by_name["score"].spec.le == 100 + + +# ------------------------------------------------------------------- +# Per-column cparams plumbing +# ------------------------------------------------------------------- + + +def test_per_column_cparams(): + """Columns with custom cparams get their own NDArray settings.""" + + @dataclass + class CustomRow: + id: int = blosc2.field(blosc2.int64(), cparams={"clevel": 9}) + score: float = blosc2.field(blosc2.float64(), default=0.0) + + t = CTable(CustomRow, expected_size=10) + # The column schema reflects the cparams + assert t.column_schema("id").config.cparams == {"clevel": 9} + assert t.column_schema("score").config.cparams is None + + +# ------------------------------------------------------------------- +# New integer / float spec types used in CTable +# ------------------------------------------------------------------- + + +def test_new_spec_types_in_ctable(): + """int8, uint16, float32 and friends work correctly end-to-end in CTable.""" + + @dataclass + class Compact: + flags: int = blosc2.field(blosc2.uint8(le=255)) + level: int = blosc2.field(blosc2.int8(ge=-128, le=127), default=0) + ratio: float = blosc2.field(blosc2.float32(ge=0.0, le=1.0), default=0.0) + + t = CTable(Compact, expected_size=10) + t.extend([(0, -1, 0.0), (255, 127, 1.0), (128, 0, 0.5)]) + assert len(t) == 3 + assert t._cols["flags"].dtype == np.dtype(np.uint8) + assert t._cols["level"].dtype == np.dtype(np.int8) + assert t._cols["ratio"].dtype == np.dtype(np.float32) + + +def test_new_spec_constraints_enforced(): + """Constraints on new spec types are enforced by both append and extend.""" + + # uint8 with explicit ge=0: negative value rejected by Pydantic + @dataclass + class R: + x: int = blosc2.field(blosc2.uint8(ge=0, le=200)) + + t = CTable(R, expected_size=5) + with pytest.raises(ValueError): + t.append((-1,)) # violates ge=0 + with pytest.raises(ValueError): + t.append((201,)) # violates le=200 + + # int8 with ge/le: vectorized extend checks + @dataclass + class R2: + x: int = blosc2.field(blosc2.int8(ge=0, le=100)) + + t2 = CTable(R2, expected_size=5) + with pytest.raises(ValueError): + t2.extend([(101,)]) # violates le=100 + with pytest.raises(ValueError): + t2.extend([(-1,)]) # violates ge=0 + + +if __name__ == "__main__": + import pytest + + pytest.main(["-v", __file__]) diff --git a/tests/ctable/test_delete_rows.py b/tests/ctable/test_delete_rows.py new file mode 100644 index 00000000..2f8e9013 --- /dev/null +++ b/tests/ctable/test_delete_rows.py @@ -0,0 +1,203 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from dataclasses import dataclass + +import pytest + +import blosc2 +from blosc2 import CTable + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +def generate_test_data(n_rows: int) -> list: + return [(i, complex(i, -i), float((i * 7) % 100), bool(i % 2)) for i in range(1, n_rows + 1)] + + +# ------------------------------------------------------------------- +# Tests +# ------------------------------------------------------------------- + + +def test_delete_single_element(): + """First, last, middle deletion once; and repeated deletion from front/back.""" + data = generate_test_data(50) + + # Delete first + t = CTable(Row, new_data=data, expected_size=50) + t.delete(0) + assert len(t) == 49 + assert not t._valid_rows[0] + + # Delete last + t2 = CTable(Row, new_data=data, expected_size=50) + t2.delete(-1) + assert len(t2) == 49 + + # Delete middle + t3 = CTable(Row, new_data=data, expected_size=50) + t3.delete(25) + assert len(t3) == 49 + + # Delete first 10 times in a row + t4 = CTable(Row, new_data=data, expected_size=50) + for i in range(10): + t4.delete(0) + assert len(t4) == 50 - (i + 1) + assert len(t4) == 40 + + # Delete last 10 times in a row + t5 = CTable(Row, new_data=data, expected_size=50) + for i in range(10): + t5.delete(-1) + assert len(t5) == 50 - (i + 1) + assert len(t5) == 40 + + +def test_delete_list_of_positions(): + """Scattered, consecutive, even, odd, and slice-equivalent list deletions.""" + data = generate_test_data(50) + + # Scattered + t = CTable(Row, new_data=data, expected_size=50) + t.delete([0, 10, 20, 30, 40]) + assert len(t) == 45 + + # Consecutive block + t2 = CTable(Row, new_data=data, expected_size=50) + t2.delete([5, 6, 7, 8, 9]) + assert len(t2) == 45 + + # All even positions + t3 = CTable(Row, new_data=data, expected_size=50) + t3.delete(list(range(0, 50, 2))) + assert len(t3) == 25 + + # All odd positions + t4 = CTable(Row, new_data=data, expected_size=50) + t4.delete(list(range(1, 50, 2))) + assert len(t4) == 25 + + # Slice-equivalent: range(10, 20) + t5 = CTable(Row, new_data=data, expected_size=50) + t5.delete(list(range(10, 20))) + assert len(t5) == 40 + + # Slice with step: range(0, 20, 2) + t6 = CTable(Row, new_data=data, expected_size=50) + t6.delete(list(range(0, 20, 2))) + assert len(t6) == 40 + + # First 10 rows + t7 = CTable(Row, new_data=data, expected_size=50) + t7.delete(list(range(0, 10))) + assert len(t7) == 40 + + # Last 10 rows + t8 = CTable(Row, new_data=data, expected_size=50) + t8.delete(list(range(40, 50))) + assert len(t8) == 40 + + +def test_delete_out_of_bounds(): + """All IndexError scenarios: full table, partial table, empty table, negative.""" + data = generate_test_data(50) + + # Beyond length on full table + t = CTable(Row, new_data=data, expected_size=50) + with pytest.raises(IndexError): + t.delete(60) + with pytest.raises(IndexError): + t.delete(-60) + + # Beyond nrows on partial table (capacity 50, only 25 rows) + t2 = CTable(Row, new_data=generate_test_data(25), expected_size=50) + assert len(t2) == 25 + with pytest.raises(IndexError): + t2.delete(35) + + # Empty table: positions 0, 25, -1 all raise + for pos in [0, 25, -1]: + empty = CTable(Row, expected_size=50) + assert len(empty) == 0 + with pytest.raises(IndexError): + empty.delete(pos) + + +def test_delete_edge_cases(): + """Same position twice, all rows front/back, negative and mixed indices.""" + data = generate_test_data(50) + + # Same logical position twice: second delete hits what was position 11 + t = CTable(Row, new_data=data, expected_size=50) + t.delete(10) + assert len(t) == 49 + t.delete(10) + assert len(t) == 48 + + # Delete all rows from the front one by one + t2 = CTable(Row, new_data=data, expected_size=50) + for _ in range(50): + t2.delete(0) + assert len(t2) == 0 + + # Delete all rows from the back one by one + t3 = CTable(Row, new_data=data, expected_size=50) + for _ in range(50): + t3.delete(-1) + assert len(t3) == 0 + + # Negative indices list + t4 = CTable(Row, new_data=data, expected_size=50) + t4.delete([-1, -5, -10]) + assert len(t4) == 47 + + # Mixed positive and negative indices + t5 = CTable(Row, new_data=data, expected_size=50) + t5.delete([0, -1, 25]) + assert len(t5) == 47 + + +def test_delete_invalid_types(): + """string, float, and list-with-strings all raise errors.""" + data = generate_test_data(50) + + t = CTable(Row, new_data=data, expected_size=50) + with pytest.raises(TypeError): + t.delete("invalid") + with pytest.raises(TypeError): + t.delete(10.5) + with pytest.raises(IndexError): + t.delete([0, "invalid", 10]) + + +def test_delete_stress(): + """Large batch deletion and alternating multi-pass pattern.""" + data = generate_test_data(50) + + # Delete 40 out of 50 at once + t = CTable(Row, new_data=data, expected_size=50) + t.delete(list(range(0, 40))) + assert len(t) == 10 + + # Alternating two-pass deletion + t2 = CTable(Row, new_data=data, expected_size=50) + t2.delete(list(range(0, 50, 2))) # delete all even -> 25 remain + assert len(t2) == 25 + t2.delete(list(range(0, 25, 2))) # delete every other of remaining -> ~12 + assert len(t2) == 12 + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/ctable/test_extend_delete.py b/tests/ctable/test_extend_delete.py new file mode 100644 index 00000000..41e82e4a --- /dev/null +++ b/tests/ctable/test_extend_delete.py @@ -0,0 +1,220 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from dataclasses import dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +def generate_test_data(n_rows: int, start_id: int = 1) -> list: + return [(start_id + i, complex(i, -i), float((i * 7) % 100), bool(i % 2)) for i in range(n_rows)] + + +def get_valid_mask(table: CTable) -> np.ndarray: + return np.array(table._valid_rows[: len(table._valid_rows)], dtype=bool) + + +def assert_mask_matches(table: CTable, expected_mask: list): + actual = get_valid_mask(table)[: len(expected_mask)] + np.testing.assert_array_equal( + actual, + np.array(expected_mask, dtype=bool), + err_msg=f"Mask mismatch.\nExpected: {expected_mask}\nGot: {actual}", + ) + + +def assert_data_at_positions(table: CTable, positions: list, expected_ids: list): + for pos, expected_id in zip(positions, expected_ids, strict=False): + actual_id = int(table._cols["id"][pos]) + assert actual_id == expected_id, f"Position {pos}: expected ID {expected_id}, got {actual_id}" + + +# ------------------------------------------------------------------- +# Tests +# ------------------------------------------------------------------- + + +def test_gap_fill_mask_and_positions(): + """extend and append fill from last valid position; mask is updated correctly.""" + # extend after deletions: mask and physical positions + t = CTable(Row, new_data=generate_test_data(7, 1), expected_size=10) + t.delete([0, 2, 4, 6]) + assert_mask_matches(t, [False, True, False, True, False, True, False]) + assert len(t) == 3 + t.extend(generate_test_data(3, 8)) + assert_mask_matches(t, [False, True, False, True, False, True, True, True, True]) + assert len(t) == 6 + assert_data_at_positions(t, [6, 7, 8], [8, 9, 10]) + + # append fills from last valid position, not into holes + t2 = CTable(Row, new_data=generate_test_data(5, 1), expected_size=10) + t2.delete([1, 3]) + assert_mask_matches(t2, [True, False, True, False, True]) + t2.append((6, 1j, 50.0, True)) + assert_mask_matches(t2, [True, False, True, False, True, True]) + t2.append((7, 2j, 60.0, False)) + assert_mask_matches(t2, [True, False, True, False, True, True, True]) + + # extend fills from last valid position when there's enough capacity + t3 = CTable(Row, new_data=generate_test_data(10, 1), expected_size=15) + t3.delete([2, 4, 6]) + t3.extend(generate_test_data(3, 20)) + assert_data_at_positions(t3, [10, 11, 12], [20, 21, 22]) + + +def test_resize_behavior(): + """Resize triggered when capacity is full; compact=True avoids massive resize.""" + # compact=False: append beyond capacity must resize + t = CTable(Row, new_data=generate_test_data(10, 1), expected_size=10, compact=False) + t.delete(list(range(9))) + assert len(t) == 1 + initial_cap = len(t._valid_rows) + t.append((11, 5j, 75.0, True)) + assert len(t._valid_rows) > initial_cap + + # compact=True: no massive resize after deletions + extend + t2 = CTable(Row, new_data=generate_test_data(10, 1), expected_size=10, compact=True) + t2.delete(list(range(9))) + assert len(t2) == 1 + initial_cap2 = len(t2._valid_rows) + t2.extend(generate_test_data(3, 11)) + assert len(t2._valid_rows) <= initial_cap2 * 2 + + # extend exceeding capacity always resizes regardless of compact + t3 = CTable(Row, new_data=generate_test_data(5, 1), expected_size=10, compact=False) + t3.delete([0, 2, 4]) + initial_cap3 = len(t3._valid_rows) + t3.extend(generate_test_data(20, 100)) + assert len(t3._valid_rows) > initial_cap3 + + +def test_mixed_append_extend_with_gaps(): + """Multiple extends, appends, and deletes interleaved; lengths stay correct.""" + # Multiple extends with intermediate deletions + t = CTable(Row, expected_size=20) + t.extend(generate_test_data(5, 1)) + t.extend(generate_test_data(3, 10)) + assert len(t) == 8 + t.delete([2, 4, 6]) + assert len(t) == 5 + t.extend(generate_test_data(2, 20)) + assert len(t) == 7 + t.delete([0, 1]) + assert len(t) == 5 + t.extend(generate_test_data(4, 30)) + assert len(t) == 9 + + # append + extend mixed, delete all then re-extend + t2 = CTable(Row, expected_size=20) + for i in range(5): + t2.append((i + 1, complex(i), float(i * 10), True)) + assert len(t2) == 5 + t2.extend(generate_test_data(5, 10)) + assert len(t2) == 10 + t2.delete([1, 3, 5, 7, 9]) + assert len(t2) == 5 + t2.append((100, 0j, 50.0, False)) + assert len(t2) == 6 + t2.extend(generate_test_data(3, 200)) + assert len(t2) == 9 + + # Fill all gaps then extend; delete all then extend from scratch + t3 = CTable(Row, new_data=generate_test_data(10, 1), expected_size=15) + t3.delete(list(range(0, 10, 2))) + assert len(t3) == 5 + t3.extend(generate_test_data(5, 20)) + assert len(t3) == 10 + + t4 = CTable(Row, new_data=generate_test_data(10, 1), expected_size=15) + t4.delete(list(range(10))) + assert len(t4) == 0 + t4.extend(generate_test_data(5, 100)) + assert len(t4) == 5 + + +def test_compact_behavior(): + """Manual compact consolidates mask; auto-compact keeps data correct after extend.""" + # Manual compact: valid rows packed to front, extend fills after them + t = CTable(Row, new_data=generate_test_data(10, 1), expected_size=15, compact=False) + t.delete([1, 3, 5, 7, 9]) + assert len(t) == 5 + t.compact() + assert_mask_matches(t, [True] * 5 + [False] * 10) + t.extend(generate_test_data(3, 20)) + assert len(t) == 8 + + # Auto-compact: table stays consistent after heavy deletions + extend + t2 = CTable(Row, new_data=generate_test_data(10, 1), expected_size=15, compact=True) + t2.delete(list(range(0, 8))) + assert len(t2) == 2 + t2.extend(generate_test_data(10, 100)) + assert len(t2) == 12 + + +def test_complex_scenarios(): + """Sparse gaps, alternating cycles, data integrity, and full workflow.""" + # Sparse table: many scattered deletions then bulk extend + t = CTable(Row, new_data=generate_test_data(20, 1), expected_size=30) + t.delete([0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18]) + assert len(t) == 5 + t.extend(generate_test_data(10, 100)) + assert len(t) == 15 + + # Alternating extend/delete cycles + t2 = CTable(Row, expected_size=50) + for cycle in range(5): + t2.extend(generate_test_data(10, cycle * 100)) + current_len = len(t2) + if current_len >= 5: + t2.delete(list(range(0, min(5, current_len)))) + + # Data integrity: correct row values survive delete + extend + t3 = CTable( + Row, new_data=[(1, 1j, 10.0, True), (2, 2j, 20.0, False), (3, 3j, 30.0, True)], expected_size=10 + ) + t3.delete(1) + assert t3.row[0].id[0] == 1 + assert t3.row[1].id[0] == 3 + t3.extend([(10, 10j, 100.0, True), (11, 11j, 100.0, False)]) + assert t3.row[0].id[0] == 1 + assert t3.row[1].id[0] == 3 + assert t3.row[2].id[0] == 10 + assert t3.row[3].id[0] == 11 + + # Full workflow + t4 = CTable(Row, expected_size=20, compact=False) + t4.extend(generate_test_data(10, 1)) + assert len(t4) == 10 + t4.delete([0, 2, 4, 6, 8]) + assert len(t4) == 5 + t4.append((100, 0j, 50.0, True)) + t4.append((101, 1j, 60.0, False)) + assert len(t4) == 7 + t4.extend(generate_test_data(5, 200)) + assert len(t4) == 12 + t4.delete([3, 7, 10]) + assert len(t4) == 9 + t4.extend(generate_test_data(3, 300)) + assert len(t4) == 12 + assert t4.nrows == 12 + assert t4.ncols == 4 + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/ctable/test_row_logic.py b/tests/ctable/test_row_logic.py new file mode 100644 index 00000000..bece75c0 --- /dev/null +++ b/tests/ctable/test_row_logic.py @@ -0,0 +1,217 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from dataclasses import dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable +from blosc2.ctable import Column + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0)) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +def generate_test_data(n_rows: int, start_id: int = 0) -> list: + return [(start_id + i, float(i * 10), i % 2 == 0) for i in range(n_rows)] + + +# ------------------------------------------------------------------- +# Tests +# ------------------------------------------------------------------- + + +def test_row_int_indexing(): + """int indexing: no holes, with holes, negative indices, and out-of-range.""" + data = generate_test_data(20) + + # No holes: spot checks + t = CTable(Row, new_data=data) + r = t.row[0] + assert isinstance(r, CTable) + assert len(r) == 1 + assert r.id[0] == 0 + assert r.score[0] == 0.0 + assert r.active[0] + assert t.row[10].id[0] == 10 + assert t.row[10].score[0] == 100.0 + + # Negative indices + assert t.row[-1].id[0] == 19 + assert t.row[-5].id[0] == 15 + + # With holes: delete odd positions -> valid: 0,2,4,6,8,10... + t.delete([1, 3, 5, 7, 9]) + assert t.row[0].id[0] == 0 + assert t.row[1].id[0] == 2 + assert t.row[5].id[0] == 10 + + # Out of range + t2 = CTable(Row, new_data=generate_test_data(10)) + for idx in [10, 100, -11]: + with pytest.raises(IndexError): + _ = t2.row[idx] + + +def test_row_slice_indexing(): + """Slice indexing: no holes, with holes, step, negative, beyond bounds, empty/full.""" + data = generate_test_data(20) + + # No holes + t = CTable(Row, new_data=data) + assert isinstance(t.row[0:5], CTable) + assert list(t.row[0:5].id) == [0, 1, 2, 3, 4] + assert list(t.row[10:15].id) == [10, 11, 12, 13, 14] + assert list(t.row[::2].id) == [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] + + # With step + assert list(t.row[0:10:2].id) == [0, 2, 4, 6, 8] + assert list(t.row[1:10:3].id) == [1, 4, 7] + + # Negative indices + assert list(t.row[-5:].id) == [15, 16, 17, 18, 19] + assert list(t.row[-10:-5].id) == [10, 11, 12, 13, 14] + + # With holes: delete odd positions + t.delete([1, 3, 5, 7, 9]) + assert list(t.row[0:5].id) == [0, 2, 4, 6, 8] + assert list(t.row[5:10].id) == [10, 11, 12, 13, 14] + + # Beyond bounds + t2 = CTable(Row, new_data=generate_test_data(10)) + assert len(t2.row[11:20]) == 0 + assert list(t2.row[5:100].id) == [5, 6, 7, 8, 9] + assert len(t2.row[100:]) == 0 + + # Empty and full slices + assert len(t2.row[5:5]) == 0 + assert len(t2.row[0:0]) == 0 + result = t2.row[:] + assert len(result) == 10 + assert list(result.id) == list(range(10)) + + +def test_row_list_indexing(): + """List indexing: no holes, with holes, out-of-range, edge cases.""" + data = generate_test_data(20) + + # No holes + t = CTable(Row, new_data=data) + r = t.row[[0, 5, 10, 15]] + assert isinstance(r, CTable) + assert len(r) == 4 + assert set(r.id) == {0, 5, 10, 15} + assert set(t.row[[19, 0, 10]].id) == {0, 10, 19} + + # With holes: delete [1,3,5,7,9] -> logical 0->id0, 1->id2, 2->id4... + t.delete([1, 3, 5, 7, 9]) + assert set(t.row[[0, 2, 4]].id) == {0, 4, 8} + assert set(t.row[[5, 3, 1]].id) == {2, 6, 10} + + # Negative indices in list + t2 = CTable(Row, new_data=generate_test_data(10)) + assert set(t2.row[[0, -1, 5]].id) == {0, 5, 9} + + # Single element + assert t2.row[[5]].id[0] == 5 + + # Duplicate indices -> deduplicated + r_dup = t2.row[[5, 5, 5]] + assert len(r_dup) == 1 + assert r_dup.id[0] == 5 + + # Empty list + assert len(t2.row[[]]) == 0 + + # Out of range + for bad in [[0, 5, 100], [0, 1, -11]]: + with pytest.raises(IndexError): + _ = t2.row[bad] + + +def test_row_view_properties(): + """View metadata, base chain, mask integrity, column liveness, and chained views.""" + data = generate_test_data(100) + tabla0 = CTable(Row, new_data=data) + + # Base is None on root table + assert tabla0.base is None + + # View properties are shared with parent + v = tabla0.row[0:10] + assert v.base is tabla0 + assert v._row_type == tabla0._row_type + assert v._cols is tabla0._cols + assert v._col_widths == tabla0._col_widths + assert v.col_names == tabla0.col_names + + # Read ops on view + view = tabla0.row[5:15] + assert view.id[0] == 5 + assert view.score[0] == 50.0 + assert not view.active[0] + assert list(view.id) == list(range(5, 15)) + + # Mask integrity + assert np.count_nonzero(view._valid_rows[:]) == 10 + + # Column is live (points back to its view) + col = view.id + assert isinstance(col, Column) + assert col._table is view + + # Chained views: base always points to immediate parent + tabla1 = tabla0.row[:50] + assert tabla1.base is tabla0 + assert len(tabla1) == 50 + + tabla2 = tabla1.row[:10] + assert tabla2.base is tabla1 + assert len(tabla2) == 10 + assert list(tabla2.id) == list(range(10)) + + tabla3 = tabla2.row[5:] + assert tabla3.base is tabla2 + assert len(tabla3) == 5 + assert list(tabla3.id) == [5, 6, 7, 8, 9] + + # Chained view with holes on parent + tabla0.delete([5, 10, 15, 20, 25]) + tv1 = tabla0.row[:30] + assert tv1.base is tabla0 + assert len(tv1) == 30 + tv2 = tv1.row[10:20] + assert tv2.base is tv1 + assert len(tv2) == 10 + + +def test_row_edge_cases(): + """Empty table, fully-deleted table: int raises IndexError, slice returns empty.""" + # Empty table + empty = CTable(Row) + with pytest.raises(IndexError): + _ = empty.row[0] + assert len(empty.row[:]) == 0 + assert len(empty.row[0:10]) == 0 + + # All rows deleted + data = generate_test_data(10) + t = CTable(Row, new_data=data) + t.delete(list(range(10))) + with pytest.raises(IndexError): + _ = t.row[0] + assert len(t.row[:]) == 0 + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/ctable/test_schema_compiler.py b/tests/ctable/test_schema_compiler.py new file mode 100644 index 00000000..812a8c63 --- /dev/null +++ b/tests/ctable/test_schema_compiler.py @@ -0,0 +1,258 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Tests for compile_schema(), schema_to_dict(), and schema_from_dict().""" + +from dataclasses import MISSING, dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2.schema import bool as b2_bool +from blosc2.schema import complex128, float64, int64, string +from blosc2.schema_compiler import ( + CompiledSchema, + compile_schema, + schema_from_dict, + schema_to_dict, +) + +# ------------------------------------------------------------------- +# Fixtures +# ------------------------------------------------------------------- + + +@dataclass +class Simple: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +@dataclass +class WithString: + name: str = blosc2.field(blosc2.string(max_length=16)) + value: float = blosc2.field(blosc2.float64(), default=0.0) + + +@dataclass +class WithComplex: + id: int = blosc2.field(blosc2.int64()) + c_val: complex = blosc2.field(blosc2.complex128(), default=0j) + + +# ------------------------------------------------------------------- +# compile_schema — explicit b2.field() +# ------------------------------------------------------------------- + + +def test_compile_returns_compiled_schema(): + s = compile_schema(Simple) + assert isinstance(s, CompiledSchema) + assert s.row_cls is Simple + + +def test_compile_column_count(): + s = compile_schema(Simple) + assert len(s.columns) == 3 + + +def test_compile_column_names_order(): + s = compile_schema(Simple) + assert [c.name for c in s.columns] == ["id", "score", "active"] + + +def test_compile_column_dtypes(): + s = compile_schema(Simple) + assert s.columns_by_name["id"].dtype == np.dtype(np.int64) + assert s.columns_by_name["score"].dtype == np.dtype(np.float64) + assert s.columns_by_name["active"].dtype == np.dtype(np.bool_) + + +def test_compile_column_specs(): + s = compile_schema(Simple) + assert isinstance(s.columns_by_name["id"].spec, int64) + assert s.columns_by_name["id"].spec.ge == 0 + assert isinstance(s.columns_by_name["score"].spec, float64) + assert s.columns_by_name["score"].spec.le == 100 + + +def test_compile_defaults(): + s = compile_schema(Simple) + assert s.columns_by_name["id"].default is MISSING # required + assert s.columns_by_name["score"].default == 0.0 + assert s.columns_by_name["active"].default is True + + +def test_compile_py_types(): + s = compile_schema(Simple) + assert s.columns_by_name["id"].py_type is int + assert s.columns_by_name["score"].py_type is float + assert s.columns_by_name["active"].py_type is bool + + +def test_compile_string_column(): + s = compile_schema(WithString) + col = s.columns_by_name["name"] + assert isinstance(col.spec, string) + assert col.spec.max_length == 16 + assert col.dtype == np.dtype("U16") + + +def test_compile_complex_column(): + s = compile_schema(WithComplex) + col = s.columns_by_name["c_val"] + assert isinstance(col.spec, complex128) + assert col.dtype == np.dtype(np.complex128) + assert col.default == 0j + + +# ------------------------------------------------------------------- +# compile_schema — inferred shorthand (plain annotations) +# ------------------------------------------------------------------- + + +@dataclass +class Inferred: + count: int + ratio: float + flag: bool + + +def test_inferred_shorthand(): + s = compile_schema(Inferred) + assert len(s.columns) == 3 + assert isinstance(s.columns_by_name["count"].spec, int64) + assert isinstance(s.columns_by_name["ratio"].spec, float64) + assert isinstance(s.columns_by_name["flag"].spec, b2_bool) + + +def test_inferred_no_constraints(): + s = compile_schema(Inferred) + for col in s.columns: + assert col.spec.to_pydantic_kwargs() == {} + + +# ------------------------------------------------------------------- +# compile_schema — annotation / spec mismatch rejection +# ------------------------------------------------------------------- + + +def test_annotation_spec_mismatch(): + @dataclass + class Bad: + x: str = blosc2.field(blosc2.int64()) # str annotation but int64 spec + + with pytest.raises(TypeError, match="incompatible"): + compile_schema(Bad) + + +def test_non_dataclass_rejected(): + class NotADataclass: + pass + + with pytest.raises(TypeError, match="dataclass"): + compile_schema(NotADataclass) + + +# ------------------------------------------------------------------- +# compile_schema — per-column cparams config +# ------------------------------------------------------------------- + + +def test_column_cparams_stored(): + @dataclass + class WithCparams: + id: int = blosc2.field(blosc2.int64(), cparams={"clevel": 9}) + score: float = blosc2.field(blosc2.float64(), default=0.0) + + s = compile_schema(WithCparams) + assert s.columns_by_name["id"].config.cparams == {"clevel": 9} + assert s.columns_by_name["score"].config.cparams is None + + +# ------------------------------------------------------------------- +# schema_to_dict / schema_from_dict (Step 12) +# ------------------------------------------------------------------- + + +def test_schema_to_dict_structure(): + d = schema_to_dict(compile_schema(Simple)) + assert d["version"] == 1 + assert d["row_cls"] == "Simple" + assert len(d["columns"]) == 3 + + +def test_schema_to_dict_column_fields(): + d = schema_to_dict(compile_schema(Simple)) + id_col = next(c for c in d["columns"] if c["name"] == "id") + assert id_col["kind"] == "int64" + assert id_col["ge"] == 0 + assert id_col["default"] is None # MISSING → None + + +def test_schema_to_dict_default_values(): + d = schema_to_dict(compile_schema(Simple)) + score_col = next(c for c in d["columns"] if c["name"] == "score") + assert score_col["default"] == 0.0 + + active_col = next(c for c in d["columns"] if c["name"] == "active") + assert active_col["default"] is True + + +def test_schema_to_dict_complex_default(): + d = schema_to_dict(compile_schema(WithComplex)) + c_col = next(c for c in d["columns"] if c["name"] == "c_val") + assert c_col["default"]["__complex__"] is True + assert c_col["default"]["real"] == 0.0 + assert c_col["default"]["imag"] == 0.0 + + +def test_schema_roundtrip(): + """schema_from_dict(schema_to_dict(s)) reproduces the same column structure.""" + original = compile_schema(Simple) + d = schema_to_dict(original) + restored = schema_from_dict(d) + + assert len(restored.columns) == len(original.columns) + for orig_col, rest_col in zip(original.columns, restored.columns, strict=True): + assert orig_col.name == rest_col.name + assert orig_col.dtype == rest_col.dtype + assert type(orig_col.spec) is type(rest_col.spec) + if orig_col.default is MISSING: + assert rest_col.default is MISSING + else: + assert orig_col.default == rest_col.default + + +def test_schema_from_dict_no_row_cls(): + """Reconstructed schema has row_cls=None (original class not available).""" + d = schema_to_dict(compile_schema(Simple)) + restored = schema_from_dict(d) + assert restored.row_cls is None + + +def test_schema_from_dict_preserves_constraints(): + d = schema_to_dict(compile_schema(Simple)) + restored = schema_from_dict(d) + id_col = restored.columns_by_name["id"] + assert id_col.spec.ge == 0 + score_col = restored.columns_by_name["score"] + assert score_col.spec.le == 100 + + +def test_schema_from_dict_unknown_kind(): + d = {"version": 1, "row_cls": "X", "columns": [{"name": "x", "kind": "unknown", "default": None}]} + with pytest.raises(ValueError, match="Unknown column kind"): + schema_from_dict(d) + + +def test_schema_from_dict_unsupported_version(): + d = {"version": 99, "row_cls": "X", "columns": []} + with pytest.raises(ValueError, match="Unsupported schema version"): + schema_from_dict(d) diff --git a/tests/ctable/test_schema_mutations.py b/tests/ctable/test_schema_mutations.py new file mode 100644 index 00000000..ca119678 --- /dev/null +++ b/tests/ctable/test_schema_mutations.py @@ -0,0 +1,396 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Tests for add_column, drop_column, rename_column, Column.assign, +and the corrected view mutability model.""" + +import os +import pathlib +import shutil +from dataclasses import dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable + +TABLE_ROOT = str(pathlib.Path(__file__).parent / "saved_ctable" / "test_schema_mutations") + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +DATA10 = [(i, float(i * 10), True) for i in range(10)] + + +@pytest.fixture(autouse=True) +def clean_dir(): + if os.path.exists(TABLE_ROOT): + shutil.rmtree(TABLE_ROOT) + os.makedirs(TABLE_ROOT, exist_ok=True) + yield + if os.path.exists(TABLE_ROOT): + shutil.rmtree(TABLE_ROOT) + + +def table_path(name): + return os.path.join(TABLE_ROOT, name) + + +# =========================================================================== +# View mutability — value writes allowed, structural changes blocked +# =========================================================================== + + +def test_view_allows_column_setitem(): + """Writing values through a view modifies the parent table.""" + t = CTable(Row, new_data=DATA10) + view = t.where(t["id"] > 4) # rows 5-9 + # double scores of those rows using __setitem__ + indices = list(range(len(view))) + new_scores = view["score"].to_numpy() * 2 + view["score"][indices] = new_scores + # check parent was modified + assert t["score"][5] == pytest.approx(100.0) # was 50.0 + + +def test_view_allows_assign(): + """assign() through a view modifies the parent table.""" + t = CTable(Row, new_data=DATA10) + view = t.where(t["id"] > 4) + view["score"].assign(np.zeros(len(view))) + assert t["score"][5] == pytest.approx(0.0) + assert t["score"][4] == pytest.approx(40.0) # untouched + + +def test_view_blocks_append(): + t = CTable(Row, new_data=DATA10) + view = t.where(t["id"] > 4) + with pytest.raises(TypeError): + view.append((99, 10.0, True)) + + +def test_view_blocks_delete(): + t = CTable(Row, new_data=DATA10) + view = t.where(t["id"] > 4) + with pytest.raises(ValueError, match="view"): + view.delete(0) + + +def test_view_blocks_compact(): + t = CTable(Row, new_data=DATA10) + view = t.where(t["id"] > 4) + with pytest.raises(ValueError, match="view"): + view.compact() + + +def test_readonly_disk_table_blocks_assign(): + path = table_path("ro") + t = CTable(Row, urlpath=path, mode="w", new_data=DATA10) + del t + t_ro = CTable.open(path, mode="r") + with pytest.raises(ValueError, match="read-only"): + t_ro["score"].assign(np.ones(len(t_ro))) + + +def test_readonly_disk_table_blocks_setitem(): + path = table_path("ro_setitem") + t = CTable(Row, urlpath=path, mode="w", new_data=DATA10) + del t + t_ro = CTable.open(path, mode="r") + with pytest.raises(ValueError, match="read-only"): + t_ro["score"][0] = 99.0 + + +# =========================================================================== +# Column.assign +# =========================================================================== + + +def test_assign_replaces_all_values(): + t = CTable(Row, new_data=DATA10) + t["score"].assign([99.0] * 10) + assert list(t["score"].to_numpy()) == [99.0] * 10 + + +def test_assign_coerces_python_ints_to_float(): + t = CTable(Row, new_data=DATA10) + t["score"].assign(list(range(10))) # Python ints → float64 + np.testing.assert_array_equal(t["score"].to_numpy(), np.arange(10, dtype=np.float64)) + + +def test_assign_wrong_length_raises(): + t = CTable(Row, new_data=DATA10) + with pytest.raises(ValueError, match="10"): + t["score"].assign([1.0, 2.0]) + + +def test_assign_through_view_touches_only_matching_rows(): + t = CTable(Row, new_data=DATA10) + view = t.where(t["id"] < 5) # rows 0-4 + view["score"].assign([0.0] * 5) + # rows 0-4 → 0, rows 5-9 unchanged + scores = t["score"].to_numpy() + np.testing.assert_array_equal(scores[:5], np.zeros(5)) + np.testing.assert_array_equal(scores[5:], np.arange(5, 10, dtype=np.float64) * 10) + + +def test_assign_respects_deleted_rows(): + t = CTable(Row, new_data=DATA10) + t.delete([0]) # delete id=0; 9 live rows remain + t["score"].assign([1.0] * 9) + assert len(t["score"].to_numpy()) == 9 + assert all(v == 1.0 for v in t["score"].to_numpy()) + + +# =========================================================================== +# add_column +# =========================================================================== + + +def test_add_column_appears_in_col_names(): + t = CTable(Row, new_data=DATA10) + t.add_column("weight", blosc2.float64(), 0.0) + assert "weight" in t.col_names + + +def test_add_column_fills_default_for_existing_rows(): + t = CTable(Row, new_data=DATA10) + t.add_column("weight", blosc2.float64(), 5.5) + np.testing.assert_array_equal(t["weight"].to_numpy(), np.full(10, 5.5)) + + +def test_add_column_new_rows_can_use_it(): + t = CTable(Row, new_data=DATA10) + t.add_column("weight", blosc2.float64(), 0.0) + # After adding, extend doesn't know about weight — add manually + t["weight"].assign(np.ones(10) * 2.0) + assert t["weight"].mean() == pytest.approx(2.0) + + +def test_add_column_schema_updated(): + t = CTable(Row, new_data=DATA10) + t.add_column("weight", blosc2.float64(), 0.0) + assert "weight" in t.schema.columns_by_name + + +def test_add_column_persists_on_disk(): + path = table_path("add_col") + t = CTable(Row, urlpath=path, mode="w", new_data=DATA10) + t.add_column("weight", blosc2.float64(), 7.0) + del t + t2 = CTable.open(path) + assert "weight" in t2.col_names + np.testing.assert_array_equal(t2["weight"].to_numpy(), np.full(10, 7.0)) + + +def test_add_column_view_raises(): + t = CTable(Row, new_data=DATA10) + view = t.where(t["id"] > 4) + with pytest.raises(ValueError, match="view"): + view.add_column("weight", blosc2.float64(), 0.0) + + +def test_add_column_duplicate_raises(): + t = CTable(Row, new_data=DATA10) + with pytest.raises(ValueError, match="already exists"): + t.add_column("score", blosc2.float64(), 0.0) + + +def test_add_column_bad_default_raises(): + t = CTable(Row, new_data=DATA10) + with pytest.raises(TypeError): + t.add_column("flag", blosc2.int8(), "not_a_number") + + +def test_add_column_skips_deleted_rows(): + t = CTable(Row, new_data=DATA10) + t.delete([0, 1]) # 8 live rows + t.add_column("weight", blosc2.float64(), 3.0) + vals = t["weight"].to_numpy() + assert len(vals) == 8 + assert all(v == 3.0 for v in vals) + + +# =========================================================================== +# drop_column +# =========================================================================== + + +def test_drop_column_removes_from_col_names(): + t = CTable(Row, new_data=DATA10) + t.drop_column("active") + assert "active" not in t.col_names + + +def test_drop_column_schema_updated(): + t = CTable(Row, new_data=DATA10) + t.drop_column("active") + assert "active" not in t.schema.columns_by_name + + +def test_drop_column_last_raises(): + @dataclass + class OneCol: + id: int = blosc2.field(blosc2.int64()) + + t = CTable(OneCol, new_data=[(i,) for i in range(5)]) + with pytest.raises(ValueError, match="last"): + t.drop_column("id") + + +def test_drop_column_missing_raises(): + t = CTable(Row, new_data=DATA10) + with pytest.raises(KeyError): + t.drop_column("nonexistent") + + +def test_drop_column_view_raises(): + t = CTable(Row, new_data=DATA10) + view = t.where(t["id"] > 4) + with pytest.raises(ValueError, match="view"): + view.drop_column("active") + + +def test_drop_column_deletes_file_on_disk(): + path = table_path("drop_col") + t = CTable(Row, urlpath=path, mode="w", new_data=DATA10) + t.drop_column("active") + assert not os.path.exists(os.path.join(path, "_cols", "active.b2nd")) + + +def test_drop_column_persists_schema_on_disk(): + path = table_path("drop_schema") + t = CTable(Row, urlpath=path, mode="w", new_data=DATA10) + t.drop_column("active") + del t + t2 = CTable.open(path) + assert "active" not in t2.col_names + assert t2.ncols == 2 + + +# =========================================================================== +# rename_column +# =========================================================================== + + +def test_rename_column_updates_col_names(): + t = CTable(Row, new_data=DATA10) + t.rename_column("score", "points") + assert "points" in t.col_names + assert "score" not in t.col_names + + +def test_rename_column_data_intact(): + t = CTable(Row, new_data=DATA10) + original = t["score"].to_numpy().copy() + t.rename_column("score", "points") + np.testing.assert_array_equal(t["points"].to_numpy(), original) + + +def test_rename_column_schema_updated(): + t = CTable(Row, new_data=DATA10) + t.rename_column("score", "points") + assert "points" in t.schema.columns_by_name + assert "score" not in t.schema.columns_by_name + + +def test_rename_column_order_preserved(): + t = CTable(Row, new_data=DATA10) + t.rename_column("score", "points") + assert t.col_names == ["id", "points", "active"] + + +def test_rename_column_missing_raises(): + t = CTable(Row, new_data=DATA10) + with pytest.raises(KeyError): + t.rename_column("nonexistent", "foo") + + +def test_rename_column_conflict_raises(): + t = CTable(Row, new_data=DATA10) + with pytest.raises(ValueError, match="already exists"): + t.rename_column("score", "active") + + +def test_rename_column_view_raises(): + t = CTable(Row, new_data=DATA10) + view = t.where(t["id"] > 4) + with pytest.raises(ValueError, match="view"): + view.rename_column("score", "points") + + +def test_rename_column_persists_on_disk(): + path = table_path("rename_col") + t = CTable(Row, urlpath=path, mode="w", new_data=DATA10) + t.rename_column("score", "points") + del t + t2 = CTable.open(path) + assert "points" in t2.col_names + assert "score" not in t2.col_names + assert os.path.exists(os.path.join(path, "_cols", "points.b2nd")) + assert not os.path.exists(os.path.join(path, "_cols", "score.b2nd")) + + +# =========================================================================== +# Boolean mask indexing (pandas-style) +# =========================================================================== + + +def test_bool_mask_getitem(): + t = CTable(Row, new_data=DATA10) + mask = t["id"].to_numpy() % 2 == 0 # even ids + result = t["score"][mask] + np.testing.assert_array_equal(result, np.array([0.0, 20.0, 40.0, 60.0, 80.0])) + + +def test_bool_mask_setitem(): + t = CTable(Row, new_data=DATA10) + mask = t["id"].to_numpy() % 2 == 0 + t["score"][mask] = 0.0 + scores = t["score"].to_numpy() + np.testing.assert_array_equal(scores[0::2], np.zeros(5)) # evens zeroed + np.testing.assert_array_equal(scores[1::2], np.array([10.0, 30.0, 50.0, 70.0, 90.0])) + + +def test_bool_mask_inplace_multiply(): + """The pandas idiom: col[mask] *= scalar.""" + t = CTable(Row, new_data=DATA10) + mask = t["id"].to_numpy() % 2 == 0 + t["score"][mask] *= 2 + scores = t["score"].to_numpy() + np.testing.assert_array_equal(scores[0::2], np.array([0.0, 40.0, 80.0, 120.0, 160.0])) + np.testing.assert_array_equal(scores[1::2], np.array([10.0, 30.0, 50.0, 70.0, 90.0])) + + +def test_bool_mask_wrong_length_raises(): + t = CTable(Row, new_data=DATA10) + bad_mask = np.array([True, False, True], dtype=np.bool_) + with pytest.raises(IndexError, match="length"): + _ = t["score"][bad_mask] + + +def test_bool_mask_through_view(): + """Boolean mask indexing works on views too.""" + t = CTable(Row, new_data=DATA10) + view = t.where(t["id"] < 6) # rows 0-5 + mask = view["id"].to_numpy() % 2 == 0 + view["score"][mask] *= 10 + # rows 0,2,4 in view → ids 0,2,4 in parent → scores 0,20,40 * 10 + assert t["score"][0] == pytest.approx(0.0) + assert t["score"][2] == pytest.approx(200.0) + assert t["score"][4] == pytest.approx(400.0) + assert t["score"][1] == pytest.approx(10.0) # untouched + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/ctable/test_schema_specs.py b/tests/ctable/test_schema_specs.py new file mode 100644 index 00000000..645087d7 --- /dev/null +++ b/tests/ctable/test_schema_specs.py @@ -0,0 +1,343 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Tests for schema spec objects (blosc2.schema).""" + +import numpy as np +import pytest + +import blosc2 +from blosc2.schema import ( + SchemaSpec, + complex64, + complex128, + float32, + float64, + int8, + int16, + int32, + int64, + string, + uint8, + uint16, + uint32, + uint64, +) +from blosc2.schema import ( + bool as b2_bool, +) +from blosc2.schema import ( + bytes as b2_bytes, +) + +# ------------------------------------------------------------------- +# dtype mapping +# ------------------------------------------------------------------- + + +def test_int_dtypes(): + assert int8().dtype == np.dtype(np.int8) + assert int16().dtype == np.dtype(np.int16) + assert int32().dtype == np.dtype(np.int32) + assert int64().dtype == np.dtype(np.int64) + assert int64(ge=0).dtype == np.dtype(np.int64) + + +def test_uint_dtypes(): + assert uint8().dtype == np.dtype(np.uint8) + assert uint16().dtype == np.dtype(np.uint16) + assert uint32().dtype == np.dtype(np.uint32) + assert uint64().dtype == np.dtype(np.uint64) + + +def test_float_dtypes(): + assert float32().dtype == np.dtype(np.float32) + assert float64().dtype == np.dtype(np.float64) + + +def test_bool_dtype(): + assert b2_bool().dtype == np.dtype(np.bool_) + + +def test_complex_dtypes(): + assert complex64().dtype == np.dtype(np.complex64) + assert complex128().dtype == np.dtype(np.complex128) + + +def test_string_dtype(): + assert string(max_length=16).dtype == np.dtype("U16") + assert string(max_length=32).dtype == np.dtype("U32") + assert string().dtype == np.dtype("U32") # default max_length=32 + + +def test_bytes_dtype(): + assert b2_bytes(max_length=8).dtype == np.dtype("S8") + assert b2_bytes().dtype == np.dtype("S32") # default max_length=32 + + +# ------------------------------------------------------------------- +# python_type mapping +# ------------------------------------------------------------------- + + +def test_python_types(): + for cls in [int8, int16, int32, int64, uint8, uint16, uint32, uint64]: + assert cls().python_type is int + for cls in [float32, float64]: + assert cls().python_type is float + for cls in [complex64, complex128]: + assert cls().python_type is complex + assert b2_bool().python_type is bool + assert string().python_type is str + assert b2_bytes().python_type is bytes + + +# ------------------------------------------------------------------- +# constraint storage +# ------------------------------------------------------------------- + + +def test_int64_constraints(): + s = int64(ge=0, lt=100) + assert s.ge == 0 + assert s.gt is None + assert s.le is None + assert s.lt == 100 + + +def test_float64_constraints(): + s = float64(gt=0.0, le=1.0) + assert s.gt == 0.0 + assert s.le == 1.0 + assert s.ge is None + assert s.lt is None + + +def test_string_constraints(): + s = string(min_length=2, max_length=10, pattern=r"^\w+$") + assert s.min_length == 2 + assert s.max_length == 10 + assert s.pattern == r"^\w+$" + + +def test_bytes_constraints(): + s = b2_bytes(min_length=1, max_length=8) + assert s.min_length == 1 + assert s.max_length == 8 + + +# ------------------------------------------------------------------- +# to_pydantic_kwargs +# ------------------------------------------------------------------- + + +def test_int64_pydantic_kwargs_partial(): + """Only non-None constraints appear in pydantic kwargs.""" + assert int64(ge=0).to_pydantic_kwargs() == {"ge": 0} + assert int64(ge=0, le=100).to_pydantic_kwargs() == {"ge": 0, "le": 100} + assert int64().to_pydantic_kwargs() == {} + + +def test_float64_pydantic_kwargs(): + assert float64(gt=0.0, lt=1.0).to_pydantic_kwargs() == {"gt": 0.0, "lt": 1.0} + + +def test_bool_pydantic_kwargs(): + assert b2_bool().to_pydantic_kwargs() == {} + + +def test_string_pydantic_kwargs(): + s = string(min_length=1, max_length=5) + kw = s.to_pydantic_kwargs() + assert kw["min_length"] == 1 + assert kw["max_length"] == 5 + + +# ------------------------------------------------------------------- +# to_metadata_dict +# ------------------------------------------------------------------- + + +def test_int64_metadata_dict(): + d = int64(ge=0, le=100).to_metadata_dict() + assert d["kind"] == "int64" + assert d["ge"] == 0 + assert d["le"] == 100 + assert "gt" not in d + assert "lt" not in d + + +def test_float64_metadata_dict(): + d = float64().to_metadata_dict() + assert d["kind"] == "float64" + assert len(d) == 1 # no constraints + + +def test_bool_metadata_dict(): + assert b2_bool().to_metadata_dict() == {"kind": "bool"} + + +def test_string_metadata_dict(): + d = string(max_length=9).to_metadata_dict() + assert d["kind"] == "string" + assert d["max_length"] == 9 + + +def test_complex128_metadata_dict(): + assert complex128().to_metadata_dict() == {"kind": "complex128"} + + +# ------------------------------------------------------------------- +# All specs are SchemaSpec subclasses +# ------------------------------------------------------------------- + + +def test_all_are_schema_spec(): + all_specs = [ + int8, + int16, + int32, + int64, + uint8, + uint16, + uint32, + uint64, + float32, + float64, + b2_bool, + complex64, + complex128, + string, + b2_bytes, + ] + for cls in all_specs: + assert issubclass(cls, SchemaSpec) + + +# ------------------------------------------------------------------- +# New integer / float metadata dicts +# ------------------------------------------------------------------- + + +def test_int8_metadata_dict(): + d = int8(ge=0, lt=128).to_metadata_dict() + assert d["kind"] == "int8" + assert d["ge"] == 0 + assert d["lt"] == 128 + + +def test_uint8_metadata_dict(): + d = uint8(le=200).to_metadata_dict() + assert d["kind"] == "uint8" + assert d["le"] == 200 + + +def test_float32_metadata_dict(): + d = float32(ge=0.0, le=1.0).to_metadata_dict() + assert d["kind"] == "float32" + assert d["ge"] == 0.0 + assert d["le"] == 1.0 + + +def test_new_kinds_roundtrip(): + """Every new kind serialises and deserialises correctly.""" + from dataclasses import dataclass + + from blosc2.schema_compiler import compile_schema, schema_from_dict, schema_to_dict + + @dataclass + class R: + a: int = blosc2.field(blosc2.int8(ge=0)) + b: int = blosc2.field(blosc2.uint16(), default=0) + c: float = blosc2.field(blosc2.float32(ge=0.0, le=1.0), default=0.0) + + schema = compile_schema(R) + d = schema_to_dict(schema) + restored = schema_from_dict(d) + + assert restored.columns_by_name["a"].spec.to_metadata_dict()["kind"] == "int8" + assert restored.columns_by_name["b"].spec.to_metadata_dict()["kind"] == "uint16" + assert restored.columns_by_name["c"].spec.to_metadata_dict()["kind"] == "float32" + + +# ------------------------------------------------------------------- +# blosc2 namespace exports +# ------------------------------------------------------------------- + + +def test_blosc2_namespace(): + """All spec classes are reachable via the blosc2 namespace.""" + assert blosc2.int8 is int8 + assert blosc2.int16 is int16 + assert blosc2.int32 is int32 + assert blosc2.int64 is int64 + assert blosc2.uint8 is uint8 + assert blosc2.uint16 is uint16 + assert blosc2.uint32 is uint32 + assert blosc2.uint64 is uint64 + assert blosc2.float32 is float32 + assert blosc2.float64 is float64 + assert blosc2.bool is b2_bool + assert blosc2.complex64 is complex64 + assert blosc2.complex128 is complex128 + assert blosc2.string is string + + +# ------------------------------------------------------------------- +# String vectorized validation — np.char.str_len path +# ------------------------------------------------------------------- + + +def test_string_validation_vectorized(): + """max_length / min_length use the np.char.str_len path, not np.vectorize.""" + from dataclasses import dataclass + + from blosc2 import CTable + + @dataclass + class Row: + name: str = blosc2.field(blosc2.string(min_length=2, max_length=5)) + + t = CTable(Row, expected_size=10) + t.extend([("hi",), ("hello",)]) # 2 and 5 chars — both valid + assert len(t) == 2 + + with pytest.raises(ValueError, match="max_length"): + t.extend([("toolong",)]) # 7 chars > 5 + + with pytest.raises(ValueError, match="min_length"): + t.extend([("x",)]) # 1 char < 2 + + +def test_string_validation_numpy_array(): + """Vectorized length check catches violations when the array dtype is wider + than the schema's max_length (e.g. dtype U8 with max_length=4).""" + from dataclasses import dataclass + + from blosc2 import CTable + + # Schema says max 4 chars, but the numpy dtype is U8 (wider). + # Strings of 5+ chars survive in the array and are caught by validation. + @dataclass + class Row: + tag: str = blosc2.field(blosc2.string(max_length=4)) + + dtype = np.dtype([("tag", "U8")]) + good = np.array([("ab",), ("cd",)], dtype=dtype) + bad = np.array([("ab",), ("toolong",)], dtype=dtype) # 7 chars > 4 + + t = CTable(Row, expected_size=5) + t.extend(good) + assert len(t) == 2 + + t2 = CTable(Row, expected_size=5) + with pytest.raises(ValueError, match="max_length"): + t2.extend(bad) + + # Note: when the array dtype matches the schema max_length (e.g. U4 with + # max_length=4), NumPy already truncates values to fit the dtype before + # validation runs — so no violation can be detected in that case. diff --git a/tests/ctable/test_schema_validation.py b/tests/ctable/test_schema_validation.py new file mode 100644 index 00000000..2d51d29f --- /dev/null +++ b/tests/ctable/test_schema_validation.py @@ -0,0 +1,164 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from dataclasses import dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +# ------------------------------------------------------------------- +# append() validation +# ------------------------------------------------------------------- + + +def test_append_valid_row(): + """Rows within constraints are accepted.""" + t = CTable(Row, expected_size=5) + t.append((0, 0.0, True)) + t.append((1, 100.0, False)) + t.append((99, 50.5, True)) + assert len(t) == 3 + + +def test_append_ge_violation(): + """id < 0 raises ValueError (ge=0).""" + t = CTable(Row, expected_size=5) + with pytest.raises(ValueError): + t.append((-1, 50.0, True)) + + +def test_append_le_violation(): + """score > 100 raises ValueError (le=100).""" + t = CTable(Row, expected_size=5) + with pytest.raises(ValueError): + t.append((1, 100.1, True)) + + +def test_append_boundary_values(): + """Exact boundary values (ge=0 and le=100) are accepted.""" + t = CTable(Row, expected_size=5) + t.append((0, 0.0, True)) # id=0 (ge boundary), score=0.0 (ge boundary) + t.append((1, 100.0, False)) # score=100.0 (le boundary) + assert len(t) == 2 + + +def test_append_default_fill(): + """Fields with defaults can be omitted from a tuple — Pydantic fills them in.""" + t = CTable(Row, expected_size=5) + # Only id is required; score and active have defaults + t.append((5,)) # score=0.0, active=True filled by defaults + assert len(t) == 1 + assert t.row[0].id[0] == 5 + + +def test_append_validate_false(): + """validate=False skips constraint checks — invalid data is stored silently.""" + t = CTable(Row, expected_size=5, validate=False) + t.append((-5, 200.0, True)) # would fail with validate=True + assert len(t) == 1 + assert int(t._cols["id"][0]) == -5 + + +# ------------------------------------------------------------------- +# extend() validation (vectorized) +# ------------------------------------------------------------------- + + +def test_extend_valid_rows(): + """Bulk insert within constraints succeeds.""" + t = CTable(Row, expected_size=10) + data = [(i, float(i), True) for i in range(10)] + t.extend(data) + assert len(t) == 10 + + +def test_extend_ge_violation(): + """A negative id anywhere in the batch raises ValueError.""" + t = CTable(Row, expected_size=10) + data = [(i, float(i), True) for i in range(5)] + [(-1, 50.0, False)] + with pytest.raises(ValueError, match="ge=0"): + t.extend(data) + + +def test_extend_le_violation(): + """A score > 100 anywhere in the batch raises ValueError.""" + t = CTable(Row, expected_size=10) + data = [(i, float(i), True) for i in range(5)] + [(5, 101.0, False)] + with pytest.raises(ValueError, match="le=100"): + t.extend(data) + + +def test_extend_validate_false(): + """validate=False on the table skips bulk constraint checks.""" + t = CTable(Row, expected_size=10, validate=False) + data = [(-1, 200.0, True), (-2, 300.0, False)] + t.extend(data) # no error + assert len(t) == 2 + + +def test_extend_numpy_structured_array(): + """Constraint enforcement also works when extending with a structured NumPy array.""" + dtype = np.dtype([("id", np.int64), ("score", np.float64), ("active", np.bool_)]) + good = np.array([(1, 50.0, True), (2, 75.0, False)], dtype=dtype) + bad = np.array([(1, 50.0, True), (2, 150.0, False)], dtype=dtype) # score > 100 + + t = CTable(Row, expected_size=5) + t.extend(good) + assert len(t) == 2 + + t2 = CTable(Row, expected_size=5) + with pytest.raises(ValueError, match="le=100"): + t2.extend(bad) + + +# ------------------------------------------------------------------- +# gt / lt constraints +# ------------------------------------------------------------------- + + +@dataclass +class Strict: + x: int = blosc2.field(blosc2.int64(gt=0, lt=10)) + + +def test_gt_lt_append(): + """gt and lt are exclusive bounds.""" + t = CTable(Strict, expected_size=5) + + t.append((5,)) # valid + with pytest.raises(ValueError): + t.append((0,)) # violates gt=0 + with pytest.raises(ValueError): + t.append((10,)) # violates lt=10 + + +def test_gt_lt_extend(): + """Vectorized gt/lt checks work on batches.""" + t = CTable(Strict, expected_size=10) + t.extend([(i,) for i in range(1, 10)]) # 1..9 all valid + assert len(t) == 9 + + t2 = CTable(Strict, expected_size=5) + with pytest.raises(ValueError, match="gt=0"): + t2.extend([(0,)]) + with pytest.raises(ValueError, match="lt=10"): + t2.extend([(10,)]) + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/ctable/test_select_describe_cov.py b/tests/ctable/test_select_describe_cov.py new file mode 100644 index 00000000..5e51d6d3 --- /dev/null +++ b/tests/ctable/test_select_describe_cov.py @@ -0,0 +1,276 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Tests for select(), describe(), and cov().""" + +from dataclasses import dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + label: str = blosc2.field(blosc2.string(max_length=16), default="") + + +DATA10 = [(i, float(i * 10 % 100), i % 2 == 0, f"r{i}") for i in range(10)] + + +# =========================================================================== +# select() +# =========================================================================== + + +def test_select_returns_subset_of_columns(): + t = CTable(Row, new_data=DATA10) + v = t.select(["id", "score"]) + assert v.col_names == ["id", "score"] + assert v.ncols == 2 + + +def test_select_preserves_caller_order(): + t = CTable(Row, new_data=DATA10) + v = t.select(["score", "id"]) + assert v.col_names == ["score", "id"] + + +def test_select_shares_data_no_copy(): + t = CTable(Row, new_data=DATA10) + v = t.select(["id", "score"]) + # Same NDArray objects — no copy + assert v._cols["id"] is t._cols["id"] + assert v._cols["score"] is t._cols["score"] + + +def test_select_row_count_unchanged(): + t = CTable(Row, new_data=DATA10) + v = t.select(["id", "score"]) + assert len(v) == 10 + + +def test_select_data_correct(): + t = CTable(Row, new_data=DATA10) + v = t.select(["id", "score"]) + np.testing.assert_array_equal(v["id"].to_numpy(), t["id"].to_numpy()) + np.testing.assert_array_equal(v["score"].to_numpy(), t["score"].to_numpy()) + + +def test_select_base_is_parent(): + t = CTable(Row, new_data=DATA10) + v = t.select(["id"]) + assert v.base is t + + +def test_select_combined_with_where(): + t = CTable(Row, new_data=DATA10) + v = t.select(["id", "score"]).where(t["id"] > 4) + assert len(v) == 5 + assert v.col_names == ["id", "score"] + + +def test_select_combined_with_deletions(): + t = CTable(Row, new_data=DATA10) + t.delete([0, 1]) + v = t.select(["id", "score"]) + assert len(v) == 8 + np.testing.assert_array_equal(v["id"].to_numpy(), t["id"].to_numpy()) + + +def test_select_schema_updated(): + t = CTable(Row, new_data=DATA10) + v = t.select(["id", "score"]) + assert list(v.schema.columns_by_name.keys()) == ["id", "score"] + assert "active" not in v.schema.columns_by_name + assert "label" not in v.schema.columns_by_name + + +def test_select_blocks_structural_mutations(): + t = CTable(Row, new_data=DATA10) + v = t.select(["id", "score"]) + with pytest.raises(TypeError): + v.append((99, 50.0, True, "x")) + + +def test_select_empty_raises(): + t = CTable(Row, new_data=DATA10) + with pytest.raises(ValueError, match="at least one"): + t.select([]) + + +def test_select_unknown_column_raises(): + t = CTable(Row, new_data=DATA10) + with pytest.raises(KeyError): + t.select(["id", "nonexistent"]) + + +def test_select_single_column(): + t = CTable(Row, new_data=DATA10) + v = t.select(["score"]) + assert v.col_names == ["score"] + assert len(v) == 10 + + +# =========================================================================== +# describe() +# =========================================================================== + + +def test_describe_runs_without_error(capsys): + t = CTable(Row, new_data=DATA10) + t.describe() + out = capsys.readouterr().out + assert "id" in out + assert "score" in out + assert "active" in out + assert "label" in out + + +def test_describe_shows_row_count(capsys): + t = CTable(Row, new_data=DATA10) + t.describe() + out = capsys.readouterr().out + assert "10" in out + + +def test_describe_numeric_stats(capsys): + t = CTable(Row, new_data=DATA10) + t.describe() + out = capsys.readouterr().out + assert "mean" in out + assert "std" in out + assert "min" in out + assert "max" in out + + +def test_describe_bool_stats(capsys): + t = CTable(Row, new_data=DATA10) + t.describe() + out = capsys.readouterr().out + assert "true" in out + assert "false" in out + + +def test_describe_string_stats(capsys): + t = CTable(Row, new_data=DATA10) + t.describe() + out = capsys.readouterr().out + assert "unique" in out + + +def test_describe_empty_table(capsys): + t = CTable(Row) + t.describe() + out = capsys.readouterr().out + assert "0 rows" in out + assert "empty" in out + + +def test_describe_on_select(capsys): + t = CTable(Row, new_data=DATA10) + t.select(["id", "score"]).describe() + out = capsys.readouterr().out + assert "id" in out + assert "score" in out + assert "active" not in out + + +# =========================================================================== +# cov() +# =========================================================================== + + +def test_cov_shape(): + t = CTable(Row, new_data=DATA10) + c = t.select(["id", "score"]).cov() + assert c.shape == (2, 2) + + +def test_cov_symmetric(): + t = CTable(Row, new_data=DATA10) + c = t.select(["id", "score"]).cov() + np.testing.assert_allclose(c, c.T) + + +def test_cov_diagonal_equals_variance(): + t = CTable(Row, new_data=DATA10) + ids = t["id"].to_numpy().astype(np.float64) + scores = t["score"].to_numpy().astype(np.float64) + c = t.select(["id", "score"]).cov() + assert c[0, 0] == pytest.approx(np.var(ids, ddof=1)) + assert c[1, 1] == pytest.approx(np.var(scores, ddof=1)) + + +def test_cov_single_column_is_scalar(): + t = CTable(Row, new_data=DATA10) + c = t.select(["id"]).cov() + assert c.shape == (1, 1) + ids = t["id"].to_numpy().astype(np.float64) + assert c[0, 0] == pytest.approx(np.var(ids, ddof=1)) + + +def test_cov_bool_column_cast_to_int(): + t = CTable(Row, new_data=DATA10) + # bool is treated as 0/1 int — should not raise + c = t.select(["id", "active"]).cov() + assert c.shape == (2, 2) + + +def test_cov_skips_deleted_rows(): + t = CTable(Row, new_data=DATA10) + t.delete([0]) # remove id=0 + ids = t["id"].to_numpy().astype(np.float64) + c = t.select(["id"]).cov() + assert c[0, 0] == pytest.approx(np.var(ids, ddof=1)) + + +def test_cov_string_column_raises(): + t = CTable(Row, new_data=DATA10) + with pytest.raises(TypeError, match="not supported"): + t.cov() # 'label' is a string column + + +def test_cov_complex_column_raises(): + @dataclass + class CRow: + val: complex = blosc2.field(blosc2.complex128()) + + t = CTable(CRow, new_data=[(1 + 2j,), (3 + 4j,)]) + with pytest.raises(TypeError, match="not supported"): + t.cov() + + +def test_cov_too_few_rows_raises(): + t = CTable(Row, new_data=[(0, 0.0, True, "a")]) + with pytest.raises(ValueError, match="2 live rows"): + t.select(["id", "score"]).cov() + + +def test_cov_after_all_deleted_raises(): + t = CTable(Row, new_data=DATA10) + t.delete(list(range(10))) + with pytest.raises(ValueError): + t.select(["id", "score"]).cov() + + +def test_cov_three_columns(): + # identity-ish: if columns are linearly independent, diagonal dominates + data = [(i, float(i), i % 2 == 0, "") for i in range(20)] + t = CTable(Row, new_data=data) + c = t.select(["id", "score", "active"]).cov() + assert c.shape == (3, 3) + np.testing.assert_allclose(c, c.T, atol=1e-10) + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/ctable/test_sort_by.py b/tests/ctable/test_sort_by.py new file mode 100644 index 00000000..67d6c403 --- /dev/null +++ b/tests/ctable/test_sort_by.py @@ -0,0 +1,271 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Tests for CTable.sort_by().""" + +from dataclasses import dataclass + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable + + +@dataclass +class Row: + id: int = blosc2.field(blosc2.int64(ge=0)) + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) + active: bool = blosc2.field(blosc2.bool(), default=True) + + +@dataclass +class StrRow: + label: str = blosc2.field(blosc2.string(max_length=16)) + rank: int = blosc2.field(blosc2.int64(ge=0), default=0) + + +DATA = [ + (3, 80.0, True), + (1, 50.0, False), + (4, 90.0, True), + (2, 50.0, True), + (0, 70.0, False), +] + + +# =========================================================================== +# Single-column sort +# =========================================================================== + + +def test_sort_single_col_ascending(): + t = CTable(Row, new_data=DATA) + s = t.sort_by("id") + np.testing.assert_array_equal(s["id"].to_numpy(), [0, 1, 2, 3, 4]) + + +def test_sort_single_col_descending(): + t = CTable(Row, new_data=DATA) + s = t.sort_by("score", ascending=False) + np.testing.assert_array_equal(s["score"].to_numpy(), [90.0, 80.0, 70.0, 50.0, 50.0]) + + +def test_sort_bool_column(): + t = CTable(Row, new_data=DATA) + s = t.sort_by("active") + # False < True → False rows first + assert list(s["active"].to_numpy()) == [False, False, True, True, True] + + +def test_sort_string_column(): + t = CTable(StrRow, new_data=[("charlie", 3), ("alice", 1), ("dave", 4), ("bob", 2)]) + s = t.sort_by("label") + assert list(s["label"].to_numpy()) == ["alice", "bob", "charlie", "dave"] + + +def test_sort_string_column_descending(): + t = CTable(StrRow, new_data=[("charlie", 3), ("alice", 1), ("dave", 4), ("bob", 2)]) + s = t.sort_by("label", ascending=False) + assert list(s["label"].to_numpy()) == ["dave", "charlie", "bob", "alice"] + + +# =========================================================================== +# Multi-column sort +# =========================================================================== + + +def test_sort_multi_col_both_asc(): + t = CTable(Row, new_data=DATA) + s = t.sort_by(["score", "id"], ascending=[True, True]) + scores = s["score"].to_numpy() + ids = s["id"].to_numpy() + # score asc; tiebreak: id asc (both 50.0 rows → id 1 before id 2) + assert scores[0] == pytest.approx(50.0) + assert ids[0] == 1 + assert scores[1] == pytest.approx(50.0) + assert ids[1] == 2 + + +def test_sort_multi_col_mixed(): + t = CTable(Row, new_data=DATA) + s = t.sort_by(["score", "id"], ascending=[True, False]) + scores = s["score"].to_numpy() + ids = s["id"].to_numpy() + # score asc; tiebreak: id desc (both 50.0 rows → id 2 before id 1) + assert scores[0] == pytest.approx(50.0) + assert ids[0] == 2 + assert scores[1] == pytest.approx(50.0) + assert ids[1] == 1 + + +def test_sort_multi_col_ascending_list_notation(): + """Passing ascending=True (single bool) applies to all keys.""" + t = CTable(Row, new_data=DATA) + s = t.sort_by(["score", "id"], ascending=True) + np.testing.assert_array_equal(s["id"].to_numpy()[:2], [1, 2]) + + +# =========================================================================== +# Non-destructive: original table is unchanged +# =========================================================================== + + +def test_sort_does_not_modify_original(): + t = CTable(Row, new_data=DATA) + original_ids = t["id"].to_numpy().copy() + _ = t.sort_by("id") + np.testing.assert_array_equal(t["id"].to_numpy(), original_ids) + + +def test_sort_returns_new_table(): + t = CTable(Row, new_data=DATA) + s = t.sort_by("id") + assert s is not t + + +# =========================================================================== +# inplace=True +# =========================================================================== + + +def test_sort_inplace_returns_self(): + t = CTable(Row, new_data=DATA) + result = t.sort_by("id", inplace=True) + assert result is t + + +def test_sort_inplace_modifies_table(): + t = CTable(Row, new_data=DATA) + t.sort_by("id", inplace=True) + np.testing.assert_array_equal(t["id"].to_numpy(), [0, 1, 2, 3, 4]) + + +def test_sort_inplace_descending(): + t = CTable(Row, new_data=DATA) + t.sort_by("score", ascending=False, inplace=True) + assert t["score"][0] == pytest.approx(90.0) + assert t["score"][-1] == pytest.approx(50.0) + + +# =========================================================================== +# Interaction with deletions +# =========================================================================== + + +def test_sort_skips_deleted_rows(): + t = CTable(Row, new_data=DATA) + t.delete([0]) # delete id=3 (first row) + s = t.sort_by("id") + np.testing.assert_array_equal(s["id"].to_numpy(), [0, 1, 2, 4]) + assert len(s) == 4 + + +def test_sort_inplace_skips_deleted_rows(): + t = CTable(Row, new_data=DATA) + t.delete([0, 2]) # delete id=3 and id=4 + t.sort_by("id", inplace=True) + np.testing.assert_array_equal(t["id"].to_numpy(), [0, 1, 2]) + assert len(t) == 3 + + +def test_sort_all_columns_consistent(): + """All columns move together when sorted.""" + t = CTable(Row, new_data=DATA) + s = t.sort_by("id") + ids = s["id"].to_numpy() + scores = s["score"].to_numpy() + # Original DATA: id→score mapping: 0→70, 1→50, 2→50, 3→80, 4→90 + expected = {0: 70.0, 1: 50.0, 2: 50.0, 3: 80.0, 4: 90.0} + for i, v in zip(ids, scores, strict=True): + assert v == pytest.approx(expected[int(i)]) + + +# =========================================================================== +# Edge cases +# =========================================================================== + + +def test_sort_empty_table(): + t = CTable(Row) + s = t.sort_by("id") + assert len(s) == 0 + + +def test_sort_single_row(): + t = CTable(Row, new_data=[(7, 42.0, True)]) + s = t.sort_by("id") + assert s["id"][0] == 7 + + +def test_sort_already_sorted(): + data = [(i, float(i * 10), True) for i in range(5)] + t = CTable(Row, new_data=data) + s = t.sort_by("id") + np.testing.assert_array_equal(s["id"].to_numpy(), list(range(5))) + + +def test_sort_reverse_sorted(): + data = [(i, float(i * 10), True) for i in range(5, 0, -1)] + t = CTable(Row, new_data=data) + s = t.sort_by("id") + np.testing.assert_array_equal(s["id"].to_numpy(), [1, 2, 3, 4, 5]) + + +# =========================================================================== +# Error cases +# =========================================================================== + + +def test_sort_view_raises(): + t = CTable(Row, new_data=DATA) + view = t.where(t["id"] > 2) + with pytest.raises(ValueError, match="view"): + view.sort_by("id") + + +def test_sort_unknown_column_raises(): + t = CTable(Row, new_data=DATA) + with pytest.raises(KeyError): + t.sort_by("nonexistent") + + +def test_sort_complex_column_raises(): + @dataclass + class CRow: + val: complex = blosc2.field(blosc2.complex128()) + + t = CTable(CRow, new_data=[(1 + 2j,), (3 + 4j,)]) + with pytest.raises(TypeError, match="complex"): + t.sort_by("val") + + +def test_sort_ascending_length_mismatch_raises(): + t = CTable(Row, new_data=DATA) + with pytest.raises(ValueError, match="ascending"): + t.sort_by(["id", "score"], ascending=[True]) + + +def test_sort_readonly_inplace_raises(): + import os + import pathlib + import shutil + + path = str(pathlib.Path(__file__).parent / "saved_ctable" / "_sort_ro_test") + os.makedirs(path, exist_ok=True) + try: + t = CTable(Row, urlpath=path, mode="w", new_data=DATA) + del t + t_ro = CTable.open(path, mode="r") + with pytest.raises(ValueError, match="read-only"): + t_ro.sort_by("id", inplace=True) + finally: + shutil.rmtree(path, ignore_errors=True) + + +if __name__ == "__main__": + pytest.main(["-v", __file__])