Merge pull request #1072 from PyTables/direct-chunking-b2ndarray

Foreign b2nd array compatibility. This expands the work on b2nd array support with direct chunking (#1056) to better handle such arrays created with other tools, e.g. when the filter values with chunk rank & shape are missing, or by being more relaxed about the format of the b2nd array used for storing the chunk (e.g. by not requiring it to consist of a single inner chunk, or have a specific blocksize). Some missing tests on chunk data shape have been added in the optimized path. A new example script has been added to test writing and reading partial chunks. Finally, the workaround for stack smashing on some versions of GCC has been removed (it seems not to be needed anymore).
PyTables · Oct 19, 2023 · bb02c88 · bb02c88
2 parents 1d6f564 + d5536db
commit bb02c88
Show file tree

Hide file tree

Showing 3 changed files with 160 additions and 28 deletions.
diff --git a/examples/direct-chunk-shape.py b/examples/direct-chunk-shape.py
@@ -0,0 +1,112 @@
+# This script uses h5py to write two versions of the following 6x6 array into
+# two datasets with 4x4 chunks::
+#
+#                chunk boundary
+#                |
+#   [[ 0  1  2  3  0  1]
+#    [ 4  5  6  7  4  5]
+#    [ 8  9 10 11  8  9]
+#    [12 13 14 15 12 13] __ chunk boundary
+#    [ 0  1  2  3  0  1]
+#    [ 4  5  6  7  4  5]]
+#
+# So its chunks contain, left-to-right, top-to-bottom, 4x4, 4x2, 2x4, and 2x2
+# items worth of data.
+#
+# - ``/data_full`` has all its chunks with shape 4x4 in storage, even if the
+#   actual data does not fill the whole chunk.
+# - ``/data_part`` has chunks with the shape of actual data, as listed above.
+#
+# Direct chunking is used in h5py to control the exact storage format for each
+# chunk, using a Blosc2 ND container.  Then PyTables is used to read both
+# datasets and compare them against the original array, so as to test whether
+# there are problems in loading partial chunks.
+#
+# Since PyTables has an optimized path for uncompressing Blosc2 ND data that
+# does not use the HDF5 filter pipeline, you may force it to only use the
+# latter by setting ``BLOSC2_FILTER=1`` in the environment.
+
+import blosc2
+import h5py
+import hdf5plugin
+import numpy
+import os
+import tempfile
+
+# Both parameter sets below are equivalent.
+fparams = hdf5plugin.Blosc2(cname='zstd', clevel=1, filters=hdf5plugin.Blosc2.SHUFFLE)
+cparams = {
+    "codec": blosc2.Codec.ZSTD,
+    "clevel": 1,
+    "filters": [blosc2.Filter.SHUFFLE],
+}
+
+def np2b2(a):
+    return blosc2.asarray(numpy.ascontiguousarray(a),
+                          chunks=a.shape, blocks=a.shape, cparams=cparams)
+
+# Assemble the array.
+achunk = numpy.arange(4 * 4).reshape((4, 4))
+adata = numpy.zeros((6, 6), dtype=achunk.dtype)
+adata[0:4, 0:4] = achunk[:, :]
+adata[0:4, 4:6] = achunk[:, 0:2]
+adata[4:6, 0:4] = achunk[0:2, :]
+adata[4:6, 4:6] = achunk[0:2, 0:2]
+
+h5fdesc, h5fpath = tempfile.mkstemp()
+os.close(h5fdesc)
+
+h5f = h5py.File(h5fpath, "w")
+print(f"Writing with full chunks to {h5fpath}:/data_full...")
+dataset = h5f.create_dataset(
+    "data_full", adata.shape, dtype=adata.dtype, chunks=achunk.shape,
+    **fparams)
+b2chunk = np2b2(achunk)  # need to keep the ref or cframe becomes bogus
+b2frame = b2chunk._schunk.to_cframe()
+dataset.id.write_direct_chunk((0, 0), b2frame)
+dataset.id.write_direct_chunk((0, 4), b2frame)
+dataset.id.write_direct_chunk((4, 0), b2frame)
+dataset.id.write_direct_chunk((4, 4), b2frame)
+print(f"Writing with partial chunks to {h5fpath}:/data_part...")
+dataset = h5f.create_dataset(
+    "data_part", adata.shape, dtype=adata.dtype, chunks=achunk.shape,
+    **fparams)
+b2chunk = np2b2(achunk[:, :])  # need to keep the ref or cframe becomes bogus
+b2frame = b2chunk._schunk.to_cframe()
+dataset.id.write_direct_chunk((0, 0), b2frame)
+b2chunk = np2b2(achunk[:, 0:2])
+# Uncomment to introduce a bogus partial chunk in the midle of data,
+# too small even for a margin chunk.
+#b2chunk = np2b2(achunk[0:2, 0:2])
+b2frame = b2chunk._schunk.to_cframe()
+dataset.id.write_direct_chunk((0, 4), b2frame)
+b2chunk = np2b2(achunk[0:2, :])
+b2frame = b2chunk._schunk.to_cframe()
+dataset.id.write_direct_chunk((4, 0), b2frame)
+b2chunk = np2b2(achunk[0:2, 0:2])
+b2frame = b2chunk._schunk.to_cframe()
+dataset.id.write_direct_chunk((4, 4), b2frame)
+h5f.close()
+
+# Import after h5py part is done so that it uses its own `hdf5-blosc2`.
+import tables
+
+h5f = tables.open_file(h5fpath, "r")
+print(f"Reading with full chunks from {h5fpath}:/data_full...")
+a_full = h5f.root.data_full[:]
+print(f"Reading with partial chunks from {h5fpath}:/data_part...")
+a_part = h5f.root.data_part[:]
+h5f.close()
+
+if not (a_full == adata).all():
+    print("FULL CHUNKS FAILED (original, read):")
+    print(adata)
+    print(a_full)
+
+if not (a_part == adata).all():
+    print("PARTIAL CHUNKS FAILED (original, read):")
+    print(adata)
+    print(a_part)
+
+print(f"Removing {h5fpath}...")
+os.remove(h5fpath)
diff --git a/hdf5-blosc2/src/blosc2_filter.c b/hdf5-blosc2/src/blosc2_filter.c
@@ -105,7 +105,7 @@ herr_t blosc2_set_local(hid_t dcpl, hid_t type, hid_t space) {
 
   unsigned int typesize, basetypesize;
   unsigned int bufsize;
-  hsize_t chunkshape[32];
+  hsize_t chunkshape[H5S_MAX_RANK];
   unsigned int flags;
   size_t nelements = MAX_FILTER_VALUES;
   unsigned int values[MAX_FILTER_VALUES];
@@ -122,11 +122,11 @@ herr_t blosc2_set_local(hid_t dcpl, hid_t type, hid_t space) {
   /* Set Blosc2 info in first slot */
   values[0] = FILTER_BLOSC2_VERSION;
 
-  ndim = H5Pget_chunk(dcpl, 32, chunkshape);
+  ndim = H5Pget_chunk(dcpl, H5S_MAX_RANK, chunkshape);
   if (ndim < 0)
     return -1;
-  if (ndim > 32) {
-    PUSH_ERR("blosc2_set_local", H5E_CALLBACK, "Chunk rank exceeds limit");
+  if (ndim > H5S_MAX_RANK) {
+    PUSH_ERR("blosc2_set_local", H5E_CALLBACK, "Chunk rank exceeds HDF5 limit");
     return -1;
   }
 
@@ -171,8 +171,8 @@ herr_t blosc2_set_local(hid_t dcpl, hid_t type, hid_t space) {
   } else if (ndim > 1) {
     /* The user may be expecting more efficient storage than we can currently provide,
      * so convey some information when tracing. */
-    BLOSC_TRACE_ERROR("Chunk rank %d exceeds B2ND build limit %d, "
-                      "using plain Blosc2 instead", ndim, BLOSC2_MAX_DIM);
+    BLOSC_TRACE_WARNING("Chunk rank %d exceeds B2ND build limit %d, "
+                        "using plain Blosc2 instead", ndim, BLOSC2_MAX_DIM);
   }
 
   r = H5Pmodify_filter(dcpl, FILTER_BLOSC2, flags, nelements, values);
@@ -236,8 +236,8 @@ int32_t compute_b2nd_block_shape(size_t block_size,
   }
 
   if (nitems_new > nitems) {
-    BLOSC_TRACE_ERROR("Target block size is too small (%lu items), raising to %lu items",
-                      nitems, nitems_new);
+    BLOSC_TRACE_INFO("Target block size is too small (%lu items), raising to %lu items",
+                     nitems, nitems_new);
   }
   if (nitems_new >= nitems) {
     return nitems_new * type_size;
@@ -331,7 +331,7 @@ size_t blosc2_filter_function(unsigned flags, size_t cd_nelmts,
 
     if (cd_nelmts < 6) {
       PUSH_ERR("blosc2_filter", H5E_CALLBACK,
-               "Too few filter parameters for B2ND compression: %d", cd_nelmts);
+               "Too few filter parameters for Blosc2 compression: %d", cd_nelmts);
       goto failed;
     }
 
@@ -459,9 +459,18 @@ size_t blosc2_filter_function(unsigned flags, size_t cd_nelmts,
       goto failed;
     }
 
+    /* Although blosc2_decompress_ctx ("else" branch) can decompress b2nd-formatted data,
+     * there may be padding bytes when the chunkshape is not a multiple of the blockshape,
+     * and only b2nd machinery knows how to handle these correctly.
+     */
     if (blosc2_meta_exists(schunk, "b2nd") >= 0
         || blosc2_meta_exists(schunk, "caterva") >= 0) {
 
+      if (ndim < 0) {
+        BLOSC_TRACE_WARNING("Auxiliary Blosc2 filter values contain no chunk rank/shape, "
+                            "some checks on B2ND arrays will be disabled");
+      }
+
       b2nd_array_t *array = NULL;
 
       if (b2nd_from_schunk(schunk, &array) < 0) {
@@ -471,7 +480,7 @@ size_t blosc2_filter_function(unsigned flags, size_t cd_nelmts,
       schunk = NULL;  // owned by the array now, do not free on its own
 
       /* Check array metadata against filter values */
-      if (array->ndim != ndim) {
+      if (ndim >= 0 && array->ndim != ndim) {
         PUSH_ERR("blosc2_filter", H5E_CALLBACK,
                  "B2ND array rank (%hhd) != filter rank (%d)", array->ndim, ndim);
         goto b2nd_decomp_out;
@@ -481,10 +490,16 @@ size_t blosc2_filter_function(unsigned flags, size_t cd_nelmts,
         start[i] = 0;
         stop[i] = array->shape[i];
         size *= array->shape[i];
-        if (array->chunkshape[i] != chunkshape[i]) {
+        if (ndim >= 0 && array->shape[i] != chunkshape[i]) {
+          /* The HDF5 filter pipeline needs the filter to always return full chunks,
+           * even for margin chunks where data does not fill the whole chunk
+           * (in which case padding should be used
+           * (at least until the last byte with actual data).
+           * Of course, we need to know the chunkshape to check this.
+           */
           PUSH_ERR("blosc2_filter", H5E_CALLBACK,
-                   "B2ND array chunkshape[%d] (%d) != filter chunkshape[%d] (%d)",
-                   i, array->chunkshape[i], i, chunkshape[i]);
+                   "B2ND array shape[%d] (%ld) != filter chunkshape[%d] (%d)",
+                   i, array->shape[i], i, chunkshape[i]);
           goto b2nd_decomp_out;
         }
       }

diff --git a/src/H5ARRAY-opt.c b/src/H5ARRAY-opt.c
@@ -49,6 +49,8 @@ herr_t read_chunk_slice_b2nd(const char *filename,
                              hid_t dataset_id,
                              hid_t space_id,
                              hsize_t chunk_idx,
+                             int rank,
+                             const hsize_t *chunk_shape,
                              const int64_t *slice_shape,
                              const int64_t *slice_start,
                              const int64_t *slice_stop,
@@ -208,7 +210,7 @@ herr_t get_blosc2_slice(char *filename,
 
     herr_t rv;
     IF_NEG_OUT_RET(rv = read_chunk_slice_b2nd(filename, dataset_id, space_id,
-                                              slice_chunk_idx, chunk_slice_shape,
+                                              slice_chunk_idx, rank, chunk_shape, chunk_slice_shape,
                                               start_in_stored_chunk, stop_in_stored_chunk,
                                               chunk_slice_size, chunk_slice_data),
                     rv - 50);
@@ -483,21 +485,12 @@ hid_t H5ARRAYOmake(hid_t loc_id,
 }
 
 
-/* There are some weird issues with optimization under GCC and Linux:
- * this code causes stack smashing on Ubuntu with optimization level > 0 (with GCC >= 10),
- * and segmentation faults on Guix with optimization level > 1 (with GCC >= 12).
- * Clang on Linux seems ok, as well as GCC and Clang on macOS.
- * For the moment, disable optimization entirely for this function
- * under GCC (and compatible compilers),
- * which should not cause a noticeable performance penalty anyway.
- */
-#ifdef __GNUC__
-__attribute__((optimize("O0")))
-#endif
 herr_t read_chunk_slice_b2nd(const char *filename,
                              hid_t dataset_id,
                              hid_t space_id,
                              hsize_t chunk_idx,
+                             int rank,
+                             const hsize_t *chunk_shape,
                              const int64_t *slice_shape,
                              const int64_t *slice_start,
                              const int64_t *slice_stop,
@@ -510,15 +503,27 @@ herr_t read_chunk_slice_b2nd(const char *filename,
   haddr_t address;
   IF_NEG_OUT_BTRACE(H5Dget_chunk_info(dataset_id, space_id, chunk_idx,
                                       NULL, NULL, &address, NULL),
-                    "Failed getting chunk info of array in %s", filename);
+                    "Failed getting chunk info of B2ND array in %s", filename);
 
   /* Open the schunk on-disk */
   IF_TRUE_OUT_BTRACE(b2nd_open_offset(filename, &array, address) != BLOSC2_ERROR_SUCCESS,
-                     "Cannot open array in %s", filename);
+                     "Cannot open B2ND array in %s", filename);
+  IF_TRUE_OUT_BTRACE(array->ndim != rank,
+                     "B2ND array rank (%hhd) != chunk rank (%d)", array->ndim, rank);
+  for (int i = 0; i < rank; i++) {
+    /* The HDF5 filter pipeline needs the filter to always return full chunks.
+     * This is not strictly needed here,
+     * but we enforce that requirement to avoid issues with other applications
+     * which use the pipeline.
+     */
+    IF_TRUE_OUT_BTRACE(array->shape[i] != (int64_t)(chunk_shape[i]),
+                       "B2ND array shape[%d] (%ld) != chunk shape[%d] (%lu)",
+                       i, array->shape[i], i, chunk_shape[i]);
+  }
 
   IF_TRUE_OUT_BTRACE(b2nd_get_slice_cbuffer(array, slice_start, slice_stop,
                                             slice_data, slice_shape, slice_size) != BLOSC2_ERROR_SUCCESS,
-                     "Failed getting slice of array in %s", filename);
+                     "Failed getting slice of B2ND array in %s", filename);
 
   //out_success:
   retval = 0;