New blocksize computation for HCR codecs

Blosc · Sep 2, 2016 · 9d06255 · 9d06255
1 parent 510cd08
commit 9d06255
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 34 deletions.
diff --git a/README_HEADER.rst b/README_HEADER.rst
@@ -32,13 +32,13 @@ All entries are little endian.
         Whether the bit-shuffle filter has been applied or not.
     :bit 3 (``0x08``):
         Reserved
-    :bit 4 (``0x16``):
-        Reserved
-    :bit 5 (``0x32``):
+    :bit 4 (``0x10``):
+        If set, the blocks will not be split in sub-blocks during compression.
+    :bit 5 (``0x20``):
         Part of the enumeration for compressors.
-    :bit 6 (``0x64``):
+    :bit 6 (``0x40``):
         Part of the enumeration for compressors.
-    :bit 7 (``0x64``):
+    :bit 7 (``0x80``):
         Part of the enumeration for compressors.
 
     The last three bits form an enumeration that allows to use alternative

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
@@ -12,6 +12,10 @@ Changes from 1.10.2 to 1.10.3
 
 - Internal Zstd codec upgraded to 1.0.0.
 
+- New block size computation inherited from C-Blosc2. Benchmarks are saying that
+  this benefits mainly to LZ4, LZ4HC, Zlib and Zstd codecs, both in speed and in
+  compression ratios (although YMMV for your case).
+
 - Added the @rpath flag in Mac OSX for shared libraries.  Fixes #175.
 
 - Added a fix for VS2008 discovered in: https://github.com/PyTables/PyTables/pull/569/files#diff-953cf824ebfea7208d2a2e312d9ccda2L126

diff --git a/blosc/blosc.c b/blosc/blosc.c
@@ -546,12 +546,16 @@ static int get_accel(const struct blosc_context* context) {
   return 1;
 }
 
+
 /* Shuffle & compress a single block */
 static int blosc_c(const struct blosc_context* context, int32_t blocksize,
                    int32_t leftoverblock, int32_t ntbytes, int32_t maxbytes,
                    const uint8_t *src, uint8_t *dest, uint8_t *tmp,
                    uint8_t *tmp2)
 {
+  int32_t compformat = (*(context->header_flags) & 0xe0) >> 5;
+  int dont_split = (*(context->header_flags) & 0x10) >> 4;
+  uint8_t blosc_version_format = BLOSC_VERSION_FORMAT;
   int32_t j, neblock, nsplits;
   int32_t cbytes;                   /* number of compressed bytes in split */
   int32_t ctbytes = 0;              /* number of compressed bytes in block */
@@ -578,11 +582,8 @@ static int blosc_c(const struct blosc_context* context, int32_t blocksize,
   /* Calculate acceleration for different compressors */
   accel = get_accel(context);
 
-  /* Compress for each shuffled slice split for this block. */
-  /* If typesize is too large, neblock is too small or we are in a
-     leftover block, do not split at all. */
-  if ((typesize <= MAX_SPLITS) && (blocksize/typesize) >= MIN_BUFFERSIZE &&
-      (!leftoverblock)) {
+  /* The number of splits for this block */
+  if (!dont_split && !leftoverblock) {
     nsplits = typesize;
   }
   else {
@@ -679,14 +680,16 @@ static int blosc_c(const struct blosc_context* context, int32_t blocksize,
 static int blosc_d(struct blosc_context* context, int32_t blocksize, int32_t leftoverblock,
                    const uint8_t *src, uint8_t *dest, uint8_t *tmp, uint8_t *tmp2)
 {
+  int32_t compformat = (*(context->header_flags) & 0xe0) >> 5;
+  int dont_split = (*(context->header_flags) & 0x10) >> 4;
+  uint8_t blosc_version_format = context->src[0];
   int32_t j, neblock, nsplits;
   int32_t nbytes;                /* number of decompressed bytes in split */
   int32_t cbytes;                /* number of compressed bytes in split */
   int32_t ctbytes = 0;           /* number of compressed bytes in block */
   int32_t ntbytes = 0;           /* number of uncompressed bytes in block */
   uint8_t *_tmp = dest;
   int32_t typesize = context->typesize;
-  int32_t compformat;
   char *compname;
   int bscount;
 
@@ -697,14 +700,14 @@ static int blosc_d(struct blosc_context* context, int32_t blocksize, int32_t lef
 
   compformat = (*(context->header_flags) & 0xe0) >> 5;
 
-  /* Compress for each shuffled slice split for this block. */
-  if ((typesize <= MAX_SPLITS) && (blocksize/typesize) >= MIN_BUFFERSIZE &&
-      (!leftoverblock)) {
+  /* The number of splits for this block */
+  if (!dont_split && !leftoverblock) {
     nsplits = typesize;
   }
   else {
     nsplits = 1;
   }
+
   neblock = blocksize / nsplits;
   for (j = 0; j < nsplits; j++) {
     cbytes = sw32_(src);      /* amount of compressed bytes */
@@ -896,6 +899,12 @@ static int do_job(struct blosc_context* context)
 }
 
 
+/* Whether a codec is meant for High Compression Ratios */
+#define HCR(codec) ( ((codec) == BLOSC_LZ4HC) ||                  \
+                     ((codec) == BLOSC_ZLIB) ||                   \
+                     ((codec) == BLOSC_ZSTD) ? 1 : 0 )
+
+
 static int32_t compute_blocksize(struct blosc_context* context, int32_t clevel,
                                  int32_t typesize, int32_t nbytes,
                                  int32_t forced_blocksize)
@@ -919,25 +928,11 @@ static int32_t compute_blocksize(struct blosc_context* context, int32_t clevel,
   else if (nbytes >= L1) {
     blocksize = L1;
 
-    /* For LZ4HC, increase the block sizes by a factor of 8 because it
-       is meant for compressing large blocks (it shows a big overhead
+    /* For HCR codecs, increase the block sizes by a factor of 2 because they
+       are meant for compressing large blocks (i.e. they show a big overhead
        when compressing small ones). */
-    if (context->compcode == BLOSC_LZ4HC) {
-      blocksize *= 8;
-    }
-
-    /* For Zlib, increase the block sizes by a factor of 8 because it
-       is meant for compressing large blocks (it shows a big overhead
-       when compressing small ones). */
-    if (context->compcode == BLOSC_ZLIB) {
-      blocksize *= 8;
-    }
-
-    /* For Zstd, increase the block sizes by a factor of 8 because it
-       is meant for compressing large blocks (it shows a big overhead
-       when compressing small ones). */
-    if (context->compcode == BLOSC_ZSTD) {
-      blocksize *= 8;
+    if (HCR(context->compcode)) {
+      blocksize *= 2;
     }
 
     if (clevel == 0) {
@@ -1036,12 +1031,27 @@ static int initialize_context_compression(struct blosc_context* context,
   return 1;
 }
 
+
+/* Conditions for splitting a block before compressing with a codec. */
+static int split_block(int compcode, int typesize, int blocksize) {
+  /* Normally all the compressors designed for speed benefit from a
+     split.  However, in conducted benchmarks LZ4 seems that it runs
+     faster if we don't split, which is quite surprising. */
+  return (((compcode == BLOSC_BLOSCLZ) ||
+	   //(compcode == BLOSC_LZ4) ||
+	   (compcode == BLOSC_SNAPPY)) &&
+	  (typesize <= MAX_SPLITS) &&
+	  (blocksize / typesize) >= MIN_BUFFERSIZE);
+}
+
+
 static int write_compression_header(struct blosc_context* context, int clevel, int doshuffle)
 {
   int32_t compformat;
+  int dont_split;
 
   /* Write version header for this block */
-  context->dest[0] = BLOSC_VERSION_FORMAT;              /* blosc format version */
+  context->dest[0] = BLOSC_VERSION_FORMAT;           /* blosc format version */
 
   /* Write compressor format */
   compformat = -1;
@@ -1123,11 +1133,15 @@ static int write_compression_header(struct blosc_context* context, int clevel, i
     *(context->header_flags) |= BLOSC_DOBITSHUFFLE;  /* bit 2 set to one in flags */
   }
 
-  *(context->header_flags) |= compformat << 5;      /* compressor format start at bit 5 */
+  dont_split = !split_block(context->compcode, context->typesize,
+                            context->blocksize);
+  *(context->header_flags) |= dont_split << 4;  /* dont_split is in bit 4 */
+  *(context->header_flags) |= compformat << 5;  /* compressor format starts at bit 5 */
 
   return 1;
 }
 
+
 int blosc_compress_context(struct blosc_context* context)
 {
   int32_t ntbytes = 0;