From b4baed337a70c317c5d6a2fa245bda21f461fc6b Mon Sep 17 00:00:00 2001 From: oyvindln Date: Wed, 29 May 2024 02:33:44 +0200 Subject: [PATCH] feat(deflate): evade a bounds check in deflate for a small perf improvement --- benches/bench.rs | 6 ++++++ miniz_oxide/src/deflate/core.rs | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/benches/bench.rs b/benches/bench.rs index 94bcb23..d3b3ab8 100755 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -107,6 +107,12 @@ mod oxide { 1, "benches/data/bin" ); + compress_bench!( + compress_bin_lvl_2, + tdefl_compress_mem_to_heap, + 2, + "benches/data/bin" + ); compress_bench!( compress_bin_lvl_6, tdefl_compress_mem_to_heap, diff --git a/miniz_oxide/src/deflate/core.rs b/miniz_oxide/src/deflate/core.rs index 30762b3..6deddd0 100644 --- a/miniz_oxide/src/deflate/core.rs +++ b/miniz_oxide/src/deflate/core.rs @@ -1242,6 +1242,8 @@ impl DictOxide { let pos = pos & LZ_DICT_SIZE_MASK; let end = pos + 4; // Somehow this assertion makes things faster. + // TODO: as of may 2024 this does not seem to make any difference + // so consider removing. assert!(end < LZ_DICT_FULL_SIZE); let bytes: [u8; 4] = self.b.dict[pos..end].try_into().unwrap(); @@ -1252,6 +1254,10 @@ impl DictOxide { /// type T. #[inline] fn read_unaligned_u64(&self, pos: usize) -> u64 { + // Help evade bounds/panic code check by masking the position value + // This provides a small speedup at the cost of an instruction or two instead of + // having to use unsafe. + let pos = pos & LZ_DICT_SIZE_MASK; let bytes: [u8; 8] = self.b.dict[pos..pos + 8].try_into().unwrap(); u64::from_le_bytes(bytes) }