Skip to content

Commit

Permalink
optimize inflate::core::transfer (#131)
Browse files Browse the repository at this point in the history
  • Loading branch information
connorskees committed Jan 9, 2023
1 parent c589703 commit dd2fa3e
Showing 1 changed file with 28 additions and 8 deletions.
36 changes: 28 additions & 8 deletions miniz_oxide/src/inflate/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -799,13 +799,34 @@ fn transfer(
match_len: usize,
out_buf_size_mask: usize,
) {
for _ in 0..match_len >> 2 {
out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask];
out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask];
out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask];
out_slice[out_pos + 3] = out_slice[(source_pos + 3) & out_buf_size_mask];
source_pos += 4;
out_pos += 4;
debug_assert!(out_pos > source_pos);

This comment was marked as duplicate.

Copy link
@tustvold

tustvold Feb 1, 2023

FYI we are seeing this debug assert failing in our integration tests, since the 0.6.3 release. It is possible that we are doing something wrong, I've not had time to investigate fully, but thought I'd raise it as something fishy may be going on

apache/arrow-rs#3650

This comment has been minimized.

Copy link
@dburgener

dburgener Feb 1, 2023

I see that this was marked as a duplicate. Can I ask what it's a duplicate of? I'd like to follow any discussion of this as my repo has hit the same problem.

This comment has been minimized.

Copy link
@tustvold

tustvold Feb 1, 2023

I moved the comment to the PR for better discoverability, was my mistake- #131 (comment)

This comment has been minimized.

Copy link
@dburgener

dburgener Feb 1, 2023

Thanks!

// special case that comes up surprisingly often. in the case that `source_pos`
// is 1 less than `out_pos`, we can say that the entire range will be the same
// value and optimize this to be a simple `memset`
if out_buf_size_mask == usize::MAX && source_pos.abs_diff(out_pos) == 1 {
let init = out_slice[out_pos - 1];
let end = (match_len >> 2) * 4 + out_pos;

out_slice[out_pos..end].fill(init);
out_pos = end;
source_pos = end - 1;
// if the difference between `source_pos` and `out_pos` is greater than 3, we
// can do slightly better than the naive case by copying everything at once
} else if out_buf_size_mask == usize::MAX && source_pos.abs_diff(out_pos) >= 4 {
for _ in 0..match_len >> 2 {
out_slice.copy_within(source_pos..=source_pos + 3, out_pos);
source_pos += 4;
out_pos += 4;
}
} else {
for _ in 0..match_len >> 2 {
out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask];
out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask];
out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask];
out_slice[out_pos + 3] = out_slice[(source_pos + 3) & out_buf_size_mask];
source_pos += 4;
out_pos += 4;
}
}

match match_len & 3 {
Expand Down Expand Up @@ -1851,7 +1872,6 @@ mod test {
// Bad check bits.
cr(&[0x78, 0x98], F, State::BadZlibHeader, true);


// Too many code lengths. (From inflate library issues)
cr(
b"M\xff\xffM*\xad\xad\xad\xad\xad\xad\xad\xcd\xcd\xcdM",
Expand Down

0 comments on commit dd2fa3e

Please sign in to comment.