-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
optimize inflate::core::transfer (#131)
- Loading branch information
1 parent
c589703
commit dd2fa3e
Showing
1 changed file
with
28 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -799,13 +799,34 @@ fn transfer( | |
match_len: usize, | ||
out_buf_size_mask: usize, | ||
) { | ||
for _ in 0..match_len >> 2 { | ||
out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask]; | ||
out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; | ||
out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask]; | ||
out_slice[out_pos + 3] = out_slice[(source_pos + 3) & out_buf_size_mask]; | ||
source_pos += 4; | ||
out_pos += 4; | ||
debug_assert!(out_pos > source_pos); | ||
This comment was marked as duplicate.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
dburgener
|
||
// special case that comes up surprisingly often. in the case that `source_pos` | ||
// is 1 less than `out_pos`, we can say that the entire range will be the same | ||
// value and optimize this to be a simple `memset` | ||
if out_buf_size_mask == usize::MAX && source_pos.abs_diff(out_pos) == 1 { | ||
let init = out_slice[out_pos - 1]; | ||
let end = (match_len >> 2) * 4 + out_pos; | ||
|
||
out_slice[out_pos..end].fill(init); | ||
out_pos = end; | ||
source_pos = end - 1; | ||
// if the difference between `source_pos` and `out_pos` is greater than 3, we | ||
// can do slightly better than the naive case by copying everything at once | ||
} else if out_buf_size_mask == usize::MAX && source_pos.abs_diff(out_pos) >= 4 { | ||
for _ in 0..match_len >> 2 { | ||
out_slice.copy_within(source_pos..=source_pos + 3, out_pos); | ||
source_pos += 4; | ||
out_pos += 4; | ||
} | ||
} else { | ||
for _ in 0..match_len >> 2 { | ||
out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask]; | ||
out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; | ||
out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask]; | ||
out_slice[out_pos + 3] = out_slice[(source_pos + 3) & out_buf_size_mask]; | ||
source_pos += 4; | ||
out_pos += 4; | ||
} | ||
} | ||
|
||
match match_len & 3 { | ||
|
@@ -1851,7 +1872,6 @@ mod test { | |
// Bad check bits. | ||
cr(&[0x78, 0x98], F, State::BadZlibHeader, true); | ||
|
||
|
||
// Too many code lengths. (From inflate library issues) | ||
cr( | ||
b"M\xff\xffM*\xad\xad\xad\xad\xad\xad\xad\xcd\xcd\xcdM", | ||
|
FYI we are seeing this debug assert failing in our integration tests, since the 0.6.3 release. It is possible that we are doing something wrong, I've not had time to investigate fully, but thought I'd raise it as something fishy may be going on
apache/arrow-rs#3650