diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index 92b42d52606a2..58b75a7d3fab5 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -183,7 +183,7 @@ impl Buffer { /// in larger chunks and starting at arbitrary bit offsets. /// Note that both `offset` and `length` are measured in bits. pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks { - BitChunks::new(&self, offset, len) + BitChunks::new(&self.data.as_slice()[self.offset..], offset, len) } /// Returns the number of 1-bits in this buffer. diff --git a/rust/arrow/src/util/bit_chunk_iterator.rs b/rust/arrow/src/util/bit_chunk_iterator.rs index 801c38a243f46..4f16c9160b694 100644 --- a/rust/arrow/src/util/bit_chunk_iterator.rs +++ b/rust/arrow/src/util/bit_chunk_iterator.rs @@ -14,14 +14,12 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use crate::buffer::Buffer; use crate::util::bit_util::ceil; use std::fmt::Debug; #[derive(Debug)] pub struct BitChunks<'a> { - buffer: &'a Buffer, - raw_data: *const u8, + buffer: &'a [u8], /// offset inside a byte, guaranteed to be between 0 and 7 (inclusive) bit_offset: usize, /// number of complete u64 chunks @@ -31,22 +29,19 @@ pub struct BitChunks<'a> { } impl<'a> BitChunks<'a> { - pub fn new(buffer: &'a Buffer, offset: usize, len: usize) -> Self { + pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self { assert!(ceil(offset + len, 8) <= buffer.len() * 8); let byte_offset = offset / 8; let bit_offset = offset % 8; - let raw_data = unsafe { buffer.raw_data().add(byte_offset) }; - let chunk_bits = 8 * std::mem::size_of::(); let chunk_len = len / chunk_bits; let remainder_len = len & (chunk_bits - 1); BitChunks::<'a> { - buffer: &buffer, - raw_data, + buffer: &buffer[byte_offset..], bit_offset, chunk_len, remainder_len, @@ -56,8 +51,7 @@ impl<'a> BitChunks<'a> { #[derive(Debug)] pub struct BitChunkIterator<'a> { - buffer: &'a Buffer, - raw_data: *const u8, + buffer: &'a [u8], bit_offset: usize, chunk_len: usize, index: usize, @@ -83,7 +77,8 @@ impl<'a> BitChunks<'a> { let byte_len = ceil(bit_len + bit_offset, 8); // pointer to remainder bytes after all complete chunks let base = unsafe { - self.raw_data + self.buffer + .as_ptr() .add(self.chunk_len * std::mem::size_of::()) }; @@ -102,7 +97,6 @@ impl<'a> BitChunks<'a> { pub const fn iter(&self) -> BitChunkIterator<'a> { BitChunkIterator::<'a> { buffer: self.buffer, - raw_data: self.raw_data, bit_offset: self.bit_offset, chunk_len: self.chunk_len, index: 0, @@ -131,7 +125,7 @@ impl Iterator for BitChunkIterator<'_> { // cast to *const u64 should be fine since we are using read_unaligned below #[allow(clippy::cast_ptr_alignment)] - let raw_data = self.raw_data as *const u64; + let raw_data = self.buffer.as_ptr() as *const u64; // bit-packed buffers are stored starting with the least-significant byte first // so when reading as u64 on a big-endian machine, the bytes need to be swapped