From 61e8433621ce247b32cff9d94d9c64066922650c Mon Sep 17 00:00:00 2001 From: Andreas Garnaes Date: Tue, 13 Jun 2023 13:21:39 +0200 Subject: [PATCH] Use traits and generic parameters for serialization --- croaring/benches/benches.rs | 16 ++--- croaring/src/bitmap/imp.rs | 37 ++++-------- croaring/src/bitmap/mod.rs | 2 +- croaring/src/bitmap/serialization.rs | 86 ++++++++++++++++++++++----- croaring/src/bitmap/view.rs | 51 ++-------------- croaring/src/lib.rs | 1 + croaring/src/treemap/serialization.rs | 18 +++--- croaring/tests/lib.rs | 18 +++--- 8 files changed, 114 insertions(+), 115 deletions(-) diff --git a/croaring/benches/benches.rs b/croaring/benches/benches.rs index f9a9e99..d7b233f 100644 --- a/croaring/benches/benches.rs +++ b/croaring/benches/benches.rs @@ -2,7 +2,7 @@ extern crate test; -use croaring::Bitmap; +use croaring::{Bitmap, Portable}; use test::Bencher; #[bench] @@ -314,7 +314,7 @@ fn bench_get_serialized_size_in_bytes(b: &mut Bencher) { bitmap.add(3); b.iter(|| { - bitmap.get_serialized_size_in_bytes(); + bitmap.get_serialized_size_in_bytes::(); }); } @@ -348,7 +348,7 @@ fn bench_serialize_100000(b: &mut Bencher) { let bitmap: Bitmap = (1..100000).collect(); b.iter(|| { - bitmap.serialize(); + bitmap.serialize::(); }); } @@ -357,26 +357,26 @@ fn bench_serialize_1000000(b: &mut Bencher) { let bitmap: Bitmap = (1..1000000).collect(); b.iter(|| { - bitmap.serialize(); + bitmap.serialize::(); }); } #[bench] fn bench_deserialize_100000(b: &mut Bencher) { let bitmap: Bitmap = (1..100000).collect(); - let serialized_buffer = bitmap.serialize(); + let serialized_buffer = bitmap.serialize::(); b.iter(|| { - Bitmap::deserialize(&serialized_buffer); + Bitmap::deserialize::(&serialized_buffer); }); } #[bench] fn bench_deserialize_1000000(b: &mut Bencher) { let bitmap: Bitmap = (1..1000000).collect(); - let serialized_buffer = bitmap.serialize(); + let serialized_buffer = bitmap.serialize::(); b.iter(|| { - Bitmap::deserialize(&serialized_buffer); + Bitmap::deserialize::(&serialized_buffer); }); } diff --git a/croaring/src/bitmap/imp.rs b/croaring/src/bitmap/imp.rs index ad78780..aa49861 100644 --- a/croaring/src/bitmap/imp.rs +++ b/croaring/src/bitmap/imp.rs @@ -3,6 +3,7 @@ use std::convert::TryInto; use std::mem; use std::ops::{Bound, RangeBounds}; +use super::serialization::{Deserializer, Serializer}; use super::{Bitmap, Statistics}; impl Bitmap { @@ -735,15 +736,8 @@ impl Bitmap { /// Computes the serialized size in bytes of the Bitmap. #[inline] #[doc(alias = "roaring_bitmap_portable_size_in_bytes")] - pub fn get_serialized_size_in_bytes(&self) -> usize { - super::PortableSerializer::get_serialized_size_in_bytes(&self) - } - - /// Computes the serialized size in bytes of the Bitmap for the frozen format. - #[inline] - #[doc(alias = "roaring_bitmap_frozen_size_in_bytes")] - pub fn get_frozen_serialized_size_in_bytes(&self) -> usize { - super::FrozenSerializer::get_serialized_size_in_bytes(&self) + pub fn get_serialized_size_in_bytes(&self) -> usize { + S::get_serialized_size_in_bytes(&self) } /// Serializes a bitmap to a slice of bytes. @@ -763,9 +757,9 @@ impl Bitmap { /// ``` #[inline] #[doc(alias = "roaring_bitmap_portable_serialize")] - pub fn serialize(&self) -> Vec { + pub fn serialize(&self) -> Vec { let mut dst = Vec::new(); - self.serialize_into(&mut dst); + self.serialize_into::(&mut dst); dst } @@ -791,17 +785,8 @@ impl Bitmap { /// ``` #[inline] #[doc(alias = "roaring_bitmap_portable_serialize")] - pub fn serialize_into<'a>(&self, dst: &'a mut Vec) -> &'a [u8] { - super::PortableSerializer::serialize_into(self, dst) - } - - /// Serialize into the "frozen" format - /// - /// This has an odd API because it always returns a slice which is aligned to 32 bytes: - /// This means the returned slice may not start exactly at the beginning of the passed Vec - #[doc(alias = "roaring_bitmap_frozen_serialize")] - pub fn serialize_frozen_into<'a>(&self, dst: &'a mut Vec) -> &'a [u8] { - super::FrozenSerializer::serialize_into(self, dst) + pub fn serialize_into<'a, S: Serializer>(&self, dst: &'a mut Vec) -> &'a [u8] { + S::serialize_into(self, dst) } /// Given a serialized bitmap as slice of bytes returns a bitmap instance. @@ -826,8 +811,8 @@ impl Bitmap { /// ``` #[inline] #[doc(alias = "roaring_bitmap_portable_deserialize_safe")] - pub fn try_deserialize(buffer: &[u8]) -> Option { - super::PortableSerializer::try_deserialize(buffer) + pub fn try_deserialize(buffer: &[u8]) -> Option { + D::try_deserialize(buffer) } /// Given a serialized bitmap as slice of bytes returns a bitmap instance. @@ -835,8 +820,8 @@ impl Bitmap { /// /// On invalid input returns empty bitmap. #[inline] - pub fn deserialize(buffer: &[u8]) -> Self { - Self::try_deserialize(buffer).unwrap_or_else(Bitmap::create) + pub fn deserialize(buffer: &[u8]) -> Self { + Self::try_deserialize::(buffer).unwrap_or_else(Bitmap::create) } /// Creates a new bitmap from a slice of u32 integers diff --git a/croaring/src/bitmap/mod.rs b/croaring/src/bitmap/mod.rs index cb13535..d80c2a9 100644 --- a/croaring/src/bitmap/mod.rs +++ b/croaring/src/bitmap/mod.rs @@ -89,4 +89,4 @@ mod view; pub use self::iter::BitmapIterator; pub use self::lazy::LazyBitmap; -pub use self::serialization::{FrozenSerializer, NativeSerializer, PortableSerializer}; +pub use self::serialization::{Frozen, Native, Portable}; diff --git a/croaring/src/bitmap/serialization.rs b/croaring/src/bitmap/serialization.rs index 8719eed..14e038a 100644 --- a/croaring/src/bitmap/serialization.rs +++ b/croaring/src/bitmap/serialization.rs @@ -1,11 +1,24 @@ -use super::Bitmap; +use super::{Bitmap, BitmapView}; use std::ffi::{c_char, c_void}; -pub struct PortableSerializer {} +pub trait Serializer { + fn serialize_into<'a>(bitmap: &Bitmap, dst: &'a mut Vec) -> &'a [u8]; + fn get_serialized_size_in_bytes(bitmap: &Bitmap) -> usize; +} + +pub trait Deserializer { + fn try_deserialize(buffer: &[u8]) -> Option; +} + +pub trait ViewDeserializer { + unsafe fn deserialize_view(data: &[u8]) -> BitmapView<'_>; +} + +pub enum Portable {} -impl PortableSerializer { - pub fn serialize_into<'a>(bitmap: &Bitmap, dst: &'a mut Vec) -> &'a [u8] { +impl Serializer for Portable { + fn serialize_into<'a>(bitmap: &Bitmap, dst: &'a mut Vec) -> &'a [u8] { let len = Self::get_serialized_size_in_bytes(bitmap); dst.reserve(len); @@ -22,11 +35,13 @@ impl PortableSerializer { dst } - pub fn get_serialized_size_in_bytes(bitmap: &Bitmap) -> usize { + fn get_serialized_size_in_bytes(bitmap: &Bitmap) -> usize { unsafe { ffi::roaring_bitmap_portable_size_in_bytes(&bitmap.bitmap) } } +} - pub fn try_deserialize(buffer: &[u8]) -> Option { +impl Deserializer for Portable { + fn try_deserialize(buffer: &[u8]) -> Option { unsafe { let bitmap = ffi::roaring_bitmap_portable_deserialize_safe( buffer.as_ptr() as *const c_char, @@ -42,10 +57,32 @@ impl PortableSerializer { } } -pub struct NativeSerializer {} +impl ViewDeserializer for Portable { + /// Read bitmap from a serialized buffer + /// + /// This is meant to be compatible with the Java and Go versions + /// + /// # Safety + /// * `data` must be the result of serializing a roaring bitmap in portable mode + /// (following `https://github.com/RoaringBitmap/RoaringFormatSpec`), for example, with + /// [`Bitmap::serialize`] + /// * Using this function (or the returned bitmap in any way) may execute unaligned memory accesses + /// + unsafe fn deserialize_view<'a>(data: &'a [u8]) -> BitmapView { + // portable_deserialize_size does some amount of checks, and returns zero if data cannot be valid + debug_assert_ne!( + ffi::roaring_bitmap_portable_deserialize_size(data.as_ptr().cast(), data.len()), + 0, + ); + let roaring = ffi::roaring_bitmap_portable_deserialize_frozen(data.as_ptr().cast()); + BitmapView::take_heap(roaring) + } +} + +pub enum Native {} -impl NativeSerializer { - pub fn serialize_into<'a>(bitmap: &Bitmap, dst: &'a mut Vec) -> &'a [u8] { +impl Serializer for Native { + fn serialize_into<'a>(bitmap: &Bitmap, dst: &'a mut Vec) -> &'a [u8] { let len = Self::get_serialized_size_in_bytes(bitmap); dst.reserve(len); @@ -62,11 +99,13 @@ impl NativeSerializer { dst } - pub fn get_serialized_size_in_bytes(bitmap: &Bitmap) -> usize { + fn get_serialized_size_in_bytes(bitmap: &Bitmap) -> usize { unsafe { ffi::roaring_bitmap_size_in_bytes(&bitmap.bitmap) } } +} - pub fn try_deserialize(buffer: &[u8]) -> Option { +impl Deserializer for Native { + fn try_deserialize(buffer: &[u8]) -> Option { unsafe { let bitmap = ffi::roaring_bitmap_deserialize_safe( buffer.as_ptr() as *const c_void, @@ -82,10 +121,10 @@ impl NativeSerializer { } } -pub struct FrozenSerializer {} +pub enum Frozen {} -impl FrozenSerializer { - pub fn serialize_into<'a>(bitmap: &Bitmap, dst: &'a mut Vec) -> &'a [u8] { +impl Serializer for Frozen { + fn serialize_into<'a>(bitmap: &Bitmap, dst: &'a mut Vec) -> &'a [u8] { const REQUIRED_ALIGNMENT: usize = 32; let len = Self::get_serialized_size_in_bytes(bitmap); @@ -115,7 +154,24 @@ impl FrozenSerializer { &dst[offset..total_len] } - pub fn get_serialized_size_in_bytes(bitmap: &Bitmap) -> usize { + fn get_serialized_size_in_bytes(bitmap: &Bitmap) -> usize { unsafe { ffi::roaring_bitmap_frozen_size_in_bytes(&bitmap.bitmap) } } } + +impl ViewDeserializer for Frozen { + /// Create a frozen bitmap view using the passed data + /// + /// # Safety + /// * `data` must be the result of serializing a roaring bitmap in frozen mode + /// (in c with `roaring_bitmap_frozen_serialize`, or via [`Bitmap::serialize_frozen_into`]). + /// * Its beginning must be aligned by 32 bytes. + /// * data.len() must be equal exactly to the size of the frozen bitmap. + unsafe fn deserialize_view<'a>(data: &'a [u8]) -> BitmapView { + const REQUIRED_ALIGNMENT: usize = 32; + assert_eq!(data.as_ptr() as usize % REQUIRED_ALIGNMENT, 0); + + let roaring = ffi::roaring_bitmap_frozen_view(data.as_ptr().cast(), data.len()); + BitmapView::take_heap(roaring) + } +} diff --git a/croaring/src/bitmap/view.rs b/croaring/src/bitmap/view.rs index 29cb03a..00055bf 100644 --- a/croaring/src/bitmap/view.rs +++ b/croaring/src/bitmap/view.rs @@ -1,3 +1,4 @@ +use super::serialization::ViewDeserializer; use super::{Bitmap, BitmapView}; use ffi::roaring_bitmap_t; use std::marker::PhantomData; @@ -19,7 +20,7 @@ const fn original_bitmap_ptr(bitmap: &roaring_bitmap_t) -> *const roaring_bitmap impl<'a> BitmapView<'a> { #[inline] #[allow(clippy::assertions_on_constants)] - unsafe fn take_heap(p: *const roaring_bitmap_t) -> Self { + pub(crate) unsafe fn take_heap(p: *const roaring_bitmap_t) -> Self { // This depends somewhat heavily on the implementation of croaring, // In particular, that `roaring_bitmap_t` doesn't store any pointers into itself // (it can be moved safely), and a "frozen" bitmap is stored in an arena, and the @@ -44,44 +45,6 @@ impl<'a> BitmapView<'a> { } } - /// Create a frozen bitmap view using the passed data - /// - /// # Safety - /// * `data` must be the result of serializing a roaring bitmap in frozen mode - /// (in c with `roaring_bitmap_frozen_serialize`, or via [`Bitmap::serialize_frozen_into`]). - /// * Its beginning must be aligned by 32 bytes. - /// * data.len() must be equal exactly to the size of the frozen bitmap. - /// - /// # Examples - /// - /// ``` - /// use croaring::{Bitmap, BitmapView}; - /// let orig_bitmap = Bitmap::of(&[1, 2, 3, 4]); - /// let mut buf = Vec::new(); - /// let data: &[u8] = orig_bitmap.serialize_frozen_into(&mut buf); - /// let view = unsafe { BitmapView::deserialize_frozen(&data) }; - /// assert!(view.contains_range(1..=4)); - /// assert_eq!(orig_bitmap, view); - /// ``` - #[doc(alias = "roaring_bitmap_frozen_view")] - pub unsafe fn deserialize_frozen(data: &'a [u8]) -> Self { - const REQUIRED_ALIGNMENT: usize = 32; - assert_eq!(data.as_ptr() as usize % REQUIRED_ALIGNMENT, 0); - - let roaring = ffi::roaring_bitmap_frozen_view(data.as_ptr().cast(), data.len()); - Self::take_heap(roaring) - } - - /// Read bitmap from a serialized buffer - /// - /// This is meant to be compatible with the Java and Go versions - /// - /// # Safety - /// * `data` must be the result of serializing a roaring bitmap in portable mode - /// (following `https://github.com/RoaringBitmap/RoaringFormatSpec`), for example, with - /// [`Bitmap::serialize`] - /// * Using this function (or the returned bitmap in any way) may execute unaligned memory accesses - /// /// # Examples /// /// ``` @@ -93,14 +56,8 @@ impl<'a> BitmapView<'a> { /// assert_eq!(orig_bitmap, view); /// ``` #[doc(alias = "roaring_bitmap_portable_deserialize_frozen")] - pub unsafe fn deserialize(data: &'a [u8]) -> Self { - // portable_deserialize_size does some amount of checks, and returns zero if data cannot be valid - debug_assert_ne!( - ffi::roaring_bitmap_portable_deserialize_size(data.as_ptr().cast(), data.len()), - 0, - ); - let roaring = ffi::roaring_bitmap_portable_deserialize_frozen(data.as_ptr().cast()); - Self::take_heap(roaring) + pub unsafe fn deserialize(data: &'a [u8]) -> Self { + S::deserialize_view(data) } /// Create an owned, mutable bitmap from this view diff --git a/croaring/src/lib.rs b/croaring/src/lib.rs index 6c561f4..0742e6a 100644 --- a/croaring/src/lib.rs +++ b/croaring/src/lib.rs @@ -6,3 +6,4 @@ pub use bitmap::BitmapIterator; pub use treemap::Treemap; pub use bitmap::BitmapView; +pub use bitmap::{Frozen, Native, Portable}; diff --git a/croaring/src/treemap/serialization.rs b/croaring/src/treemap/serialization.rs index 1d71836..d9d787d 100644 --- a/croaring/src/treemap/serialization.rs +++ b/croaring/src/treemap/serialization.rs @@ -1,5 +1,5 @@ -use crate::Bitmap; use crate::Treemap; +use crate::{Bitmap, Portable}; use byteorder::{BigEndian, NativeEndian, ReadBytesExt, WriteBytesExt}; use std::io::{Cursor, Result, Seek, SeekFrom}; @@ -28,7 +28,7 @@ impl NativeSerializer for Treemap { for (index, bitmap) in &self.map { buffer.write_u32::(*index)?; - let bitmap_buffer = bitmap.serialize(); + let bitmap_buffer = bitmap.serialize::(); buffer.extend(bitmap_buffer); } @@ -42,9 +42,9 @@ impl NativeSerializer for Treemap { for _ in 0..bitmap_count { let index = cursor.read_u32::()?; - let bitmap = Bitmap::deserialize(&buffer[cursor.position() as usize..]); + let bitmap = Bitmap::deserialize::(&buffer[cursor.position() as usize..]); cursor.seek(SeekFrom::Current( - bitmap.get_serialized_size_in_bytes() as i64 + bitmap.get_serialized_size_in_bytes::() as i64, ))?; treemap.map.insert(index, bitmap); } @@ -75,7 +75,7 @@ impl NativeSerializer for Treemap { fn get_serialized_size_in_bytes(&self) -> usize { self.map.iter().fold( size_of::() + self.map.len() * size_of::(), - |sum, (_, bitmap)| sum + bitmap.get_serialized_size_in_bytes(), + |sum, (_, bitmap)| sum + bitmap.get_serialized_size_in_bytes::(), ) } } @@ -101,7 +101,7 @@ impl JvmSerializer for Treemap { for (index, bitmap) in &self.map { buffer.write_u32::(*index)?; - let bitmap_buffer = bitmap.serialize(); + let bitmap_buffer = bitmap.serialize::(); buffer.extend(bitmap_buffer); } @@ -117,9 +117,9 @@ impl JvmSerializer for Treemap { for _ in 0..bitmap_count { let index = cursor.read_u32::()?; - let bitmap = Bitmap::deserialize(&buffer[cursor.position() as usize..]); + let bitmap = Bitmap::deserialize::(&buffer[cursor.position() as usize..]); cursor.seek(SeekFrom::Current( - bitmap.get_serialized_size_in_bytes() as i64 + bitmap.get_serialized_size_in_bytes::() as i64, ))?; treemap.map.insert(index, bitmap); } @@ -150,7 +150,7 @@ impl JvmSerializer for Treemap { fn get_serialized_size_in_bytes(&self) -> usize { self.map.iter().fold( size_of::() + size_of::() + self.map.len() * size_of::(), - |sum, (_, bitmap)| sum + bitmap.get_serialized_size_in_bytes(), + |sum, (_, bitmap)| sum + bitmap.get_serialized_size_in_bytes::(), ) } } diff --git a/croaring/tests/lib.rs b/croaring/tests/lib.rs index 994961b..240fd68 100644 --- a/croaring/tests/lib.rs +++ b/croaring/tests/lib.rs @@ -1,7 +1,7 @@ use std::collections::BTreeMap; use std::{fs, iter, u32}; -use croaring::{Bitmap, BitmapView, Treemap}; +use croaring::{Bitmap, BitmapView, Frozen, Portable, Treemap}; use proptest::prelude::*; // borrowed and adapted from https://github.com/Nemo157/roaring-rs/blob/5089f180ca7e17db25f5c58023f4460d973e747f/tests/lib.rs#L7-L37 @@ -105,7 +105,7 @@ fn expected_serialized_bitmap() -> Bitmap { #[test] fn test_portable_view() { let buffer = fs::read("tests/data/portable_bitmap.bin").unwrap(); - let bitmap = unsafe { BitmapView::deserialize(&buffer) }; + let bitmap = unsafe { BitmapView::deserialize::(&buffer) }; let expected = expected_serialized_bitmap(); assert_eq!(bitmap, expected); assert!(bitmap.iter().eq(expected.iter())) @@ -119,7 +119,7 @@ fn test_frozen_view() { let offset = 32 - (buffer.as_ptr() as usize) % 32; buffer.splice(..0, iter::repeat(0).take(offset)); - let bitmap = unsafe { BitmapView::deserialize_frozen(&buffer[offset..]) }; + let bitmap = unsafe { BitmapView::deserialize::(&buffer[offset..]) }; let expected = expected_serialized_bitmap(); assert_eq!(bitmap, expected); assert!(bitmap.iter().eq(expected.iter())) @@ -230,9 +230,9 @@ proptest! { ) { let original = Bitmap::of(&indices); - let buffer = original.serialize(); + let buffer = original.serialize::(); - let deserialized = Bitmap::deserialize(&buffer); + let deserialized = Bitmap::deserialize::(&buffer); prop_assert_eq!(original , deserialized); } @@ -276,8 +276,8 @@ proptest! { use croaring::BitmapView; let original = Bitmap::of(&indices); - let serialized = original.serialize(); - let deserialized = unsafe { BitmapView::deserialize(&serialized) }; + let serialized = original.serialize::(); + let deserialized = unsafe { BitmapView::deserialize::(&serialized) }; assert_eq!(&original, &*deserialized); assert!(original.iter().eq(deserialized.iter())); } @@ -290,8 +290,8 @@ proptest! { let original = Bitmap::of(&indices); let mut buf = Vec::new(); - let serialized: &[u8] = original.serialize_frozen_into(&mut buf); - let deserialized = unsafe { BitmapView::deserialize_frozen(serialized) }; + let serialized: &[u8] = original.serialize_into::(&mut buf); + let deserialized = unsafe { BitmapView::deserialize::(serialized) }; assert_eq!(&original, &*deserialized); assert!(original.iter().eq(deserialized.iter())); }