Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support native serialization format #106

Merged
merged 6 commits into from
Jun 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions croaring/benches/benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

extern crate test;

use croaring::Bitmap;
use croaring::{Bitmap, Portable};
use test::Bencher;

#[bench]
Expand Down Expand Up @@ -314,7 +314,7 @@ fn bench_get_serialized_size_in_bytes(b: &mut Bencher) {
bitmap.add(3);

b.iter(|| {
bitmap.get_serialized_size_in_bytes();
bitmap.get_serialized_size_in_bytes::<Portable>();
});
}

Expand Down Expand Up @@ -348,7 +348,7 @@ fn bench_serialize_100000(b: &mut Bencher) {
let bitmap: Bitmap = (1..100000).collect();

b.iter(|| {
bitmap.serialize();
bitmap.serialize::<Portable>();
});
}

Expand All @@ -357,26 +357,26 @@ fn bench_serialize_1000000(b: &mut Bencher) {
let bitmap: Bitmap = (1..1000000).collect();

b.iter(|| {
bitmap.serialize();
bitmap.serialize::<Portable>();
});
}

#[bench]
fn bench_deserialize_100000(b: &mut Bencher) {
let bitmap: Bitmap = (1..100000).collect();
let serialized_buffer = bitmap.serialize();
let serialized_buffer = bitmap.serialize::<Portable>();

b.iter(|| {
Bitmap::deserialize(&serialized_buffer);
Bitmap::deserialize::<Portable>(&serialized_buffer);
});
}

#[bench]
fn bench_deserialize_1000000(b: &mut Bencher) {
let bitmap: Bitmap = (1..1000000).collect();
let serialized_buffer = bitmap.serialize();
let serialized_buffer = bitmap.serialize::<Portable>();

b.iter(|| {
Bitmap::deserialize(&serialized_buffer);
Bitmap::deserialize::<Portable>(&serialized_buffer);
});
}
127 changes: 29 additions & 98 deletions croaring/src/bitmap/imp.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
use crate::Bitset;
use ffi::roaring_bitmap_t;
use std::convert::TryInto;
use std::ffi::c_char;
use std::mem;
use std::ops::{Bound, RangeBounds};

use super::serialization::{Deserializer, Serializer};
use super::{Bitmap, Statistics};

impl Bitmap {
#[inline]
#[allow(clippy::assertions_on_constants)]
unsafe fn take_heap(p: *mut roaring_bitmap_t) -> Self {
pub(crate) unsafe fn take_heap(p: *mut roaring_bitmap_t) -> Self {
// Based heavily on the `roaring.hh` cpp header from croaring

assert!(!p.is_null());
Expand Down Expand Up @@ -734,161 +734,92 @@ impl Bitmap {
buffer
}

/// Computes the serialized size in bytes of the Bitmap.
/// Computes the serialized size in bytes of the Bitmap in format `S`.
#[inline]
#[doc(alias = "roaring_bitmap_portable_size_in_bytes")]
pub fn get_serialized_size_in_bytes(&self) -> usize {
unsafe { ffi::roaring_bitmap_portable_size_in_bytes(&self.bitmap) }
pub fn get_serialized_size_in_bytes<S: Serializer>(&self) -> usize {
S::get_serialized_size_in_bytes(&self)
}

/// Computes the serialized size in bytes of the Bitmap for the frozen format.
#[inline]
#[doc(alias = "roaring_bitmap_frozen_size_in_bytes")]
pub fn get_frozen_serialized_size_in_bytes(&self) -> usize {
unsafe { ffi::roaring_bitmap_frozen_size_in_bytes(&self.bitmap) }
}

/// Serializes a bitmap to a slice of bytes.
/// Serializes a bitmap to a slice of bytes in format `S`.
///
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// use croaring::{Bitmap, Portable};
///
/// let original_bitmap: Bitmap = (1..5).collect();
///
/// let serialized_buffer = original_bitmap.serialize();
/// let serialized_buffer = original_bitmap.serialize::<Portable>();
///
/// let deserialized_bitmap = Bitmap::deserialize(&serialized_buffer);
/// let deserialized_bitmap = Bitmap::deserialize::<Portable>(&serialized_buffer);
///
/// assert_eq!(original_bitmap, deserialized_bitmap);
/// ```
#[inline]
#[doc(alias = "roaring_bitmap_portable_serialize")]
pub fn serialize(&self) -> Vec<u8> {
pub fn serialize<S: Serializer>(&self) -> Vec<u8> {
let mut dst = Vec::new();
self.serialize_into(&mut dst);
self.serialize_into::<S>(&mut dst);
dst
}

/// Serializes a bitmap to a slice of bytes, re-using existing capacity
/// Serializes a bitmap to a slice of bytes in format `S`, re-using existing capacity
///
/// `dst` is not cleared, data is added after any existing data. Returns the added slice of `dst`.
/// If `dst` is empty, it is guaranteed to hold only the serialized data after this call
///
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// use croaring::{Bitmap, Portable};
///
/// let original_bitmap_1: Bitmap = (1..5).collect();
/// let original_bitmap_2: Bitmap = (1..10).collect();
///
/// let mut data = Vec::new();
/// for bitmap in [original_bitmap_1, original_bitmap_2] {
/// data.clear();
/// bitmap.serialize_into(&mut data);
/// bitmap.serialize_into::<Portable>(&mut data);
/// // do something with data
/// }
/// ```
#[inline]
#[doc(alias = "roaring_bitmap_portable_serialize")]
pub fn serialize_into<'a>(&self, dst: &'a mut Vec<u8>) -> &'a [u8] {
let len = self.get_serialized_size_in_bytes();

dst.reserve(len);
let total_len = dst.len().checked_add(len).unwrap();

unsafe {
ffi::roaring_bitmap_portable_serialize(
&self.bitmap,
dst.spare_capacity_mut().as_mut_ptr().cast::<c_char>(),
);
dst.set_len(total_len);
}

dst
}

/// Serialize into the "frozen" format
///
/// This has an odd API because it always returns a slice which is aligned to 32 bytes:
/// This means the returned slice may not start exactly at the beginning of the passed Vec
#[doc(alias = "roaring_bitmap_frozen_serialize")]
pub fn serialize_frozen_into<'a>(&self, dst: &'a mut Vec<u8>) -> &'a [u8] {
const REQUIRED_ALIGNMENT: usize = 32;
let len = self.get_frozen_serialized_size_in_bytes();

let offset = dst.len();
// Need to be able to add up to 31 extra bytes to align to 32 bytes
dst.reserve(len.checked_add(REQUIRED_ALIGNMENT - 1).unwrap());

let extra_offset = match (dst.as_ptr() as usize) % REQUIRED_ALIGNMENT {
0 => 0,
r => REQUIRED_ALIGNMENT - r,
};
let offset = offset.checked_add(extra_offset).unwrap();
let total_len = offset.checked_add(len).unwrap();
debug_assert!(dst.capacity() >= total_len);

// we must initialize up to offset
dst.resize(offset, 0);

unsafe {
ffi::roaring_bitmap_frozen_serialize(
&self.bitmap,
dst.as_mut_ptr().add(offset).cast::<c_char>(),
);
dst.set_len(total_len);
}

&dst[offset..total_len]
pub fn serialize_into<'a, S: Serializer>(&self, dst: &'a mut Vec<u8>) -> &'a [u8] {
S::serialize_into(self, dst)
}

/// Given a serialized bitmap as slice of bytes returns a bitmap instance.
/// Given a serialized bitmap as slice of bytes in format `S`, returns a `Bitmap` instance.
/// See example of [`Self::serialize`] function.
///
/// On invalid input returns None.
///
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// use croaring::{Bitmap, Portable};
///
/// let original_bitmap: Bitmap = (1..5).collect();
/// let serialized_buffer = original_bitmap.serialize();
/// let serialized_buffer = original_bitmap.serialize::<Portable>();
///
/// let deserialized_bitmap = Bitmap::try_deserialize(&serialized_buffer);
/// let deserialized_bitmap = Bitmap::try_deserialize::<Portable>(&serialized_buffer);
/// assert_eq!(original_bitmap, deserialized_bitmap.unwrap());
///
/// let invalid_buffer: Vec<u8> = vec![3];
/// let deserialized_bitmap = Bitmap::try_deserialize(&invalid_buffer);
/// let deserialized_bitmap = Bitmap::try_deserialize::<Portable>(&invalid_buffer);
/// assert!(deserialized_bitmap.is_none());
/// ```
#[inline]
#[doc(alias = "roaring_bitmap_portable_deserialize_safe")]
pub fn try_deserialize(buffer: &[u8]) -> Option<Self> {
unsafe {
let bitmap = ffi::roaring_bitmap_portable_deserialize_safe(
buffer.as_ptr() as *const c_char,
buffer.len(),
);

if !bitmap.is_null() {
Some(Self::take_heap(bitmap))
} else {
None
}
}
pub fn try_deserialize<D: Deserializer>(buffer: &[u8]) -> Option<Self> {
D::try_deserialize(buffer)
}

/// Given a serialized bitmap as slice of bytes returns a bitmap instance.
/// Given a serialized bitmap as slice of bytes in format `S `, returns a bitmap instance.
/// See example of [`Self::serialize`] function.
///
/// On invalid input returns empty bitmap.
#[inline]
pub fn deserialize(buffer: &[u8]) -> Self {
Self::try_deserialize(buffer).unwrap_or_else(Bitmap::create)
pub fn deserialize<D: Deserializer>(buffer: &[u8]) -> Self {
Self::try_deserialize::<D>(buffer).unwrap_or_else(Bitmap::create)
}

/// Creates a new bitmap from a slice of u32 integers
Expand Down Expand Up @@ -1029,14 +960,14 @@ impl Bitmap {
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// use croaring::{Bitmap, Portable};
///
/// let mut bitmap: Bitmap = (100..1000).collect();
///
/// assert_eq!(bitmap.cardinality(), 900);
/// let old_size = bitmap.get_serialized_size_in_bytes();
/// let old_size = bitmap.get_serialized_size_in_bytes::<Portable>();
/// assert!(bitmap.run_optimize());
/// let new_size = bitmap.get_serialized_size_in_bytes();
/// let new_size = bitmap.get_serialized_size_in_bytes::<Portable>();
/// assert!(new_size < old_size);
/// ```
#[inline]
Expand Down
2 changes: 2 additions & 0 deletions croaring/src/bitmap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ mod imp;
mod iter;
mod lazy;
mod ops;
mod serialization;
mod view;

pub use self::iter::BitmapIterator;
pub use self::lazy::LazyBitmap;
pub use self::serialization::{Frozen, Native, Portable};
Loading