Skip to content

Commit

Permalink
Split BitSet into two types and make it a bit nicer (typst#4249)
Browse files Browse the repository at this point in the history
  • Loading branch information
laurmaedje committed May 26, 2024
1 parent 34f1a23 commit 0d065eb
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 37 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ syn = { version = "2", features = ["full", "extra-traits"] }
syntect = { version = "5", default-features = false, features = ["parsing", "regex-fancy", "plist-load", "yaml-load"] }
tar = "0.4"
tempfile = "3.7.0"
thin-vec = "0.2.13"
time = { version = "0.3.20", features = ["formatting", "macros", "parsing"] }
tiny-skia = "0.11"
toml = { version = "0.8", default-features = false, features = ["parse", "display"] }
Expand Down
3 changes: 2 additions & 1 deletion crates/typst-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ readme = { workspace = true }

[dependencies]
once_cell = { workspace = true }
siphasher = { workspace = true }
portable-atomic = { workspace = true }
rayon = { workspace = true }
siphasher = { workspace = true }
thin-vec = { workspace = true }

[lints]
workspace = true
103 changes: 73 additions & 30 deletions crates/typst-utils/src/bitset.rs
Original file line number Diff line number Diff line change
@@ -1,41 +1,88 @@
use std::fmt::{self, Debug, Formatter};

/// Efficiently stores a set of numbers which are expected to be very small
/// (< 32/64 depending on the architecture).
use thin_vec::ThinVec;

/// The number of bits per chunk.
const BITS: usize = usize::BITS as usize;

/// Stores a set of numbers which are expected to be rather small.
///
/// Inserting a very small value is cheap while inserting a large one may be
/// very expensive.
///
/// Inserting a very small value is very cheap while inserting a large one may
/// be very expensive.
/// Unless you're managing small numbers yourself, you should likely prefer
/// `SmallBitSet`, which has a bit larger memory size, but does not allocate
/// for small numbers.
#[derive(Clone, PartialEq, Hash)]
pub struct BitSet {
pub struct BitSet(ThinVec<usize>);

impl BitSet {
/// Creates a new empty bit set.
pub fn new() -> Self {
Self(ThinVec::new())
}

/// Inserts a number into the set.
pub fn insert(&mut self, value: usize) {
let chunk = value / BITS;
let within = value % BITS;
if chunk >= self.0.len() {
self.0.resize(chunk + 1, 0);
}
self.0[chunk] |= 1 << within;
}

/// Whether a number is present in the set.
pub fn contains(&self, value: usize) -> bool {
let chunk = value / BITS;
let within = value % BITS;
let Some(bits) = self.0.get(chunk) else { return false };
(bits & (1 << within)) != 0
}
}

impl Default for BitSet {
fn default() -> Self {
Self::new()
}
}

impl Debug for BitSet {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let mut list = f.debug_list();
let chunks = self.0.len();
for v in 0..chunks * BITS {
if self.contains(v) {
list.entry(&v);
}
}
list.finish()
}
}

/// Efficiently stores a set of numbers which are expected to be very small.
/// Values `< 32/64` (depending on the architecture) are stored inline, while
/// values larger than that will lead to an allocation.
#[derive(Clone, PartialEq, Hash)]
pub struct SmallBitSet {
/// Used to store values < BITS.
low: usize,
/// Used to store values > BITS. We have the extra `Box` to keep the memory
/// size of the `BitSet` down.
#[allow(clippy::box_collection)]
hi: Option<Box<Vec<usize>>>,
/// Used to store values > BITS.
hi: BitSet,
}

/// The number of bits per chunk.
const BITS: usize = usize::BITS as usize;

impl BitSet {
impl SmallBitSet {
/// Creates a new empty bit set.
pub fn new() -> Self {
Self { low: 0, hi: None }
Self { low: 0, hi: BitSet::new() }
}

/// Inserts a number into the set.
pub fn insert(&mut self, value: usize) {
if value < BITS {
self.low |= 1 << value;
} else {
let chunk = value / BITS - 1;
let within = value % BITS;
let vec = self.hi.get_or_insert_with(Default::default);
if chunk >= vec.len() {
vec.resize(chunk + 1, 0);
}
vec[chunk] |= 1 << within;
self.hi.insert(value - BITS);
}
}

Expand All @@ -44,25 +91,21 @@ impl BitSet {
if value < BITS {
(self.low & (1 << value)) != 0
} else {
let Some(hi) = &self.hi else { return false };
let chunk = value / BITS - 1;
let within = value % BITS;
let Some(bits) = hi.get(chunk) else { return false };
(bits & (1 << within)) != 0
self.hi.contains(value - BITS)
}
}
}

impl Default for BitSet {
impl Default for SmallBitSet {
fn default() -> Self {
Self::new()
}
}

impl Debug for BitSet {
impl Debug for SmallBitSet {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let mut list = f.debug_list();
let chunks = 1 + self.hi.as_ref().map_or(0, |v| v.len());
let chunks = 1 + self.hi.0.len();
for v in 0..chunks * BITS {
if self.contains(v) {
list.entry(&v);
Expand All @@ -78,7 +121,7 @@ mod tests {

#[test]
fn test_bitset() {
let mut set = BitSet::new();
let mut set = SmallBitSet::new();
assert!(!set.contains(0));
assert!(!set.contains(5));
set.insert(0);
Expand Down
2 changes: 1 addition & 1 deletion crates/typst-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mod hash;
mod pico;
mod scalar;

pub use self::bitset::BitSet;
pub use self::bitset::{BitSet, SmallBitSet};
pub use self::deferred::Deferred;
pub use self::hash::LazyHash;
pub use self::pico::PicoStr;
Expand Down
6 changes: 3 additions & 3 deletions crates/typst/src/foundations/content.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::model::{Destination, EmphElem, LinkElem, StrongElem};
use crate::realize::{Behave, Behaviour};
use crate::syntax::Span;
use crate::text::UnderlineElem;
use crate::utils::{fat, BitSet, LazyHash};
use crate::utils::{fat, LazyHash, SmallBitSet};

/// A piece of document content.
///
Expand Down Expand Up @@ -90,7 +90,7 @@ struct Inner<T: ?Sized + 'static> {
/// - If bit 0 is set, the element is prepared.
/// - If bit n is set, the element is guarded against the n-th show rule
/// recipe from the top of the style chain (counting from 1).
lifecycle: BitSet,
lifecycle: SmallBitSet,
/// The element's raw data.
elem: LazyHash<T>,
}
Expand All @@ -102,7 +102,7 @@ impl Content {
inner: Arc::new(Inner {
label: None,
location: None,
lifecycle: BitSet::new(),
lifecycle: SmallBitSet::new(),
elem: elem.into(),
}),
span: Span::detached(),
Expand Down
4 changes: 2 additions & 2 deletions crates/typst/src/realize/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::foundations::{
};
use crate::introspection::{Locatable, TagElem};
use crate::text::TextElem;
use crate::utils::{hash128, BitSet};
use crate::utils::{hash128, SmallBitSet};

/// What to do with an element when encountering it during realization.
struct Verdict<'a> {
Expand Down Expand Up @@ -83,7 +83,7 @@ fn verdict<'a>(
) -> Option<Verdict<'a>> {
let mut target = target;
let mut map = Styles::new();
let mut revoked = BitSet::new();
let mut revoked = SmallBitSet::new();
let mut step = None;
let mut slot;

Expand Down

0 comments on commit 0d065eb

Please sign in to comment.