From 0d065eb7c55f8a80565da31bda51a373e69e7d3b Mon Sep 17 00:00:00 2001 From: Laurenz Date: Fri, 24 May 2024 23:09:54 +0200 Subject: [PATCH] Split `BitSet` into two types and make it a bit nicer (#4249) --- Cargo.lock | 7 ++ Cargo.toml | 1 + crates/typst-utils/Cargo.toml | 3 +- crates/typst-utils/src/bitset.rs | 103 +++++++++++++++++------- crates/typst-utils/src/lib.rs | 2 +- crates/typst/src/foundations/content.rs | 6 +- crates/typst/src/realize/process.rs | 4 +- 7 files changed, 89 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 61c35c9878c0..44597c476396 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2354,6 +2354,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "thin-vec" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a38c90d48152c236a3ab59271da4f4ae63d678c5d7ad6b7714d7cb9760be5e4b" + [[package]] name = "thiserror" version = "1.0.57" @@ -2812,6 +2818,7 @@ dependencies = [ "portable-atomic", "rayon", "siphasher 1.0.0", + "thin-vec", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 810ff7b8cd39..45e13bbfcec0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -104,6 +104,7 @@ syn = { version = "2", features = ["full", "extra-traits"] } syntect = { version = "5", default-features = false, features = ["parsing", "regex-fancy", "plist-load", "yaml-load"] } tar = "0.4" tempfile = "3.7.0" +thin-vec = "0.2.13" time = { version = "0.3.20", features = ["formatting", "macros", "parsing"] } tiny-skia = "0.11" toml = { version = "0.8", default-features = false, features = ["parse", "display"] } diff --git a/crates/typst-utils/Cargo.toml b/crates/typst-utils/Cargo.toml index ba75e399e05d..5f828cff93b3 100644 --- a/crates/typst-utils/Cargo.toml +++ b/crates/typst-utils/Cargo.toml @@ -14,9 +14,10 @@ readme = { workspace = true } [dependencies] once_cell = { workspace = true } -siphasher = { workspace = true } portable-atomic = { workspace = true } rayon = { workspace = true } +siphasher = { workspace = true } +thin-vec = { workspace = true } [lints] workspace = true diff --git a/crates/typst-utils/src/bitset.rs b/crates/typst-utils/src/bitset.rs index cbac7a1eb041..fa57e63138a2 100644 --- a/crates/typst-utils/src/bitset.rs +++ b/crates/typst-utils/src/bitset.rs @@ -1,27 +1,80 @@ use std::fmt::{self, Debug, Formatter}; -/// Efficiently stores a set of numbers which are expected to be very small -/// (< 32/64 depending on the architecture). +use thin_vec::ThinVec; + +/// The number of bits per chunk. +const BITS: usize = usize::BITS as usize; + +/// Stores a set of numbers which are expected to be rather small. +/// +/// Inserting a very small value is cheap while inserting a large one may be +/// very expensive. /// -/// Inserting a very small value is very cheap while inserting a large one may -/// be very expensive. +/// Unless you're managing small numbers yourself, you should likely prefer +/// `SmallBitSet`, which has a bit larger memory size, but does not allocate +/// for small numbers. #[derive(Clone, PartialEq, Hash)] -pub struct BitSet { +pub struct BitSet(ThinVec); + +impl BitSet { + /// Creates a new empty bit set. + pub fn new() -> Self { + Self(ThinVec::new()) + } + + /// Inserts a number into the set. + pub fn insert(&mut self, value: usize) { + let chunk = value / BITS; + let within = value % BITS; + if chunk >= self.0.len() { + self.0.resize(chunk + 1, 0); + } + self.0[chunk] |= 1 << within; + } + + /// Whether a number is present in the set. + pub fn contains(&self, value: usize) -> bool { + let chunk = value / BITS; + let within = value % BITS; + let Some(bits) = self.0.get(chunk) else { return false }; + (bits & (1 << within)) != 0 + } +} + +impl Default for BitSet { + fn default() -> Self { + Self::new() + } +} + +impl Debug for BitSet { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let mut list = f.debug_list(); + let chunks = self.0.len(); + for v in 0..chunks * BITS { + if self.contains(v) { + list.entry(&v); + } + } + list.finish() + } +} + +/// Efficiently stores a set of numbers which are expected to be very small. +/// Values `< 32/64` (depending on the architecture) are stored inline, while +/// values larger than that will lead to an allocation. +#[derive(Clone, PartialEq, Hash)] +pub struct SmallBitSet { /// Used to store values < BITS. low: usize, - /// Used to store values > BITS. We have the extra `Box` to keep the memory - /// size of the `BitSet` down. - #[allow(clippy::box_collection)] - hi: Option>>, + /// Used to store values > BITS. + hi: BitSet, } -/// The number of bits per chunk. -const BITS: usize = usize::BITS as usize; - -impl BitSet { +impl SmallBitSet { /// Creates a new empty bit set. pub fn new() -> Self { - Self { low: 0, hi: None } + Self { low: 0, hi: BitSet::new() } } /// Inserts a number into the set. @@ -29,13 +82,7 @@ impl BitSet { if value < BITS { self.low |= 1 << value; } else { - let chunk = value / BITS - 1; - let within = value % BITS; - let vec = self.hi.get_or_insert_with(Default::default); - if chunk >= vec.len() { - vec.resize(chunk + 1, 0); - } - vec[chunk] |= 1 << within; + self.hi.insert(value - BITS); } } @@ -44,25 +91,21 @@ impl BitSet { if value < BITS { (self.low & (1 << value)) != 0 } else { - let Some(hi) = &self.hi else { return false }; - let chunk = value / BITS - 1; - let within = value % BITS; - let Some(bits) = hi.get(chunk) else { return false }; - (bits & (1 << within)) != 0 + self.hi.contains(value - BITS) } } } -impl Default for BitSet { +impl Default for SmallBitSet { fn default() -> Self { Self::new() } } -impl Debug for BitSet { +impl Debug for SmallBitSet { fn fmt(&self, f: &mut Formatter) -> fmt::Result { let mut list = f.debug_list(); - let chunks = 1 + self.hi.as_ref().map_or(0, |v| v.len()); + let chunks = 1 + self.hi.0.len(); for v in 0..chunks * BITS { if self.contains(v) { list.entry(&v); @@ -78,7 +121,7 @@ mod tests { #[test] fn test_bitset() { - let mut set = BitSet::new(); + let mut set = SmallBitSet::new(); assert!(!set.contains(0)); assert!(!set.contains(5)); set.insert(0); diff --git a/crates/typst-utils/src/lib.rs b/crates/typst-utils/src/lib.rs index e0a2c8350406..754fc70d7ce5 100644 --- a/crates/typst-utils/src/lib.rs +++ b/crates/typst-utils/src/lib.rs @@ -10,7 +10,7 @@ mod hash; mod pico; mod scalar; -pub use self::bitset::BitSet; +pub use self::bitset::{BitSet, SmallBitSet}; pub use self::deferred::Deferred; pub use self::hash::LazyHash; pub use self::pico::PicoStr; diff --git a/crates/typst/src/foundations/content.rs b/crates/typst/src/foundations/content.rs index 6f8e46b48e10..741deb36451e 100644 --- a/crates/typst/src/foundations/content.rs +++ b/crates/typst/src/foundations/content.rs @@ -24,7 +24,7 @@ use crate::model::{Destination, EmphElem, LinkElem, StrongElem}; use crate::realize::{Behave, Behaviour}; use crate::syntax::Span; use crate::text::UnderlineElem; -use crate::utils::{fat, BitSet, LazyHash}; +use crate::utils::{fat, LazyHash, SmallBitSet}; /// A piece of document content. /// @@ -90,7 +90,7 @@ struct Inner { /// - If bit 0 is set, the element is prepared. /// - If bit n is set, the element is guarded against the n-th show rule /// recipe from the top of the style chain (counting from 1). - lifecycle: BitSet, + lifecycle: SmallBitSet, /// The element's raw data. elem: LazyHash, } @@ -102,7 +102,7 @@ impl Content { inner: Arc::new(Inner { label: None, location: None, - lifecycle: BitSet::new(), + lifecycle: SmallBitSet::new(), elem: elem.into(), }), span: Span::detached(), diff --git a/crates/typst/src/realize/process.rs b/crates/typst/src/realize/process.rs index 4212ecb4b2e0..45d8dc8ced24 100644 --- a/crates/typst/src/realize/process.rs +++ b/crates/typst/src/realize/process.rs @@ -10,7 +10,7 @@ use crate::foundations::{ }; use crate::introspection::{Locatable, TagElem}; use crate::text::TextElem; -use crate::utils::{hash128, BitSet}; +use crate::utils::{hash128, SmallBitSet}; /// What to do with an element when encountering it during realization. struct Verdict<'a> { @@ -83,7 +83,7 @@ fn verdict<'a>( ) -> Option> { let mut target = target; let mut map = Styles::new(); - let mut revoked = BitSet::new(); + let mut revoked = SmallBitSet::new(); let mut step = None; let mut slot;