perf: speedup the automaton construction in debug builds

This commit makes some changes to the implementation that should speed up the construction of Aho-Corasick automatons in debug builds. PR #31
BurntSushi · Jun 27, 2018 · 59124b2 · 59124b2
1 parent 864e823
commit 59124b2
Show file tree

Hide file tree

Showing 3 changed files with 104 additions and 21 deletions.
diff --git a/benches/bench.rs b/benches/bench.rs
@@ -57,6 +57,40 @@ fn bench_naive_no_match<S>(b: &mut Bencher, needles: Vec<S>, haystack: &str)
     b.iter(|| assert!(!naive_find(&needles, haystack)));
 }
 
+#[bench]
+fn bench_construction(b: &mut Bencher) {
+    b.iter(|| {
+        AcAutomaton::new(test::black_box(
+            [
+                "ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BAK", "BaK", "Bak", "BaK", "HOL",
+                "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire", "JOH", "JOh", "JoH", "Joh", "SHE",
+                "SHe", "ShE", "She", "WAT", "WAt", "WaT", "Wat", "aDL", "aDl", "adL", "adl", "bAK",
+                "bAk", "bAK", "baK", "bak", "baK", "hOL", "hOl", "hoL", "hol", "iRE", "iRe",
+                "irE", "ire", "jOH", "jOh", "joH", "joh", "sHE", "sHe", "shE", "she", "wAT", "wAt",
+                "waT", "wat", "ſHE", "ſHe", "ſhE", "ſhe",
+            ].iter()
+                .map(|x| *x),
+        ))
+    })
+}
+
+#[bench]
+fn bench_full_construction(b: &mut Bencher) {
+    b.iter(|| {
+        AcAutomaton::new(test::black_box(
+            [
+                "ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BAK", "BaK", "Bak", "BaK", "HOL",
+                "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire", "JOH", "JOh", "JoH", "Joh", "SHE",
+                "SHe", "ShE", "She", "WAT", "WAt", "WaT", "Wat", "aDL", "aDl", "adL", "adl", "bAK",
+                "bAk", "bAK", "baK", "bak", "baK", "hOL", "hOl", "hoL", "hol", "iRE", "iRe",
+                "irE", "ire", "jOH", "jOh", "joH", "joh", "sHE", "sHe", "shE", "she", "wAT", "wAt",
+                "waT", "wat", "ſHE", "ſHe", "ſhE", "ſhe",
+            ].iter()
+                .map(|x| *x),
+        )).into_full()
+    })
+}
+
 fn haystack_same(letter: char) -> String {
     iter::repeat(letter).take(10000).collect()
 }

diff --git a/src/full.rs b/src/full.rs
@@ -3,6 +3,7 @@ use std::mem;
 
 use super::{
     FAIL_STATE,
+    AllBytesIter,
     StateIdx, AcAutomaton, Transitions, Match,
     usize_bytes, vec_bytes,
 };
@@ -119,7 +120,7 @@ impl<P: AsRef<[u8]>> Automaton<P> for FullAcAutomaton<P> {
 impl<P: AsRef<[u8]>> FullAcAutomaton<P> {
     fn build_matrix<T: Transitions>(&mut self, ac: &AcAutomaton<P, T>) {
         for (si, s) in ac.states.iter().enumerate().skip(1) {
-            for b in (0..256).map(|b| b as u8) {
+            for b in AllBytesIter::new() {
                 self.set(si as StateIdx, b, ac.next_state(si as StateIdx, b));
             }
             for &pati in &s.out {

diff --git a/src/lib.rs b/src/lib.rs
@@ -297,6 +297,29 @@ impl<P: AsRef<[u8]>, T: Transitions> Automaton<P> for AcAutomaton<P, T> {
     }
 }
 
+// `(0..256).map(|b| b as u8)` optimizes poorly in debug builds so
+// we use this small explicit iterator instead
+struct AllBytesIter(i32);
+impl Iterator for AllBytesIter {
+    type Item = u8;
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.0 <= 256 {
+            let b = self.0 as u8;
+            self.0 += 1;
+            Some(b)
+        } else {
+            None
+        }
+    }
+}
+
+impl AllBytesIter {
+    fn new() -> AllBytesIter {
+        AllBytesIter(0)
+    }
+}
+
 // Below contains code for *building* the automaton. It's a reasonably faithful
 // translation of the description/psuedo-code from:
 // http://www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf
@@ -321,11 +344,14 @@ impl<P: AsRef<[u8]>, T: Transitions> AcAutomaton<P, T> {
             }
             self.states[previ as usize].out.push(pati);
         }
-        for c in (0..256).into_iter().map(|c| c as u8) {
-            if self.states[ROOT_STATE as usize].goto(c) == FAIL_STATE {
-                self.states[ROOT_STATE as usize].set_goto(c, ROOT_STATE);
-            } else {
-                self.start_bytes.push(c);
+        {
+            let root_state = &mut self.states[ROOT_STATE as usize];
+            for c in AllBytesIter::new() {
+                if root_state.goto(c) == FAIL_STATE {
+                    root_state.set_goto(c, ROOT_STATE);
+                } else {
+                    self.start_bytes.push(c);
+                }
             }
         }
         // If any of the start bytes are non-ASCII, then remove them all,
@@ -344,14 +370,14 @@ impl<P: AsRef<[u8]>, T: Transitions> AcAutomaton<P, T> {
         // Fill up the queue with all non-root transitions out of the root
         // node. Then proceed by breadth first traversal.
         let mut q = VecDeque::new();
-        for c in (0..256).into_iter().map(|c| c as u8) {
+        for c in AllBytesIter::new() {
             let si = self.states[ROOT_STATE as usize].goto(c);
             if si != ROOT_STATE {
                 q.push_front(si);
             }
         }
         while let Some(si) = q.pop_back() {
-            for c in (0..256).into_iter().map(|c| c as u8) {
+            for c in AllBytesIter::new() {
                 let u = self.states[si as usize].goto(c);
                 if u != FAIL_STATE {
                     q.push_front(u);
@@ -361,8 +387,19 @@ impl<P: AsRef<[u8]>, T: Transitions> AcAutomaton<P, T> {
                     }
                     let ufail = self.states[v as usize].goto(c);
                     self.states[u as usize].fail = ufail;
-                    let ufail_out = self.states[ufail as usize].out.clone();
-                    self.states[u as usize].out.extend(ufail_out);
+
+                    fn get_two<T>(xs: &mut [T], i: usize, j: usize) -> (&mut T, &mut T) {
+                        if i < j {
+                            let (before, after) = xs.split_at_mut(j);
+                            (&mut before[i], &mut after[0])
+                        } else {
+                            let (before, after) = xs.split_at_mut(i);
+                            (&mut after[0], &mut before[j])
+                        }
+                    }
+
+                    let (ufail_out, out) = get_two(&mut self.states, ufail as usize, u as usize);
+                    out.out.extend_from_slice(&ufail_out.out);
                 }
             }
         }
@@ -426,22 +463,22 @@ pub struct Dense(DenseChoice);
 
 #[derive(Clone, Debug)]
 enum DenseChoice {
-    Sparse(Vec<StateIdx>), // indexed by alphabet
+    Sparse(Box<Sparse>),
     Dense(Vec<(u8, StateIdx)>),
 }
 
 impl Transitions for Dense {
     fn new(depth: u32) -> Dense {
         if depth <= DENSE_DEPTH_THRESHOLD {
-            Dense(DenseChoice::Sparse(vec![0; 256]))
+            Dense(DenseChoice::Sparse(Box::new(Sparse::new(depth))))
         } else {
             Dense(DenseChoice::Dense(vec![]))
         }
     }
 
     fn goto(&self, b1: u8) -> StateIdx {
         match self.0 {
-            DenseChoice::Sparse(ref m) => m[b1 as usize],
+            DenseChoice::Sparse(ref m) => m.goto(b1),
             DenseChoice::Dense(ref m) => {
                 for &(b2, si) in m {
                     if b1 == b2 {
@@ -455,14 +492,14 @@ impl Transitions for Dense {
 
     fn set_goto(&mut self, b: u8, si: StateIdx) {
         match self.0 {
-            DenseChoice::Sparse(ref mut m) => m[b as usize] = si,
+            DenseChoice::Sparse(ref mut m) => m.set_goto(b, si),
             DenseChoice::Dense(ref mut m) => m.push((b, si)),
         }
     }
 
     fn heap_bytes(&self) -> usize {
         match self.0 {
-            DenseChoice::Sparse(ref m) => m.len() * 4,
+            DenseChoice::Sparse(_) => mem::size_of::<Sparse>(),
             DenseChoice::Dense(ref m) => m.len() * (1 + 4),
         }
     }
@@ -472,12 +509,23 @@ impl Transitions for Dense {
 ///
 /// This can use enormous amounts of memory when there are many patterns,
 /// but matching is very fast.
-#[derive(Clone, Debug)]
-pub struct Sparse(Vec<StateIdx>);
+pub struct Sparse([StateIdx; 256]);
+
+impl Clone for Sparse {
+    fn clone(&self) -> Sparse {
+        Sparse(self.0)
+    }
+}
+
+impl fmt::Debug for Sparse {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_tuple("Sparse").field(&&self.0[..]).finish()
+    }
+}
 
 impl Transitions for Sparse {
     fn new(_: u32) -> Sparse {
-        Sparse(vec![0; 256])
+        Sparse([0; 256])
     }
 
     #[inline]
@@ -490,7 +538,7 @@ impl Transitions for Sparse {
     }
 
     fn heap_bytes(&self) -> usize {
-        self.0.len() * 4
+        0
     }
 }
 
@@ -526,7 +574,7 @@ impl<T: Transitions> State<T> {
 
     fn goto_string(&self, root: bool) -> String {
         let mut goto = vec![];
-        for b in (0..256).map(|b| b as u8) {
+        for b in AllBytesIter::new() {
             let si = self.goto(b);
             if (!root && si == FAIL_STATE) || (root && si == ROOT_STATE) {
                 continue;
@@ -567,7 +615,7 @@ digraph automaton {{
                 w!(out, "    {} [peripheries=2];\n", i);
             }
             w!(out, "    {} -> {} [style=dashed];\n", i, s.fail);
-            for b in (0..256).map(|b| b as u8) {
+            for b in AllBytesIter::new() {
                 let si = s.goto(b);
                 if si == FAIL_STATE || (i == ROOT_STATE && si == ROOT_STATE) {
                     continue;