diff --git a/README.md b/README.md index 32b809d31bf..68f64c4427f 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,7 @@ These are for demonstration purposes only. ## [Strings](./src/string) - [x] [Aho-Corasick Algorithm](./src/string/aho_corasick.rs) +- [x] [Boyer-Moore String Search Algorithm](./src/string/boyer_moore_search.rs) - [x] [Burrows-Wheeler transform](./src/string/burrows_wheeler_transform.rs) - [x] [Knuth Morris Pratt](./src/string/knuth_morris_pratt.rs) - [x] [Manacher](./src/string/manacher.rs) diff --git a/src/string/boyer_moore_search.rs b/src/string/boyer_moore_search.rs new file mode 100644 index 00000000000..e88e1f26d0b --- /dev/null +++ b/src/string/boyer_moore_search.rs @@ -0,0 +1,60 @@ +// In computer science, the Boyer–Moore string-search algorithm is an efficient string-searching algorithm, +// that is the standard benchmark for practical string-search literature. Source: https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm + +use std::collections::HashMap; + +pub fn boyer_moore_search(text: &str, pattern: &str) -> Vec { + let mut positions = Vec::new(); + let n = text.len() as i32; + let m = pattern.len() as i32; + let pattern: Vec = pattern.chars().collect(); + let text: Vec = text.chars().collect(); + if n == 0 || m == 0 { + return positions; + } + let mut collection = HashMap::new(); + for (i, c) in pattern.iter().enumerate() { + collection.insert(c, i as i32); + } + let mut shift: i32 = 0; + while shift <= (n - m) as i32 { + let mut j = (m - 1) as i32; + while j >= 0 && pattern[j as usize] == text[(shift + j) as usize] { + j -= 1; + } + if j < 0 { + positions.push(shift as usize); + let add_to_shift = { + if (shift + m) < n { + let c = text[(shift + m) as usize]; + let index = collection.get(&c).unwrap_or(&-1); + m - index + } else { + 1 + } + }; + shift += add_to_shift; + } else { + let c = text[(shift + j) as usize]; + let index = collection.get(&c).unwrap_or(&-1); + let add_to_shift = std::cmp::max(1, j - index); + shift += add_to_shift; + } + } + positions +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_boyer_moore_search() { + let a = boyer_moore_search("AABCAB12AFAABCABFFEGABCAB", "ABCAB"); + assert_eq!(a, [1, 11, 20]); + let a = boyer_moore_search("AABCAB12AFAABCABFFEGABCAB", "FFF"); + assert_eq!(a, []); + let a = boyer_moore_search("AABCAB12AFAABCABFFEGABCAB", "CAB"); + assert_eq!(a, [3, 13, 22]); + } +} diff --git a/src/string/mod.rs b/src/string/mod.rs index 935782cc79a..096fb0156b2 100644 --- a/src/string/mod.rs +++ b/src/string/mod.rs @@ -1,4 +1,5 @@ mod aho_corasick; +mod boyer_moore_search; mod burrows_wheeler_transform; mod hamming_distance; mod jaro_winkler_distance; @@ -12,6 +13,7 @@ mod suffix_tree; mod z_algorithm; pub use self::aho_corasick::AhoCorasick; +pub use self::boyer_moore_search::boyer_moore_search; pub use self::burrows_wheeler_transform::{ burrows_wheeler_transform, inv_burrows_wheeler_transform, };