Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Subsequence automaton #64

Merged
merged 1 commit into from
Aug 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
61 changes: 61 additions & 0 deletions src/automaton/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,67 @@ impl<'a, T: Automaton> Automaton for &'a T {
}
}

/// An automaton that matches if the input contains a specific subsequence.
///
/// It can be used to build a simple fuzzy-finder.
///
/// ```rust
/// extern crate fst;
///
/// use std::error::Error;
///
/// use fst::{IntoStreamer, Streamer, Set};
/// use fst::automaton::Subsequence;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<Error>> {
/// let paths = vec!["/home/projects/bar", "/home/projects/foo", "/tmp/foo"];
/// let set = Set::from_iter(paths)?;
///
/// // Build our fuzzy query.
/// let subseq = Subsequence::new("hpf");
///
/// // Apply our fuzzy query to the set we built.
/// let mut stream = set.search(subseq).into_stream();
///
/// let matches = stream.into_strs()?;
/// assert_eq!(matches, vec!["/home/projects/foo"]);
/// Ok(())
/// }
/// ```
pub struct Subsequence<'a> {
subseq: &'a [u8]
}

impl<'a> Subsequence<'a> {
/// Constructs automaton that matches input containing the
/// specified subsequence.
pub fn new(subsequence: &'a str) -> Subsequence<'a> {
Subsequence { subseq: subsequence.as_bytes() }
}
}

impl<'a> Automaton for Subsequence<'a> {
type State = usize;

fn start(&self) -> usize { 0 }

fn is_match(&self, &state: &usize) -> bool {
state == self.subseq.len()
}

fn can_match(&self, _: &usize) -> bool { true }

fn will_always_match(&self, &state: &usize) -> bool {
state == self.subseq.len()
}

fn accept(&self, &state: &usize, byte: u8) -> usize {
if state == self.subseq.len() { return state; }
state + (byte == self.subseq[state]) as usize
}
}

/// An automaton that always matches.
///
/// This is useful in a generic context as a way to express that no automaton
Expand Down
14 changes: 14 additions & 0 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use fst_levenshtein::Levenshtein;
use fst_regex::Regex;

use fst::{Automaton, IntoStreamer, Streamer};
use fst::automaton::Subsequence;
use fst::raw::{Builder, Fst, Output};
use fst::set::{Set, OpBuilder};

Expand Down Expand Up @@ -138,6 +139,19 @@ fn union_large() {
assert_eq!(stream2.next(), None);
}

#[test]
fn subsequence() {
let set = get_set();
let subseq = Subsequence::new("aab");
let regex = Regex::new(".*a.*a.*b.*").unwrap();
let mut stream1 = set.search(&subseq).into_stream();
let mut stream2 = set.search(&regex).into_stream();
while let Some(key1) = stream1.next() {
assert_eq!(stream2.next(), Some(key1));
}
assert_eq!(stream2.next(), None);
}

#[test]
fn implements_default() {
let map: fst::Map = Default::default();
Expand Down