Skip to content

Commit

Permalink
impl: replace substring search with memmem from memchr crate
Browse files Browse the repository at this point in the history
  • Loading branch information
BurntSushi committed May 1, 2021
1 parent a4cc90d commit 5fcef91
Show file tree
Hide file tree
Showing 10 changed files with 29 additions and 1,942 deletions.
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ bench = false

[features]
default = ["std", "unicode"]
std = ["memchr/use_std"]
std = ["memchr/std"]
unicode = ["lazy_static", "regex-automata"]
serde1 = ["std", "serde1-nostd", "serde/std"]
serde1-nostd = ["serde"]

[dependencies]
memchr = { version = "2.1.2", default-features = false }
lazy_static = { version = "1.2", optional = true }
memchr = { version = "2.4.0", default-features = false }
lazy_static = { version = "1.2.0", optional = true }
regex-automata = { version = "0.1.5", default-features = false, optional = true }
serde = { version = "1.0.85", default-features = false, optional = true }

Expand Down
84 changes: 0 additions & 84 deletions src/cow.rs

This file was deleted.

92 changes: 26 additions & 66 deletions src/ext_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@ use std::ffi::OsStr;
#[cfg(feature = "std")]
use std::path::Path;

use core::{cmp, iter, ops, ptr, slice, str};
use memchr::{memchr, memrchr};
use core::{iter, ops, ptr, slice, str};
use memchr::{memchr, memmem, memrchr};

use crate::ascii;
use crate::bstr::BStr;
use crate::byteset;
#[cfg(feature = "std")]
use crate::ext_vec::ByteVec;
use crate::search::{PrefilterState, TwoWay};
#[cfg(feature = "unicode")]
use crate::unicode::{
whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
Expand Down Expand Up @@ -2986,15 +2985,13 @@ pub trait ByteSlice: Sealed {
/// version which permits building a `Finder` that is not connected to the
/// lifetime of its needle.
#[derive(Clone, Debug)]
pub struct Finder<'a> {
searcher: TwoWay<'a>,
}
pub struct Finder<'a>(memmem::Finder<'a>);

impl<'a> Finder<'a> {
/// Create a new finder for the given needle.
#[inline]
pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
Finder { searcher: TwoWay::forward(needle.as_ref()) }
Finder(memmem::Finder::new(needle.as_ref()))
}

/// Convert this finder into its owned variant, such that it no longer
Expand All @@ -3007,7 +3004,7 @@ impl<'a> Finder<'a> {
#[cfg(feature = "std")]
#[inline]
pub fn into_owned(self) -> Finder<'static> {
Finder { searcher: self.searcher.into_owned() }
Finder(self.0.into_owned())
}

/// Returns the needle that this finder searches for.
Expand All @@ -3018,7 +3015,7 @@ impl<'a> Finder<'a> {
/// needle returned must necessarily be the shorter of the two.
#[inline]
pub fn needle(&self) -> &[u8] {
self.searcher.needle()
self.0.needle()
}

/// Returns the index of the first occurrence of this needle in the given
Expand Down Expand Up @@ -3050,7 +3047,7 @@ impl<'a> Finder<'a> {
/// ```
#[inline]
pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
self.searcher.find(haystack.as_ref())
self.0.find(haystack.as_ref())
}
}

Expand All @@ -3071,15 +3068,13 @@ impl<'a> Finder<'a> {
/// version which permits building a `FinderReverse` that is not connected to
/// the lifetime of its needle.
#[derive(Clone, Debug)]
pub struct FinderReverse<'a> {
searcher: TwoWay<'a>,
}
pub struct FinderReverse<'a>(memmem::FinderRev<'a>);

impl<'a> FinderReverse<'a> {
/// Create a new reverse finder for the given needle.
#[inline]
pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
FinderReverse { searcher: TwoWay::reverse(needle.as_ref()) }
FinderReverse(memmem::FinderRev::new(needle.as_ref()))
}

/// Convert this finder into its owned variant, such that it no longer
Expand All @@ -3092,7 +3087,7 @@ impl<'a> FinderReverse<'a> {
#[cfg(feature = "std")]
#[inline]
pub fn into_owned(self) -> FinderReverse<'static> {
FinderReverse { searcher: self.searcher.into_owned() }
FinderReverse(self.0.into_owned())
}

/// Returns the needle that this finder searches for.
Expand All @@ -3103,7 +3098,7 @@ impl<'a> FinderReverse<'a> {
/// the needle returned must necessarily be the shorter of the two.
#[inline]
pub fn needle(&self) -> &[u8] {
self.searcher.needle()
self.0.needle()
}

/// Returns the index of the last occurrence of this needle in the given
Expand Down Expand Up @@ -3135,7 +3130,7 @@ impl<'a> FinderReverse<'a> {
/// ```
#[inline]
pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
self.searcher.rfind(haystack.as_ref())
self.0.rfind(haystack.as_ref())
}
}

Expand All @@ -3147,17 +3142,14 @@ impl<'a> FinderReverse<'a> {
/// byte string being looked for.
#[derive(Debug)]
pub struct Find<'a> {
it: memmem::FindIter<'a, 'a>,
haystack: &'a [u8],
prestate: PrefilterState,
searcher: TwoWay<'a>,
pos: usize,
needle: &'a [u8],
}

impl<'a> Find<'a> {
fn new(haystack: &'a [u8], needle: &'a [u8]) -> Find<'a> {
let searcher = TwoWay::forward(needle);
let prestate = searcher.prefilter_state();
Find { haystack, prestate, searcher, pos: 0 }
Find { it: memmem::find_iter(haystack, needle), haystack, needle }
}
}

Expand All @@ -3166,20 +3158,7 @@ impl<'a> Iterator for Find<'a> {

#[inline]
fn next(&mut self) -> Option<usize> {
if self.pos > self.haystack.len() {
return None;
}
let result = self
.searcher
.find_with(&mut self.prestate, &self.haystack[self.pos..]);
match result {
None => None,
Some(i) => {
let pos = self.pos + i;
self.pos = pos + cmp::max(1, self.searcher.needle().len());
Some(pos)
}
}
self.it.next()
}
}

Expand All @@ -3191,28 +3170,26 @@ impl<'a> Iterator for Find<'a> {
/// byte string being looked for.
#[derive(Debug)]
pub struct FindReverse<'a> {
it: memmem::FindRevIter<'a, 'a>,
haystack: &'a [u8],
prestate: PrefilterState,
searcher: TwoWay<'a>,
/// When searching with an empty needle, this gets set to `None` after
/// we've yielded the last element at `0`.
pos: Option<usize>,
needle: &'a [u8],
}

impl<'a> FindReverse<'a> {
fn new(haystack: &'a [u8], needle: &'a [u8]) -> FindReverse<'a> {
let searcher = TwoWay::reverse(needle);
let prestate = searcher.prefilter_state();
let pos = Some(haystack.len());
FindReverse { haystack, prestate, searcher, pos }
FindReverse {
it: memmem::rfind_iter(haystack, needle),
haystack,
needle,
}
}

fn haystack(&self) -> &'a [u8] {
self.haystack
}

fn needle(&self) -> &[u8] {
self.searcher.needle()
self.needle
}
}

Expand All @@ -3221,24 +3198,7 @@ impl<'a> Iterator for FindReverse<'a> {

#[inline]
fn next(&mut self) -> Option<usize> {
let pos = match self.pos {
None => return None,
Some(pos) => pos,
};
let result = self
.searcher
.rfind_with(&mut self.prestate, &self.haystack[..pos]);
match result {
None => None,
Some(i) => {
if pos == i {
self.pos = pos.checked_sub(1);
} else {
self.pos = Some(i);
}
Some(i)
}
}
self.it.next()
}
}

Expand Down Expand Up @@ -3398,7 +3358,7 @@ impl<'a> Iterator for Split<'a> {
match self.finder.next() {
Some(start) => {
let next = &haystack[self.last..start];
self.last = start + self.finder.searcher.needle().len();
self.last = start + self.finder.needle.len();
Some(next)
}
None => {
Expand Down
1 change: 0 additions & 1 deletion src/ext_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1045,7 +1045,6 @@ impl fmt::Display for FromUtf8Error {

#[cfg(test)]
mod tests {
use crate::ext_slice::B;
use crate::ext_vec::ByteVec;

#[test]
Expand Down
2 changes: 0 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,14 +393,12 @@ mod bstr;
#[cfg(feature = "std")]
mod bstring;
mod byteset;
mod cow;
mod ext_slice;
#[cfg(feature = "std")]
mod ext_vec;
mod impls;
#[cfg(feature = "std")]
pub mod io;
mod search;
#[cfg(test)]
mod tests;
#[cfg(feature = "unicode")]
Expand Down
Loading

0 comments on commit 5fcef91

Please sign in to comment.