Skip to content

Commit

Permalink
Add str::split_ascii_whitespace.
Browse files Browse the repository at this point in the history
  • Loading branch information
clarfonthey committed Jun 27, 2018
1 parent 23b5516 commit b5cee02
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/liballoc/lib.rs
Expand Up @@ -108,6 +108,7 @@
#![cfg_attr(stage0, feature(repr_transparent))]
#![feature(rustc_attrs)]
#![feature(specialization)]
#![feature(split_ascii_whitespace)]
#![feature(staged_api)]
#![feature(str_internals)]
#![feature(trusted_len)]
Expand Down
2 changes: 2 additions & 0 deletions src/liballoc/str.rs
Expand Up @@ -78,6 +78,8 @@ pub use core::str::SplitWhitespace;
pub use core::str::pattern;
#[stable(feature = "encode_utf16", since = "1.8.0")]
pub use core::str::EncodeUtf16;
#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
pub use core::str::SplitAsciiWhitespace;

#[unstable(feature = "slice_concat_ext",
reason = "trait should not have to exist",
Expand Down
159 changes: 155 additions & 4 deletions src/libcore/str/mod.rs
Expand Up @@ -21,7 +21,7 @@ use char;
use fmt;
use iter::{Map, Cloned, FusedIterator, TrustedLen, Filter};
use iter_private::TrustedRandomAccess;
use slice::{self, SliceIndex};
use slice::{self, SliceIndex, Split as SliceSplit};
use mem;

pub mod pattern;
Expand Down Expand Up @@ -2722,7 +2722,10 @@ impl str {
/// the original string slice, separated by any amount of whitespace.
///
/// 'Whitespace' is defined according to the terms of the Unicode Derived
/// Core Property `White_Space`.
/// Core Property `White_Space`. If you only want to split on ASCII whitespace
/// instead, use [`split_ascii_whitespace`].
///
/// [`split_ascii_whitespace`]: #method.split_ascii_whitespace
///
/// # Examples
///
Expand Down Expand Up @@ -2756,6 +2759,53 @@ impl str {
SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
}

/// Split a string slice by ASCII whitespace.
///
/// The iterator returned will return string slices that are sub-slices of
/// the original string slice, separated by any amount of ASCII whitespace.
///
/// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
///
/// [`split_whitespace`]: #method.split_whitespace
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(split_ascii_whitespace)]
/// let mut iter = "A few words".split_ascii_whitespace();
///
/// assert_eq!(Some("A"), iter.next());
/// assert_eq!(Some("few"), iter.next());
/// assert_eq!(Some("words"), iter.next());
///
/// assert_eq!(None, iter.next());
/// ```
///
/// All kinds of ASCII whitespace are considered:
///
/// ```
/// let mut iter = " Mary had\ta little \n\t lamb".split_whitespace();
/// assert_eq!(Some("Mary"), iter.next());
/// assert_eq!(Some("had"), iter.next());
/// assert_eq!(Some("a"), iter.next());
/// assert_eq!(Some("little"), iter.next());
/// assert_eq!(Some("lamb"), iter.next());
///
/// assert_eq!(None, iter.next());
/// ```
#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
#[inline]
pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace {
let inner = self
.as_bytes()
.split(IsAsciiWhitespace)
.filter(IsNotEmpty)
.map(UnsafeBytesToStr);
SplitAsciiWhitespace { inner }
}

/// An iterator over the lines of a string, as string slices.
///
/// Lines are ended with either a newline (`\n`) or a carriage return with
Expand Down Expand Up @@ -3895,6 +3945,20 @@ pub struct SplitWhitespace<'a> {
inner: Filter<Split<'a, IsWhitespace>, IsNotEmpty>,
}

/// An iterator over the non-ASCII-whitespace substrings of a string,
/// separated by any amount of ASCII whitespace.
///
/// This struct is created by the [`split_ascii_whitespace`] method on [`str`].
/// See its documentation for more.
///
/// [`split_ascii_whitespace`]: ../../std/primitive.str.html#method.split_ascii_whitespace
/// [`str`]: ../../std/primitive.str.html
#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
#[derive(Clone, Debug)]
pub struct SplitAsciiWhitespace<'a> {
inner: Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, IsNotEmpty>, UnsafeBytesToStr>,
}

#[derive(Clone)]
struct IsWhitespace;

Expand All @@ -3914,37 +3978,98 @@ impl FnMut<(char, )> for IsWhitespace {
}
}

#[derive(Clone)]
struct IsAsciiWhitespace;

impl<'a> FnOnce<(&'a u8, )> for IsAsciiWhitespace {
type Output = bool;

#[inline]
extern "rust-call" fn call_once(mut self, arg: (&u8, )) -> bool {
self.call_mut(arg)
}
}

impl<'a> FnMut<(&'a u8, )> for IsAsciiWhitespace {
#[inline]
extern "rust-call" fn call_mut(&mut self, arg: (&u8, )) -> bool {
arg.0.is_ascii_whitespace()
}
}

#[derive(Clone)]
struct IsNotEmpty;

impl<'a, 'b> FnOnce<(&'a &'b str, )> for IsNotEmpty {
type Output = bool;

#[inline]
extern "rust-call" fn call_once(mut self, arg: (&&str, )) -> bool {
extern "rust-call" fn call_once(mut self, arg: (&'a &'b str, )) -> bool {
self.call_mut(arg)
}
}

impl<'a, 'b> FnMut<(&'a &'b str, )> for IsNotEmpty {
#[inline]
extern "rust-call" fn call_mut(&mut self, arg: (&&str, )) -> bool {
extern "rust-call" fn call_mut(&mut self, arg: (&'a &'b str, )) -> bool {
!arg.0.is_empty()
}
}

impl<'a, 'b> FnOnce<(&'a &'b [u8], )> for IsNotEmpty {
type Output = bool;

#[inline]
extern "rust-call" fn call_once(mut self, arg: (&'a &'b [u8], )) -> bool {
self.call_mut(arg)
}
}

impl<'a, 'b> FnMut<(&'a &'b [u8], )> for IsNotEmpty {
#[inline]
extern "rust-call" fn call_mut(&mut self, arg: (&'a &'b [u8], )) -> bool {
!arg.0.is_empty()
}
}

#[derive(Clone)]
struct UnsafeBytesToStr;

impl<'a> FnOnce<(&'a [u8], )> for UnsafeBytesToStr {
type Output = &'a str;

#[inline]
extern "rust-call" fn call_once(mut self, arg: (&'a [u8], )) -> &'a str {
self.call_mut(arg)
}
}

impl<'a> FnMut<(&'a [u8], )> for UnsafeBytesToStr {
#[inline]
extern "rust-call" fn call_mut(&mut self, arg: (&'a [u8], )) -> &'a str {
unsafe { from_utf8_unchecked(arg.0) }
}
}


#[stable(feature = "split_whitespace", since = "1.1.0")]
impl<'a> Iterator for SplitWhitespace<'a> {
type Item = &'a str;

#[inline]
fn next(&mut self) -> Option<&'a str> {
self.inner.next()
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}

#[stable(feature = "split_whitespace", since = "1.1.0")]
impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
self.inner.next_back()
}
Expand All @@ -3953,6 +4078,32 @@ impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
#[stable(feature = "fused", since = "1.26.0")]
impl<'a> FusedIterator for SplitWhitespace<'a> {}

#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
impl<'a> Iterator for SplitAsciiWhitespace<'a> {
type Item = &'a str;

#[inline]
fn next(&mut self) -> Option<&'a str> {
self.inner.next()
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}

#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
self.inner.next_back()
}
}

#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
impl<'a> FusedIterator for SplitAsciiWhitespace<'a> {}

/// An iterator of [`u16`] over the string encoded as UTF-16.
///
/// [`u16`]: ../../std/primitive.u16.html
Expand Down

0 comments on commit b5cee02

Please sign in to comment.