From 6c29708bf906fa9075bb96b76fd7f6cc81eda43c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 20 Jan 2015 10:45:29 -0800 Subject: [PATCH] regex: Remove in-tree version The regex library was largely used for non-critical aspects of the compiler and various external tooling. The library at this point is duplicated with its out-of-tree counterpart and as such imposes a bit of a maintenance overhead as well as compile time hit for the compiler itself. The last major user of the regex library is the libtest library, using regexes for filters when running tests. This removal means that the filtering has gone back to substring matching rather than using regexes. --- mk/crates.mk | 12 +- src/compiletest/common.rs | 6 +- src/compiletest/compiletest.rs | 66 +- src/compiletest/errors.rs | 73 +- src/compiletest/runtest.rs | 2 +- src/grammar/verify.rs | 25 +- src/liblog/directive.rs | 15 +- src/liblog/lib.rs | 24 +- src/libregex/compile.rs | 275 ------- src/libregex/lib.rs | 93 --- src/libregex/parse.rs | 1087 ------------------------- src/libregex/re.rs | 684 ---------------- src/libregex/test/bench.rs | 183 ----- src/libregex/test/matches.rs | 373 --------- src/libregex/test/mod.rs | 24 - src/libregex/test/native_static.rs | 26 - src/libregex/test/tests.rs | 245 ------ src/libregex/testdata/LICENSE | 19 - src/libregex/testdata/README | 17 - src/libregex/testdata/basic.dat | 221 ----- src/libregex/testdata/nullsubexpr.dat | 79 -- src/libregex/testdata/repetition.dat | 163 ---- src/libregex/vm.rs | 582 ------------- src/librustc/lib.rs | 1 - src/librustc/session/mod.rs | 58 +- src/libtest/lib.rs | 58 +- src/rustbook/book.rs | 85 +- src/rustbook/build.rs | 7 +- src/rustbook/main.rs | 2 - src/test/bench/shootout-regex-dna.rs | 126 --- src/test/run-pass/rust-log-filter.rs | 6 +- 31 files changed, 187 insertions(+), 4450 deletions(-) delete mode 100644 src/libregex/compile.rs delete mode 100644 src/libregex/lib.rs delete mode 100644 src/libregex/parse.rs delete mode 100644 src/libregex/re.rs delete mode 100644 src/libregex/test/bench.rs delete mode 100644 src/libregex/test/matches.rs delete mode 100644 src/libregex/test/mod.rs delete mode 100644 src/libregex/test/native_static.rs delete mode 100644 src/libregex/test/tests.rs delete mode 100644 src/libregex/testdata/LICENSE delete mode 100644 src/libregex/testdata/README delete mode 100644 src/libregex/testdata/basic.dat delete mode 100644 src/libregex/testdata/nullsubexpr.dat delete mode 100644 src/libregex/testdata/repetition.dat delete mode 100644 src/libregex/vm.rs delete mode 100644 src/test/bench/shootout-regex-dna.rs diff --git a/mk/crates.mk b/mk/crates.mk index 5957405f0f9ec..be1965b7edadd 100644 --- a/mk/crates.mk +++ b/mk/crates.mk @@ -51,7 +51,7 @@ TARGET_CRATES := libc std flate arena term \ serialize getopts collections test rand \ - log regex graphviz core rbml alloc \ + log graphviz core rbml alloc \ unicode rustc_bitflags RUSTC_CRATES := rustc rustc_typeck rustc_borrowck rustc_resolve rustc_driver \ rustc_trans rustc_back rustc_llvm rustc_privacy @@ -95,16 +95,15 @@ DEPS_term := std log DEPS_getopts := std DEPS_collections := core alloc unicode DEPS_num := std -DEPS_test := std getopts serialize rbml term regex native:rust_test_helpers +DEPS_test := std getopts serialize rbml term native:rust_test_helpers DEPS_rand := core -DEPS_log := std regex -DEPS_regex := std +DEPS_log := std DEPS_fmt_macros = std TOOL_DEPS_compiletest := test getopts TOOL_DEPS_rustdoc := rustdoc TOOL_DEPS_rustc := rustc_driver -TOOL_DEPS_rustbook := std regex rustdoc +TOOL_DEPS_rustbook := std rustdoc TOOL_SOURCE_compiletest := $(S)src/compiletest/compiletest.rs TOOL_SOURCE_rustdoc := $(S)src/driver/driver.rs TOOL_SOURCE_rustc := $(S)src/driver/driver.rs @@ -130,9 +129,8 @@ DOC_CRATES := $(filter-out rustc, \ $(filter-out rustc_driver, \ $(filter-out rustc_privacy, \ $(filter-out log, \ - $(filter-out regex, \ $(filter-out getopts, \ - $(filter-out syntax, $(CRATES)))))))))))) + $(filter-out syntax, $(CRATES))))))))))) COMPILER_DOC_CRATES := rustc rustc_trans rustc_borrowck rustc_resolve \ rustc_typeck rustc_driver syntax rustc_privacy diff --git a/src/compiletest/common.rs b/src/compiletest/common.rs index c21785c45a3ac..1f4f444634dc2 100644 --- a/src/compiletest/common.rs +++ b/src/compiletest/common.rs @@ -11,7 +11,6 @@ pub use self::Mode::*; use std::fmt; use std::str::FromStr; -use regex::Regex; #[derive(Clone, PartialEq, Debug)] pub enum Mode { @@ -101,10 +100,7 @@ pub struct Config { pub run_ignored: bool, // Only run tests that match this filter - pub filter: Option, - - // Precompiled regex for finding expected errors in cfail - pub cfail_regex: Regex, + pub filter: Option, // Write out a parseable log of tests that were run pub logfile: Option, diff --git a/src/compiletest/compiletest.rs b/src/compiletest/compiletest.rs index b3f0034ca894f..4659af4416bd9 100644 --- a/src/compiletest/compiletest.rs +++ b/src/compiletest/compiletest.rs @@ -22,7 +22,6 @@ extern crate getopts; #[macro_use] extern crate log; -extern crate regex; use std::os; use std::io; @@ -33,7 +32,6 @@ use getopts::{optopt, optflag, reqopt}; use common::Config; use common::{Pretty, DebugInfoGdb, DebugInfoLldb, Codegen}; use util::logv; -use regex::Regex; pub mod procsrv; pub mod util; @@ -116,14 +114,7 @@ pub fn parse_config(args: Vec ) -> Config { } let filter = if !matches.free.is_empty() { - let s = matches.free[0].as_slice(); - match regex::Regex::new(s) { - Ok(re) => Some(re), - Err(e) => { - println!("failed to parse filter /{}/: {:?}", s, e); - panic!() - } - } + Some(matches.free[0].clone()) } else { None }; @@ -145,7 +136,6 @@ pub fn parse_config(args: Vec ) -> Config { .as_slice()).expect("invalid mode"), run_ignored: matches.opt_present("ignored"), filter: filter, - cfail_regex: Regex::new(errors::EXPECTED_PATTERN).unwrap(), logfile: matches.opt_str("logfile").map(|s| Path::new(s)), runtool: matches.opt_str("runtool"), host_rustcflags: matches.opt_str("host-rustcflags"), @@ -374,18 +364,24 @@ fn extract_gdb_version(full_version_line: Option) -> Option { if full_version_line.as_slice().trim().len() > 0 => { let full_version_line = full_version_line.as_slice().trim(); - let re = Regex::new(r"(^|[^0-9])([0-9]\.[0-9])([^0-9]|$)").unwrap(); - - match re.captures(full_version_line) { - Some(captures) => { - Some(captures.at(2).unwrap_or("").to_string()) + // used to be a regex "(^|[^0-9])([0-9]\.[0-9])([^0-9]|$)" + for (pos, c) in full_version_line.char_indices() { + if !c.is_digit(10) { continue } + if pos + 2 >= full_version_line.len() { continue } + if full_version_line.char_at(pos + 1) != '.' { continue } + if !full_version_line.char_at(pos + 2).is_digit(10) { continue } + if pos > 0 && full_version_line.char_at_reverse(pos).is_digit(10) { + continue } - None => { - println!("Could not extract GDB version from line '{}'", - full_version_line); - None + if pos + 3 < full_version_line.len() && + full_version_line.char_at(pos + 3).is_digit(10) { + continue } + return Some(full_version_line[pos..pos+3].to_string()); } + println!("Could not extract GDB version from line '{}'", + full_version_line); + None }, _ => None } @@ -408,18 +404,26 @@ fn extract_lldb_version(full_version_line: Option) -> Option { if full_version_line.as_slice().trim().len() > 0 => { let full_version_line = full_version_line.as_slice().trim(); - let re = Regex::new(r"[Ll][Ll][Dd][Bb]-([0-9]+)").unwrap(); - - match re.captures(full_version_line) { - Some(captures) => { - Some(captures.at(1).unwrap_or("").to_string()) - } - None => { - println!("Could not extract LLDB version from line '{}'", - full_version_line); - None - } + for (pos, l) in full_version_line.char_indices() { + if l != 'l' && l != 'L' { continue } + if pos + 5 >= full_version_line.len() { continue } + let l = full_version_line.char_at(pos + 1); + if l != 'l' && l != 'L' { continue } + let d = full_version_line.char_at(pos + 2); + if d != 'd' && d != 'D' { continue } + let b = full_version_line.char_at(pos + 3); + if b != 'b' && b != 'B' { continue } + let dash = full_version_line.char_at(pos + 4); + if dash != '-' { continue } + + let vers = full_version_line[pos + 5..].chars().take_while(|c| { + c.is_digit(10) + }).collect::(); + if vers.len() > 0 { return Some(vers) } } + println!("Could not extract LLDB version from line '{}'", + full_version_line); + None }, _ => None } diff --git a/src/compiletest/errors.rs b/src/compiletest/errors.rs index dcfac688c7f62..fc815d66a4d42 100644 --- a/src/compiletest/errors.rs +++ b/src/compiletest/errors.rs @@ -9,9 +9,7 @@ // except according to those terms. use self::WhichLine::*; -use std::ascii::AsciiExt; use std::io::{BufferedReader, File}; -use regex::Regex; pub struct ExpectedError { pub line: uint, @@ -19,6 +17,9 @@ pub struct ExpectedError { pub msg: String, } +#[derive(PartialEq, Show)] +enum WhichLine { ThisLine, FollowPrevious(uint), AdjustBackward(uint) } + /// Looks for either "//~| KIND MESSAGE" or "//~^^... KIND MESSAGE" /// The former is a "follow" that inherits its target from the preceding line; /// the latter is an "adjusts" that goes that many lines up. @@ -26,15 +27,8 @@ pub struct ExpectedError { /// Goal is to enable tests both like: //~^^^ ERROR go up three /// and also //~^ ERROR message one for the preceding line, and /// //~| ERROR message two for that same line. - -pub static EXPECTED_PATTERN : &'static str = - r"//~(?P\|)?(?P\^*)\s*(?P\S*)\s*(?P.*)"; - -#[derive(PartialEq, Show)] -enum WhichLine { ThisLine, FollowPrevious(uint), AdjustBackward(uint) } - // Load any test directives embedded in the file -pub fn load_errors(re: &Regex, testfile: &Path) -> Vec { +pub fn load_errors(testfile: &Path) -> Vec { let mut rdr = BufferedReader::new(File::open(testfile).unwrap()); // `last_nonfollow_error` tracks the most recently seen @@ -50,7 +44,7 @@ pub fn load_errors(re: &Regex, testfile: &Path) -> Vec { rdr.lines().enumerate().filter_map(|(line_no, ln)| { parse_expected(last_nonfollow_error, line_no + 1, - ln.unwrap().as_slice(), re) + ln.unwrap().as_slice()) .map(|(which, error)| { match which { FollowPrevious(_) => {} @@ -63,30 +57,39 @@ pub fn load_errors(re: &Regex, testfile: &Path) -> Vec { fn parse_expected(last_nonfollow_error: Option, line_num: uint, - line: &str, - re: &Regex) -> Option<(WhichLine, ExpectedError)> { - re.captures(line).and_then(|caps| { - let adjusts = caps.name("adjusts").unwrap_or("").len(); - let kind = caps.name("kind").unwrap_or("").to_ascii_lowercase(); - let msg = caps.name("msg").unwrap_or("").trim().to_string(); - let follow = caps.name("follow").unwrap_or("").len() > 0; + line: &str) -> Option<(WhichLine, ExpectedError)> { + let start = match line.find_str("//~") { Some(i) => i, None => return None }; + let (follow, adjusts) = if line.char_at(start + 3) == '|' { + (true, 0) + } else { + (false, line[start + 3..].chars().take_while(|c| *c == '^').count()) + }; + let kind_start = start + 3 + adjusts + (follow as usize); + let letters = line[kind_start..].chars(); + let kind = letters.skip_while(|c| c.is_whitespace()) + .take_while(|c| !c.is_whitespace()) + .map(|c| c.to_lowercase()) + .collect::(); + let letters = line[kind_start..].chars(); + let msg = letters.skip_while(|c| c.is_whitespace()) + .skip_while(|c| !c.is_whitespace()) + .collect::().trim().to_string(); - let (which, line) = if follow { - assert!(adjusts == 0, "use either //~| or //~^, not both."); - let line = last_nonfollow_error.unwrap_or_else(|| { - panic!("encountered //~| without preceding //~^ line.") - }); - (FollowPrevious(line), line) - } else { - let which = - if adjusts > 0 { AdjustBackward(adjusts) } else { ThisLine }; - let line = line_num - adjusts; - (which, line) - }; + let (which, line) = if follow { + assert!(adjusts == 0, "use either //~| or //~^, not both."); + let line = last_nonfollow_error.unwrap_or_else(|| { + panic!("encountered //~| without preceding //~^ line.") + }); + (FollowPrevious(line), line) + } else { + let which = + if adjusts > 0 { AdjustBackward(adjusts) } else { ThisLine }; + let line = line_num - adjusts; + (which, line) + }; - debug!("line={} which={:?} kind={:?} msg={:?}", line_num, which, kind, msg); - Some((which, ExpectedError { line: line, - kind: kind, - msg: msg, })) - }) + debug!("line={} which={:?} kind={:?} msg={:?}", line_num, which, kind, msg); + Some((which, ExpectedError { line: line, + kind: kind, + msg: msg, })) } diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs index f075cff769fba..e5a973e7501ae 100644 --- a/src/compiletest/runtest.rs +++ b/src/compiletest/runtest.rs @@ -99,7 +99,7 @@ fn run_cfail_test(config: &Config, props: &TestProps, testfile: &Path) { } let output_to_check = get_output(props, &proc_res); - let expected_errors = errors::load_errors(&config.cfail_regex, testfile); + let expected_errors = errors::load_errors(testfile); if !expected_errors.is_empty() { if !props.error_patterns.is_empty() { fatal("both error pattern and expected errors specified"); diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index e9409a6106131..1288110df330a 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -13,14 +13,11 @@ extern crate syntax; extern crate rustc; -extern crate regex; - #[macro_use] extern crate log; use std::collections::HashMap; use std::io::File; -use regex::Regex; use syntax::parse; use syntax::parse::lexer; @@ -167,15 +164,19 @@ fn count(lit: &str) -> usize { } fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { - let re = Regex::new( - r"\[@(?P\d+),(?P\d+):(?P\d+)='(?P.+?)',<(?P-?\d+)>,\d+:\d+]" - ).unwrap(); - - let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice()); - let start = m.name("start").unwrap_or(""); - let end = m.name("end").unwrap_or(""); - let toknum = m.name("toknum").unwrap_or(""); - let content = m.name("content").unwrap_or(""); + // old regex: + // \[@(?P\d+),(?P\d+):(?P\d+)='(?P.+?)',<(?P-?\d+)>,\d+:\d+] + let start = s.find_str("[@").unwrap(); + let comma = start + s[start..].find_str(",").unwrap(); + let colon = comma + s[comma..].find_str(":").unwrap(); + let content_start = colon + s[colon..].find_str("='").unwrap(); + let content_end = content_start + s[content_start..].find_str("',<").unwrap(); + let toknum_end = content_end + s[content_end..].find_str(">,").unwrap(); + + let start = &s[comma + 1 .. colon]; + let end = &s[colon + 1 .. content_start]; + let content = &s[content_start + 2 .. content_end]; + let toknum = &s[content_end + 3 .. toknum_end]; let proto_tok = tokens.get(toknum).expect(format!("didn't find token {:?} in the map", toknum).as_slice()); diff --git a/src/liblog/directive.rs b/src/liblog/directive.rs index d741019aa7b9c..5efa799f56279 100644 --- a/src/liblog/directive.rs +++ b/src/liblog/directive.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use regex::Regex; use std::ascii::AsciiExt; use std::cmp; @@ -34,7 +33,7 @@ fn parse_log_level(level: &str) -> Option { /// /// Valid log levels are 0-255, with the most likely ones being 1-4 (defined in /// std::). Also supports string log levels of error, warn, info, and debug -pub fn parse_logging_spec(spec: &str) -> (Vec, Option) { +pub fn parse_logging_spec(spec: &str) -> (Vec, Option) { let mut dirs = Vec::new(); let mut parts = spec.split('/'); @@ -80,17 +79,7 @@ pub fn parse_logging_spec(spec: &str) -> (Vec, Option) { }); }}); - let filter = filter.map_or(None, |filter| { - match Regex::new(filter) { - Ok(re) => Some(re), - Err(e) => { - println!("warning: invalid regex filter - {:?}", e); - None - } - } - }); - - return (dirs, filter); + (dirs, filter.map(|s| s.to_string())) } #[cfg(test)] diff --git a/src/liblog/lib.rs b/src/liblog/lib.rs index 4da07c50c595e..e7c5bc35f761a 100644 --- a/src/liblog/lib.rs +++ b/src/liblog/lib.rs @@ -123,11 +123,11 @@ //! //! # Filtering results //! -//! A RUST_LOG directive may include a regex filter. The syntax is to append `/` -//! followed by a regex. Each message is checked against the regex, and is only -//! logged if it matches. Note that the matching is done after formatting the log -//! string but before adding any logging meta-data. There is a single filter for all -//! modules. +//! A RUST_LOG directive may include a string filter. The syntax is to append +//! `/` followed by a string. Each message is checked against the string and is +//! only logged if it contains the string. Note that the matching is done after +//! formatting the log string but before adding any logging meta-data. There is +//! a single filter for all modules. //! //! Some examples: //! @@ -172,8 +172,6 @@ #![allow(unstable)] #![deny(missing_docs)] -extern crate regex; - use std::cell::RefCell; use std::fmt; use std::io::LineBufferedWriter; @@ -185,8 +183,6 @@ use std::rt; use std::slice; use std::sync::{Once, ONCE_INIT}; -use regex::Regex; - use directive::LOG_LEVEL_NAMES; #[macro_use] @@ -209,8 +205,8 @@ static mut LOG_LEVEL: u32 = MAX_LOG_LEVEL; static mut DIRECTIVES: *const Vec = 0 as *const Vec; -/// Optional regex filter. -static mut FILTER: *const Regex = 0 as *const _; +/// Optional filter. +static mut FILTER: *const String = 0 as *const _; /// Debug log level pub const DEBUG: u32 = 4; @@ -288,7 +284,7 @@ pub fn log(level: u32, loc: &'static LogLocation, args: fmt::Arguments) { // Test the literal string from args against the current filter, if there // is one. match unsafe { FILTER.as_ref() } { - Some(filter) if !filter.is_match(&args.to_string()[]) => return, + Some(filter) if !args.to_string().contains(&filter[]) => return, _ => {} } @@ -435,8 +431,8 @@ fn init() { DIRECTIVES = ptr::null(); if !FILTER.is_null() { - let _filter: Box = mem::transmute(FILTER); - FILTER = ptr::null(); + let _filter: Box = mem::transmute(FILTER); + FILTER = 0 as *const _; } }); } diff --git a/src/libregex/compile.rs b/src/libregex/compile.rs deleted file mode 100644 index d29a7a425c116..0000000000000 --- a/src/libregex/compile.rs +++ /dev/null @@ -1,275 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// Enable this to squash warnings due to exporting pieces of the representation -// for use with the regex! macro. See lib.rs for explanation. - -pub use self::Inst::*; - -use std::cmp; -use std::iter::repeat; -use parse; -use parse::{ - Flags, FLAG_EMPTY, - Nothing, Literal, Dot, AstClass, Begin, End, WordBoundary, Capture, Cat, Alt, - Rep, - ZeroOne, ZeroMore, OneMore, -}; - -type InstIdx = uint; - -#[derive(Show, Clone)] -pub enum Inst { - // When a Match instruction is executed, the current thread is successful. - Match, - - // The OneChar instruction matches a literal character. - // The flags indicate whether to do a case insensitive match. - OneChar(char, Flags), - - // The CharClass instruction tries to match one input character against - // the range of characters given. - // The flags indicate whether to do a case insensitive match and whether - // the character class is negated or not. - CharClass(Vec<(char, char)>, Flags), - - // Matches any character except new lines. - // The flags indicate whether to include the '\n' character. - Any(Flags), - - // Matches the beginning of the string, consumes no characters. - // The flags indicate whether it matches if the preceding character - // is a new line. - EmptyBegin(Flags), - - // Matches the end of the string, consumes no characters. - // The flags indicate whether it matches if the proceeding character - // is a new line. - EmptyEnd(Flags), - - // Matches a word boundary (\w on one side and \W \A or \z on the other), - // and consumes no character. - // The flags indicate whether this matches a word boundary or something - // that isn't a word boundary. - EmptyWordBoundary(Flags), - - // Saves the current position in the input string to the Nth save slot. - Save(uint), - - // Jumps to the instruction at the index given. - Jump(InstIdx), - - // Jumps to the instruction at the first index given. If that leads to - // a panic state, then the instruction at the second index given is - // tried. - Split(InstIdx, InstIdx), -} - -/// Program represents a compiled regular expression. Once an expression is -/// compiled, its representation is immutable and will never change. -/// -/// All of the data in a compiled expression is wrapped in "MaybeStatic" or -/// "MaybeOwned" types so that a `Program` can be represented as static data. -/// (This makes it convenient and efficient for use with the `regex!` macro.) -#[derive(Clone)] -pub struct Program { - /// A sequence of instructions. - pub insts: Vec, - /// If the regular expression requires a literal prefix in order to have a - /// match, that prefix is stored here. (It's used in the VM to implement - /// an optimization.) - pub prefix: String, -} - -impl Program { - /// Compiles a Regex given its AST. - pub fn new(ast: parse::Ast) -> (Program, Vec>) { - let mut c = Compiler { - insts: Vec::with_capacity(100), - names: Vec::with_capacity(10), - }; - - c.insts.push(Save(0)); - c.compile(ast); - c.insts.push(Save(1)); - c.insts.push(Match); - - // Try to discover a literal string prefix. - // This is a bit hacky since we have to skip over the initial - // 'Save' instruction. - let mut pre = String::with_capacity(5); - for inst in c.insts[1..].iter() { - match *inst { - OneChar(c, FLAG_EMPTY) => pre.push(c), - _ => break - } - } - - let Compiler { insts, names } = c; - let prog = Program { - insts: insts, - prefix: pre, - }; - (prog, names) - } - - /// Returns the total number of capture groups in the regular expression. - /// This includes the zeroth capture. - pub fn num_captures(&self) -> uint { - let mut n = 0; - for inst in self.insts.iter() { - match *inst { - Save(c) => n = cmp::max(n, c+1), - _ => {} - } - } - // There's exactly 2 Save slots for every capture. - n / 2 - } -} - -struct Compiler<'r> { - insts: Vec, - names: Vec>, -} - -// The compiler implemented here is extremely simple. Most of the complexity -// in this crate is in the parser or the VM. -// The only tricky thing here is patching jump/split instructions to point to -// the right instruction. -impl<'r> Compiler<'r> { - fn compile(&mut self, ast: parse::Ast) { - match ast { - Nothing => {}, - Literal(c, flags) => self.push(OneChar(c, flags)), - Dot(nl) => self.push(Any(nl)), - AstClass(ranges, flags) => - self.push(CharClass(ranges, flags)), - Begin(flags) => self.push(EmptyBegin(flags)), - End(flags) => self.push(EmptyEnd(flags)), - WordBoundary(flags) => self.push(EmptyWordBoundary(flags)), - Capture(cap, name, x) => { - let len = self.names.len(); - if cap >= len { - self.names.extend(repeat(None).take(10 + cap - len)) - } - self.names[cap] = name; - - self.push(Save(2 * cap)); - self.compile(*x); - self.push(Save(2 * cap + 1)); - } - Cat(xs) => { - for x in xs.into_iter() { - self.compile(x) - } - } - Alt(x, y) => { - let split = self.empty_split(); // push: split 0, 0 - let j1 = self.insts.len(); - self.compile(*x); // push: insts for x - let jmp = self.empty_jump(); // push: jmp 0 - let j2 = self.insts.len(); - self.compile(*y); // push: insts for y - let j3 = self.insts.len(); - - self.set_split(split, j1, j2); // split 0, 0 -> split j1, j2 - self.set_jump(jmp, j3); // jmp 0 -> jmp j3 - } - Rep(x, ZeroOne, g) => { - let split = self.empty_split(); - let j1 = self.insts.len(); - self.compile(*x); - let j2 = self.insts.len(); - - if g.is_greedy() { - self.set_split(split, j1, j2); - } else { - self.set_split(split, j2, j1); - } - } - Rep(x, ZeroMore, g) => { - let j1 = self.insts.len(); - let split = self.empty_split(); - let j2 = self.insts.len(); - self.compile(*x); - let jmp = self.empty_jump(); - let j3 = self.insts.len(); - - self.set_jump(jmp, j1); - if g.is_greedy() { - self.set_split(split, j2, j3); - } else { - self.set_split(split, j3, j2); - } - } - Rep(x, OneMore, g) => { - let j1 = self.insts.len(); - self.compile(*x); - let split = self.empty_split(); - let j2 = self.insts.len(); - - if g.is_greedy() { - self.set_split(split, j1, j2); - } else { - self.set_split(split, j2, j1); - } - } - } - } - - /// Appends the given instruction to the program. - #[inline] - fn push(&mut self, x: Inst) { - self.insts.push(x) - } - - /// Appends an *empty* `Split` instruction to the program and returns - /// the index of that instruction. (The index can then be used to "patch" - /// the actual locations of the split in later.) - #[inline] - fn empty_split(&mut self) -> InstIdx { - self.insts.push(Split(0, 0)); - self.insts.len() - 1 - } - - /// Sets the left and right locations of a `Split` instruction at index - /// `i` to `pc1` and `pc2`, respectively. - /// If the instruction at index `i` isn't a `Split` instruction, then - /// `panic!` is called. - #[inline] - fn set_split(&mut self, i: InstIdx, pc1: InstIdx, pc2: InstIdx) { - let split = &mut self.insts[i]; - match *split { - Split(_, _) => *split = Split(pc1, pc2), - _ => panic!("BUG: Invalid split index."), - } - } - - /// Appends an *empty* `Jump` instruction to the program and returns the - /// index of that instruction. - #[inline] - fn empty_jump(&mut self) -> InstIdx { - self.insts.push(Jump(0)); - self.insts.len() - 1 - } - - /// Sets the location of a `Jump` instruction at index `i` to `pc`. - /// If the instruction at index `i` isn't a `Jump` instruction, then - /// `panic!` is called. - #[inline] - fn set_jump(&mut self, i: InstIdx, pc: InstIdx) { - let jmp = &mut self.insts[i]; - match *jmp { - Jump(_) => *jmp = Jump(pc), - _ => panic!("BUG: Invalid jump index."), - } - } -} diff --git a/src/libregex/lib.rs b/src/libregex/lib.rs deleted file mode 100644 index 002b74cf1efa4..0000000000000 --- a/src/libregex/lib.rs +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. -// -// ignore-lexer-test FIXME #15679 - -//! Regular expressions implemented in Rust -//! -//! For official documentation, see the rust-lang/regex crate -#![crate_name = "regex"] -#![crate_type = "rlib"] -#![crate_type = "dylib"] -#![unstable = "use the crates.io `regex` library instead"] -#![staged_api] -#![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png", - html_favicon_url = "http://www.rust-lang.org/favicon.ico", - html_root_url = "http://doc.rust-lang.org/nightly/", - html_playground_url = "http://play.rust-lang.org/")] - -#![allow(unknown_features)] -#![allow(unstable)] -#![feature(slicing_syntax)] -#![feature(box_syntax)] -#![allow(unknown_features)] #![feature(int_uint)] -#![deny(missing_docs)] - -#[cfg(test)] -extern crate "test" as stdtest; -#[cfg(test)] -extern crate rand; - -// During tests, this links with the `regex` crate so that the `regex!` macro -// can be tested. -#[cfg(test)] -extern crate regex; - -// Unicode tables for character classes are defined in libunicode -extern crate unicode; - -pub use parse::Error; -pub use re::{Regex, Captures, SubCaptures, SubCapturesPos}; -pub use re::{FindCaptures, FindMatches}; -pub use re::{Replacer, NoExpand, RegexSplits, RegexSplitsN}; -pub use re::{quote, is_match}; - -mod compile; -mod parse; -mod re; -mod vm; - -#[cfg(test)] -mod test; - -/// The `native` module exists to support the `regex!` macro. Do not use. -#[doc(hidden)] -pub mod native { - // Exporting this stuff is bad form, but it's necessary for two reasons. - // Firstly, the `regex!` syntax extension is in a different crate and - // requires access to the representation of a regex (particularly the - // instruction set) in order to compile to native Rust. This could be - // mitigated if `regex!` was defined in the same crate, but this has - // undesirable consequences (such as requiring a dependency on - // `libsyntax`). - // - // Secondly, the code generated by `regex!` must *also* be able - // to access various functions in this crate to reduce code duplication - // and to provide a value with precisely the same `Regex` type in this - // crate. This, AFAIK, is impossible to mitigate. - // - // On the bright side, `rustdoc` lets us hide this from the public API - // documentation. - pub use compile::{ - Program, - OneChar, CharClass, Any, Save, Jump, Split, - Match, EmptyBegin, EmptyEnd, EmptyWordBoundary, - }; - pub use parse::{ - FLAG_EMPTY, FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL, - FLAG_SWAP_GREED, FLAG_NEGATED, - }; - pub use re::{Dynamic, ExDynamic, Native, ExNative}; - pub use vm::{ - MatchKind, Exists, Location, Submatches, - StepState, StepMatchEarlyReturn, StepMatch, StepContinue, - CharReader, find_prefix, - }; -} diff --git a/src/libregex/parse.rs b/src/libregex/parse.rs deleted file mode 100644 index c2186a0ec241c..0000000000000 --- a/src/libregex/parse.rs +++ /dev/null @@ -1,1087 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -pub use self::Ast::*; -pub use self::Repeater::*; -pub use self::Greed::*; -use self::BuildAst::*; - -use std::char; -use std::cmp; -use std::fmt; -use std::iter; -use std::num; - -/// Static data containing Unicode ranges for general categories and scripts. -use unicode::regex::{UNICODE_CLASSES, PERLD, PERLS, PERLW}; - -/// The maximum number of repetitions allowed with the `{n,m}` syntax. -static MAX_REPEAT: uint = 1000; - -/// Error corresponds to something that can go wrong while parsing -/// a regular expression. -/// -/// (Once an expression is compiled, it is not possible to produce an error -/// via searching, splitting or replacing.) -#[derive(Show)] -pub struct Error { - /// The *approximate* character index of where the error occurred. - pub pos: uint, - /// A message describing the error. - pub msg: String, -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Regex syntax error near position {}: {:?}", - self.pos, self.msg) - } -} - -/// Represents the abstract syntax of a regular expression. -/// It is showable so that error messages resulting from a bug can provide -/// useful information. -/// It is cloneable so that expressions can be repeated for the counted -/// repetition feature. (No other copying is done.) -/// -/// Note that this representation prevents one from reproducing the regex as -/// it was typed. (But it could be used to reproduce an equivalent regex.) -#[derive(Show, Clone)] -pub enum Ast { - Nothing, - Literal(char, Flags), - Dot(Flags), - AstClass(Vec<(char, char)>, Flags), - Begin(Flags), - End(Flags), - WordBoundary(Flags), - Capture(uint, Option, Box), - // Represent concatenation as a flat vector to avoid blowing the - // stack in the compiler. - Cat(Vec), - Alt(Box, Box), - Rep(Box, Repeater, Greed), -} - -#[derive(Show, PartialEq, Clone)] -pub enum Repeater { - ZeroOne, - ZeroMore, - OneMore, -} - -#[derive(Copy, Show, Clone)] -pub enum Greed { - Greedy, - Ungreedy, -} - -impl Greed { - pub fn is_greedy(&self) -> bool { - match *self { - Greedy => true, - _ => false, - } - } - - fn swap(self, swapped: bool) -> Greed { - if !swapped { return self } - match self { - Greedy => Ungreedy, - Ungreedy => Greedy, - } - } -} - -/// BuildAst is a regrettable type that represents intermediate state for -/// constructing an abstract syntax tree. Its central purpose is to facilitate -/// parsing groups and alternations while also maintaining a stack of flag -/// state. -#[derive(Show)] -enum BuildAst { - Expr(Ast), - Paren(Flags, uint, String), // '(' - Bar, // '|' -} - -impl BuildAst { - fn paren(&self) -> bool { - match *self { - Paren(_, _, _) => true, - _ => false, - } - } - - fn flags(&self) -> Flags { - match *self { - Paren(flags, _, _) => flags, - _ => panic!("Cannot get flags from {:?}", self), - } - } - - fn capture(&self) -> Option { - match *self { - Paren(_, 0, _) => None, - Paren(_, c, _) => Some(c), - _ => panic!("Cannot get capture group from {:?}", self), - } - } - - fn capture_name(&self) -> Option { - match *self { - Paren(_, 0, _) => None, - Paren(_, _, ref name) => { - if name.len() == 0 { - None - } else { - Some(name.clone()) - } - } - _ => panic!("Cannot get capture name from {:?}", self), - } - } - - fn bar(&self) -> bool { - match *self { - Bar => true, - _ => false, - } - } - - fn unwrap(self) -> Result { - match self { - Expr(x) => Ok(x), - _ => panic!("Tried to unwrap non-AST item: {:?}", self), - } - } -} - -/// Flags represents all options that can be twiddled by a user in an -/// expression. -pub type Flags = u8; - -pub const FLAG_EMPTY: u8 = 0; -pub const FLAG_NOCASE: u8 = 1 << 0; // i -pub const FLAG_MULTI: u8 = 1 << 1; // m -pub const FLAG_DOTNL: u8 = 1 << 2; // s -pub const FLAG_SWAP_GREED: u8 = 1 << 3; // U -pub const FLAG_NEGATED: u8 = 1 << 4; // char class or not word boundary - -struct Parser<'a> { - // The input, parsed only as a sequence of UTF8 code points. - chars: Vec, - // The index of the current character in the input. - chari: uint, - // The intermediate state representing the AST. - stack: Vec, - // The current set of flags. - flags: Flags, - // The total number of capture groups. - // Incremented each time an opening left paren is seen (assuming it is - // opening a capture group). - caps: uint, - // A set of all capture group names used only to detect duplicates. - names: Vec, -} - -pub fn parse(s: &str) -> Result { - Parser { - chars: s.chars().collect(), - chari: 0, - stack: vec!(), - flags: FLAG_EMPTY, - caps: 0, - names: vec!(), - }.parse() -} - -impl<'a> Parser<'a> { - fn parse(&mut self) -> Result { - if self.chars.len() == 0 { - return Ok(Nothing); - } - loop { - let c = self.cur(); - match c { - '?' | '*' | '+' => try!(self.push_repeater(c)), - '\\' => { - let ast = try!(self.parse_escape()); - self.push(ast) - } - '{' => try!(self.parse_counted()), - '[' => match self.try_parse_ascii() { - None => try!(self.parse_class()), - Some(class) => self.push(class), - }, - '(' => { - if self.peek_is(1, '?') { - try!(self.expect('?')); - try!(self.parse_group_opts()) - } else { - self.caps += 1; - self.stack.push(Paren(self.flags, - self.caps, - "".to_string())) - } - } - ')' => { - let catfrom = try!( - self.pos_last(false, |x| x.paren() || x.bar())); - try!(self.concat(catfrom)); - - let altfrom = try!(self.pos_last(false, |x| x.paren())); - // Before we smush the alternates together and pop off the - // left paren, let's grab the old flags and see if we - // need a capture. - let (cap, cap_name, oldflags) = { - let paren = &self.stack[altfrom-1]; - (paren.capture(), paren.capture_name(), paren.flags()) - }; - try!(self.alternate(altfrom)); - self.flags = oldflags; - - // If this was a capture, pop what we just pushed in - // alternate and make it a capture. - if cap.is_some() { - let ast = try!(self.pop_ast()); - self.push(Capture(cap.unwrap(), cap_name, box ast)); - } - } - '|' => { - let catfrom = try!( - self.pos_last(true, |x| x.paren() || x.bar())); - try!(self.concat(catfrom)); - - self.stack.push(Bar); - } - _ => try!(self.push_literal(c)), - } - if !self.next_char() { - break - } - } - - // Try to improve error handling. At this point, there should be - // no remaining open parens. - if self.stack.iter().any(|x| x.paren()) { - return self.err("Unclosed parenthesis.") - } - let catfrom = try!(self.pos_last(true, |x| x.bar())); - try!(self.concat(catfrom)); - try!(self.alternate(0)); - - assert!(self.stack.len() == 1); - self.pop_ast() - } - - fn noteof(&mut self, expected: &str) -> Result<(), Error> { - match self.next_char() { - true => Ok(()), - false => { - self.err(&format!("Expected {:?} but got EOF.", - expected)[]) - } - } - } - - fn expect(&mut self, expected: char) -> Result<(), Error> { - match self.next_char() { - true if self.cur() == expected => Ok(()), - true => self.err(&format!("Expected '{:?}' but got '{:?}'.", - expected, self.cur())[]), - false => { - self.err(&format!("Expected '{:?}' but got EOF.", - expected)[]) - } - } - } - - fn next_char(&mut self) -> bool { - self.chari += 1; - self.chari < self.chars.len() - } - - fn pop_ast(&mut self) -> Result { - match self.stack.pop().unwrap().unwrap() { - Err(e) => Err(e), - Ok(ast) => Ok(ast), - } - } - - fn push(&mut self, ast: Ast) { - self.stack.push(Expr(ast)) - } - - fn push_repeater(&mut self, c: char) -> Result<(), Error> { - match self.stack.last() { - Some(&Expr(..)) => (), - // self.stack is empty, or the top item is not an Expr - _ => return self.err("A repeat operator must be preceded by a valid expression."), - } - let rep: Repeater = match c { - '?' => ZeroOne, '*' => ZeroMore, '+' => OneMore, - _ => panic!("Not a valid repeater operator."), - }; - - match self.peek(1) { - Some('*') | Some('+') => - return self.err( - "Double repeat operators are not supported."), - _ => {}, - } - let ast = try!(self.pop_ast()); - match ast { - Begin(_) | End(_) | WordBoundary(_) => - return self.err( - "Repeat arguments cannot be empty width assertions."), - _ => {} - } - let greed = try!(self.get_next_greedy()); - self.push(Rep(box ast, rep, greed)); - Ok(()) - } - - fn push_literal(&mut self, c: char) -> Result<(), Error> { - let flags = self.flags; - match c { - '.' => { - self.push(Dot(flags)) - } - '^' => { - self.push(Begin(flags)) - } - '$' => { - self.push(End(flags)) - } - _ => { - self.push(Literal(c, flags)) - } - } - Ok(()) - } - - // Parses all forms of character classes. - // Assumes that '[' is the current character. - fn parse_class(&mut self) -> Result<(), Error> { - let negated = - if self.peek_is(1, '^') { - try!(self.expect('^')); - FLAG_NEGATED - } else { - FLAG_EMPTY - }; - let mut ranges: Vec<(char, char)> = vec!(); - let mut alts: Vec = vec!(); - - while self.peek_is(1, '-') { - try!(self.expect('-')); - ranges.push(('-', '-')) - } - loop { - try!(self.noteof("a closing ']' or a non-empty character class)")); - let mut c = self.cur(); - match c { - '[' => - match self.try_parse_ascii() { - Some(AstClass(asciis, flags)) => { - alts.push(AstClass(asciis, flags ^ negated)); - continue - } - Some(ast) => - panic!("Expected Class AST but got '{:?}'", ast), - // Just drop down and try to add as a regular character. - None => {}, - }, - '\\' => { - match try!(self.parse_escape()) { - AstClass(asciis, flags) => { - alts.push(AstClass(asciis, flags ^ negated)); - continue - } - Literal(c2, _) => c = c2, // process below - Begin(_) | End(_) | WordBoundary(_) => - return self.err( - "\\A, \\z, \\b and \\B are not valid escape \ - sequences inside a character class."), - ast => panic!("Unexpected AST item '{:?}'", ast), - } - } - ']' if ranges.len() > 0 || alts.len() > 0 => { - if ranges.len() > 0 { - let flags = negated | (self.flags & FLAG_NOCASE); - let mut ast = AstClass(combine_ranges(ranges), flags); - for alt in alts.into_iter() { - ast = Alt(box alt, box ast) - } - self.push(ast); - } else if alts.len() > 0 { - let mut ast = alts.pop().unwrap(); - for alt in alts.into_iter() { - ast = Alt(box alt, box ast) - } - self.push(ast); - } - return Ok(()) - } - _ => {} - } - - if self.peek_is(1, '-') && !self.peek_is(2, ']') { - try!(self.expect('-')); - // The regex can't end here. - try!(self.noteof("not a ']'")); - // End the range with a single character or character escape. - let mut c2 = self.cur(); - if c2 == '\\' { - match try!(self.parse_escape()) { - Literal(c3, _) => c2 = c3, // allow literal escapes below - ast => - return self.err(&format!("Expected a literal, but got {:?}.", - ast)[]), - } - } - if c2 < c { - return self.err(&format!("Invalid character class \ - range '{}-{}'", - c, - c2)[]) - } - ranges.push((c, self.cur())) - } else { - ranges.push((c, c)) - } - } - } - - // Tries to parse an ASCII character class of the form [:name:]. - // If successful, returns an AST character class corresponding to name - // and moves the parser to the final ']' character. - // If unsuccessful, no state is changed and None is returned. - // Assumes that '[' is the current character. - fn try_parse_ascii(&mut self) -> Option { - if !self.peek_is(1, ':') { - return None - } - let closer = - match self.pos(']') { - Some(i) => i, - None => return None, - }; - if self.chars[closer-1] != ':' { - return None - } - if closer - self.chari <= 3 { - return None - } - let mut name_start = self.chari + 2; - let negated = - if self.peek_is(2, '^') { - name_start += 1; - FLAG_NEGATED - } else { - FLAG_EMPTY - }; - let name = self.slice(name_start, closer - 1); - match find_class(ASCII_CLASSES, &name[]) { - None => None, - Some(ranges) => { - self.chari = closer; - let flags = negated | (self.flags & FLAG_NOCASE); - Some(AstClass(combine_ranges(ranges), flags)) - } - } - } - - // Parses counted repetition. Supports: - // {n}, {n,}, {n,m}, {n}?, {n,}? and {n,m}? - // Assumes that '{' is the current character. - // Returns either an error or moves the parser to the final '}' character. - // (Or the '?' character if not greedy.) - fn parse_counted(&mut self) -> Result<(), Error> { - // Scan until the closing '}' and grab the stuff in {}. - let start = self.chari; - let closer = - match self.pos('}') { - Some(i) => i, - None => { - return self.err(&format!("No closing brace for counted \ - repetition starting at position \ - {:?}.", - start)[]) - } - }; - self.chari = closer; - let greed = try!(self.get_next_greedy()); - let inner = self.chars[start+1..closer].iter().cloned() - .collect::(); - - // Parse the min and max values from the regex. - let (mut min, mut max): (uint, Option); - if !inner.contains(",") { - min = try!(self.parse_uint(&inner[])); - max = Some(min); - } else { - let pieces: Vec<&str> = inner.splitn(1, ',').collect(); - let (smin, smax) = (pieces[0], pieces[1]); - if smin.len() == 0 { - return self.err("Max repetitions cannot be specified \ - without min repetitions.") - } - min = try!(self.parse_uint(smin)); - max = - if smax.len() == 0 { - None - } else { - Some(try!(self.parse_uint(smax))) - }; - } - - // Do some bounds checking and make sure max >= min. - if min > MAX_REPEAT { - return self.err(&format!( - "{} exceeds maximum allowed repetitions ({})", - min, MAX_REPEAT)[]); - } - if max.is_some() { - let m = max.unwrap(); - if m > MAX_REPEAT { - return self.err(&format!( - "{} exceeds maximum allowed repetitions ({})", - m, MAX_REPEAT)[]); - } - if m < min { - return self.err(&format!( - "Max repetitions ({}) cannot be smaller than min \ - repetitions ({}).", m, min)[]); - } - } - - // Now manipulate the AST be repeating elements. - if max.is_none() { - // Require N copies of what's on the stack and then repeat it. - let ast = try!(self.pop_ast()); - for _ in iter::range(0, min) { - self.push(ast.clone()) - } - self.push(Rep(box ast, ZeroMore, greed)); - } else { - // Require N copies of what's on the stack and then repeat it - // up to M times optionally. - let ast = try!(self.pop_ast()); - for _ in iter::range(0, min) { - self.push(ast.clone()) - } - if max.is_some() { - for _ in iter::range(min, max.unwrap()) { - self.push(Rep(box ast.clone(), ZeroOne, greed)) - } - } - // It's possible that we popped something off the stack but - // never put anything back on it. To keep things simple, add - // a no-op expression. - if min == 0 && (max.is_none() || max == Some(0)) { - self.push(Nothing) - } - } - Ok(()) - } - - // Parses all escape sequences. - // Assumes that '\' is the current character. - fn parse_escape(&mut self) -> Result { - try!(self.noteof("an escape sequence following a '\\'")); - - let c = self.cur(); - if is_punct(c) { - return Ok(Literal(c, FLAG_EMPTY)) - } - match c { - 'a' => Ok(Literal('\x07', FLAG_EMPTY)), - 'f' => Ok(Literal('\x0C', FLAG_EMPTY)), - 't' => Ok(Literal('\t', FLAG_EMPTY)), - 'n' => Ok(Literal('\n', FLAG_EMPTY)), - 'r' => Ok(Literal('\r', FLAG_EMPTY)), - 'v' => Ok(Literal('\x0B', FLAG_EMPTY)), - 'A' => Ok(Begin(FLAG_EMPTY)), - 'z' => Ok(End(FLAG_EMPTY)), - 'b' => Ok(WordBoundary(FLAG_EMPTY)), - 'B' => Ok(WordBoundary(FLAG_NEGATED)), - '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => Ok(try!(self.parse_octal())), - 'x' => Ok(try!(self.parse_hex())), - 'p' | 'P' => Ok(try!(self.parse_unicode_name())), - 'd' | 'D' | 's' | 'S' | 'w' | 'W' => { - let ranges = perl_unicode_class(c); - let mut flags = self.flags & FLAG_NOCASE; - if c.is_uppercase() { flags |= FLAG_NEGATED } - Ok(AstClass(ranges, flags)) - } - _ => { - self.err(&format!("Invalid escape sequence '\\\\{}'", c)[]) - } - } - } - - // Parses a Unicode character class name, either of the form \pF where - // F is a one letter Unicode class name or of the form \p{name} where - // name is the Unicode class name. - // Assumes that \p or \P has been read (and 'p' or 'P' is the current - // character). - fn parse_unicode_name(&mut self) -> Result { - let negated = if self.cur() == 'P' { FLAG_NEGATED } else { FLAG_EMPTY }; - let mut name: String; - if self.peek_is(1, '{') { - try!(self.expect('{')); - let closer = - match self.pos('}') { - Some(i) => i, - None => return self.err(&format!( - "Missing '}}' for unclosed '{{' at position {}", - self.chari)[]), - }; - if closer - self.chari + 1 == 0 { - return self.err("No Unicode class name found.") - } - name = self.slice(self.chari + 1, closer); - self.chari = closer; - } else { - if self.chari + 1 >= self.chars.len() { - return self.err("No single letter Unicode class name found.") - } - name = self.slice(self.chari + 1, self.chari + 2); - self.chari += 1; - } - match find_class(UNICODE_CLASSES, &name[]) { - None => { - return self.err(&format!("Could not find Unicode class '{}'", - name)[]) - } - Some(ranges) => { - Ok(AstClass(ranges, negated | (self.flags & FLAG_NOCASE))) - } - } - } - - // Parses an octal number, up to 3 digits. - // Assumes that \n has been read, where n is the first digit. - fn parse_octal(&mut self) -> Result { - let start = self.chari; - let mut end = start + 1; - let (d2, d3) = (self.peek(1), self.peek(2)); - if d2 >= Some('0') && d2 <= Some('7') { - try!(self.noteof("expected octal character in [0-7]")); - end += 1; - if d3 >= Some('0') && d3 <= Some('7') { - try!(self.noteof("expected octal character in [0-7]")); - end += 1; - } - } - let s = self.slice(start, end); - match num::from_str_radix::(&s[], 8) { - Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)), - None => { - self.err(&format!("Could not parse '{:?}' as octal number.", - s)[]) - } - } - } - - // Parse a hex number. Either exactly two digits or anything in {}. - // Assumes that \x has been read. - fn parse_hex(&mut self) -> Result { - if !self.peek_is(1, '{') { - try!(self.expect('{')); - return self.parse_hex_two() - } - let start = self.chari + 2; - let closer = - match self.pos('}') { - None => { - return self.err(&format!("Missing '}}' for unclosed \ - '{{' at position {}", - start)[]) - } - Some(i) => i, - }; - self.chari = closer; - self.parse_hex_digits(&self.slice(start, closer)[]) - } - - // Parses a two-digit hex number. - // Assumes that \xn has been read, where n is the first digit and is the - // current character. - // After return, parser will point at the second digit. - fn parse_hex_two(&mut self) -> Result { - let (start, end) = (self.chari, self.chari + 2); - let bad = self.slice(start - 2, self.chars.len()); - try!(self.noteof(format!("Invalid hex escape sequence '{}'", - bad).as_slice())); - self.parse_hex_digits(self.slice(start, end).as_slice()) - } - - // Parses `s` as a hexadecimal number. - fn parse_hex_digits(&self, s: &str) -> Result { - match num::from_str_radix::(s, 16) { - Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)), - None => { - self.err(&format!("Could not parse '{}' as hex number.", s)[]) - } - } - } - - // Parses a named capture. - // Assumes that '(?P<' has been consumed and that the current character - // is '<'. - // When done, parser will be at the closing '>' character. - fn parse_named_capture(&mut self) -> Result<(), Error> { - try!(self.noteof("a capture name")); - let closer = - match self.pos('>') { - Some(i) => i, - None => return self.err("Capture name must end with '>'."), - }; - if closer - self.chari == 0 { - return self.err("Capture names must have at least 1 character.") - } - let name = self.slice(self.chari, closer); - if !name.chars().all(is_valid_cap) { - return self.err( - "Capture names can only have underscores, letters and digits.") - } - if self.names.contains(&name) { - return self.err(&format!("Duplicate capture group name '{}'.", - name)[]) - } - self.names.push(name.clone()); - self.chari = closer; - self.caps += 1; - self.stack.push(Paren(self.flags, self.caps, name)); - Ok(()) - } - - // Parses non-capture groups and options. - // Assumes that '(?' has already been consumed and '?' is the current - // character. - fn parse_group_opts(&mut self) -> Result<(), Error> { - if self.peek_is(1, 'P') && self.peek_is(2, '<') { - try!(self.expect('P')); - try!(self.expect('<')); - return self.parse_named_capture() - } - let start = self.chari; - let mut flags = self.flags; - let mut sign = 1i; - let mut saw_flag = false; - loop { - try!(self.noteof( - "expected non-empty set of flags or closing ')'")); - match self.cur() { - 'i' => { flags = flags | FLAG_NOCASE; saw_flag = true}, - 'm' => { flags = flags | FLAG_MULTI; saw_flag = true}, - 's' => { flags = flags | FLAG_DOTNL; saw_flag = true}, - 'U' => { flags = flags | FLAG_SWAP_GREED; saw_flag = true}, - '-' => { - if sign < 0 { - return self.err(&format!( - "Cannot negate flags twice in '{}'.", - self.slice(start, self.chari + 1))[]) - } - sign = -1; - saw_flag = false; - flags = flags ^ flags; - } - ':' | ')' => { - if sign < 0 { - if !saw_flag { - return self.err(&format!( - "A valid flag does not follow negation in '{}'", - self.slice(start, self.chari + 1))[]) - } - flags = flags ^ flags; - } - if self.cur() == ':' { - // Save the old flags with the opening paren. - self.stack.push(Paren(self.flags, 0, "".to_string())); - } - self.flags = flags; - return Ok(()) - } - _ => return self.err(&format!( - "Unrecognized flag '{}'.", self.cur())[]), - } - } - } - - // Peeks at the next character and returns whether it's ungreedy or not. - // If it is, then the next character is consumed. - fn get_next_greedy(&mut self) -> Result { - Ok(if self.peek_is(1, '?') { - try!(self.expect('?')); - Ungreedy - } else { - Greedy - }.swap(self.flags & FLAG_SWAP_GREED > 0)) - } - - // Searches the stack (starting at the top) until it finds an expression - // for which `pred` returns true. The index of that expression in the - // stack is returned. - // If there's no match, then one of two things happens depending on the - // values of `allow_start`. When it's true, then `0` will be returned. - // Otherwise, an error will be returned. - // Generally, `allow_start` is only true when you're *not* expecting an - // opening parenthesis. - fn pos_last

(&self, allow_start: bool, pred: P) -> Result where - P: FnMut(&BuildAst) -> bool, - { - let from = match self.stack.iter().rev().position(pred) { - Some(i) => i, - None => { - if allow_start { - self.stack.len() - } else { - return self.err("No matching opening parenthesis.") - } - } - }; - // Adjust index since 'from' is for the reversed stack. - // Also, don't include the '(' or '|'. - Ok(self.stack.len() - from) - } - - // concat starts at `from` in the parser's stack and concatenates all - // expressions up to the top of the stack. The resulting concatenation is - // then pushed on to the stack. - // Usually `from` corresponds to the position of an opening parenthesis, - // a '|' (alternation) or the start of the entire expression. - fn concat(&mut self, from: uint) -> Result<(), Error> { - let ast = try!(self.build_from(from, concat_flatten)); - self.push(ast); - Ok(()) - } - - // concat starts at `from` in the parser's stack and alternates all - // expressions up to the top of the stack. The resulting alternation is - // then pushed on to the stack. - // Usually `from` corresponds to the position of an opening parenthesis - // or the start of the entire expression. - // This will also drop any opening parens or alternation bars found in - // the intermediate AST. - fn alternate(&mut self, mut from: uint) -> Result<(), Error> { - // Unlike in the concatenation case, we want 'build_from' to continue - // all the way to the opening left paren (so it will be popped off and - // thrown away). But be careful with overflow---we can't count on the - // open paren to be there. - if from > 0 { from = from - 1} - let ast = try!(self.build_from(from, |l,r| Alt(box l, box r))); - self.push(ast); - Ok(()) - } - - // build_from combines all AST elements starting at 'from' in the - // parser's stack using 'mk' to combine them. If any such element is not an - // AST then it is popped off the stack and ignored. - fn build_from(&mut self, from: uint, mut mk: F) -> Result where - F: FnMut(Ast, Ast) -> Ast, - { - if from >= self.stack.len() { - return self.err("Empty group or alternate not allowed.") - } - - let mut combined = try!(self.pop_ast()); - let mut i = self.stack.len(); - while i > from { - i = i - 1; - match self.stack.pop().unwrap() { - Expr(x) => combined = mk(x, combined), - _ => {}, - } - } - Ok(combined) - } - - fn parse_uint(&self, s: &str) -> Result { - match s.parse::() { - Some(i) => Ok(i), - None => { - self.err(&format!("Expected an unsigned integer but got '{}'.", - s)[]) - } - } - } - - fn char_from_u32(&self, n: u32) -> Result { - match char::from_u32(n) { - Some(c) => Ok(c), - None => { - self.err(&format!("Could not decode '{}' to unicode \ - character.", n)[]) - } - } - } - - fn pos(&self, c: char) -> Option { - self.chars.iter() - .skip(self.chari).position(|&c2| c2 == c).map(|i| self.chari + i) - } - - fn err(&self, msg: &str) -> Result { - Err(Error { - pos: self.chari, - msg: msg.to_string(), - }) - } - - fn peek(&self, offset: uint) -> Option { - if self.chari + offset >= self.chars.len() { - return None - } - Some(self.chars[self.chari + offset]) - } - - fn peek_is(&self, offset: uint, is: char) -> bool { - self.peek(offset) == Some(is) - } - - fn cur(&self) -> char { - self.chars[self.chari] - } - - fn slice(&self, start: uint, end: uint) -> String { - self.chars[start..end].iter().cloned().collect() - } -} - -// Given an unordered collection of character ranges, combine_ranges returns -// an ordered sequence of character ranges where no two ranges overlap. They -// are ordered from least to greatest (using start position). -fn combine_ranges(unordered: Vec<(char, char)>) -> Vec<(char, char)> { - // Returns true iff the two character classes overlap or share a boundary. - // e.g., ('a', 'g') and ('h', 'm') would return true. - fn should_merge((a, b): (char, char), (x, y): (char, char)) -> bool { - cmp::max(a, x) as u32 <= cmp::min(b, y) as u32 + 1 - } - - // This is currently O(n^2), but I think with sufficient cleverness, - // it can be reduced to O(n) **if necessary**. - let mut ordered: Vec<(char, char)> = Vec::with_capacity(unordered.len()); - for (us, ue) in unordered.into_iter() { - let (mut us, mut ue) = (us, ue); - assert!(us <= ue); - let mut which: Option = None; - for (i, &(os, oe)) in ordered.iter().enumerate() { - if should_merge((us, ue), (os, oe)) { - us = cmp::min(us, os); - ue = cmp::max(ue, oe); - which = Some(i); - break - } - } - match which { - None => ordered.push((us, ue)), - Some(i) => ordered[i] = (us, ue), - } - } - ordered.sort(); - ordered -} - -// Constructs a Unicode friendly Perl character class from \d, \s or \w -// (or any of their negated forms). Note that this does not handle negation. -fn perl_unicode_class(which: char) -> Vec<(char, char)> { - match which.to_lowercase() { - 'd' => PERLD.to_vec(), - 's' => PERLS.to_vec(), - 'w' => PERLW.to_vec(), - _ => unreachable!(), - } -} - -// Returns a concatenation of two expressions. This also guarantees that a -// `Cat` expression will never be a direct child of another `Cat` expression. -fn concat_flatten(x: Ast, y: Ast) -> Ast { - match (x, y) { - (Cat(mut xs), Cat(ys)) => { xs.extend(ys.into_iter()); Cat(xs) } - (Cat(mut xs), ast) => { xs.push(ast); Cat(xs) } - (ast, Cat(mut xs)) => { xs.insert(0, ast); Cat(xs) } - (ast1, ast2) => Cat(vec!(ast1, ast2)), - } -} - -pub fn is_punct(c: char) -> bool { - match c { - '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | - '[' | ']' | '{' | '}' | '^' | '$' => true, - _ => false, - } -} - -fn is_valid_cap(c: char) -> bool { - c == '_' || (c >= '0' && c <= '9') - || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') -} - -fn find_class(classes: NamedClasses, name: &str) -> Option> { - match classes.binary_search_by(|&(s, _)| s.cmp(name)) { - Ok(i) => Some(classes[i].1.to_vec()), - Err(_) => None, - } -} - -type Class = &'static [(char, char)]; -type NamedClasses = &'static [(&'static str, &'static Class)]; - -static ASCII_CLASSES: NamedClasses = &[ - // Classes must be in alphabetical order so that bsearch works. - // [:alnum:] alphanumeric (== [0-9A-Za-z]) - // [:alpha:] alphabetic (== [A-Za-z]) - // [:ascii:] ASCII (== [\x00-\x7F]) - // [:blank:] blank (== [\t ]) - // [:cntrl:] control (== [\x00-\x1F\x7F]) - // [:digit:] digits (== [0-9]) - // [:graph:] graphical (== [!-~]) - // [:lower:] lower case (== [a-z]) - // [:print:] printable (== [ -~] == [ [:graph:]]) - // [:punct:] punctuation (== [!-/:-@[-`{-~]) - // [:space:] whitespace (== [\t\n\v\f\r ]) - // [:upper:] upper case (== [A-Z]) - // [:word:] word characters (== [0-9A-Za-z_]) - // [:xdigit:] hex digit (== [0-9A-Fa-f]) - // Taken from: http://golang.org/pkg/regex/syntax/ - ("alnum", &ALNUM), - ("alpha", &ALPHA), - ("ascii", &ASCII), - ("blank", &BLANK), - ("cntrl", &CNTRL), - ("digit", &DIGIT), - ("graph", &GRAPH), - ("lower", &LOWER), - ("print", &PRINT), - ("punct", &PUNCT), - ("space", &SPACE), - ("upper", &UPPER), - ("word", &WORD), - ("xdigit", &XDIGIT), -]; - -static ALNUM: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z')]; -static ALPHA: Class = &[('A', 'Z'), ('a', 'z')]; -static ASCII: Class = &[('\x00', '\x7F')]; -static BLANK: Class = &[(' ', ' '), ('\t', '\t')]; -static CNTRL: Class = &[('\x00', '\x1F'), ('\x7F', '\x7F')]; -static DIGIT: Class = &[('0', '9')]; -static GRAPH: Class = &[('!', '~')]; -static LOWER: Class = &[('a', 'z')]; -static PRINT: Class = &[(' ', '~')]; -static PUNCT: Class = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')]; -static SPACE: Class = &[('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'), - ('\x0C', '\x0C'), ('\r', '\r'), (' ', ' ')]; -static UPPER: Class = &[('A', 'Z')]; -static WORD: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z'), ('_', '_')]; -static XDIGIT: Class = &[('0', '9'), ('A', 'F'), ('a', 'f')]; diff --git a/src/libregex/re.rs b/src/libregex/re.rs deleted file mode 100644 index 1b68ad500caa5..0000000000000 --- a/src/libregex/re.rs +++ /dev/null @@ -1,684 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -pub use self::NamesIter::*; -pub use self::Regex::*; - -use std::borrow::IntoCow; -use std::collections::HashMap; -use std::fmt; -use std::string::CowString; - -use compile::Program; -use parse; -use vm; -use vm::{CaptureLocs, MatchKind, Exists, Location, Submatches}; - -/// Escapes all regular expression meta characters in `text`. -/// -/// The string returned may be safely used as a literal in a regular -/// expression. -pub fn quote(text: &str) -> String { - let mut quoted = String::with_capacity(text.len()); - for c in text.chars() { - if parse::is_punct(c) { - quoted.push('\\') - } - quoted.push(c); - } - quoted -} - -/// Tests if the given regular expression matches somewhere in the text given. -/// -/// If there was a problem compiling the regular expression, an error is -/// returned. -/// -/// To find submatches, split or replace text, you'll need to compile an -/// expression first. -/// -/// Note that you should prefer the `regex!` macro when possible. For example, -/// `regex!("...").is_match("...")`. -pub fn is_match(regex: &str, text: &str) -> Result { - Regex::new(regex).map(|r| r.is_match(text)) -} - -/// A compiled regular expression -#[derive(Clone)] -pub enum Regex { - // The representation of `Regex` is exported to support the `regex!` - // syntax extension. Do not rely on it. - // - // See the comments for the `program` module in `lib.rs` for a more - // detailed explanation for what `regex!` requires. - #[doc(hidden)] - Dynamic(ExDynamic), - #[doc(hidden)] - Native(ExNative), -} - -#[derive(Clone)] -#[doc(hidden)] -pub struct ExDynamic { - original: String, - names: Vec>, - #[doc(hidden)] - pub prog: Program -} - -#[doc(hidden)] -#[derive(Copy)] -pub struct ExNative { - #[doc(hidden)] - pub original: &'static str, - #[doc(hidden)] - pub names: &'static &'static [Option<&'static str>], - #[doc(hidden)] - pub prog: fn(MatchKind, &str, uint, uint) -> Vec> -} - -impl Clone for ExNative { - fn clone(&self) -> ExNative { - *self - } -} - -impl fmt::Display for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(self.as_str(), f) - } -} - -impl Regex { - /// Compiles a dynamic regular expression. Once compiled, it can be - /// used repeatedly to search, split or replace text in a string. - /// - /// When possible, you should prefer the `regex!` macro since it is - /// safer and always faster. - /// - /// If an invalid expression is given, then an error is returned. - pub fn new(re: &str) -> Result { - let ast = try!(parse::parse(re)); - let (prog, names) = Program::new(ast); - Ok(Dynamic(ExDynamic { - original: re.to_string(), - names: names, - prog: prog, - })) - } - - /// Returns true if and only if the regex matches the string given. - pub fn is_match(&self, text: &str) -> bool { - has_match(&exec(self, Exists, text)) - } - - /// Returns the start and end byte range of the leftmost-first match in - /// `text`. If no match exists, then `None` is returned. - pub fn find(&self, text: &str) -> Option<(uint, uint)> { - let caps = exec(self, Location, text); - if has_match(&caps) { - Some((caps[0].unwrap(), caps[1].unwrap())) - } else { - None - } - } - - /// Returns an iterator for each successive non-overlapping match in - /// `text`, returning the start and end byte indices with respect to - /// `text`. - pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> { - FindMatches { - re: self, - search: text, - last_end: 0, - last_match: None, - } - } - - /// Returns the capture groups corresponding to the leftmost-first - /// match in `text`. Capture group `0` always corresponds to the entire - /// match. If no match is found, then `None` is returned. - /// - /// You should only use `captures` if you need access to submatches. - /// Otherwise, `find` is faster for discovering the location of the overall - /// match. - pub fn captures<'t>(&self, text: &'t str) -> Option> { - let caps = exec(self, Submatches, text); - Captures::new(self, text, caps) - } - - /// Returns an iterator over all the non-overlapping capture groups matched - /// in `text`. This is operationally the same as `find_iter` (except it - /// yields information about submatches). - pub fn captures_iter<'r, 't>(&'r self, text: &'t str) - -> FindCaptures<'r, 't> { - FindCaptures { - re: self, - search: text, - last_match: None, - last_end: 0, - } - } - - /// Returns an iterator of substrings of `text` delimited by a match - /// of the regular expression. - /// Namely, each element of the iterator corresponds to text that *isn't* - /// matched by the regular expression. - /// - /// This method will *not* copy the text given. - pub fn split<'r, 't>(&'r self, text: &'t str) -> RegexSplits<'r, 't> { - RegexSplits { - finder: self.find_iter(text), - last: 0, - } - } - - /// Returns an iterator of at most `limit` substrings of `text` delimited - /// by a match of the regular expression. (A `limit` of `0` will return no - /// substrings.) - /// Namely, each element of the iterator corresponds to text that *isn't* - /// matched by the regular expression. - /// The remainder of the string that is not split will be the last element - /// in the iterator. - /// - /// This method will *not* copy the text given. - pub fn splitn<'r, 't>(&'r self, text: &'t str, limit: uint) - -> RegexSplitsN<'r, 't> { - RegexSplitsN { - splits: self.split(text), - cur: 0, - limit: limit, - } - } - - /// Replaces the leftmost-first match with the replacement provided. - /// The replacement can be a regular string (where `$N` and `$name` are - /// expanded to match capture groups) or a function that takes the matches' - /// `Captures` and returns the replaced string. - /// - /// If no match is found, then a copy of the string is returned unchanged. - pub fn replace(&self, text: &str, rep: R) -> String { - self.replacen(text, 1, rep) - } - - /// Replaces all non-overlapping matches in `text` with the - /// replacement provided. This is the same as calling `replacen` with - /// `limit` set to `0`. - /// - /// See the documentation for `replace` for details on how to access - /// submatches in the replacement string. - pub fn replace_all(&self, text: &str, rep: R) -> String { - self.replacen(text, 0, rep) - } - - /// Replaces at most `limit` non-overlapping matches in `text` with the - /// replacement provided. If `limit` is 0, then all non-overlapping matches - /// are replaced. - /// - /// See the documentation for `replace` for details on how to access - /// submatches in the replacement string. - pub fn replacen - (&self, text: &str, limit: uint, mut rep: R) -> String { - let mut new = String::with_capacity(text.len()); - let mut last_match = 0u; - - for (i, cap) in self.captures_iter(text).enumerate() { - // It'd be nicer to use the 'take' iterator instead, but it seemed - // awkward given that '0' => no limit. - if limit > 0 && i >= limit { - break - } - - let (s, e) = cap.pos(0).unwrap(); // captures only reports matches - new.push_str(&text[last_match..s]); - new.push_str(&rep.reg_replace(&cap)[]); - last_match = e; - } - new.push_str(&text[last_match..text.len()]); - return new; - } - - /// Returns the original string of this regex. - pub fn as_str<'a>(&'a self) -> &'a str { - match *self { - Dynamic(ExDynamic { ref original, .. }) => &original[], - Native(ExNative { ref original, .. }) => &original[], - } - } - - #[doc(hidden)] - #[unstable] - pub fn names_iter<'a>(&'a self) -> NamesIter<'a> { - match *self { - Native(ref n) => NamesIterNative(n.names.iter()), - Dynamic(ref d) => NamesIterDynamic(d.names.iter()) - } - } - - fn names_len(&self) -> uint { - match *self { - Native(ref n) => n.names.len(), - Dynamic(ref d) => d.names.len() - } - } - -} - -#[derive(Clone)] -pub enum NamesIter<'a> { - NamesIterNative(::std::slice::Iter<'a, Option<&'static str>>), - NamesIterDynamic(::std::slice::Iter<'a, Option>) -} - -impl<'a> Iterator for NamesIter<'a> { - type Item = Option; - - fn next(&mut self) -> Option> { - match *self { - NamesIterNative(ref mut i) => i.next().map(|x| x.map(|s| s.to_string())), - NamesIterDynamic(ref mut i) => i.next().map(|x| x.as_ref().map(|s| s.to_string())), - } - } -} - -/// NoExpand indicates literal string replacement. -/// -/// It can be used with `replace` and `replace_all` to do a literal -/// string replacement without expanding `$name` to their corresponding -/// capture groups. -/// -/// `'r` is the lifetime of the literal text. -pub struct NoExpand<'t>(pub &'t str); - -/// Replacer describes types that can be used to replace matches in a string. -pub trait Replacer { - /// Returns a possibly owned string that is used to replace the match - /// corresponding to the `caps` capture group. - /// - /// The `'a` lifetime refers to the lifetime of a borrowed string when - /// a new owned string isn't needed (e.g., for `NoExpand`). - fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a>; -} - -impl<'t> Replacer for NoExpand<'t> { - fn reg_replace<'a>(&'a mut self, _: &Captures) -> CowString<'a> { - let NoExpand(s) = *self; - s.into_cow() - } -} - -impl<'t> Replacer for &'t str { - fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a> { - caps.expand(*self).into_cow() - } -} - -impl Replacer for F where F: FnMut(&Captures) -> String { - fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a> { - (*self)(caps).into_cow() - } -} - -/// Yields all substrings delimited by a regular expression match. -/// -/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime -/// of the string being split. -#[derive(Clone)] -pub struct RegexSplits<'r, 't> { - finder: FindMatches<'r, 't>, - last: uint, -} - -impl<'r, 't> Iterator for RegexSplits<'r, 't> { - type Item = &'t str; - - fn next(&mut self) -> Option<&'t str> { - let text = self.finder.search; - match self.finder.next() { - None => { - if self.last >= text.len() { - None - } else { - let s = &text[self.last..text.len()]; - self.last = text.len(); - Some(s) - } - } - Some((s, e)) => { - let matched = &text[self.last..s]; - self.last = e; - Some(matched) - } - } - } -} - -/// Yields at most `N` substrings delimited by a regular expression match. -/// -/// The last substring will be whatever remains after splitting. -/// -/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime -/// of the string being split. -#[derive(Clone)] -pub struct RegexSplitsN<'r, 't> { - splits: RegexSplits<'r, 't>, - cur: uint, - limit: uint, -} - -impl<'r, 't> Iterator for RegexSplitsN<'r, 't> { - type Item = &'t str; - - fn next(&mut self) -> Option<&'t str> { - let text = self.splits.finder.search; - if self.cur >= self.limit { - None - } else { - self.cur += 1; - if self.cur >= self.limit { - Some(&text[self.splits.last..text.len()]) - } else { - self.splits.next() - } - } - } -} - -/// Captures represents a group of captured strings for a single match. -/// -/// The 0th capture always corresponds to the entire match. Each subsequent -/// index corresponds to the next capture group in the regex. -/// If a capture group is named, then the matched string is *also* available -/// via the `name` method. (Note that the 0th capture is always unnamed and so -/// must be accessed with the `at` method.) -/// -/// Positions returned from a capture group are always byte indices. -/// -/// `'t` is the lifetime of the matched text. -pub struct Captures<'t> { - text: &'t str, - locs: CaptureLocs, - named: Option>, -} - -impl<'t> Captures<'t> { - #[allow(unstable)] - fn new(re: &Regex, search: &'t str, locs: CaptureLocs) - -> Option> { - if !has_match(&locs) { - return None - } - - let named = - if re.names_len() == 0 { - None - } else { - let mut named = HashMap::new(); - for (i, name) in re.names_iter().enumerate() { - match name { - None => {}, - Some(name) => { - named.insert(name, i); - } - } - } - Some(named) - }; - Some(Captures { - text: search, - locs: locs, - named: named, - }) - } - - /// Returns the start and end positions of the Nth capture group. - /// Returns `None` if `i` is not a valid capture group or if the capture - /// group did not match anything. - /// The positions returned are *always* byte indices with respect to the - /// original string matched. - pub fn pos(&self, i: uint) -> Option<(uint, uint)> { - let (s, e) = (i * 2, i * 2 + 1); - if e >= self.locs.len() || self.locs[s].is_none() { - // VM guarantees that each pair of locations are both Some or None. - return None - } - Some((self.locs[s].unwrap(), self.locs[e].unwrap())) - } - - /// Returns the matched string for the capture group `i`. If `i` isn't - /// a valid capture group or didn't match anything, then `None` is - /// returned. - pub fn at(&self, i: uint) -> Option<&'t str> { - match self.pos(i) { - None => None, - Some((s, e)) => Some(&self.text[s.. e]) - } - } - - /// Returns the matched string for the capture group named `name`. If - /// `name` isn't a valid capture group or didn't match anything, then - /// `None` is returned. - pub fn name(&self, name: &str) -> Option<&'t str> { - match self.named { - None => None, - Some(ref h) => { - match h.get(name) { - None => None, - Some(i) => self.at(*i), - } - } - } - } - - /// Creates an iterator of all the capture groups in order of appearance - /// in the regular expression. - pub fn iter(&'t self) -> SubCaptures<'t> { - SubCaptures { idx: 0, caps: self, } - } - - /// Creates an iterator of all the capture group positions in order of - /// appearance in the regular expression. Positions are byte indices - /// in terms of the original string matched. - pub fn iter_pos(&'t self) -> SubCapturesPos<'t> { - SubCapturesPos { idx: 0, caps: self, } - } - - /// Expands all instances of `$name` in `text` to the corresponding capture - /// group `name`. - /// - /// `name` may be an integer corresponding to the index of the - /// capture group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist or - /// isn't a valid index), then it is replaced with the empty string. - /// - /// To write a literal `$` use `$$`. - pub fn expand(&self, text: &str) -> String { - // How evil can you get? - // FIXME: Don't use regexes for this. It's completely unnecessary. - let re = Regex::new(r"(^|[^$]|\b)\$(\w+)").unwrap(); - let text = re.replace_all(text, |&mut: refs: &Captures| -> String { - let pre = refs.at(1).unwrap_or(""); - let name = refs.at(2).unwrap_or(""); - format!("{}{}", pre, - match name.parse::() { - None => self.name(name).unwrap_or("").to_string(), - Some(i) => self.at(i).unwrap_or("").to_string(), - }) - }); - let re = Regex::new(r"\$\$").unwrap(); - re.replace_all(&text[], NoExpand("$")) - } - - /// Returns the number of captured groups. - #[inline] - pub fn len(&self) -> uint { self.locs.len() / 2 } - - /// Returns if there are no captured groups. - #[inline] - pub fn is_empty(&self) -> bool { self.len() == 0 } -} - -/// An iterator over capture groups for a particular match of a regular -/// expression. -/// -/// `'t` is the lifetime of the matched text. -#[derive(Clone)] -pub struct SubCaptures<'t> { - idx: uint, - caps: &'t Captures<'t>, -} - -impl<'t> Iterator for SubCaptures<'t> { - type Item = &'t str; - - fn next(&mut self) -> Option<&'t str> { - if self.idx < self.caps.len() { - self.idx += 1; - Some(self.caps.at(self.idx - 1).unwrap_or("")) - } else { - None - } - } -} - -/// An iterator over capture group positions for a particular match of a -/// regular expression. -/// -/// Positions are byte indices in terms of the original string matched. -/// -/// `'t` is the lifetime of the matched text. -#[derive(Clone)] -pub struct SubCapturesPos<'t> { - idx: uint, - caps: &'t Captures<'t>, -} - -impl<'t> Iterator for SubCapturesPos<'t> { - type Item = Option<(uint, uint)>; - - fn next(&mut self) -> Option> { - if self.idx < self.caps.len() { - self.idx += 1; - Some(self.caps.pos(self.idx - 1)) - } else { - None - } - } -} - -/// An iterator that yields all non-overlapping capture groups matching a -/// particular regular expression. -/// -/// The iterator stops when no more matches can be found. -/// -/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime -/// of the matched string. -#[derive(Clone)] -pub struct FindCaptures<'r, 't> { - re: &'r Regex, - search: &'t str, - last_match: Option, - last_end: uint, -} - -impl<'r, 't> Iterator for FindCaptures<'r, 't> { - type Item = Captures<'t>; - - fn next(&mut self) -> Option> { - if self.last_end > self.search.len() { - return None - } - - let caps = exec_slice(self.re, Submatches, self.search, - self.last_end, self.search.len()); - let (s, e) = - if !has_match(&caps) { - return None - } else { - (caps[0].unwrap(), caps[1].unwrap()) - }; - - // Don't accept empty matches immediately following a match. - // i.e., no infinite loops please. - if e == s && Some(self.last_end) == self.last_match { - self.last_end += 1; - return self.next() - } - self.last_end = e; - self.last_match = Some(self.last_end); - Captures::new(self.re, self.search, caps) - } -} - -/// An iterator over all non-overlapping matches for a particular string. -/// -/// The iterator yields a tuple of integers corresponding to the start and end -/// of the match. The indices are byte offsets. The iterator stops when no more -/// matches can be found. -/// -/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime -/// of the matched string. -#[derive(Clone)] -pub struct FindMatches<'r, 't> { - re: &'r Regex, - search: &'t str, - last_match: Option, - last_end: uint, -} - -impl<'r, 't> Iterator for FindMatches<'r, 't> { - type Item = (uint, uint); - - fn next(&mut self) -> Option<(uint, uint)> { - if self.last_end > self.search.len() { - return None - } - - let caps = exec_slice(self.re, Location, self.search, - self.last_end, self.search.len()); - let (s, e) = - if !has_match(&caps) { - return None - } else { - (caps[0].unwrap(), caps[1].unwrap()) - }; - - // Don't accept empty matches immediately following a match. - // i.e., no infinite loops please. - if e == s && Some(self.last_end) == self.last_match { - self.last_end += 1; - return self.next() - } - self.last_end = e; - self.last_match = Some(self.last_end); - Some((s, e)) - } -} - -fn exec(re: &Regex, which: MatchKind, input: &str) -> CaptureLocs { - exec_slice(re, which, input, 0, input.len()) -} - -fn exec_slice(re: &Regex, which: MatchKind, - input: &str, s: uint, e: uint) -> CaptureLocs { - match *re { - Dynamic(ExDynamic { ref prog, .. }) => vm::run(which, prog, input, s, e), - Native(ExNative { ref prog, .. }) => (*prog)(which, input, s, e), - } -} - -#[inline] -fn has_match(caps: &CaptureLocs) -> bool { - caps.len() >= 2 && caps[0].is_some() && caps[1].is_some() -} diff --git a/src/libregex/test/bench.rs b/src/libregex/test/bench.rs deleted file mode 100644 index 17521ff7ea54b..0000000000000 --- a/src/libregex/test/bench.rs +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. -#![allow(non_snake_case)] - -use std::rand::{Rng, thread_rng}; -use stdtest::Bencher; -use std::iter::repeat; - -use regex::{Regex, NoExpand}; - -fn bench_assert_match(b: &mut Bencher, re: Regex, text: &str) { - b.iter(|| if !re.is_match(text) { panic!("no match") }); -} - -#[bench] -fn no_exponential(b: &mut Bencher) { - let n = 100; - let re = Regex::new(format!("{}{}", - repeat("a?").take(n).collect::(), - repeat("a").take(n).collect::()).as_slice()).unwrap(); - let text = repeat("a").take(n).collect::(); - bench_assert_match(b, re, text.as_slice()); -} - -#[bench] -fn literal(b: &mut Bencher) { - let re = regex!("y"); - let text = format!("{}y", repeat("x").take(50).collect::()); - bench_assert_match(b, re, text.as_slice()); -} - -#[bench] -fn not_literal(b: &mut Bencher) { - let re = regex!(".y"); - let text = format!("{}y", repeat("x").take(50).collect::()); - bench_assert_match(b, re, text.as_slice()); -} - -#[bench] -fn match_class(b: &mut Bencher) { - let re = regex!("[abcdw]"); - let text = format!("{}w", repeat("xxxx").take(20).collect::()); - bench_assert_match(b, re, text.as_slice()); -} - -#[bench] -fn match_class_in_range(b: &mut Bencher) { - // 'b' is between 'a' and 'c', so the class range checking doesn't help. - let re = regex!("[ac]"); - let text = format!("{}c", repeat("bbbb").take(20).collect::()); - bench_assert_match(b, re, text.as_slice()); -} - -#[bench] -fn replace_all(b: &mut Bencher) { - let re = regex!("[cjrw]"); - let text = "abcdefghijklmnopqrstuvwxyz"; - // FIXME: This isn't using the $name expand stuff. - // It's possible RE2/Go is using it, but currently, the expand in this - // crate is actually compiling a regex, so it's incredibly slow. - b.iter(|| re.replace_all(text, NoExpand(""))); -} - -#[bench] -fn anchored_literal_short_non_match(b: &mut Bencher) { - let re = regex!("^zbc(d|e)"); - let text = "abcdefghijklmnopqrstuvwxyz"; - b.iter(|| re.is_match(text)); -} - -#[bench] -fn anchored_literal_long_non_match(b: &mut Bencher) { - let re = regex!("^zbc(d|e)"); - let text = repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::(); - b.iter(|| re.is_match(text.as_slice())); -} - -#[bench] -fn anchored_literal_short_match(b: &mut Bencher) { - let re = regex!("^.bc(d|e)"); - let text = "abcdefghijklmnopqrstuvwxyz"; - b.iter(|| re.is_match(text)); -} - -#[bench] -fn anchored_literal_long_match(b: &mut Bencher) { - let re = regex!("^.bc(d|e)"); - let text = repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::(); - b.iter(|| re.is_match(text.as_slice())); -} - -#[bench] -fn one_pass_short_a(b: &mut Bencher) { - let re = regex!("^.bc(d|e)*$"); - let text = "abcddddddeeeededd"; - b.iter(|| re.is_match(text)); -} - -#[bench] -fn one_pass_short_a_not(b: &mut Bencher) { - let re = regex!(".bc(d|e)*$"); - let text = "abcddddddeeeededd"; - b.iter(|| re.is_match(text)); -} - -#[bench] -fn one_pass_short_b(b: &mut Bencher) { - let re = regex!("^.bc(?:d|e)*$"); - let text = "abcddddddeeeededd"; - b.iter(|| re.is_match(text)); -} - -#[bench] -fn one_pass_short_b_not(b: &mut Bencher) { - let re = regex!(".bc(?:d|e)*$"); - let text = "abcddddddeeeededd"; - b.iter(|| re.is_match(text)); -} - -#[bench] -fn one_pass_long_prefix(b: &mut Bencher) { - let re = regex!("^abcdefghijklmnopqrstuvwxyz.*$"); - let text = "abcdefghijklmnopqrstuvwxyz"; - b.iter(|| re.is_match(text)); -} - -#[bench] -fn one_pass_long_prefix_not(b: &mut Bencher) { - let re = regex!("^.bcdefghijklmnopqrstuvwxyz.*$"); - let text = "abcdefghijklmnopqrstuvwxyz"; - b.iter(|| re.is_match(text)); -} - -macro_rules! throughput { - ($name:ident, $regex:expr, $size:expr) => ( - #[bench] - fn $name(b: &mut Bencher) { - let text = gen_text($size); - b.bytes = $size; - b.iter(|| if $regex.is_match(text.as_slice()) { panic!("match") }); - } - ); -} - -fn easy0() -> Regex { regex!("ABCDEFGHIJKLMNOPQRSTUVWXYZ$") } -fn easy1() -> Regex { regex!("A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$") } -fn medium() -> Regex { regex!("[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$") } -fn hard() -> Regex { regex!("[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$") } - -fn gen_text(n: uint) -> String { - let mut rng = thread_rng(); - let mut bytes = rng.gen_ascii_chars().map(|n| n as u8).take(n) - .collect::>(); - for (i, b) in bytes.iter_mut().enumerate() { - if i % 20 == 0 { - *b = b'\n' - } - } - String::from_utf8(bytes).unwrap() -} - -throughput!{easy0_32, easy0(), 32} -throughput!{easy0_1K, easy0(), 1<<10} -throughput!{easy0_32K, easy0(), 32<<10} - -throughput!{easy1_32, easy1(), 32} -throughput!{easy1_1K, easy1(), 1<<10} -throughput!{easy1_32K, easy1(), 32<<10} - -throughput!{medium_32, medium(), 32} -throughput!{medium_1K, medium(), 1<<10} -throughput!{medium_32K,medium(), 32<<10} - -throughput!{hard_32, hard(), 32} -throughput!{hard_1K, hard(), 1<<10} -throughput!{hard_32K,hard(), 32<<10} diff --git a/src/libregex/test/matches.rs b/src/libregex/test/matches.rs deleted file mode 100644 index 7508f4c50a2c3..0000000000000 --- a/src/libregex/test/matches.rs +++ /dev/null @@ -1,373 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// ignore-tidy-linelength - -// DO NOT EDIT. Automatically generated by 'src/etc/regex-match-tests' -// on 2014-04-23 01:33:36.539280. - -// Tests from basic.dat -mat!{match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18))} -mat!{match_basic_4, r"a...b", r"abababbb", Some((2, 7))} -mat!{match_basic_5, r"XXXXXX", r"..XXXXXX", Some((2, 8))} -mat!{match_basic_6, r"\)", r"()", Some((1, 2))} -mat!{match_basic_7, r"a]", r"a]a", Some((0, 2))} -mat!{match_basic_9, r"\}", r"}", Some((0, 1))} -mat!{match_basic_10, r"\]", r"]", Some((0, 1))} -mat!{match_basic_12, r"]", r"]", Some((0, 1))} -mat!{match_basic_15, r"^a", r"ax", Some((0, 1))} -mat!{match_basic_16, r"\^a", r"a^a", Some((1, 3))} -mat!{match_basic_17, r"a\^", r"a^", Some((0, 2))} -mat!{match_basic_18, r"a$", r"aa", Some((1, 2))} -mat!{match_basic_19, r"a\$", r"a$", Some((0, 2))} -mat!{match_basic_20, r"^$", r"", Some((0, 0))} -mat!{match_basic_21, r"$^", r"", Some((0, 0))} -mat!{match_basic_22, r"a($)", r"aa", Some((1, 2)), Some((2, 2))} -mat!{match_basic_23, r"a*(^a)", r"aa", Some((0, 1)), Some((0, 1))} -mat!{match_basic_24, r"(..)*(...)*", r"a", Some((0, 0))} -mat!{match_basic_25, r"(..)*(...)*", r"abcd", Some((0, 4)), Some((2, 4))} -mat!{match_basic_26, r"(ab|a)(bc|c)", r"abc", Some((0, 3)), Some((0, 2)), Some((2, 3))} -mat!{match_basic_27, r"(ab)c|abc", r"abc", Some((0, 3)), Some((0, 2))} -mat!{match_basic_28, r"a{0}b", r"ab", Some((1, 2))} -mat!{match_basic_29, r"(a*)(b?)(b+)b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7))} -mat!{match_basic_30, r"(a*)(b{0,1})(b{1,})b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7))} -mat!{match_basic_32, r"((a|a)|a)", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1))} -mat!{match_basic_33, r"(a*)(a|aa)", r"aaaa", Some((0, 4)), Some((0, 3)), Some((3, 4))} -mat!{match_basic_34, r"a*(a.|aa)", r"aaaa", Some((0, 4)), Some((2, 4))} -mat!{match_basic_35, r"a(b)|c(d)|a(e)f", r"aef", Some((0, 3)), None, None, Some((1, 2))} -mat!{match_basic_36, r"(a|b)?.*", r"b", Some((0, 1)), Some((0, 1))} -mat!{match_basic_37, r"(a|b)c|a(b|c)", r"ac", Some((0, 2)), Some((0, 1))} -mat!{match_basic_38, r"(a|b)c|a(b|c)", r"ab", Some((0, 2)), None, Some((1, 2))} -mat!{match_basic_39, r"(a|b)*c|(a|ab)*c", r"abc", Some((0, 3)), Some((1, 2))} -mat!{match_basic_40, r"(a|b)*c|(a|ab)*c", r"xc", Some((1, 2))} -mat!{match_basic_41, r"(.a|.b).*|.*(.a|.b)", r"xa", Some((0, 2)), Some((0, 2))} -mat!{match_basic_42, r"a?(ab|ba)ab", r"abab", Some((0, 4)), Some((0, 2))} -mat!{match_basic_43, r"a?(ac{0}b|ba)ab", r"abab", Some((0, 4)), Some((0, 2))} -mat!{match_basic_44, r"ab|abab", r"abbabab", Some((0, 2))} -mat!{match_basic_45, r"aba|bab|bba", r"baaabbbaba", Some((5, 8))} -mat!{match_basic_46, r"aba|bab", r"baaabbbaba", Some((6, 9))} -mat!{match_basic_47, r"(aa|aaa)*|(a|aaaaa)", r"aa", Some((0, 2)), Some((0, 2))} -mat!{match_basic_48, r"(a.|.a.)*|(a|.a...)", r"aa", Some((0, 2)), Some((0, 2))} -mat!{match_basic_49, r"ab|a", r"xabc", Some((1, 3))} -mat!{match_basic_50, r"ab|a", r"xxabc", Some((2, 4))} -mat!{match_basic_51, r"(?i)(Ab|cD)*", r"aBcD", Some((0, 4)), Some((2, 4))} -mat!{match_basic_52, r"[^-]", r"--a", Some((2, 3))} -mat!{match_basic_53, r"[a-]*", r"--a", Some((0, 3))} -mat!{match_basic_54, r"[a-m-]*", r"--amoma--", Some((0, 4))} -mat!{match_basic_55, r":::1:::0:|:::1:1:0:", r":::0:::1:::1:::0:", Some((8, 17))} -mat!{match_basic_56, r":::1:::0:|:::1:1:1:", r":::0:::1:::1:::0:", Some((8, 17))} -mat!{match_basic_57, r"[[:upper:]]", r"A", Some((0, 1))} -mat!{match_basic_58, r"[[:lower:]]+", r"`az{", Some((1, 3))} -mat!{match_basic_59, r"[[:upper:]]+", r"@AZ[", Some((1, 3))} -mat!{match_basic_65, r" -", r" -", Some((0, 1))} -mat!{match_basic_66, r" -", r" -", Some((0, 1))} -mat!{match_basic_67, r"[^a]", r" -", Some((0, 1))} -mat!{match_basic_68, r" -a", r" -a", Some((0, 2))} -mat!{match_basic_69, r"(a)(b)(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((2, 3))} -mat!{match_basic_70, r"xxx", r"xxx", Some((0, 3))} -mat!{match_basic_71, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 6,", Some((0, 6))} -mat!{match_basic_72, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"2/7", Some((0, 3))} -mat!{match_basic_73, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 1,Feb 6", Some((5, 11))} -mat!{match_basic_74, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", r"x", Some((0, 1)), Some((0, 1)), Some((0, 1))} -mat!{match_basic_75, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", r"xx", Some((0, 2)), Some((1, 2)), Some((1, 2))} -mat!{match_basic_76, r"a?(ab|ba)*", r"ababababababababababababababababababababababababababababababababababababababababa", Some((0, 81)), Some((79, 81))} -mat!{match_basic_77, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabbbbaa", Some((18, 25))} -mat!{match_basic_78, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabaa", Some((18, 22))} -mat!{match_basic_79, r"aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", r"baaabbbabac", Some((7, 11))} -mat!{match_basic_80, r".*", r"", Some((0, 2))} -mat!{match_basic_81, r"aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", r"XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", Some((53, 57))} -mat!{match_basic_83, r"a*a*a*a*a*b", r"aaaaaaaaab", Some((0, 10))} -mat!{match_basic_84, r"^", r"", Some((0, 0))} -mat!{match_basic_85, r"$", r"", Some((0, 0))} -mat!{match_basic_86, r"^$", r"", Some((0, 0))} -mat!{match_basic_87, r"^a$", r"a", Some((0, 1))} -mat!{match_basic_88, r"abc", r"abc", Some((0, 3))} -mat!{match_basic_89, r"abc", r"xabcy", Some((1, 4))} -mat!{match_basic_90, r"abc", r"ababc", Some((2, 5))} -mat!{match_basic_91, r"ab*c", r"abc", Some((0, 3))} -mat!{match_basic_92, r"ab*bc", r"abc", Some((0, 3))} -mat!{match_basic_93, r"ab*bc", r"abbc", Some((0, 4))} -mat!{match_basic_94, r"ab*bc", r"abbbbc", Some((0, 6))} -mat!{match_basic_95, r"ab+bc", r"abbc", Some((0, 4))} -mat!{match_basic_96, r"ab+bc", r"abbbbc", Some((0, 6))} -mat!{match_basic_97, r"ab?bc", r"abbc", Some((0, 4))} -mat!{match_basic_98, r"ab?bc", r"abc", Some((0, 3))} -mat!{match_basic_99, r"ab?c", r"abc", Some((0, 3))} -mat!{match_basic_100, r"^abc$", r"abc", Some((0, 3))} -mat!{match_basic_101, r"^abc", r"abcc", Some((0, 3))} -mat!{match_basic_102, r"abc$", r"aabc", Some((1, 4))} -mat!{match_basic_103, r"^", r"abc", Some((0, 0))} -mat!{match_basic_104, r"$", r"abc", Some((3, 3))} -mat!{match_basic_105, r"a.c", r"abc", Some((0, 3))} -mat!{match_basic_106, r"a.c", r"axc", Some((0, 3))} -mat!{match_basic_107, r"a.*c", r"axyzc", Some((0, 5))} -mat!{match_basic_108, r"a[bc]d", r"abd", Some((0, 3))} -mat!{match_basic_109, r"a[b-d]e", r"ace", Some((0, 3))} -mat!{match_basic_110, r"a[b-d]", r"aac", Some((1, 3))} -mat!{match_basic_111, r"a[-b]", r"a-", Some((0, 2))} -mat!{match_basic_112, r"a[b-]", r"a-", Some((0, 2))} -mat!{match_basic_113, r"a]", r"a]", Some((0, 2))} -mat!{match_basic_114, r"a[]]b", r"a]b", Some((0, 3))} -mat!{match_basic_115, r"a[^bc]d", r"aed", Some((0, 3))} -mat!{match_basic_116, r"a[^-b]c", r"adc", Some((0, 3))} -mat!{match_basic_117, r"a[^]b]c", r"adc", Some((0, 3))} -mat!{match_basic_118, r"ab|cd", r"abc", Some((0, 2))} -mat!{match_basic_119, r"ab|cd", r"abcd", Some((0, 2))} -mat!{match_basic_120, r"a\(b", r"a(b", Some((0, 3))} -mat!{match_basic_121, r"a\(*b", r"ab", Some((0, 2))} -mat!{match_basic_122, r"a\(*b", r"a((b", Some((0, 4))} -mat!{match_basic_123, r"((a))", r"abc", Some((0, 1)), Some((0, 1)), Some((0, 1))} -mat!{match_basic_124, r"(a)b(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((2, 3))} -mat!{match_basic_125, r"a+b+c", r"aabbabc", Some((4, 7))} -mat!{match_basic_126, r"a*", r"aaa", Some((0, 3))} -mat!{match_basic_128, r"(a*)*", r"-", Some((0, 0)), None} -mat!{match_basic_129, r"(a*)+", r"-", Some((0, 0)), Some((0, 0))} -mat!{match_basic_131, r"(a*|b)*", r"-", Some((0, 0)), None} -mat!{match_basic_132, r"(a+|b)*", r"ab", Some((0, 2)), Some((1, 2))} -mat!{match_basic_133, r"(a+|b)+", r"ab", Some((0, 2)), Some((1, 2))} -mat!{match_basic_134, r"(a+|b)?", r"ab", Some((0, 1)), Some((0, 1))} -mat!{match_basic_135, r"[^ab]*", r"cde", Some((0, 3))} -mat!{match_basic_137, r"(^)*", r"-", Some((0, 0)), None} -mat!{match_basic_138, r"a*", r"", Some((0, 0))} -mat!{match_basic_139, r"([abc])*d", r"abbbcd", Some((0, 6)), Some((4, 5))} -mat!{match_basic_140, r"([abc])*bcd", r"abcd", Some((0, 4)), Some((0, 1))} -mat!{match_basic_141, r"a|b|c|d|e", r"e", Some((0, 1))} -mat!{match_basic_142, r"(a|b|c|d|e)f", r"ef", Some((0, 2)), Some((0, 1))} -mat!{match_basic_144, r"((a*|b))*", r"-", Some((0, 0)), None, None} -mat!{match_basic_145, r"abcd*efg", r"abcdefg", Some((0, 7))} -mat!{match_basic_146, r"ab*", r"xabyabbbz", Some((1, 3))} -mat!{match_basic_147, r"ab*", r"xayabbbz", Some((1, 2))} -mat!{match_basic_148, r"(ab|cd)e", r"abcde", Some((2, 5)), Some((2, 4))} -mat!{match_basic_149, r"[abhgefdc]ij", r"hij", Some((0, 3))} -mat!{match_basic_150, r"(a|b)c*d", r"abcd", Some((1, 4)), Some((1, 2))} -mat!{match_basic_151, r"(ab|ab*)bc", r"abc", Some((0, 3)), Some((0, 1))} -mat!{match_basic_152, r"a([bc]*)c*", r"abc", Some((0, 3)), Some((1, 3))} -mat!{match_basic_153, r"a([bc]*)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4))} -mat!{match_basic_154, r"a([bc]+)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4))} -mat!{match_basic_155, r"a([bc]*)(c+d)", r"abcd", Some((0, 4)), Some((1, 2)), Some((2, 4))} -mat!{match_basic_156, r"a[bcd]*dcdcde", r"adcdcde", Some((0, 7))} -mat!{match_basic_157, r"(ab|a)b*c", r"abc", Some((0, 3)), Some((0, 2))} -mat!{match_basic_158, r"((a)(b)c)(d)", r"abcd", Some((0, 4)), Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((3, 4))} -mat!{match_basic_159, r"[A-Za-z_][A-Za-z0-9_]*", r"alpha", Some((0, 5))} -mat!{match_basic_160, r"^a(bc+|b[eh])g|.h$", r"abh", Some((1, 3))} -mat!{match_basic_161, r"(bc+d$|ef*g.|h?i(j|k))", r"effgz", Some((0, 5)), Some((0, 5))} -mat!{match_basic_162, r"(bc+d$|ef*g.|h?i(j|k))", r"ij", Some((0, 2)), Some((0, 2)), Some((1, 2))} -mat!{match_basic_163, r"(bc+d$|ef*g.|h?i(j|k))", r"reffgz", Some((1, 6)), Some((1, 6))} -mat!{match_basic_164, r"(((((((((a)))))))))", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1))} -mat!{match_basic_165, r"multiple words", r"multiple words yeah", Some((0, 14))} -mat!{match_basic_166, r"(.*)c(.*)", r"abcde", Some((0, 5)), Some((0, 2)), Some((3, 5))} -mat!{match_basic_167, r"abcd", r"abcd", Some((0, 4))} -mat!{match_basic_168, r"a(bc)d", r"abcd", Some((0, 4)), Some((1, 3))} -mat!{match_basic_169, r"a[-]?c", r"ac", Some((0, 3))} -mat!{match_basic_170, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qaddafi", Some((0, 15)), None, Some((10, 12))} -mat!{match_basic_171, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mo'ammar Gadhafi", Some((0, 16)), None, Some((11, 13))} -mat!{match_basic_172, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Kaddafi", Some((0, 15)), None, Some((10, 12))} -mat!{match_basic_173, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qadhafi", Some((0, 15)), None, Some((10, 12))} -mat!{match_basic_174, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gadafi", Some((0, 14)), None, Some((10, 11))} -mat!{match_basic_175, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadafi", Some((0, 15)), None, Some((11, 12))} -mat!{match_basic_176, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moamar Gaddafi", Some((0, 14)), None, Some((9, 11))} -mat!{match_basic_177, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadhdhafi", Some((0, 18)), None, Some((13, 15))} -mat!{match_basic_178, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Khaddafi", Some((0, 16)), None, Some((11, 13))} -mat!{match_basic_179, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafy", Some((0, 16)), None, Some((11, 13))} -mat!{match_basic_180, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghadafi", Some((0, 15)), None, Some((11, 12))} -mat!{match_basic_181, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafi", Some((0, 16)), None, Some((11, 13))} -mat!{match_basic_182, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muamar Kaddafi", Some((0, 14)), None, Some((9, 11))} -mat!{match_basic_183, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Quathafi", Some((0, 16)), None, Some((11, 13))} -mat!{match_basic_184, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gheddafi", Some((0, 16)), None, Some((11, 13))} -mat!{match_basic_185, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Khadafy", Some((0, 15)), None, Some((11, 12))} -mat!{match_basic_186, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Qudhafi", Some((0, 15)), None, Some((10, 12))} -mat!{match_basic_187, r"a+(b|c)*d+", r"aabcdd", Some((0, 6)), Some((3, 4))} -mat!{match_basic_188, r"^.+$", r"vivi", Some((0, 4))} -mat!{match_basic_189, r"^(.+)$", r"vivi", Some((0, 4)), Some((0, 4))} -mat!{match_basic_190, r"^([^!.]+).att.com!(.+)$", r"gryphon.att.com!eby", Some((0, 19)), Some((0, 7)), Some((16, 19))} -mat!{match_basic_191, r"^([^!]+!)?([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3))} -mat!{match_basic_192, r"^([^!]+!)?([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))} -mat!{match_basic_193, r"^([^!]+!)?([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))} -mat!{match_basic_194, r"^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), Some((4, 8)), Some((8, 11))} -mat!{match_basic_195, r"((foo)|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), None, Some((0, 3))} -mat!{match_basic_196, r"((foo)|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), None, Some((4, 7))} -mat!{match_basic_197, r"((foo)|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))} -mat!{match_basic_198, r"((foo)|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3))} -mat!{match_basic_199, r"((foo)|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7))} -mat!{match_basic_200, r"((foo)|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))} -mat!{match_basic_201, r"(foo|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))} -mat!{match_basic_202, r"(foo|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), Some((4, 7))} -mat!{match_basic_203, r"(foo|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3))} -mat!{match_basic_204, r"(foo|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3))} -mat!{match_basic_205, r"(foo|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7))} -mat!{match_basic_206, r"(foo|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3))} -mat!{match_basic_207, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))} -mat!{match_basic_208, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3))} -mat!{match_basic_209, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))} -mat!{match_basic_210, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))} -mat!{match_basic_211, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))} -mat!{match_basic_212, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bas", Some((0, 3)), Some((0, 3)), None, Some((0, 3))} -mat!{match_basic_213, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bar!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7))} -mat!{match_basic_214, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))} -mat!{match_basic_215, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7))} -mat!{match_basic_216, r".*(/XXX).*", r"/XXX", Some((0, 4)), Some((0, 4))} -mat!{match_basic_217, r".*(\\XXX).*", r"\XXX", Some((0, 4)), Some((0, 4))} -mat!{match_basic_218, r"\\XXX", r"\XXX", Some((0, 4))} -mat!{match_basic_219, r".*(/000).*", r"/000", Some((0, 4)), Some((0, 4))} -mat!{match_basic_220, r".*(\\000).*", r"\000", Some((0, 4)), Some((0, 4))} -mat!{match_basic_221, r"\\000", r"\000", Some((0, 4))} - -// Tests from nullsubexpr.dat -mat!{match_nullsubexpr_3, r"(a*)*", r"a", Some((0, 1)), Some((0, 1))} -mat!{match_nullsubexpr_5, r"(a*)*", r"x", Some((0, 0)), None} -mat!{match_nullsubexpr_6, r"(a*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_7, r"(a*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_8, r"(a*)+", r"a", Some((0, 1)), Some((0, 1))} -mat!{match_nullsubexpr_9, r"(a*)+", r"x", Some((0, 0)), Some((0, 0))} -mat!{match_nullsubexpr_10, r"(a*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_11, r"(a*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_12, r"(a+)*", r"a", Some((0, 1)), Some((0, 1))} -mat!{match_nullsubexpr_13, r"(a+)*", r"x", Some((0, 0))} -mat!{match_nullsubexpr_14, r"(a+)*", r"aaaaaa", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_15, r"(a+)*", r"aaaaaax", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_16, r"(a+)+", r"a", Some((0, 1)), Some((0, 1))} -mat!{match_nullsubexpr_17, r"(a+)+", r"x", None} -mat!{match_nullsubexpr_18, r"(a+)+", r"aaaaaa", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_19, r"(a+)+", r"aaaaaax", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_21, r"([a]*)*", r"a", Some((0, 1)), Some((0, 1))} -mat!{match_nullsubexpr_23, r"([a]*)*", r"x", Some((0, 0)), None} -mat!{match_nullsubexpr_24, r"([a]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_25, r"([a]*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_26, r"([a]*)+", r"a", Some((0, 1)), Some((0, 1))} -mat!{match_nullsubexpr_27, r"([a]*)+", r"x", Some((0, 0)), Some((0, 0))} -mat!{match_nullsubexpr_28, r"([a]*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_29, r"([a]*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_30, r"([^b]*)*", r"a", Some((0, 1)), Some((0, 1))} -mat!{match_nullsubexpr_32, r"([^b]*)*", r"b", Some((0, 0)), None} -mat!{match_nullsubexpr_33, r"([^b]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_34, r"([^b]*)*", r"aaaaaab", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_35, r"([ab]*)*", r"a", Some((0, 1)), Some((0, 1))} -mat!{match_nullsubexpr_36, r"([ab]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_37, r"([ab]*)*", r"ababab", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_38, r"([ab]*)*", r"bababa", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_39, r"([ab]*)*", r"b", Some((0, 1)), Some((0, 1))} -mat!{match_nullsubexpr_40, r"([ab]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_41, r"([ab]*)*", r"aaaabcde", Some((0, 5)), Some((0, 5))} -mat!{match_nullsubexpr_42, r"([^a]*)*", r"b", Some((0, 1)), Some((0, 1))} -mat!{match_nullsubexpr_43, r"([^a]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_45, r"([^a]*)*", r"aaaaaa", Some((0, 0)), None} -mat!{match_nullsubexpr_46, r"([^ab]*)*", r"ccccxx", Some((0, 6)), Some((0, 6))} -mat!{match_nullsubexpr_48, r"([^ab]*)*", r"ababab", Some((0, 0)), None} -mat!{match_nullsubexpr_50, r"((z)+|a)*", r"zabcde", Some((0, 2)), Some((1, 2))} -mat!{match_nullsubexpr_69, r"(a*)*(x)", r"x", Some((0, 1)), None, Some((0, 1))} -mat!{match_nullsubexpr_70, r"(a*)*(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2))} -mat!{match_nullsubexpr_71, r"(a*)*(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2))} -mat!{match_nullsubexpr_73, r"(a*)+(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1))} -mat!{match_nullsubexpr_74, r"(a*)+(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2))} -mat!{match_nullsubexpr_75, r"(a*)+(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2))} -mat!{match_nullsubexpr_77, r"(a*){2}(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1))} -mat!{match_nullsubexpr_78, r"(a*){2}(x)", r"ax", Some((0, 2)), Some((1, 1)), Some((1, 2))} -mat!{match_nullsubexpr_79, r"(a*){2}(x)", r"axa", Some((0, 2)), Some((1, 1)), Some((1, 2))} - -// Tests from repetition.dat -mat!{match_repetition_10, r"((..)|(.))", r"", None} -mat!{match_repetition_11, r"((..)|(.))((..)|(.))", r"", None} -mat!{match_repetition_12, r"((..)|(.))((..)|(.))((..)|(.))", r"", None} -mat!{match_repetition_14, r"((..)|(.)){1}", r"", None} -mat!{match_repetition_15, r"((..)|(.)){2}", r"", None} -mat!{match_repetition_16, r"((..)|(.)){3}", r"", None} -mat!{match_repetition_18, r"((..)|(.))*", r"", Some((0, 0))} -mat!{match_repetition_20, r"((..)|(.))", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))} -mat!{match_repetition_21, r"((..)|(.))((..)|(.))", r"a", None} -mat!{match_repetition_22, r"((..)|(.))((..)|(.))((..)|(.))", r"a", None} -mat!{match_repetition_24, r"((..)|(.)){1}", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))} -mat!{match_repetition_25, r"((..)|(.)){2}", r"a", None} -mat!{match_repetition_26, r"((..)|(.)){3}", r"a", None} -mat!{match_repetition_28, r"((..)|(.))*", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))} -mat!{match_repetition_30, r"((..)|(.))", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None} -mat!{match_repetition_31, r"((..)|(.))((..)|(.))", r"aa", Some((0, 2)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2))} -mat!{match_repetition_32, r"((..)|(.))((..)|(.))((..)|(.))", r"aa", None} -mat!{match_repetition_34, r"((..)|(.)){1}", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None} -mat!{match_repetition_35, r"((..)|(.)){2}", r"aa", Some((0, 2)), Some((1, 2)), None, Some((1, 2))} -mat!{match_repetition_36, r"((..)|(.)){3}", r"aa", None} -mat!{match_repetition_38, r"((..)|(.))*", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None} -mat!{match_repetition_40, r"((..)|(.))", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None} -mat!{match_repetition_41, r"((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3))} -mat!{match_repetition_42, r"((..)|(.))((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2)), Some((2, 3)), None, Some((2, 3))} -mat!{match_repetition_44, r"((..)|(.)){1}", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None} -mat!{match_repetition_46, r"((..)|(.)){2}", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3))} -mat!{match_repetition_47, r"((..)|(.)){3}", r"aaa", Some((0, 3)), Some((2, 3)), None, Some((2, 3))} -mat!{match_repetition_50, r"((..)|(.))*", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3))} -mat!{match_repetition_52, r"((..)|(.))", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None} -mat!{match_repetition_53, r"((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None} -mat!{match_repetition_54, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3)), Some((3, 4)), None, Some((3, 4))} -mat!{match_repetition_56, r"((..)|(.)){1}", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None} -mat!{match_repetition_57, r"((..)|(.)){2}", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None} -mat!{match_repetition_59, r"((..)|(.)){3}", r"aaaa", Some((0, 4)), Some((3, 4)), Some((0, 2)), Some((3, 4))} -mat!{match_repetition_61, r"((..)|(.))*", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None} -mat!{match_repetition_63, r"((..)|(.))", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None} -mat!{match_repetition_64, r"((..)|(.))((..)|(.))", r"aaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None} -mat!{match_repetition_65, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaa", Some((0, 5)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 5)), None, Some((4, 5))} -mat!{match_repetition_67, r"((..)|(.)){1}", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None} -mat!{match_repetition_68, r"((..)|(.)){2}", r"aaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None} -mat!{match_repetition_70, r"((..)|(.)){3}", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5))} -mat!{match_repetition_73, r"((..)|(.))*", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5))} -mat!{match_repetition_75, r"((..)|(.))", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None} -mat!{match_repetition_76, r"((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None} -mat!{match_repetition_77, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 6)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 6)), Some((4, 6)), None} -mat!{match_repetition_79, r"((..)|(.)){1}", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None} -mat!{match_repetition_80, r"((..)|(.)){2}", r"aaaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None} -mat!{match_repetition_81, r"((..)|(.)){3}", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None} -mat!{match_repetition_83, r"((..)|(.))*", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None} -mat!{match_repetition_90, r"X(.?){0,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))} -mat!{match_repetition_91, r"X(.?){1,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))} -mat!{match_repetition_92, r"X(.?){2,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))} -mat!{match_repetition_93, r"X(.?){3,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))} -mat!{match_repetition_94, r"X(.?){4,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))} -mat!{match_repetition_95, r"X(.?){5,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))} -mat!{match_repetition_96, r"X(.?){6,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))} -mat!{match_repetition_97, r"X(.?){7,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))} -mat!{match_repetition_98, r"X(.?){8,}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))} -mat!{match_repetition_100, r"X(.?){0,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))} -mat!{match_repetition_102, r"X(.?){1,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))} -mat!{match_repetition_104, r"X(.?){2,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))} -mat!{match_repetition_106, r"X(.?){3,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))} -mat!{match_repetition_108, r"X(.?){4,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))} -mat!{match_repetition_110, r"X(.?){5,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))} -mat!{match_repetition_112, r"X(.?){6,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))} -mat!{match_repetition_114, r"X(.?){7,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))} -mat!{match_repetition_115, r"X(.?){8,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))} -mat!{match_repetition_126, r"(a|ab|c|bcd){0,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))} -mat!{match_repetition_127, r"(a|ab|c|bcd){1,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))} -mat!{match_repetition_128, r"(a|ab|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))} -mat!{match_repetition_129, r"(a|ab|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))} -mat!{match_repetition_130, r"(a|ab|c|bcd){4,}(d*)", r"ababcd", None} -mat!{match_repetition_131, r"(a|ab|c|bcd){0,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))} -mat!{match_repetition_132, r"(a|ab|c|bcd){1,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))} -mat!{match_repetition_133, r"(a|ab|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))} -mat!{match_repetition_134, r"(a|ab|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))} -mat!{match_repetition_135, r"(a|ab|c|bcd){4,10}(d*)", r"ababcd", None} -mat!{match_repetition_136, r"(a|ab|c|bcd)*(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))} -mat!{match_repetition_137, r"(a|ab|c|bcd)+(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))} -mat!{match_repetition_143, r"(ab|a|c|bcd){0,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))} -mat!{match_repetition_145, r"(ab|a|c|bcd){1,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))} -mat!{match_repetition_147, r"(ab|a|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))} -mat!{match_repetition_149, r"(ab|a|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))} -mat!{match_repetition_150, r"(ab|a|c|bcd){4,}(d*)", r"ababcd", None} -mat!{match_repetition_152, r"(ab|a|c|bcd){0,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))} -mat!{match_repetition_154, r"(ab|a|c|bcd){1,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))} -mat!{match_repetition_156, r"(ab|a|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))} -mat!{match_repetition_158, r"(ab|a|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))} -mat!{match_repetition_159, r"(ab|a|c|bcd){4,10}(d*)", r"ababcd", None} -mat!{match_repetition_161, r"(ab|a|c|bcd)*(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))} -mat!{match_repetition_163, r"(ab|a|c|bcd)+(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))} - diff --git a/src/libregex/test/mod.rs b/src/libregex/test/mod.rs deleted file mode 100644 index e11094b117471..0000000000000 --- a/src/libregex/test/mod.rs +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -macro_rules! regex { - ($re:expr) => ( - match ::regex::Regex::new($re) { - Ok(re) => re, - Err(err) => panic!("{:?}", err), - } - ); -} - -#[path = "bench.rs"] -mod dynamic_bench; -#[path = "tests.rs"] -mod dynamic_tests; - diff --git a/src/libregex/test/native_static.rs b/src/libregex/test/native_static.rs deleted file mode 100644 index 62e14731c207b..0000000000000 --- a/src/libregex/test/native_static.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use regex::Regex; -static RE: Regex = regex!(r"\d+"); - -#[test] -fn static_splitn() { - let text = "cauchy123plato456tyler789binx"; - let subs: Vec<&str> = RE.splitn(text, 2).collect(); - assert_eq!(subs, vec!("cauchy", "plato456tyler789binx")); -} - -#[test] -fn static_split() { - let text = "cauchy123plato456tyler789binx"; - let subs: Vec<&str> = RE.split(text).collect(); - assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx")); -} diff --git a/src/libregex/test/tests.rs b/src/libregex/test/tests.rs deleted file mode 100644 index b69420ac05bd1..0000000000000 --- a/src/libregex/test/tests.rs +++ /dev/null @@ -1,245 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// ignore-tidy-linelength -// ignore-lexer-test FIXME #15679 - -use regex::{Regex, NoExpand}; - -#[test] -fn splitn() { - let re = regex!(r"\d+"); - let text = "cauchy123plato456tyler789binx"; - let subs: Vec<&str> = re.splitn(text, 2).collect(); - assert_eq!(subs, vec!("cauchy", "plato456tyler789binx")); -} - -#[test] -fn split() { - let re = regex!(r"\d+"); - let text = "cauchy123plato456tyler789binx"; - let subs: Vec<&str> = re.split(text).collect(); - assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx")); -} - -#[test] -fn empty_regex_empty_match() { - let re = regex!(""); - let ms = re.find_iter("").collect::>(); - assert_eq!(ms, vec![(0, 0)]); -} - -#[test] -fn empty_regex_nonempty_match() { - let re = regex!(""); - let ms = re.find_iter("abc").collect::>(); - assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]); -} - -#[test] -fn quoted_bracket_set() { - let re = regex!(r"([\x{5b}\x{5d}])"); - let ms = re.find_iter("[]").collect::>(); - assert_eq!(ms, vec![(0, 1), (1, 2)]); - let re = regex!(r"([\[\]])"); - let ms = re.find_iter("[]").collect::>(); - assert_eq!(ms, vec![(0, 1), (1, 2)]); -} - -#[test] -fn first_range_starts_with_left_bracket() { - let re = regex!(r"([[-z])"); - let ms = re.find_iter("[]").collect::>(); - assert_eq!(ms, vec![(0, 1), (1, 2)]); -} - -#[test] -fn range_ends_with_escape() { - let re = regex!(r"([\[-\x{5d}])"); - let ms = re.find_iter("[]").collect::>(); - assert_eq!(ms, vec![(0, 1), (1, 2)]); -} - -macro_rules! replace { - ($name:ident, $which:ident, $re:expr, - $search:expr, $replace:expr, $result:expr) => ( - #[test] - fn $name() { - let re = regex!($re); - assert_eq!(re.$which($search, $replace), String::from_str($result)); - } - ); -} - -replace!{rep_first, replace, r"\d", "age: 26", "Z", "age: Z6"} -replace!{rep_plus, replace, r"\d+", "age: 26", "Z", "age: Z"} -replace!{rep_all, replace_all, r"\d", "age: 26", "Z", "age: ZZ"} -replace!{rep_groups, replace, r"(\S+)\s+(\S+)", "w1 w2", "$2 $1", "w2 w1"} -replace!{rep_double_dollar, replace, - r"(\S+)\s+(\S+)", "w1 w2", "$2 $$1", "w2 $1"} -replace!{rep_no_expand, replace, - r"(\S+)\s+(\S+)", "w1 w2", NoExpand("$2 $1"), "$2 $1"} -replace!{rep_named, replace_all, - r"(?P\S+)\s+(?P\S+)(?P\s*)", - "w1 w2 w3 w4", "$last $first$space", "w2 w1 w4 w3"} -replace!{rep_trim, replace_all, "^[ \t]+|[ \t]+$", " \t trim me\t \t", - "", "trim me"} - -macro_rules! noparse { - ($name:ident, $re:expr) => ( - #[test] - fn $name() { - let re = $re; - match Regex::new(re) { - Err(_) => {}, - Ok(_) => panic!("Regex '{}' should cause a parse error.", re), - } - } - ); -} - -noparse!{fail_double_repeat, "a**"} -noparse!{fail_no_repeat_arg, "*"} -noparse!{fail_no_repeat_arg_begin, "^*"} -noparse!{fail_incomplete_escape, "\\"} -noparse!{fail_class_incomplete, "[A-"} -noparse!{fail_class_not_closed, "[A"} -noparse!{fail_class_no_begin, r"[\A]"} -noparse!{fail_class_no_end, r"[\z]"} -noparse!{fail_class_no_boundary, r"[\b]"} -noparse!{fail_open_paren, "("} -noparse!{fail_close_paren, ")"} -noparse!{fail_invalid_range, "[a-Z]"} -noparse!{fail_empty_capture_name, "(?P<>a)"} -noparse!{fail_empty_capture_exp, "(?P)"} -noparse!{fail_bad_capture_name, "(?P)"} -noparse!{fail_bad_flag, "(?a)a"} -noparse!{fail_empty_alt_before, "|a"} -noparse!{fail_empty_alt_after, "a|"} -noparse!{fail_counted_big_exact, "a{1001}"} -noparse!{fail_counted_big_min, "a{1001,}"} -noparse!{fail_counted_no_close, "a{1001"} -noparse!{fail_unfinished_cap, "(?"} -noparse!{fail_unfinished_escape, "\\"} -noparse!{fail_octal_digit, r"\8"} -noparse!{fail_hex_digit, r"\xG0"} -noparse!{fail_hex_short, r"\xF"} -noparse!{fail_hex_long_digits, r"\x{fffg}"} -noparse!{fail_flag_bad, "(?a)"} -noparse!{fail_flag_empty, "(?)"} -noparse!{fail_double_neg, "(?-i-i)"} -noparse!{fail_neg_empty, "(?i-)"} -noparse!{fail_empty_group, "()"} -noparse!{fail_dupe_named, "(?P.)(?P.)"} -noparse!{fail_range_end_no_class, "[a-[:lower:]]"} -noparse!{fail_range_end_no_begin, r"[a-\A]"} -noparse!{fail_range_end_no_end, r"[a-\z]"} -noparse!{fail_range_end_no_boundary, r"[a-\b]"} -noparse!{fail_repeat_no_expr, r"-|+"} - -macro_rules! mat { - ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => ( - #[test] - fn $name() { - let text = $text; - let expected: Vec> = vec!($($loc)+); - let r = regex!($re); - let got = match r.captures(text) { - Some(c) => c.iter_pos().collect::>>(), - None => vec!(None), - }; - // The test set sometimes leave out capture groups, so truncate - // actual capture groups to match test set. - let mut sgot = got.as_slice(); - if sgot.len() > expected.len() { - sgot = &sgot[..expected.len()] - } - if expected != sgot { - panic!("For RE '{}' against '{}', expected '{:?}' but got '{:?}'", - $re, text, expected, sgot); - } - } - ); -} - -// Some crazy expressions from regular-expressions.info. -mat!{match_ranges, - r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", - "num: 255", Some((5, 8))} -mat!{match_ranges_not, - r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", - "num: 256", None} -mat!{match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))} -mat!{match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))} -mat!{match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))} -mat!{match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None} -mat!{match_email, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", - "mine is jam.slam@gmail.com ", Some((8, 26))} -mat!{match_email_not, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", - "mine is jam.slam@gmail ", None} -mat!{match_email_big, r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?", - "mine is jam.slam@gmail.com ", Some((8, 26))} -mat!{match_date1, - r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-01-01", Some((0, 10))} -mat!{match_date2, - r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-00-01", None} -mat!{match_date3, - r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-13-01", None} - -// Exercise the flags. -mat!{match_flag_case, "(?i)abc", "ABC", Some((0, 3))} -mat!{match_flag_weird_case, "(?i)a(?-i)bc", "Abc", Some((0, 3))} -mat!{match_flag_weird_case_not, "(?i)a(?-i)bc", "ABC", None} -mat!{match_flag_case_dotnl, "(?is)a.", "A\n", Some((0, 2))} -mat!{match_flag_case_dotnl_toggle, "(?is)a.(?-is)a.", "A\nab", Some((0, 4))} -mat!{match_flag_case_dotnl_toggle_not, "(?is)a.(?-is)a.", "A\na\n", None} -mat!{match_flag_case_dotnl_toggle_ok, "(?is)a.(?-is:a.)?", "A\na\n", Some((0, 2))} -mat!{match_flag_multi, "(?m)(?:^\\d+$\n?)+", "123\n456\n789", Some((0, 11))} -mat!{match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1))} -mat!{match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2))} -mat!{match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2))} - -// Some Unicode tests. -// A couple of these are commented out because something in the guts of macro expansion is creating -// invalid byte strings. -//mat!{uni_literal, r"Ⅰ", "Ⅰ", Some((0, 3))} -mat!{uni_one, r"\pN", "Ⅰ", Some((0, 3))} -mat!{uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8))} -mat!{uni_not, r"\PN+", "abⅠ", Some((0, 2))} -mat!{uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2))} -mat!{uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5))} -mat!{uni_case, r"(?i)Δ", "δ", Some((0, 2))} -//mat!{uni_case_not, r"Δ", "δ", None} -mat!{uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8))} -mat!{uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10))} -mat!{uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10))} -mat!{uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10))} - -// Test the Unicode friendliness of Perl character classes. -mat!{uni_perl_w, r"\w+", "dδd", Some((0, 4))} -mat!{uni_perl_w_not, r"\w+", "⥡", None} -mat!{uni_perl_w_neg, r"\W+", "⥡", Some((0, 3))} -mat!{uni_perl_d, r"\d+", "1२३9", Some((0, 8))} -mat!{uni_perl_d_not, r"\d+", "Ⅱ", None} -mat!{uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3))} -mat!{uni_perl_s, r"\s+", " ", Some((0, 3))} -mat!{uni_perl_s_not, r"\s+", "☃", None} -mat!{uni_perl_s_neg, r"\S+", "☃", Some((0, 3))} - -// And do the same for word boundaries. -mat!{uni_boundary_none, r"\d\b", "6δ", None} -mat!{uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1))} - -// A whole mess of tests from Glenn Fowler's regex test suite. -// Generated by the 'src/etc/regex-match-tests' program. -mod matches; diff --git a/src/libregex/testdata/LICENSE b/src/libregex/testdata/LICENSE deleted file mode 100644 index f47dbf4c449bc..0000000000000 --- a/src/libregex/testdata/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -The following license covers testregex.c and all associated test data. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software -without restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, and/or sell copies of the -Software, and to permit persons to whom the Software is furnished to do -so, subject to the following disclaimer: - -THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/libregex/testdata/README b/src/libregex/testdata/README deleted file mode 100644 index 33b0ba17ed7f6..0000000000000 --- a/src/libregex/testdata/README +++ /dev/null @@ -1,17 +0,0 @@ -Test data was taken from the Go distribution, which was in turn taken from the -testregex test suite: - - http://www2.research.att.com/~astopen/testregex/testregex.html - -The LICENSE in this directory corresponds to the LICENSE that the data was -released under. - -The tests themselves were modified for RE2/Go. A couple were modified further -by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them. -(Yes, it seems like RE2/Go includes failing test cases.) This may or may not -have been a bad idea, but I think being consistent with an established Regex -library is worth something. - -Note that these files are read by 'src/etc/regexp-match-tests' and turned into -Rust tests found in 'src/libregexp/tests/matches.rs'. - diff --git a/src/libregex/testdata/basic.dat b/src/libregex/testdata/basic.dat deleted file mode 100644 index e55efaeec0624..0000000000000 --- a/src/libregex/testdata/basic.dat +++ /dev/null @@ -1,221 +0,0 @@ -NOTE all standard compliant implementations should pass these : 2002-05-31 - -BE abracadabra$ abracadabracadabra (7,18) -BE a...b abababbb (2,7) -BE XXXXXX ..XXXXXX (2,8) -E \) () (1,2) -BE a] a]a (0,2) -B } } (0,1) -E \} } (0,1) -BE \] ] (0,1) -B ] ] (0,1) -E ] ] (0,1) -B { { (0,1) -B } } (0,1) -BE ^a ax (0,1) -BE \^a a^a (1,3) -BE a\^ a^ (0,2) -BE a$ aa (1,2) -BE a\$ a$ (0,2) -BE ^$ NULL (0,0) -E $^ NULL (0,0) -E a($) aa (1,2)(2,2) -E a*(^a) aa (0,1)(0,1) -E (..)*(...)* a (0,0) -E (..)*(...)* abcd (0,4)(2,4) -E (ab|a)(bc|c) abc (0,3)(0,2)(2,3) -E (ab)c|abc abc (0,3)(0,2) -E a{0}b ab (1,2) -E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) -E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) -E a{9876543210} NULL BADBR -E ((a|a)|a) a (0,1)(0,1)(0,1) -E (a*)(a|aa) aaaa (0,4)(0,3)(3,4) -E a*(a.|aa) aaaa (0,4)(2,4) -E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2) -E (a|b)?.* b (0,1)(0,1) -E (a|b)c|a(b|c) ac (0,2)(0,1) -E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2) -E (a|b)*c|(a|ab)*c abc (0,3)(1,2) -E (a|b)*c|(a|ab)*c xc (1,2) -E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2) -E a?(ab|ba)ab abab (0,4)(0,2) -E a?(ac{0}b|ba)ab abab (0,4)(0,2) -E ab|abab abbabab (0,2) -E aba|bab|bba baaabbbaba (5,8) -E aba|bab baaabbbaba (6,9) -E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2) -E (a.|.a.)*|(a|.a...) aa (0,2)(0,2) -E ab|a xabc (1,3) -E ab|a xxabc (2,4) -Ei (Ab|cD)* aBcD (0,4)(2,4) -BE [^-] --a (2,3) -BE [a-]* --a (0,3) -BE [a-m-]* --amoma-- (0,4) -E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17) -E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17) -{E [[:upper:]] A (0,1) [[]] not supported -E [[:lower:]]+ `az{ (1,3) -E [[:upper:]]+ @AZ[ (1,3) -# No collation in Go -#BE [[-]] [[-]] (2,4) -#BE [[.NIL.]] NULL ECOLLATE -#BE [[=aleph=]] NULL ECOLLATE -} -BE$ \n \n (0,1) -BEn$ \n \n (0,1) -BE$ [^a] \n (0,1) -BE$ \na \na (0,2) -E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3) -BE xxx xxx (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11) -E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) -E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) -E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81) -E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25) -E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22) -E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11) -BE$ .* \x01\x7f (0,2) -E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57) -L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH -E a*a*a*a*a*b aaaaaaaaab (0,10) -BE ^ NULL (0,0) -BE $ NULL (0,0) -BE ^$ NULL (0,0) -BE ^a$ a (0,1) -BE abc abc (0,3) -BE abc xabcy (1,4) -BE abc ababc (2,5) -BE ab*c abc (0,3) -BE ab*bc abc (0,3) -BE ab*bc abbc (0,4) -BE ab*bc abbbbc (0,6) -E ab+bc abbc (0,4) -E ab+bc abbbbc (0,6) -E ab?bc abbc (0,4) -E ab?bc abc (0,3) -E ab?c abc (0,3) -BE ^abc$ abc (0,3) -BE ^abc abcc (0,3) -BE abc$ aabc (1,4) -BE ^ abc (0,0) -BE $ abc (3,3) -BE a.c abc (0,3) -BE a.c axc (0,3) -BE a.*c axyzc (0,5) -BE a[bc]d abd (0,3) -BE a[b-d]e ace (0,3) -BE a[b-d] aac (1,3) -BE a[-b] a- (0,2) -BE a[b-] a- (0,2) -BE a] a] (0,2) -BE a[]]b a]b (0,3) -BE a[^bc]d aed (0,3) -BE a[^-b]c adc (0,3) -BE a[^]b]c adc (0,3) -E ab|cd abc (0,2) -E ab|cd abcd (0,2) -E a\(b a(b (0,3) -E a\(*b ab (0,2) -E a\(*b a((b (0,4) -E ((a)) abc (0,1)(0,1)(0,1) -E (a)b(c) abc (0,3)(0,1)(2,3) -E a+b+c aabbabc (4,7) -E a* aaa (0,3) -#E (a*)* - (0,0)(0,0) -E (a*)* - (0,0)(?,?) RE2/Go -E (a*)+ - (0,0)(0,0) -#E (a*|b)* - (0,0)(0,0) -E (a*|b)* - (0,0)(?,?) RE2/Go -E (a+|b)* ab (0,2)(1,2) -E (a+|b)+ ab (0,2)(1,2) -E (a+|b)? ab (0,1)(0,1) -BE [^ab]* cde (0,3) -#E (^)* - (0,0)(0,0) -E (^)* - (0,0)(?,?) RE2/Go -BE a* NULL (0,0) -E ([abc])*d abbbcd (0,6)(4,5) -E ([abc])*bcd abcd (0,4)(0,1) -E a|b|c|d|e e (0,1) -E (a|b|c|d|e)f ef (0,2)(0,1) -#E ((a*|b))* - (0,0)(0,0)(0,0) -E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go -BE abcd*efg abcdefg (0,7) -BE ab* xabyabbbz (1,3) -BE ab* xayabbbz (1,2) -E (ab|cd)e abcde (2,5)(2,4) -BE [abhgefdc]ij hij (0,3) -E (a|b)c*d abcd (1,4)(1,2) -E (ab|ab*)bc abc (0,3)(0,1) -E a([bc]*)c* abc (0,3)(1,3) -E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4) -E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4) -E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4) -E a[bcd]*dcdcde adcdcde (0,7) -E (ab|a)b*c abc (0,3)(0,2) -E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4) -BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5) -E ^a(bc+|b[eh])g|.h$ abh (1,3) -E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5) -E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2) -E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6) -E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1) -BE multiple words multiple words yeah (0,14) -E (.*)c(.*) abcde (0,5)(0,2)(3,5) -BE abcd abcd (0,4) -E a(bc)d abcd (0,4)(1,3) -E a[-]?c ac (0,3) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12) -E a+(b|c)*d+ aabcdd (0,6)(3,4) -E ^.+$ vivi (0,4) -E ^(.+)$ vivi (0,4)(0,4) -E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19) -E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3) -E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7) -E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7) -E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11) -E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3) -E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7) -E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3) -E ((foo)|bar)!bas bar!bas (0,7)(0,3) -E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7) -E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3) -E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3) -E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7) -E (foo|(bar))!bas foo!bas (0,7)(0,3) -E (foo|bar)!bas bar!bas (0,7)(0,3) -E (foo|bar)!bas foo!bar!bas (4,11)(4,7) -E (foo|bar)!bas foo!bas (0,7)(0,3) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7) -E .*(/XXX).* /XXX (0,4)(0,4) -E .*(\\XXX).* \XXX (0,4)(0,4) -E \\XXX \XXX (0,4) -E .*(/000).* /000 (0,4)(0,4) -E .*(\\000).* \000 (0,4)(0,4) -E \\000 \000 (0,4) diff --git a/src/libregex/testdata/nullsubexpr.dat b/src/libregex/testdata/nullsubexpr.dat deleted file mode 100644 index 2e18fbb917070..0000000000000 --- a/src/libregex/testdata/nullsubexpr.dat +++ /dev/null @@ -1,79 +0,0 @@ -NOTE null subexpression matches : 2002-06-06 - -E (a*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a*)+ a (0,1)(0,1) -E SAME x (0,0)(0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a+)* a (0,1)(0,1) -E SAME x (0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a+)+ a (0,1)(0,1) -E SAME x NOMATCH -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) - -E ([a]*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E ([a]*)+ a (0,1)(0,1) -E SAME x (0,0)(0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E ([^b]*)* a (0,1)(0,1) -#E SAME b (0,0)(0,0) -E SAME b (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaab (0,6)(0,6) -E ([ab]*)* a (0,1)(0,1) -E SAME aaaaaa (0,6)(0,6) -E SAME ababab (0,6)(0,6) -E SAME bababa (0,6)(0,6) -E SAME b (0,1)(0,1) -E SAME bbbbbb (0,6)(0,6) -E SAME aaaabcde (0,5)(0,5) -E ([^a]*)* b (0,1)(0,1) -E SAME bbbbbb (0,6)(0,6) -#E SAME aaaaaa (0,0)(0,0) -E SAME aaaaaa (0,0)(?,?) RE2/Go -E ([^ab]*)* ccccxx (0,6)(0,6) -#E SAME ababab (0,0)(0,0) -E SAME ababab (0,0)(?,?) RE2/Go - -E ((z)+|a)* zabcde (0,2)(1,2) - -#{E a+? aaaaaa (0,1) no *? +? mimimal match ops -#E (a) aaa (0,1)(0,1) -#E (a*?) aaa (0,0)(0,0) -#E (a)*? aaa (0,0) -#E (a*?)*? aaa (0,0) -#} - -B \(a*\)*\(x\) x (0,1)(0,0)(0,1) -B \(a*\)*\(x\) ax (0,2)(0,1)(1,2) -B \(a*\)*\(x\) axa (0,2)(0,1)(1,2) -B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1) -B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2) -B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3) -B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4) -B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3) - -#E (a*)*(x) x (0,1)(0,0)(0,1) -E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go -E (a*)*(x) ax (0,2)(0,1)(1,2) -E (a*)*(x) axa (0,2)(0,1)(1,2) - -E (a*)+(x) x (0,1)(0,0)(0,1) -E (a*)+(x) ax (0,2)(0,1)(1,2) -E (a*)+(x) axa (0,2)(0,1)(1,2) - -E (a*){2}(x) x (0,1)(0,0)(0,1) -E (a*){2}(x) ax (0,2)(1,1)(1,2) -E (a*){2}(x) axa (0,2)(1,1)(1,2) diff --git a/src/libregex/testdata/repetition.dat b/src/libregex/testdata/repetition.dat deleted file mode 100644 index 3bb2121180005..0000000000000 --- a/src/libregex/testdata/repetition.dat +++ /dev/null @@ -1,163 +0,0 @@ -NOTE implicit vs. explicit repetitions : 2009-02-02 - -# Glenn Fowler -# conforming matches (column 4) must match one of the following BREs -# NOMATCH -# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)* -# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)* -# i.e., each 3-tuple has two identical elements and one (?,?) - -E ((..)|(.)) NULL NOMATCH -E ((..)|(.))((..)|(.)) NULL NOMATCH -E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH - -E ((..)|(.)){1} NULL NOMATCH -E ((..)|(.)){2} NULL NOMATCH -E ((..)|(.)){3} NULL NOMATCH - -E ((..)|(.))* NULL (0,0) - -E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1) -E ((..)|(.))((..)|(.)) a NOMATCH -E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH - -E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1) -E ((..)|(.)){2} a NOMATCH -E ((..)|(.)){3} a NOMATCH - -E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1) - -E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2) -E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH - -E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2) -E ((..)|(.)){3} aa NOMATCH - -E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?) - -E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3) -E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3) - -E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?) -#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3) -E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go -E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3) - -#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3) -E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go - -E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4) - -E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?) -#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4) -E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go - -E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?) - -E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5) - -E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?) -#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5) -E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go - -#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5) -E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go - -E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?) - -E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?) -E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?) - -E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?) - -NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02 - -# These test a bug in OS X / FreeBSD / NetBSD, and libtree. -# Linux/GLIBC gets the {8,} and {8,8} wrong. - -:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8) -:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8) -:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8) -:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8) -:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8) -:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8) -:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8) -:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8) -:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8) -#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8) -:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8) -:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8) -:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8) -:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8) -:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8) -:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8) -:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8) -:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go -:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8) - -# These test a fixed bug in my regex-tdfa that did not keep the expanded -# form properly grouped, so right association did the wrong thing with -# these ambiguous patterns (crafted just to test my code when I became -# suspicious of my implementation). The first subexpression should use -# "ab" then "a" then "bcd". - -# OS X / FreeBSD / NetBSD badly fail many of these, with impossible -# results like (0,6)(4,5)(6,6). - -:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) -:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) -:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH -:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) -:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) -:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH -:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) -:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) - -# The above worked on Linux/GLIBC but the following often fail. -# They also trip up OS X / FreeBSD / NetBSD: - -#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH -#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH -#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) -:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) -:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go diff --git a/src/libregex/vm.rs b/src/libregex/vm.rs deleted file mode 100644 index 9605536a052c0..0000000000000 --- a/src/libregex/vm.rs +++ /dev/null @@ -1,582 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// FIXME: Currently, the VM simulates an NFA. It would be nice to have another -// VM that simulates a DFA. -// -// According to Russ Cox[1], a DFA performs better than an NFA, principally -// because it reuses states previously computed by the machine *and* doesn't -// keep track of capture groups. The drawback of a DFA (aside from its -// complexity) is that it can't accurately return the locations of submatches. -// The NFA *can* do that. (This is my understanding anyway.) -// -// Cox suggests that a DFA ought to be used to answer "does this match" and -// "where does it match" questions. (In the latter, the starting position of -// the match is computed by executing the regex backwards.) Cox also suggests -// that a DFA should be run when asking "where are the submatches", which can -// 1) quickly answer "no" is there's no match and 2) discover the substring -// that matches, which means running the NFA on smaller input. -// -// Currently, the NFA simulation implemented below does some dirty tricks to -// avoid tracking capture groups when they aren't needed (which only works -// for 'is_match', not 'find'). This is a half-measure, but does provide some -// perf improvement. -// -// AFAIK, the DFA/NFA approach is implemented in RE2/C++ but *not* in RE2/Go. -// -// [1] - http://swtch.com/~rsc/regex/regex3.html - -pub use self::MatchKind::*; -pub use self::StepState::*; - -use std::cmp; -use std::cmp::Ordering::{self, Less, Equal, Greater}; -use std::mem; -use std::iter::repeat; -use std::slice::SliceExt; -use compile::{ - Program, - Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd, EmptyWordBoundary, - Save, Jump, Split, -}; -use parse::{FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL, FLAG_NEGATED}; -use unicode::regex::PERLW; - -pub type CaptureLocs = Vec>; - -/// Indicates the type of match to be performed by the VM. -#[derive(Copy)] -pub enum MatchKind { - /// Only checks if a match exists or not. Does not return location. - Exists, - /// Returns the start and end indices of the entire match in the input - /// given. - Location, - /// Returns the start and end indices of each submatch in the input given. - Submatches, -} - -/// Runs an NFA simulation on the compiled expression given on the search text -/// `input`. The search begins at byte index `start` and ends at byte index -/// `end`. (The range is specified here so that zero-width assertions will work -/// correctly when searching for successive non-overlapping matches.) -/// -/// The `which` parameter indicates what kind of capture information the caller -/// wants. There are three choices: match existence only, the location of the -/// entire match or the locations of the entire match in addition to the -/// locations of each submatch. -pub fn run<'r, 't>(which: MatchKind, prog: &'r Program, input: &'t str, - start: uint, end: uint) -> CaptureLocs { - Nfa { - which: which, - prog: prog, - input: input, - start: start, - end: end, - ic: 0, - chars: CharReader::new(input), - }.run() -} - -struct Nfa<'r, 't> { - which: MatchKind, - prog: &'r Program, - input: &'t str, - start: uint, - end: uint, - ic: uint, - chars: CharReader<'t>, -} - -/// Indicates the next action to take after a single non-empty instruction -/// is processed. -#[derive(Copy)] -pub enum StepState { - /// This is returned if and only if a Match instruction is reached and - /// we only care about the existence of a match. It instructs the VM to - /// quit early. - StepMatchEarlyReturn, - /// Indicates that a match was found. Thus, the rest of the states in the - /// *current* queue should be dropped (i.e., leftmost-first semantics). - /// States in the "next" queue can still be processed. - StepMatch, - /// No match was found. Continue with the next state in the queue. - StepContinue, -} - -impl<'r, 't> Nfa<'r, 't> { - fn run(&mut self) -> CaptureLocs { - let ncaps = match self.which { - Exists => 0, - Location => 1, - Submatches => self.prog.num_captures(), - }; - let mut matched = false; - let ninsts = self.prog.insts.len(); - let mut clist = &mut Threads::new(self.which, ninsts, ncaps); - let mut nlist = &mut Threads::new(self.which, ninsts, ncaps); - - let mut groups: Vec<_> = repeat(None).take(ncaps * 2).collect(); - - // Determine if the expression starts with a '^' so we can avoid - // simulating .*? - // Make sure multi-line mode isn't enabled for it, otherwise we can't - // drop the initial .*? - let prefix_anchor = - match self.prog.insts[1] { - EmptyBegin(flags) if flags & FLAG_MULTI == 0 => true, - _ => false, - }; - - self.ic = self.start; - let mut next_ic = self.chars.set(self.start); - while self.ic <= self.end { - if clist.size == 0 { - // We have a match and we're done exploring alternatives. - // Time to quit. - if matched { - break - } - - // If there are no threads to try, then we'll have to start - // over at the beginning of the regex. - // BUT, if there's a literal prefix for the program, try to - // jump ahead quickly. If it can't be found, then we can bail - // out early. - if self.prog.prefix.len() > 0 && clist.size == 0 { - let needle = self.prog.prefix.as_bytes(); - let haystack = &self.input.as_bytes()[self.ic..]; - match find_prefix(needle, haystack) { - None => break, - Some(i) => { - self.ic += i; - next_ic = self.chars.set(self.ic); - } - } - } - } - - // This simulates a preceding '.*?' for every regex by adding - // a state starting at the current position in the input for the - // beginning of the program only if we don't already have a match. - if clist.size == 0 || (!prefix_anchor && !matched) { - self.add(clist, 0, groups.as_mut_slice()) - } - - // Now we try to read the next character. - // As a result, the 'step' method will look at the previous - // character. - self.ic = next_ic; - next_ic = self.chars.advance(); - - for i in range(0, clist.size) { - let pc = clist.pc(i); - let step_state = self.step(groups.as_mut_slice(), nlist, - clist.groups(i), pc); - match step_state { - StepMatchEarlyReturn => return vec![Some(0), Some(0)], - StepMatch => { matched = true; break }, - StepContinue => {}, - } - } - mem::swap(&mut clist, &mut nlist); - nlist.empty(); - } - match self.which { - Exists if matched => vec![Some(0), Some(0)], - Exists => vec![None, None], - Location | Submatches => groups, - } - } - - fn step(&self, groups: &mut [Option], nlist: &mut Threads, - caps: &mut [Option], pc: uint) - -> StepState { - match self.prog.insts[pc] { - Match => { - match self.which { - Exists => { - return StepMatchEarlyReturn - } - Location => { - groups[0] = caps[0]; - groups[1] = caps[1]; - return StepMatch - } - Submatches => { - for (slot, val) in groups.iter_mut().zip(caps.iter()) { - *slot = *val; - } - return StepMatch - } - } - } - OneChar(c, flags) => { - if self.char_eq(flags & FLAG_NOCASE > 0, self.chars.prev, c) { - self.add(nlist, pc+1, caps); - } - } - CharClass(ref ranges, flags) => { - if self.chars.prev.is_some() { - let c = self.chars.prev.unwrap(); - let negate = flags & FLAG_NEGATED > 0; - let casei = flags & FLAG_NOCASE > 0; - let found = ranges.as_slice(); - let found = found.binary_search_by(|&rc| class_cmp(casei, c, rc)).is_ok(); - if found ^ negate { - self.add(nlist, pc+1, caps); - } - } - } - Any(flags) => { - if flags & FLAG_DOTNL > 0 - || !self.char_eq(false, self.chars.prev, '\n') { - self.add(nlist, pc+1, caps) - } - } - EmptyBegin(_) | EmptyEnd(_) | EmptyWordBoundary(_) - | Save(_) | Jump(_) | Split(_, _) => {}, - } - StepContinue - } - - fn add(&self, nlist: &mut Threads, pc: uint, groups: &mut [Option]) { - if nlist.contains(pc) { - return - } - // We have to add states to the threads list even if their empty. - // TL;DR - It prevents cycles. - // If we didn't care about cycles, we'd *only* add threads that - // correspond to non-jumping instructions (OneChar, Any, Match, etc.). - // But, it's possible for valid regexs (like '(a*)*') to result in - // a cycle in the instruction list. e.g., We'll keep chasing the Split - // instructions forever. - // So we add these instructions to our thread queue, but in the main - // VM loop, we look for them but simply ignore them. - // Adding them to the queue prevents them from being revisited so we - // can avoid cycles (and the inevitable stack overflow). - // - // We make a minor optimization by indicating that the state is "empty" - // so that its capture groups are not filled in. - match self.prog.insts[pc] { - EmptyBegin(flags) => { - let multi = flags & FLAG_MULTI > 0; - nlist.add(pc, groups, true); - if self.chars.is_begin() - || (multi && self.char_is(self.chars.prev, '\n')) { - self.add(nlist, pc + 1, groups) - } - } - EmptyEnd(flags) => { - let multi = flags & FLAG_MULTI > 0; - nlist.add(pc, groups, true); - if self.chars.is_end() - || (multi && self.char_is(self.chars.cur, '\n')) { - self.add(nlist, pc + 1, groups) - } - } - EmptyWordBoundary(flags) => { - nlist.add(pc, groups, true); - if self.chars.is_word_boundary() == !(flags & FLAG_NEGATED > 0) { - self.add(nlist, pc + 1, groups) - } - } - Save(slot) => { - nlist.add(pc, groups, true); - match self.which { - Location if slot <= 1 => { - let old = groups[slot]; - groups[slot] = Some(self.ic); - self.add(nlist, pc + 1, groups); - groups[slot] = old; - } - Submatches => { - let old = groups[slot]; - groups[slot] = Some(self.ic); - self.add(nlist, pc + 1, groups); - groups[slot] = old; - } - Exists | Location => self.add(nlist, pc + 1, groups), - } - } - Jump(to) => { - nlist.add(pc, groups, true); - self.add(nlist, to, groups) - } - Split(x, y) => { - nlist.add(pc, groups, true); - self.add(nlist, x, groups); - self.add(nlist, y, groups); - } - Match | OneChar(_, _) | CharClass(_, _) | Any(_) => { - nlist.add(pc, groups, false); - } - } - } - - // FIXME: For case insensitive comparisons, it uses the uppercase - // character and tests for equality. IIUC, this does not generalize to - // all of Unicode. I believe we need to check the entire fold for each - // character. This will be easy to add if and when it gets added to Rust's - // standard library. - #[inline] - fn char_eq(&self, casei: bool, textc: Option, regc: char) -> bool { - match textc { - None => false, - Some(textc) => { - regc == textc - || (casei && regc.to_uppercase() == textc.to_uppercase()) - } - } - } - - #[inline] - fn char_is(&self, textc: Option, regc: char) -> bool { - textc == Some(regc) - } -} - -/// CharReader is responsible for maintaining a "previous" and a "current" -/// character. This one-character lookahead is necessary for assertions that -/// look one character before or after the current position. -pub struct CharReader<'t> { - /// The previous character read. It is None only when processing the first - /// character of the input. - pub prev: Option, - /// The current character. - pub cur: Option, - input: &'t str, - next: uint, -} - -impl<'t> CharReader<'t> { - /// Returns a new CharReader that advances through the input given. - /// Note that a CharReader has no knowledge of the range in which to search - /// the input. - pub fn new(input: &'t str) -> CharReader<'t> { - CharReader { - prev: None, - cur: None, - input: input, - next: 0, - } - } - - /// Sets the previous and current character given any arbitrary byte - /// index (at a Unicode codepoint boundary). - #[inline] - pub fn set(&mut self, ic: uint) -> uint { - self.prev = None; - self.cur = None; - self.next = 0; - - if self.input.len() == 0 { - return 1 - } - if ic > 0 { - let i = cmp::min(ic, self.input.len()); - let prev = self.input.char_range_at_reverse(i); - self.prev = Some(prev.ch); - } - if ic < self.input.len() { - let cur = self.input.char_range_at(ic); - self.cur = Some(cur.ch); - self.next = cur.next; - self.next - } else { - self.input.len() + 1 - } - } - - /// Does the same as `set`, except it always advances to the next - /// character in the input (and therefore does half as many UTF8 decodings). - #[inline] - pub fn advance(&mut self) -> uint { - self.prev = self.cur; - if self.next < self.input.len() { - let cur = self.input.char_range_at(self.next); - self.cur = Some(cur.ch); - self.next = cur.next; - } else { - self.cur = None; - self.next = self.input.len() + 1; - } - self.next - } - - /// Returns true if and only if this is the beginning of the input - /// (ignoring the range of the input to search). - #[inline] - pub fn is_begin(&self) -> bool { self.prev.is_none() } - - /// Returns true if and only if this is the end of the input - /// (ignoring the range of the input to search). - #[inline] - pub fn is_end(&self) -> bool { self.cur.is_none() } - - /// Returns true if and only if the current position is a word boundary. - /// (Ignoring the range of the input to search.) - pub fn is_word_boundary(&self) -> bool { - if self.is_begin() { - return is_word(self.cur) - } - if self.is_end() { - return is_word(self.prev) - } - (is_word(self.cur) && !is_word(self.prev)) - || (is_word(self.prev) && !is_word(self.cur)) - } -} - -struct Thread { - pc: uint, - groups: Vec>, -} - -struct Threads { - which: MatchKind, - queue: Vec, - sparse: Vec, - size: uint, -} - -impl Threads { - // This is using a wicked neat trick to provide constant time lookup - // for threads in the queue using a sparse set. A queue of threads is - // allocated once with maximal size when the VM initializes and is reused - // throughout execution. That is, there should be zero allocation during - // the execution of a VM. - // - // See http://research.swtch.com/sparse for the deets. - fn new(which: MatchKind, num_insts: uint, ncaps: uint) -> Threads { - Threads { - which: which, - queue: range(0, num_insts).map(|_| { - Thread { pc: 0, groups: repeat(None).take(ncaps * 2).collect() } - }).collect(), - sparse: repeat(0u).take(num_insts).collect(), - size: 0, - } - } - - fn add(&mut self, pc: uint, groups: &[Option], empty: bool) { - let t = &mut self.queue[self.size]; - t.pc = pc; - match (empty, self.which) { - (_, Exists) | (true, _) => {}, - (false, Location) => { - t.groups[0] = groups[0]; - t.groups[1] = groups[1]; - } - (false, Submatches) => { - for (slot, val) in t.groups.iter_mut().zip(groups.iter()) { - *slot = *val; - } - } - } - self.sparse[pc] = self.size; - self.size += 1; - } - - #[inline] - fn contains(&self, pc: uint) -> bool { - let s = self.sparse[pc]; - s < self.size && self.queue[s].pc == pc - } - - #[inline] - fn empty(&mut self) { - self.size = 0; - } - - #[inline] - fn pc(&self, i: uint) -> uint { - self.queue[i].pc - } - - #[inline] - fn groups<'r>(&'r mut self, i: uint) -> &'r mut [Option] { - let q = &mut self.queue[i]; - q.groups.as_mut_slice() - } -} - -/// Returns true if the character is a word character, according to the -/// (Unicode friendly) Perl character class '\w'. -/// Note that this is only use for testing word boundaries. The actual '\w' -/// is encoded as a CharClass instruction. -pub fn is_word(c: Option) -> bool { - let c = match c { - None => return false, - Some(c) => c, - }; - // Try the common ASCII case before invoking binary search. - match c { - '_' | '0' ... '9' | 'a' ... 'z' | 'A' ... 'Z' => true, - _ => PERLW.binary_search_by(|&(start, end)| { - if c >= start && c <= end { - Equal - } else if start > c { - Greater - } else { - Less - } - }).is_ok() - } -} - -/// Given a character and a single character class range, return an ordering -/// indicating whether the character is less than the start of the range, -/// in the range (inclusive) or greater than the end of the range. -/// -/// If `casei` is `true`, then this ordering is computed case insensitively. -/// -/// This function is meant to be used with a binary search. -#[inline] -fn class_cmp(casei: bool, mut textc: char, - (mut start, mut end): (char, char)) -> Ordering { - if casei { - // FIXME: This is pretty ridiculous. All of this case conversion - // can be moved outside this function: - // 1) textc should be uppercased outside the bsearch. - // 2) the character class itself should be uppercased either in the - // parser or the compiler. - // FIXME: This is too simplistic for correct Unicode support. - // See also: char_eq - textc = textc.to_uppercase(); - start = start.to_uppercase(); - end = end.to_uppercase(); - } - if textc >= start && textc <= end { - Equal - } else if start > textc { - Greater - } else { - Less - } -} - -/// Returns the starting location of `needle` in `haystack`. -/// If `needle` is not in `haystack`, then `None` is returned. -/// -/// Note that this is using a naive substring algorithm. -#[inline] -pub fn find_prefix(needle: &[u8], haystack: &[u8]) -> Option { - let (hlen, nlen) = (haystack.len(), needle.len()); - if nlen > hlen || nlen == 0 { - return None - } - for (offset, window) in haystack.windows(nlen).enumerate() { - if window == needle { - return Some(offset) - } - } - None -} diff --git a/src/librustc/lib.rs b/src/librustc/lib.rs index 377e5dd39ff57..b961200f33501 100644 --- a/src/librustc/lib.rs +++ b/src/librustc/lib.rs @@ -37,7 +37,6 @@ extern crate fmt_macros; extern crate getopts; extern crate graphviz; extern crate libc; -extern crate regex; extern crate rustc_llvm; extern crate rustc_back; extern crate serialize; diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs index 4150335abc3d3..f90a60c9754a0 100644 --- a/src/librustc/session/mod.rs +++ b/src/librustc/session/mod.rs @@ -15,8 +15,6 @@ use metadata::filesearch; use session::search_paths::PathKind; use util::nodemap::NodeMap; -use regex::Regex; - use syntax::ast::NodeId; use syntax::codemap::Span; use syntax::diagnostic::{self, Emitter}; @@ -253,50 +251,54 @@ fn split_msg_into_multilines(msg: &str) -> Option { !msg.contains("structure constructor specifies a structure of type") { return None } - - let first = Regex::new(r"[( ]expected").unwrap(); - let second = Regex::new(r" found").unwrap(); - let third = Regex::new( - r"\((values differ|lifetime|cyclic type of infinite size)").unwrap(); + let first = msg.match_indices("expected").filter(|s| { + s.0 > 0 && (msg.char_at_reverse(s.0) == ' ' || + msg.char_at_reverse(s.0) == '(') + }).map(|(a, b)| (a - 1, b)); + let second = msg.match_indices("found").filter(|s| { + msg.char_at_reverse(s.0) == ' ' + }).map(|(a, b)| (a - 1, b)); let mut new_msg = String::new(); let mut head = 0u; // Insert `\n` before expected and found. - for (pos1, pos2) in first.find_iter(msg).zip( - second.find_iter(msg)) { + for (pos1, pos2) in first.zip(second) { new_msg = new_msg + - // A `(` may be preceded by a space and it should be trimmed - msg[head..pos1.0].trim_right() + // prefix - "\n" + // insert before first - &msg[pos1.0..pos1.1] + // insert what first matched - &msg[pos1.1..pos2.0] + // between matches - "\n " + // insert before second - // 123 - // `expected` is 3 char longer than `found`. To align the types, `found` gets - // 3 spaces prepended. - &msg[pos2.0..pos2.1]; // insert what second matched + // A `(` may be preceded by a space and it should be trimmed + msg[head..pos1.0].trim_right() + // prefix + "\n" + // insert before first + &msg[pos1.0..pos1.1] + // insert what first matched + &msg[pos1.1..pos2.0] + // between matches + "\n " + // insert before second + // 123 + // `expected` is 3 char longer than `found`. To align the types, + // `found` gets 3 spaces prepended. + &msg[pos2.0..pos2.1]; // insert what second matched head = pos2.1; } let mut tail = &msg[head..]; + let third = tail.find_str("(values differ") + .or(tail.find_str("(lifetime")) + .or(tail.find_str("(cyclic type of infinite size")); // Insert `\n` before any remaining messages which match. - for pos in third.find_iter(tail).take(1) { - // The end of the message may just be wrapped in `()` without `expected`/`found`. - // Push this also to a new line and add the final tail after. + if let Some(pos) = third { + // The end of the message may just be wrapped in `()` without + // `expected`/`found`. Push this also to a new line and add the + // final tail after. new_msg = new_msg + - // `(` is usually preceded by a space and should be trimmed. - tail[..pos.0].trim_right() + // prefix - "\n" + // insert before paren - &tail[pos.0..]; // append the tail + // `(` is usually preceded by a space and should be trimmed. + tail[..pos].trim_right() + // prefix + "\n" + // insert before paren + &tail[pos..]; // append the tail tail = ""; } new_msg.push_str(tail); - - return Some(new_msg) + return Some(new_msg); } pub fn build_session(sopts: config::Options, diff --git a/src/libtest/lib.rs b/src/libtest/lib.rs index 7226c6423b82c..793483754eebf 100644 --- a/src/libtest/lib.rs +++ b/src/libtest/lib.rs @@ -38,7 +38,6 @@ #![allow(unstable)] extern crate getopts; -extern crate regex; extern crate serialize; extern crate "serialize" as rustc_serialize; extern crate term; @@ -53,7 +52,6 @@ use self::OutputLocation::*; use stats::Stats; use getopts::{OptGroup, optflag, optopt}; -use regex::Regex; use serialize::Encodable; use term::Terminal; use term::color::{Color, RED, YELLOW, GREEN, CYAN}; @@ -279,7 +277,7 @@ pub enum ColorConfig { } pub struct TestOpts { - pub filter: Option, + pub filter: Option, pub run_ignored: bool, pub run_tests: bool, pub run_benchmarks: bool, @@ -365,11 +363,7 @@ pub fn parse_opts(args: &[String]) -> Option { if matches.opt_present("h") { usage(args[0].as_slice()); return None; } let filter = if matches.free.len() > 0 { - let s = matches.free[0].as_slice(); - match Regex::new(s) { - Ok(re) => Some(re), - Err(e) => return Some(Err(format!("could not parse /{}/: {:?}", s, e))) - } + Some(matches.free[0].clone()) } else { None }; @@ -833,9 +827,10 @@ pub fn filter_tests(opts: &TestOpts, tests: Vec) -> Vec filtered, - Some(ref re) => { - filtered.into_iter() - .filter(|test| re.is_match(test.desc.name.as_slice())).collect() + Some(ref filter) => { + filtered.into_iter().filter(|test| { + test.desc.name.as_slice().contains(&filter[]) + }).collect() } }; @@ -1230,16 +1225,6 @@ mod tests { assert!(res == TrFailed); } - #[test] - fn first_free_arg_should_be_a_filter() { - let args = vec!("progname".to_string(), "some_regex_filter".to_string()); - let opts = match parse_opts(args.as_slice()) { - Some(Ok(o)) => o, - _ => panic!("Malformed arg in first_free_arg_should_be_a_filter") - }; - assert!(opts.filter.expect("should've found filter").is_match("some_regex_filter")) - } - #[test] fn parse_ignored_flag() { let args = vec!("progname".to_string(), @@ -1336,37 +1321,6 @@ mod tests { } } - #[test] - pub fn filter_tests_regex() { - let mut opts = TestOpts::new(); - opts.filter = Some(::regex::Regex::new("a.*b.+c").unwrap()); - - let mut names = ["yes::abXc", "yes::aXXXbXXXXc", - "no::XYZ", "no::abc"]; - names.sort(); - - fn test_fn() {} - let tests = names.iter().map(|name| { - TestDescAndFn { - desc: TestDesc { - name: DynTestName(name.to_string()), - ignore: false, - should_fail: ShouldFail::No, - }, - testfn: DynTestFn(Thunk::new(test_fn)) - } - }).collect(); - let filtered = filter_tests(&opts, tests); - - let expected: Vec<&str> = - names.iter().map(|&s| s).filter(|name| name.starts_with("yes")).collect(); - - assert_eq!(filtered.len(), expected.len()); - for (test, expected_name) in filtered.iter().zip(expected.iter()) { - assert_eq!(test.desc.name.as_slice(), *expected_name); - } - } - #[test] pub fn test_metricmap_compare() { let mut m1 = MetricMap::new(); diff --git a/src/rustbook/book.rs b/src/rustbook/book.rs index 20346449fd16c..3047e93137f9f 100644 --- a/src/rustbook/book.rs +++ b/src/rustbook/book.rs @@ -13,7 +13,6 @@ use std::io::BufferedReader; use std::iter; use std::iter::AdditiveIterator; -use regex::Regex; pub struct BookItem { pub title: String, @@ -94,8 +93,6 @@ pub fn parse_summary(input: R, src: &Path) -> Result[\t ]*)\*[:space:]*\[(?P.*)\]\((?P<path>.*)\)"; - let item_re = Regex::new(regex).unwrap(); let mut top_items = vec!(); let mut stack = vec!(); let mut errors = vec!(); @@ -117,45 +114,51 @@ pub fn parse_summary<R: Reader>(input: R, src: &Path) -> Result<Book, Vec<String } }; - item_re.captures(&line[]).map(|cap| { - let given_path = cap.name("path"); - let title = cap.name("title").unwrap().to_string(); - - let path_from_root = match src.join(given_path.unwrap()).path_relative_from(src) { - Some(p) => p, - None => { - errors.push(format!("paths in SUMMARY.md must be relative, \ - but path '{}' for section '{}' is not.", - given_path.unwrap(), title)); - Path::new("") - } - }; - let path_to_root = Path::new(iter::repeat("../") - .take(path_from_root.components().count() - 1) - .collect::<String>()); - let item = BookItem { - title: title, - path: path_from_root, - path_to_root: path_to_root, - children: vec!(), - }; - let level = cap.name("indent").unwrap().chars().map(|c| { - match c { - ' ' => 1us, - '\t' => 4, - _ => unreachable!() - } - }).sum() / 4 + 1; - - if level > stack.len() + 1 { - errors.push(format!("section '{}' is indented too deeply; \ - found {}, expected {} or less", - item.title, level, stack.len() + 1)); - } else if level <= stack.len() { - collapse(&mut stack, &mut top_items, level); + let star_idx = match line.find_str("*") { Some(i) => i, None => continue }; + + let start_bracket = star_idx + line[star_idx..].find_str("[").unwrap(); + let end_bracket = start_bracket + line[start_bracket..].find_str("](").unwrap(); + let start_paren = end_bracket + 1; + let end_paren = start_paren + line[start_paren..].find_str(")").unwrap(); + + let given_path = &line[start_paren + 1 .. end_paren]; + let title = line[start_bracket + 1..end_bracket].to_string(); + let indent = &line[..star_idx]; + + let path_from_root = match src.join(given_path).path_relative_from(src) { + Some(p) => p, + None => { + errors.push(format!("paths in SUMMARY.md must be relative, \ + but path '{}' for section '{}' is not.", + given_path, title)); + Path::new("") } - stack.push(item) - }); + }; + let path_to_root = Path::new(iter::repeat("../") + .take(path_from_root.components().count() - 1) + .collect::<String>()); + let item = BookItem { + title: title, + path: path_from_root, + path_to_root: path_to_root, + children: vec!(), + }; + let level = indent.chars().map(|c| { + match c { + ' ' => 1us, + '\t' => 4, + _ => unreachable!() + } + }).sum() / 4 + 1; + + if level > stack.len() + 1 { + errors.push(format!("section '{}' is indented too deeply; \ + found {}, expected {} or less", + item.title, level, stack.len() + 1)); + } else if level <= stack.len() { + collapse(&mut stack, &mut top_items, level); + } + stack.push(item) } if errors.is_empty() { diff --git a/src/rustbook/build.rs b/src/rustbook/build.rs index 50a6ad43aeeb1..93601c0f61bf4 100644 --- a/src/rustbook/build.rs +++ b/src/rustbook/build.rs @@ -22,8 +22,6 @@ use book::{Book, BookItem}; use css; use javascript; -use regex::Regex; - use rustdoc; struct Build; @@ -81,9 +79,6 @@ fn render(book: &Book, tgt: &Path) -> CliResult<()> { let out_path = tgt.join(item.path.dirname()); - let regex = r"\[(?P<title>[^]]*)\]\((?P<url_stem>[^)]*)\.(?P<ext>md|markdown)\)"; - let md_urls = Regex::new(regex).unwrap(); - let src; if os::args().len() < 3 { src = os::getcwd().unwrap().clone(); @@ -94,7 +89,7 @@ fn render(book: &Book, tgt: &Path) -> CliResult<()> { let markdown_data = try!(File::open(&src.join(&item.path)).read_to_string()); let preprocessed_path = tmp.path().join(item.path.filename().unwrap()); { - let urls = md_urls.replace_all(&markdown_data[], "[$title]($url_stem.html)"); + let urls = markdown_data.replace(".md)", ".html)"); try!(File::create(&preprocessed_path) .write_str(&urls[])); } diff --git a/src/rustbook/main.rs b/src/rustbook/main.rs index ea72c653087d3..cbd29004097e8 100644 --- a/src/rustbook/main.rs +++ b/src/rustbook/main.rs @@ -11,8 +11,6 @@ #![feature(slicing_syntax, box_syntax)] #![allow(unstable)] -extern crate regex; - extern crate rustdoc; use std::os; diff --git a/src/test/bench/shootout-regex-dna.rs b/src/test/bench/shootout-regex-dna.rs deleted file mode 100644 index 074c059231299..0000000000000 --- a/src/test/bench/shootout-regex-dna.rs +++ /dev/null @@ -1,126 +0,0 @@ -// The Computer Language Benchmarks Game -// http://benchmarksgame.alioth.debian.org/ -// -// contributed by the Rust Project Developers - -// Copyright (c) 2014 The Rust Project Developers -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in -// the documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of "The Computer Language Benchmarks Game" nor -// the name of "The Computer Language Shootout Benchmarks" nor the -// names of its contributors may be used to endorse or promote -// products derived from this software without specific prior -// written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -// OF THE POSSIBILITY OF SUCH DAMAGE. - -// ignore-stage1 -// ignore-cross-compile #12102 - -#![feature(box_syntax)] - -extern crate regex; - -use std::io; -use regex::{NoExpand, Regex}; -use std::sync::{Arc, Future}; - -macro_rules! regex { - ($e:expr) => (Regex::new($e).unwrap()) -} - -fn count_matches(seq: &str, variant: &Regex) -> int { - let mut n = 0; - for _ in variant.find_iter(seq) { - n += 1; - } - n -} - -fn main() { - let mut rdr = if std::os::getenv("RUST_BENCH").is_some() { - let fd = io::File::open(&Path::new("shootout-k-nucleotide.data")); - box io::BufferedReader::new(fd) as Box<io::Reader> - } else { - box io::stdin() as Box<io::Reader> - }; - let mut seq = rdr.read_to_string().unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(seq.as_slice(), NoExpand("")); - let seq_arc = Arc::new(seq.clone()); // copy before it moves - let clen = seq.len(); - - let mut seqlen = Future::spawn(move|| { - let substs = vec![ - (regex!("B"), "(c|g|t)"), - (regex!("D"), "(a|g|t)"), - (regex!("H"), "(a|c|t)"), - (regex!("K"), "(g|t)"), - (regex!("M"), "(a|c)"), - (regex!("N"), "(a|c|g|t)"), - (regex!("R"), "(a|g)"), - (regex!("S"), "(c|g)"), - (regex!("V"), "(a|c|g)"), - (regex!("W"), "(a|t)"), - (regex!("Y"), "(c|t)"), - ]; - let mut seq = seq; - for (re, replacement) in substs.into_iter() { - seq = re.replace_all(seq.as_slice(), NoExpand(replacement)); - } - seq.len() - }); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - let (mut variant_strs, mut counts) = (vec!(), vec!()); - for variant in variants.into_iter() { - let seq_arc_copy = seq_arc.clone(); - variant_strs.push(variant.to_string()); - counts.push(Future::spawn(move|| { - count_matches(seq_arc_copy.as_slice(), &variant) - })); - } - - for (i, variant) in variant_strs.iter().enumerate() { - println!("{} {}", variant, counts[i].get()); - } - println!(""); - println!("{}", ilen); - println!("{}", clen); - println!("{}", seqlen.get()); -} diff --git a/src/test/run-pass/rust-log-filter.rs b/src/test/run-pass/rust-log-filter.rs index 28d47f7aa9be5..f7fa204d4539c 100644 --- a/src/test/run-pass/rust-log-filter.rs +++ b/src/test/run-pass/rust-log-filter.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// exec-env:RUST_LOG=rust-log-filter/f.o +// exec-env:RUST_LOG=rust-log-filter/foo #![allow(unknown_features)] #![feature(box_syntax)] @@ -42,18 +42,14 @@ pub fn main() { let _t = Thread::spawn(move|| { log::set_logger(logger); - // our regex is "f.o" - // ensure it is a regex, and isn't anchored info!("foo"); info!("bar"); info!("foo bar"); info!("bar foo"); - info!("f1o"); }); assert_eq!(rx.recv().unwrap().as_slice(), "foo"); assert_eq!(rx.recv().unwrap().as_slice(), "foo bar"); assert_eq!(rx.recv().unwrap().as_slice(), "bar foo"); - assert_eq!(rx.recv().unwrap().as_slice(), "f1o"); assert!(rx.recv().is_err()); }