Skip to content

Commit

Permalink
regex: Remove in-tree version
Browse files Browse the repository at this point in the history
The regex library was largely used for non-critical aspects of the compiler and
various external tooling. The library at this point is duplicated with its
out-of-tree counterpart and as such imposes a bit of a maintenance overhead as
well as compile time hit for the compiler itself.

The last major user of the regex library is the libtest library, using regexes
for filters when running tests. This removal means that the filtering has gone
back to substring matching rather than using regexes.
  • Loading branch information
alexcrichton committed Jan 24, 2015
1 parent 494896f commit 6c29708
Show file tree
Hide file tree
Showing 31 changed files with 187 additions and 4,450 deletions.
12 changes: 5 additions & 7 deletions mk/crates.mk
Expand Up @@ -51,7 +51,7 @@

TARGET_CRATES := libc std flate arena term \
serialize getopts collections test rand \
log regex graphviz core rbml alloc \
log graphviz core rbml alloc \
unicode rustc_bitflags
RUSTC_CRATES := rustc rustc_typeck rustc_borrowck rustc_resolve rustc_driver \
rustc_trans rustc_back rustc_llvm rustc_privacy
Expand Down Expand Up @@ -95,16 +95,15 @@ DEPS_term := std log
DEPS_getopts := std
DEPS_collections := core alloc unicode
DEPS_num := std
DEPS_test := std getopts serialize rbml term regex native:rust_test_helpers
DEPS_test := std getopts serialize rbml term native:rust_test_helpers
DEPS_rand := core
DEPS_log := std regex
DEPS_regex := std
DEPS_log := std
DEPS_fmt_macros = std

TOOL_DEPS_compiletest := test getopts
TOOL_DEPS_rustdoc := rustdoc
TOOL_DEPS_rustc := rustc_driver
TOOL_DEPS_rustbook := std regex rustdoc
TOOL_DEPS_rustbook := std rustdoc
TOOL_SOURCE_compiletest := $(S)src/compiletest/compiletest.rs
TOOL_SOURCE_rustdoc := $(S)src/driver/driver.rs
TOOL_SOURCE_rustc := $(S)src/driver/driver.rs
Expand All @@ -130,9 +129,8 @@ DOC_CRATES := $(filter-out rustc, \
$(filter-out rustc_driver, \
$(filter-out rustc_privacy, \
$(filter-out log, \
$(filter-out regex, \
$(filter-out getopts, \
$(filter-out syntax, $(CRATES))))))))))))
$(filter-out syntax, $(CRATES)))))))))))
COMPILER_DOC_CRATES := rustc rustc_trans rustc_borrowck rustc_resolve \
rustc_typeck rustc_driver syntax rustc_privacy

Expand Down
6 changes: 1 addition & 5 deletions src/compiletest/common.rs
Expand Up @@ -11,7 +11,6 @@ pub use self::Mode::*;

use std::fmt;
use std::str::FromStr;
use regex::Regex;

#[derive(Clone, PartialEq, Debug)]
pub enum Mode {
Expand Down Expand Up @@ -101,10 +100,7 @@ pub struct Config {
pub run_ignored: bool,

// Only run tests that match this filter
pub filter: Option<Regex>,

// Precompiled regex for finding expected errors in cfail
pub cfail_regex: Regex,
pub filter: Option<String>,

// Write out a parseable log of tests that were run
pub logfile: Option<Path>,
Expand Down
66 changes: 35 additions & 31 deletions src/compiletest/compiletest.rs
Expand Up @@ -22,7 +22,6 @@ extern crate getopts;

#[macro_use]
extern crate log;
extern crate regex;

use std::os;
use std::io;
Expand All @@ -33,7 +32,6 @@ use getopts::{optopt, optflag, reqopt};
use common::Config;
use common::{Pretty, DebugInfoGdb, DebugInfoLldb, Codegen};
use util::logv;
use regex::Regex;

pub mod procsrv;
pub mod util;
Expand Down Expand Up @@ -116,14 +114,7 @@ pub fn parse_config(args: Vec<String> ) -> Config {
}

let filter = if !matches.free.is_empty() {
let s = matches.free[0].as_slice();
match regex::Regex::new(s) {
Ok(re) => Some(re),
Err(e) => {
println!("failed to parse filter /{}/: {:?}", s, e);
panic!()
}
}
Some(matches.free[0].clone())
} else {
None
};
Expand All @@ -145,7 +136,6 @@ pub fn parse_config(args: Vec<String> ) -> Config {
.as_slice()).expect("invalid mode"),
run_ignored: matches.opt_present("ignored"),
filter: filter,
cfail_regex: Regex::new(errors::EXPECTED_PATTERN).unwrap(),
logfile: matches.opt_str("logfile").map(|s| Path::new(s)),
runtool: matches.opt_str("runtool"),
host_rustcflags: matches.opt_str("host-rustcflags"),
Expand Down Expand Up @@ -374,18 +364,24 @@ fn extract_gdb_version(full_version_line: Option<String>) -> Option<String> {
if full_version_line.as_slice().trim().len() > 0 => {
let full_version_line = full_version_line.as_slice().trim();

let re = Regex::new(r"(^|[^0-9])([0-9]\.[0-9])([^0-9]|$)").unwrap();

match re.captures(full_version_line) {
Some(captures) => {
Some(captures.at(2).unwrap_or("").to_string())
// used to be a regex "(^|[^0-9])([0-9]\.[0-9])([^0-9]|$)"
for (pos, c) in full_version_line.char_indices() {
if !c.is_digit(10) { continue }
if pos + 2 >= full_version_line.len() { continue }
if full_version_line.char_at(pos + 1) != '.' { continue }
if !full_version_line.char_at(pos + 2).is_digit(10) { continue }
if pos > 0 && full_version_line.char_at_reverse(pos).is_digit(10) {
continue
}
None => {
println!("Could not extract GDB version from line '{}'",
full_version_line);
None
if pos + 3 < full_version_line.len() &&
full_version_line.char_at(pos + 3).is_digit(10) {
continue
}
return Some(full_version_line[pos..pos+3].to_string());
}
println!("Could not extract GDB version from line '{}'",
full_version_line);
None
},
_ => None
}
Expand All @@ -408,18 +404,26 @@ fn extract_lldb_version(full_version_line: Option<String>) -> Option<String> {
if full_version_line.as_slice().trim().len() > 0 => {
let full_version_line = full_version_line.as_slice().trim();

let re = Regex::new(r"[Ll][Ll][Dd][Bb]-([0-9]+)").unwrap();

match re.captures(full_version_line) {
Some(captures) => {
Some(captures.at(1).unwrap_or("").to_string())
}
None => {
println!("Could not extract LLDB version from line '{}'",
full_version_line);
None
}
for (pos, l) in full_version_line.char_indices() {
if l != 'l' && l != 'L' { continue }
if pos + 5 >= full_version_line.len() { continue }
let l = full_version_line.char_at(pos + 1);
if l != 'l' && l != 'L' { continue }
let d = full_version_line.char_at(pos + 2);
if d != 'd' && d != 'D' { continue }
let b = full_version_line.char_at(pos + 3);
if b != 'b' && b != 'B' { continue }
let dash = full_version_line.char_at(pos + 4);
if dash != '-' { continue }

let vers = full_version_line[pos + 5..].chars().take_while(|c| {
c.is_digit(10)
}).collect::<String>();
if vers.len() > 0 { return Some(vers) }
}
println!("Could not extract LLDB version from line '{}'",
full_version_line);
None
},
_ => None
}
Expand Down
73 changes: 38 additions & 35 deletions src/compiletest/errors.rs
Expand Up @@ -9,32 +9,26 @@
// except according to those terms.
use self::WhichLine::*;

use std::ascii::AsciiExt;
use std::io::{BufferedReader, File};
use regex::Regex;

pub struct ExpectedError {
pub line: uint,
pub kind: String,
pub msg: String,
}

#[derive(PartialEq, Show)]
enum WhichLine { ThisLine, FollowPrevious(uint), AdjustBackward(uint) }

/// Looks for either "//~| KIND MESSAGE" or "//~^^... KIND MESSAGE"
/// The former is a "follow" that inherits its target from the preceding line;
/// the latter is an "adjusts" that goes that many lines up.
///
/// Goal is to enable tests both like: //~^^^ ERROR go up three
/// and also //~^ ERROR message one for the preceding line, and
/// //~| ERROR message two for that same line.

pub static EXPECTED_PATTERN : &'static str =
r"//~(?P<follow>\|)?(?P<adjusts>\^*)\s*(?P<kind>\S*)\s*(?P<msg>.*)";

#[derive(PartialEq, Show)]
enum WhichLine { ThisLine, FollowPrevious(uint), AdjustBackward(uint) }

// Load any test directives embedded in the file
pub fn load_errors(re: &Regex, testfile: &Path) -> Vec<ExpectedError> {
pub fn load_errors(testfile: &Path) -> Vec<ExpectedError> {
let mut rdr = BufferedReader::new(File::open(testfile).unwrap());

// `last_nonfollow_error` tracks the most recently seen
Expand All @@ -50,7 +44,7 @@ pub fn load_errors(re: &Regex, testfile: &Path) -> Vec<ExpectedError> {
rdr.lines().enumerate().filter_map(|(line_no, ln)| {
parse_expected(last_nonfollow_error,
line_no + 1,
ln.unwrap().as_slice(), re)
ln.unwrap().as_slice())
.map(|(which, error)| {
match which {
FollowPrevious(_) => {}
Expand All @@ -63,30 +57,39 @@ pub fn load_errors(re: &Regex, testfile: &Path) -> Vec<ExpectedError> {

fn parse_expected(last_nonfollow_error: Option<uint>,
line_num: uint,
line: &str,
re: &Regex) -> Option<(WhichLine, ExpectedError)> {
re.captures(line).and_then(|caps| {
let adjusts = caps.name("adjusts").unwrap_or("").len();
let kind = caps.name("kind").unwrap_or("").to_ascii_lowercase();
let msg = caps.name("msg").unwrap_or("").trim().to_string();
let follow = caps.name("follow").unwrap_or("").len() > 0;
line: &str) -> Option<(WhichLine, ExpectedError)> {
let start = match line.find_str("//~") { Some(i) => i, None => return None };
let (follow, adjusts) = if line.char_at(start + 3) == '|' {
(true, 0)
} else {
(false, line[start + 3..].chars().take_while(|c| *c == '^').count())
};
let kind_start = start + 3 + adjusts + (follow as usize);
let letters = line[kind_start..].chars();
let kind = letters.skip_while(|c| c.is_whitespace())
.take_while(|c| !c.is_whitespace())
.map(|c| c.to_lowercase())
.collect::<String>();
let letters = line[kind_start..].chars();
let msg = letters.skip_while(|c| c.is_whitespace())
.skip_while(|c| !c.is_whitespace())
.collect::<String>().trim().to_string();

let (which, line) = if follow {
assert!(adjusts == 0, "use either //~| or //~^, not both.");
let line = last_nonfollow_error.unwrap_or_else(|| {
panic!("encountered //~| without preceding //~^ line.")
});
(FollowPrevious(line), line)
} else {
let which =
if adjusts > 0 { AdjustBackward(adjusts) } else { ThisLine };
let line = line_num - adjusts;
(which, line)
};
let (which, line) = if follow {
assert!(adjusts == 0, "use either //~| or //~^, not both.");
let line = last_nonfollow_error.unwrap_or_else(|| {
panic!("encountered //~| without preceding //~^ line.")
});
(FollowPrevious(line), line)
} else {
let which =
if adjusts > 0 { AdjustBackward(adjusts) } else { ThisLine };
let line = line_num - adjusts;
(which, line)
};

debug!("line={} which={:?} kind={:?} msg={:?}", line_num, which, kind, msg);
Some((which, ExpectedError { line: line,
kind: kind,
msg: msg, }))
})
debug!("line={} which={:?} kind={:?} msg={:?}", line_num, which, kind, msg);
Some((which, ExpectedError { line: line,
kind: kind,
msg: msg, }))
}
2 changes: 1 addition & 1 deletion src/compiletest/runtest.rs
Expand Up @@ -99,7 +99,7 @@ fn run_cfail_test(config: &Config, props: &TestProps, testfile: &Path) {
}

let output_to_check = get_output(props, &proc_res);
let expected_errors = errors::load_errors(&config.cfail_regex, testfile);
let expected_errors = errors::load_errors(testfile);
if !expected_errors.is_empty() {
if !props.error_patterns.is_empty() {
fatal("both error pattern and expected errors specified");
Expand Down
25 changes: 13 additions & 12 deletions src/grammar/verify.rs
Expand Up @@ -13,14 +13,11 @@
extern crate syntax;
extern crate rustc;

extern crate regex;

#[macro_use]
extern crate log;

use std::collections::HashMap;
use std::io::File;
use regex::Regex;

use syntax::parse;
use syntax::parse::lexer;
Expand Down Expand Up @@ -167,15 +164,19 @@ fn count(lit: &str) -> usize {
}

fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>) -> TokenAndSpan {
let re = Regex::new(
r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]"
).unwrap();

let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
let start = m.name("start").unwrap_or("");
let end = m.name("end").unwrap_or("");
let toknum = m.name("toknum").unwrap_or("");
let content = m.name("content").unwrap_or("");
// old regex:
// \[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]
let start = s.find_str("[@").unwrap();
let comma = start + s[start..].find_str(",").unwrap();
let colon = comma + s[comma..].find_str(":").unwrap();
let content_start = colon + s[colon..].find_str("='").unwrap();
let content_end = content_start + s[content_start..].find_str("',<").unwrap();
let toknum_end = content_end + s[content_end..].find_str(">,").unwrap();

let start = &s[comma + 1 .. colon];
let end = &s[colon + 1 .. content_start];
let content = &s[content_start + 2 .. content_end];
let toknum = &s[content_end + 3 .. toknum_end];

let proto_tok = tokens.get(toknum).expect(format!("didn't find token {:?} in the map",
toknum).as_slice());
Expand Down
15 changes: 2 additions & 13 deletions src/liblog/directive.rs
Expand Up @@ -8,7 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use regex::Regex;
use std::ascii::AsciiExt;
use std::cmp;

Expand All @@ -34,7 +33,7 @@ fn parse_log_level(level: &str) -> Option<u32> {
///
/// Valid log levels are 0-255, with the most likely ones being 1-4 (defined in
/// std::). Also supports string log levels of error, warn, info, and debug
pub fn parse_logging_spec(spec: &str) -> (Vec<LogDirective>, Option<Regex>) {
pub fn parse_logging_spec(spec: &str) -> (Vec<LogDirective>, Option<String>) {
let mut dirs = Vec::new();

let mut parts = spec.split('/');
Expand Down Expand Up @@ -80,17 +79,7 @@ pub fn parse_logging_spec(spec: &str) -> (Vec<LogDirective>, Option<Regex>) {
});
}});

let filter = filter.map_or(None, |filter| {
match Regex::new(filter) {
Ok(re) => Some(re),
Err(e) => {
println!("warning: invalid regex filter - {:?}", e);
None
}
}
});

return (dirs, filter);
(dirs, filter.map(|s| s.to_string()))
}

#[cfg(test)]
Expand Down

0 comments on commit 6c29708

Please sign in to comment.