Skip to content

Commit

Permalink
Auto merge of #50855 - nnethercote:fewer-macro_parser-allocs, r=petro…
Browse files Browse the repository at this point in the history
…chenkov

Speed up the macro parser

These three commits reduce the number of allocations done by the macro parser, in some cases dramatically. For example, for a clean check builds of html5ever, the number of allocations is reduced by 40%.

Here are the rustc-benchmarks that are sped up by at least 1%.
```
html5ever-check
        avg: -6.6%      min: -10.3%     max: -4.1%
html5ever
        avg: -5.2%      min: -9.5%      max: -2.8%
html5ever-opt
        avg: -4.3%      min: -9.3%      max: -1.6%
crates.io-check
        avg: -1.8%      min: -2.9%      max: -0.6%
crates.io-opt
        avg: -1.0%      min: -2.2%      max: -0.1%
crates.io
        avg: -1.1%      min: -2.2%      max: -0.2%
```
  • Loading branch information
bors committed May 20, 2018
2 parents ccb5e97 + ad47145 commit 4c26e2e
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 35 deletions.
94 changes: 71 additions & 23 deletions src/libsyntax/ext/tt/macro_parser.rs
Expand Up @@ -82,7 +82,7 @@

pub use self::NamedMatch::*;
pub use self::ParseResult::*;
use self::TokenTreeOrTokenTreeVec::*;
use self::TokenTreeOrTokenTreeSlice::*;

use ast::Ident;
use syntax_pos::{self, BytePos, Span};
Expand All @@ -97,6 +97,7 @@ use tokenstream::TokenStream;
use util::small_vector::SmallVector;

use std::mem;
use std::ops::{Deref, DerefMut};
use std::rc::Rc;
use std::collections::HashMap;
use std::collections::hash_map::Entry::{Occupied, Vacant};
Expand All @@ -106,12 +107,12 @@ use std::collections::hash_map::Entry::{Occupied, Vacant};
/// Either a sequence of token trees or a single one. This is used as the representation of the
/// sequence of tokens that make up a matcher.
#[derive(Clone)]
enum TokenTreeOrTokenTreeVec {
enum TokenTreeOrTokenTreeSlice<'a> {
Tt(TokenTree),
TtSeq(Vec<TokenTree>),
TtSeq(&'a [TokenTree]),
}

impl TokenTreeOrTokenTreeVec {
impl<'a> TokenTreeOrTokenTreeSlice<'a> {
/// Returns the number of constituent top-level token trees of `self` (top-level in that it
/// will not recursively descend into subtrees).
fn len(&self) -> usize {
Expand All @@ -135,19 +136,19 @@ impl TokenTreeOrTokenTreeVec {
/// This is used by `inner_parse_loop` to keep track of delimited submatchers that we have
/// descended into.
#[derive(Clone)]
struct MatcherTtFrame {
struct MatcherTtFrame<'a> {
/// The "parent" matcher that we are descending into.
elts: TokenTreeOrTokenTreeVec,
elts: TokenTreeOrTokenTreeSlice<'a>,
/// The position of the "dot" in `elts` at the time we descended.
idx: usize,
}

/// Represents a single "position" (aka "matcher position", aka "item"), as described in the module
/// documentation.
#[derive(Clone)]
struct MatcherPos {
struct MatcherPos<'a> {
/// The token or sequence of tokens that make up the matcher
top_elts: TokenTreeOrTokenTreeVec,
top_elts: TokenTreeOrTokenTreeSlice<'a>,
/// The position of the "dot" in this matcher
idx: usize,
/// The beginning position in the source that the beginning of this matcher corresponds to. In
Expand Down Expand Up @@ -186,7 +187,7 @@ struct MatcherPos {
sep: Option<Token>,
/// The "parent" matcher position if we are in a repetition. That is, the matcher position just
/// before we enter the sequence.
up: Option<Box<MatcherPos>>,
up: Option<MatcherPosHandle<'a>>,

// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from
// a delimited token tree (e.g. something wrapped in `(` `)`) or to get the contents of a doc
Expand All @@ -195,17 +196,60 @@ struct MatcherPos {
/// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does
/// that where the bottom of the stack is the outermost matcher.
// Also, throughout the comments, this "descent" is often referred to as "unzipping"...
stack: Vec<MatcherTtFrame>,
stack: Vec<MatcherTtFrame<'a>>,
}

impl MatcherPos {
impl<'a> MatcherPos<'a> {
/// Add `m` as a named match for the `idx`-th metavar.
fn push_match(&mut self, idx: usize, m: NamedMatch) {
let matches = Rc::make_mut(&mut self.matches[idx]);
matches.push(m);
}
}

// Lots of MatcherPos instances are created at runtime. Allocating them on the
// heap is slow. Furthermore, using SmallVec<MatcherPos> to allocate them all
// on the stack is also slow, because MatcherPos is quite a large type and
// instances get moved around a lot between vectors, which requires lots of
// slow memcpy calls.
//
// Therefore, the initial MatcherPos is always allocated on the stack,
// subsequent ones (of which there aren't that many) are allocated on the heap,
// and this type is used to encapsulate both cases.
enum MatcherPosHandle<'a> {
Ref(&'a mut MatcherPos<'a>),
Box(Box<MatcherPos<'a>>),
}

impl<'a> Clone for MatcherPosHandle<'a> {
// This always produces a new Box.
fn clone(&self) -> Self {
MatcherPosHandle::Box(match *self {
MatcherPosHandle::Ref(ref r) => Box::new((**r).clone()),
MatcherPosHandle::Box(ref b) => b.clone(),
})
}
}

impl<'a> Deref for MatcherPosHandle<'a> {
type Target = MatcherPos<'a>;
fn deref(&self) -> &Self::Target {
match *self {
MatcherPosHandle::Ref(ref r) => r,
MatcherPosHandle::Box(ref b) => b,
}
}
}

impl<'a> DerefMut for MatcherPosHandle<'a> {
fn deref_mut(&mut self) -> &mut MatcherPos<'a> {
match *self {
MatcherPosHandle::Ref(ref mut r) => r,
MatcherPosHandle::Box(ref mut b) => b,
}
}
}

/// Represents the possible results of an attempted parse.
pub enum ParseResult<T> {
/// Parsed successfully.
Expand Down Expand Up @@ -241,10 +285,10 @@ fn create_matches(len: usize) -> Vec<Rc<Vec<NamedMatch>>> {

/// Generate the top-level matcher position in which the "dot" is before the first token of the
/// matcher `ms` and we are going to start matching at position `lo` in the source.
fn initial_matcher_pos(ms: Vec<TokenTree>, lo: BytePos) -> Box<MatcherPos> {
let match_idx_hi = count_names(&ms[..]);
fn initial_matcher_pos(ms: &[TokenTree], lo: BytePos) -> MatcherPos {
let match_idx_hi = count_names(ms);
let matches = create_matches(match_idx_hi);
Box::new(MatcherPos {
MatcherPos {
// Start with the top level matcher given to us
top_elts: TtSeq(ms), // "elts" is an abbr. for "elements"
// The "dot" is before the first token of the matcher
Expand All @@ -267,7 +311,7 @@ fn initial_matcher_pos(ms: Vec<TokenTree>, lo: BytePos) -> Box<MatcherPos> {
seq_op: None,
sep: None,
up: None,
})
}
}

/// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`:
Expand Down Expand Up @@ -394,12 +438,12 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {
/// # Returns
///
/// A `ParseResult`. Note that matches are kept track of through the items generated.
fn inner_parse_loop(
fn inner_parse_loop<'a>(
sess: &ParseSess,
cur_items: &mut SmallVector<Box<MatcherPos>>,
next_items: &mut Vec<Box<MatcherPos>>,
eof_items: &mut SmallVector<Box<MatcherPos>>,
bb_items: &mut SmallVector<Box<MatcherPos>>,
cur_items: &mut SmallVector<MatcherPosHandle<'a>>,
next_items: &mut Vec<MatcherPosHandle<'a>>,
eof_items: &mut SmallVector<MatcherPosHandle<'a>>,
bb_items: &mut SmallVector<MatcherPosHandle<'a>>,
token: &Token,
span: syntax_pos::Span,
) -> ParseResult<()> {
Expand Down Expand Up @@ -502,7 +546,7 @@ fn inner_parse_loop(
}

let matches = create_matches(item.matches.len());
cur_items.push(Box::new(MatcherPos {
cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos {
stack: vec![],
sep: seq.separator.clone(),
seq_op: Some(seq.op),
Expand All @@ -514,7 +558,7 @@ fn inner_parse_loop(
up: Some(item),
sp_lo: sp.lo(),
top_elts: Tt(TokenTree::Sequence(sp, seq)),
}));
})));
}

// We need to match a metavar (but the identifier is invalid)... this is an error
Expand Down Expand Up @@ -596,7 +640,11 @@ pub fn parse(
// processes all of these possible matcher positions and produces posible next positions into
// `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items`
// and we start over again.
let mut cur_items = SmallVector::one(initial_matcher_pos(ms.to_owned(), parser.span.lo()));
//
// This MatcherPos instance is allocated on the stack. All others -- and
// there are frequently *no* others! -- are allocated on the heap.
let mut initial = initial_matcher_pos(ms, parser.span.lo());
let mut cur_items = SmallVector::one(MatcherPosHandle::Ref(&mut initial));
let mut next_items = Vec::new();

loop {
Expand Down
3 changes: 2 additions & 1 deletion src/libsyntax/ext/tt/macro_rules.rs
Expand Up @@ -27,6 +27,7 @@ use parse::token::Token::*;
use symbol::Symbol;
use tokenstream::{TokenStream, TokenTree};

use std::borrow::Cow;
use std::collections::HashMap;
use std::collections::hash_map::Entry;

Expand Down Expand Up @@ -142,7 +143,7 @@ fn generic_extension<'cx>(cx: &'cx mut ExtCtxt,
}

let directory = Directory {
path: cx.current_expansion.module.directory.clone(),
path: Cow::from(cx.current_expansion.module.directory.as_path()),
ownership: cx.current_expansion.directory_ownership,
};
let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false);
Expand Down
5 changes: 3 additions & 2 deletions src/libsyntax/parse/mod.rs
Expand Up @@ -23,6 +23,7 @@ use symbol::Symbol;
use tokenstream::{TokenStream, TokenTree};
use diagnostics::plugin::ErrorMap;

use std::borrow::Cow;
use std::collections::HashSet;
use std::iter;
use std::path::{Path, PathBuf};
Expand Down Expand Up @@ -89,8 +90,8 @@ impl ParseSess {
}

#[derive(Clone)]
pub struct Directory {
pub path: PathBuf,
pub struct Directory<'a> {
pub path: Cow<'a, Path>,
pub ownership: DirectoryOwnership,
}

Expand Down
17 changes: 9 additions & 8 deletions src/libsyntax/parse/parser.rs
Expand Up @@ -57,6 +57,7 @@ use tokenstream::{self, Delimited, ThinTokenStream, TokenTree, TokenStream};
use symbol::{Symbol, keywords};
use util::ThinVec;

use std::borrow::Cow;
use std::cmp;
use std::mem;
use std::path::{self, Path, PathBuf};
Expand Down Expand Up @@ -228,7 +229,7 @@ pub struct Parser<'a> {
prev_token_kind: PrevTokenKind,
pub restrictions: Restrictions,
/// Used to determine the path to externally loaded source files
pub directory: Directory,
pub directory: Directory<'a>,
/// Whether to parse sub-modules in other files.
pub recurse_into_file_modules: bool,
/// Name of the root module this parser originated from. If `None`, then the
Expand Down Expand Up @@ -535,7 +536,7 @@ enum TokenExpectType {
impl<'a> Parser<'a> {
pub fn new(sess: &'a ParseSess,
tokens: TokenStream,
directory: Option<Directory>,
directory: Option<Directory<'a>>,
recurse_into_file_modules: bool,
desugar_doc_comments: bool)
-> Self {
Expand All @@ -549,7 +550,7 @@ impl<'a> Parser<'a> {
restrictions: Restrictions::empty(),
recurse_into_file_modules,
directory: Directory {
path: PathBuf::new(),
path: Cow::from(PathBuf::new()),
ownership: DirectoryOwnership::Owned { relative: None }
},
root_module_name: None,
Expand All @@ -572,9 +573,9 @@ impl<'a> Parser<'a> {
if let Some(directory) = directory {
parser.directory = directory;
} else if !parser.span.source_equal(&DUMMY_SP) {
if let FileName::Real(path) = sess.codemap().span_to_unmapped_path(parser.span) {
parser.directory.path = path;
parser.directory.path.pop();
if let FileName::Real(mut path) = sess.codemap().span_to_unmapped_path(parser.span) {
path.pop();
parser.directory.path = Cow::from(path);
}
}

Expand Down Expand Up @@ -6008,10 +6009,10 @@ impl<'a> Parser<'a> {

fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) {
if let Some(path) = attr::first_attr_value_str_by_name(attrs, "path") {
self.directory.path.push(&path.as_str());
self.directory.path.to_mut().push(&path.as_str());
self.directory.ownership = DirectoryOwnership::Owned { relative: None };
} else {
self.directory.path.push(&id.name.as_str());
self.directory.path.to_mut().push(&id.name.as_str());
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/libsyntax/tokenstream.rs
Expand Up @@ -31,6 +31,7 @@ use print::pprust;
use serialize::{Decoder, Decodable, Encoder, Encodable};
use util::RcSlice;

use std::borrow::Cow;
use std::{fmt, iter, mem};
use std::hash::{self, Hash};

Expand Down Expand Up @@ -106,7 +107,7 @@ impl TokenTree {
-> macro_parser::NamedParseResult {
// `None` is because we're not interpolating
let directory = Directory {
path: cx.current_expansion.module.directory.clone(),
path: Cow::from(cx.current_expansion.module.directory.as_path()),
ownership: cx.current_expansion.directory_ownership,
};
macro_parser::parse(cx.parse_sess(), tts, mtch, Some(directory), true)
Expand Down

0 comments on commit 4c26e2e

Please sign in to comment.