From b8d6686ef3c2998d29c7ef531895ee05305cfef1 Mon Sep 17 00:00:00 2001 From: Mark-Simulacrum Date: Fri, 11 Nov 2016 16:28:47 -0700 Subject: [PATCH] Factor out inner current Earley item loop. Change multiple functions to be non-public. Change nameize to accept an iterator so as to avoid an allocation. --- src/libsyntax/ext/tt/macro_parser.rs | 299 ++++++++++++++------------- 1 file changed, 153 insertions(+), 146 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 64acce19c1cc4..3c57f7a05c29d 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -130,7 +130,7 @@ struct MatcherTtFrame { } #[derive(Clone)] -pub struct MatcherPos { +struct MatcherPos { stack: Vec, top_elts: TokenTreeOrTokenTreeVec, sep: Option, @@ -162,14 +162,13 @@ pub fn count_names(ms: &[TokenTree]) -> usize { }) } -pub fn initial_matcher_pos(ms: Vec, sep: Option, lo: BytePos) - -> Box { +fn initial_matcher_pos(ms: Vec, lo: BytePos) -> Box { let match_idx_hi = count_names(&ms[..]); - let matches: Vec<_> = (0..match_idx_hi).map(|_| Vec::new()).collect(); + let matches = create_matches(match_idx_hi); Box::new(MatcherPos { stack: vec![], top_elts: TtSeq(ms), - sep: sep, + sep: None, idx: 0, up: None, matches: matches, @@ -202,26 +201,25 @@ pub enum NamedMatch { MatchedNonterminal(Rc) } -fn nameize(ms: &[TokenTree], res: &[Rc]) -> NamedParseResult { - fn n_rec(m: &TokenTree, res: &[Rc], - ret_val: &mut HashMap>, idx: &mut usize) +fn nameize>>(ms: &[TokenTree], mut res: I) -> NamedParseResult { + fn n_rec>>(m: &TokenTree, mut res: &mut I, + ret_val: &mut HashMap>) -> Result<(), (syntax_pos::Span, String)> { match *m { TokenTree::Sequence(_, ref seq) => { for next_m in &seq.tts { - n_rec(next_m, res, ret_val, idx)? + n_rec(next_m, res.by_ref(), ret_val)? } } TokenTree::Delimited(_, ref delim) => { for next_m in &delim.tts { - n_rec(next_m, res, ret_val, idx)?; + n_rec(next_m, res.by_ref(), ret_val)?; } } TokenTree::Token(sp, MatchNt(bind_name, _)) => { match ret_val.entry(bind_name) { Vacant(spot) => { - spot.insert(res[*idx].clone()); - *idx += 1; + spot.insert(res.next().unwrap()); } Occupied(..) => { return Err((sp, format!("duplicated bind name: {}", bind_name))) @@ -238,9 +236,8 @@ fn nameize(ms: &[TokenTree], res: &[Rc]) -> NamedParseResult { } let mut ret_val = HashMap::new(); - let mut idx = 0; for m in ms { - match n_rec(m, res, &mut ret_val, &mut idx) { + match n_rec(m, res.by_ref(), &mut ret_val) { Ok(_) => {}, Err((sp, msg)) => return Error(sp, msg), } @@ -266,9 +263,8 @@ pub fn parse_failure_msg(tok: Token) -> String { } } -/// Perform a token equality check, ignoring syntax context (that is, an -/// unhygienic comparison) -pub fn token_name_eq(t1 : &Token, t2 : &Token) -> bool { +/// Perform a token equality check, ignoring syntax context (that is, an unhygienic comparison) +fn token_name_eq(t1 : &Token, t2 : &Token) -> bool { match (t1,t2) { (&token::Ident(id1),&token::Ident(id2)) | (&token::Lifetime(id1),&token::Lifetime(id2)) => @@ -277,154 +273,165 @@ pub fn token_name_eq(t1 : &Token, t2 : &Token) -> bool { } } -pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseResult { - let mut parser = Parser::new_with_doc_flag(sess, Box::new(rdr), true); - let mut cur_eis = SmallVector::one(initial_matcher_pos(ms.to_owned(), None, parser.span.lo)); - - loop { - let mut bb_eis = Vec::new(); // black-box parsed by parser.rs - let mut next_eis = Vec::new(); // or proceed normally - let mut eof_eis = Vec::new(); +fn create_matches(len: usize) -> Vec>> { + (0..len).into_iter().map(|_| Vec::new()).collect() +} - // for each Earley item - while let Some(mut ei) = cur_eis.pop() { - // When unzipped trees end, remove them - while ei.idx >= ei.top_elts.len() { - match ei.stack.pop() { - Some(MatcherTtFrame { elts, idx }) => { - ei.top_elts = elts; - ei.idx = idx + 1; - } - None => break +fn inner_parse_loop(cur_eis: &mut SmallVector>, + next_eis: &mut Vec>, + eof_eis: &mut Vec>, + bb_eis: &mut Vec>, + token: &Token, span: &syntax_pos::Span) -> ParseResult<()> { + while let Some(mut ei) = cur_eis.pop() { + // When unzipped trees end, remove them + while ei.idx >= ei.top_elts.len() { + match ei.stack.pop() { + Some(MatcherTtFrame { elts, idx }) => { + ei.top_elts = elts; + ei.idx = idx + 1; } + None => break } + } - let idx = ei.idx; - let len = ei.top_elts.len(); - - /* at end of sequence */ - if idx >= len { - // can't move out of `match`es, so: - if ei.up.is_some() { - // hack: a matcher sequence is repeating iff it has a - // parent (the top level is just a container) - - // disregard separator, try to go up - // (remove this condition to make trailing seps ok) - if idx == len { - // pop from the matcher position - - let mut new_pos = ei.up.clone().unwrap(); - - // update matches (the MBE "parse tree") by appending - // each tree as a subtree. - - // I bet this is a perf problem: we're preemptively - // doing a lot of array work that will get thrown away - // most of the time. - - // Only touch the binders we have actually bound - for idx in ei.match_lo..ei.match_hi { - let sub = ei.matches[idx].clone(); - new_pos.matches[idx] - .push(Rc::new(MatchedSeq(sub, mk_sp(ei.sp_lo, - parser.span.hi)))); - } - - new_pos.match_cur = ei.match_hi; - new_pos.idx += 1; - cur_eis.push(new_pos); + let idx = ei.idx; + let len = ei.top_elts.len(); + + // at end of sequence + if idx >= len { + // We are repeating iff there is a parent + if ei.up.is_some() { + // Disregarding the separator, add the "up" case to the tokens that should be + // examined. + // (remove this condition to make trailing seps ok) + if idx == len { + let mut new_pos = ei.up.clone().unwrap(); + + // update matches (the MBE "parse tree") by appending + // each tree as a subtree. + + // I bet this is a perf problem: we're preemptively + // doing a lot of array work that will get thrown away + // most of the time. + + // Only touch the binders we have actually bound + for idx in ei.match_lo..ei.match_hi { + let sub = ei.matches[idx].clone(); + new_pos.matches[idx] + .push(Rc::new(MatchedSeq(sub, mk_sp(ei.sp_lo, + span.hi)))); } - // can we go around again? - - // Check if we need a separator - if idx == len && ei.sep.is_some() { - if ei.sep.as_ref().map(|ref sep| token_name_eq(&parser.token, sep)) - .unwrap_or(false) { - // i'm conflicted about whether this should be hygienic.... though in - // this case, if the separators are never legal idents, it shouldn't - // matter. - // ei.match_cur = ei.match_lo; - ei.idx += 1; - next_eis.push(ei); - } - } else { // we don't need a separator - ei.match_cur = ei.match_lo; - ei.idx = 0; - cur_eis.push(ei); - } - } else { - eof_eis.push(ei); + new_pos.match_cur = ei.match_hi; + new_pos.idx += 1; + cur_eis.push(new_pos); } - } else { - match ei.top_elts.get_tt(idx) { - /* need to descend into sequence */ - TokenTree::Sequence(sp, seq) => { - if seq.op == tokenstream::KleeneOp::ZeroOrMore { - let mut new_ei = ei.clone(); - new_ei.match_cur += seq.num_captures; - new_ei.idx += 1; - //we specifically matched zero repeats. - for idx in ei.match_cur..ei.match_cur + seq.num_captures { - new_ei.matches[idx].push(Rc::new(MatchedSeq(vec![], sp))); - } - - cur_eis.push(new_ei); - } - let matches: Vec<_> = (0..ei.matches.len()) - .map(|_| Vec::new()).collect(); - cur_eis.push(Box::new(MatcherPos { - stack: vec![], - sep: seq.separator.clone(), - idx: 0, - matches: matches, - match_lo: ei.match_cur, - match_cur: ei.match_cur, - match_hi: ei.match_cur + seq.num_captures, - up: Some(ei), - sp_lo: sp.lo, - top_elts: Tt(TokenTree::Sequence(sp, seq)), - })); + // Check if we need a separator + if idx == len && ei.sep.is_some() { + // We have a separator, and it is the current token. + if ei.sep.as_ref().map(|ref sep| token_name_eq(&token, sep)).unwrap_or(false) { + ei.idx += 1; + next_eis.push(ei); } - TokenTree::Token(_, MatchNt(..)) => { - // Built-in nonterminals never start with these tokens, - // so we can eliminate them from consideration. - match parser.token { - token::CloseDelim(_) => {}, - _ => bb_eis.push(ei), + } else { // we don't need a separator + ei.match_cur = ei.match_lo; + ei.idx = 0; + cur_eis.push(ei); + } + } else { + // We aren't repeating, so we must be potentially at the end of the input. + eof_eis.push(ei); + } + } else { + match ei.top_elts.get_tt(idx) { + /* need to descend into sequence */ + TokenTree::Sequence(sp, seq) => { + if seq.op == tokenstream::KleeneOp::ZeroOrMore { + // Examine the case where there are 0 matches of this sequence + let mut new_ei = ei.clone(); + new_ei.match_cur += seq.num_captures; + new_ei.idx += 1; + for idx in ei.match_cur..ei.match_cur + seq.num_captures { + new_ei.matches[idx].push(Rc::new(MatchedSeq(vec![], sp))); } + cur_eis.push(new_ei); } - TokenTree::Token(sp, SubstNt(..)) => { - return Error(sp, "missing fragment specifier".to_string()) - } - seq @ TokenTree::Delimited(..) | seq @ TokenTree::Token(_, DocComment(..)) => { - let lower_elts = mem::replace(&mut ei.top_elts, Tt(seq)); - let idx = ei.idx; - ei.stack.push(MatcherTtFrame { - elts: lower_elts, - idx: idx, - }); - ei.idx = 0; - cur_eis.push(ei); + + // Examine the case where there is at least one match of this sequence + let matches = create_matches(ei.matches.len()); + cur_eis.push(Box::new(MatcherPos { + stack: vec![], + sep: seq.separator.clone(), + idx: 0, + matches: matches, + match_lo: ei.match_cur, + match_cur: ei.match_cur, + match_hi: ei.match_cur + seq.num_captures, + up: Some(ei), + sp_lo: sp.lo, + top_elts: Tt(TokenTree::Sequence(sp, seq)), + })); + } + TokenTree::Token(_, MatchNt(..)) => { + // Built-in nonterminals never start with these tokens, + // so we can eliminate them from consideration. + match *token { + token::CloseDelim(_) => {}, + _ => bb_eis.push(ei), } - TokenTree::Token(_, ref t) => { - if token_name_eq(t, &parser.token) { - ei.idx += 1; - next_eis.push(ei); - } + } + TokenTree::Token(sp, SubstNt(..)) => { + return Error(sp, "missing fragment specifier".to_string()) + } + seq @ TokenTree::Delimited(..) | seq @ TokenTree::Token(_, DocComment(..)) => { + let lower_elts = mem::replace(&mut ei.top_elts, Tt(seq)); + let idx = ei.idx; + ei.stack.push(MatcherTtFrame { + elts: lower_elts, + idx: idx, + }); + ei.idx = 0; + cur_eis.push(ei); + } + TokenTree::Token(_, ref t) => { + if token_name_eq(t, &token) { + ei.idx += 1; + next_eis.push(ei); } } } } + } + + Success(()) +} + +pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseResult { + let mut parser = Parser::new_with_doc_flag(sess, Box::new(rdr), true); + let mut cur_eis = SmallVector::one(initial_matcher_pos(ms.to_owned(), parser.span.lo)); + + loop { + let mut bb_eis = Vec::new(); // black-box parsed by parser.rs + let mut next_eis = Vec::new(); // or proceed normally + + // FIXME: Use SmallVector since in the successful case we will only have one + let mut eof_eis = Vec::new(); + + match inner_parse_loop(&mut cur_eis, &mut next_eis, &mut eof_eis, &mut bb_eis, + &parser.token, &parser.span) { + Success(_) => {}, + Failure(sp, tok) => return Failure(sp, tok), + Error(sp, msg) => return Error(sp, msg), + } + + // inner parse loop handled all cur_eis, so it's empty + assert!(cur_eis.is_empty()); /* error messages here could be improved with links to orig. rules */ if token_name_eq(&parser.token, &token::Eof) { if eof_eis.len() == 1 { - let v = eof_eis[0].matches.iter_mut() - .map(|dv| dv.pop().unwrap()).collect::>(); - return nameize(ms, &v[..]); + return nameize(ms, eof_eis[0].matches.iter_mut().map(|mut dv| dv.pop().unwrap())); } else if eof_eis.len() > 1 { return Error(parser.span, "ambiguity: multiple successful parses".to_string()); } else { @@ -473,7 +480,7 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes } } -pub fn parse_nt<'a>(p: &mut Parser<'a>, sp: Span, name: &str) -> Nonterminal { +fn parse_nt<'a>(p: &mut Parser<'a>, sp: Span, name: &str) -> Nonterminal { match name { "tt" => { p.quote_depth += 1; //but in theory, non-quoted tts might be useful