Skip to content

Commit

Permalink
Introduce NonterminalKind
Browse files Browse the repository at this point in the history
It encapsulate the (part of) the interface between the parser and
macro by example (macro_rules) parser.

The second bit is somewhat more general `parse_ast_fragment`, which is
the reason why we keep some `parse_xxx` functions as public.
  • Loading branch information
matklad committed Aug 2, 2020
1 parent dfe1e3b commit 2595d75
Show file tree
Hide file tree
Showing 9 changed files with 231 additions and 211 deletions.
38 changes: 38 additions & 0 deletions src/librustc_ast/token.rs
Expand Up @@ -760,6 +760,44 @@ pub enum Nonterminal {
#[cfg(target_arch = "x86_64")]
rustc_data_structures::static_assert_size!(Nonterminal, 40);

#[derive(Copy, Clone)]
pub enum NonterminalKind {
Item,
Block,
Stmt,
Pat,
Expr,
Ty,
Ident,
Lifetime,
Literal,
Meta,
Path,
Vis,
TT,
}

impl NonterminalKind {
pub fn from_symbol(symbol: Symbol) -> Option<NonterminalKind> {
Some(match symbol {
sym::item => NonterminalKind::Item,
sym::block => NonterminalKind::Block,
sym::stmt => NonterminalKind::Stmt,
sym::pat => NonterminalKind::Pat,
sym::expr => NonterminalKind::Expr,
sym::ty => NonterminalKind::Ty,
sym::ident => NonterminalKind::Ident,
sym::lifetime => NonterminalKind::Lifetime,
sym::literal => NonterminalKind::Literal,
sym::meta => NonterminalKind::Meta,
sym::path => NonterminalKind::Path,
sym::vis => NonterminalKind::Vis,
sym::tt => NonterminalKind::TT,
_ => return None,
})
}
}

impl Nonterminal {
fn span(&self) -> Span {
match self {
Expand Down
203 changes: 18 additions & 185 deletions src/librustc_expand/mbe/macro_parser.rs
Expand Up @@ -76,15 +76,11 @@ use TokenTreeOrTokenTreeSlice::*;

use crate::mbe::{self, TokenTree};

use rustc_ast::ptr::P;
use rustc_ast::token::{self, DocComment, Nonterminal, Token};
use rustc_ast_pretty::pprust;
use rustc_parse::parser::{FollowedByType, Parser, PathStyle};
use rustc_ast::token::{self, DocComment, Nonterminal, NonterminalKind, Token};
use rustc_parse::parser::Parser;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{kw, sym, Ident, MacroRulesNormalizedIdent, Symbol};
use rustc_span::symbol::{kw, MacroRulesNormalizedIdent};

use rustc_errors::PResult;
use rustc_span::Span;
use smallvec::{smallvec, SmallVec};

use rustc_data_structures::fx::FxHashMap;
Expand Down Expand Up @@ -576,7 +572,8 @@ fn inner_parse_loop<'root, 'tt>(
TokenTree::MetaVarDecl(_, _, id) => {
// Built-in nonterminals never start with these tokens,
// so we can eliminate them from consideration.
if may_begin_with(token, id.name) {
let kind = NonterminalKind::from_symbol(id.name).unwrap();
if Parser::nonterminal_may_begin_with(kind, token) {
bb_items.push(item);
}
}
Expand Down Expand Up @@ -738,8 +735,19 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na
let mut item = bb_items.pop().unwrap();
if let TokenTree::MetaVarDecl(span, _, ident) = item.top_elts.get_tt(item.idx) {
let match_cur = item.match_cur;
let nt = match parse_nt(parser.to_mut(), span, ident.name) {
Err(()) => return ErrorReported,
let kind = NonterminalKind::from_symbol(ident.name).unwrap();
let nt = match parser.to_mut().parse_nonterminal(kind) {
Err(mut err) => {
err.span_label(
span,
format!(
"while parsing argument for this `{}` macro fragment",
ident.name
),
)
.emit();
return ErrorReported;
}
Ok(nt) => nt,
};
item.push_match(match_cur, MatchedNonterminal(Lrc::new(nt)));
Expand All @@ -754,178 +762,3 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na
assert!(!cur_items.is_empty());
}
}

/// The token is an identifier, but not `_`.
/// We prohibit passing `_` to macros expecting `ident` for now.
fn get_macro_ident(token: &Token) -> Option<(Ident, bool)> {
token.ident().filter(|(ident, _)| ident.name != kw::Underscore)
}

/// Checks whether a non-terminal may begin with a particular token.
///
/// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that
/// token. Be conservative (return true) if not sure.
fn may_begin_with(token: &Token, name: Symbol) -> bool {
/// Checks whether the non-terminal may contain a single (non-keyword) identifier.
fn may_be_ident(nt: &token::Nonterminal) -> bool {
match *nt {
token::NtItem(_) | token::NtBlock(_) | token::NtVis(_) | token::NtLifetime(_) => false,
_ => true,
}
}

match name {
sym::expr => {
token.can_begin_expr()
// This exception is here for backwards compatibility.
&& !token.is_keyword(kw::Let)
}
sym::ty => token.can_begin_type(),
sym::ident => get_macro_ident(token).is_some(),
sym::literal => token.can_begin_literal_maybe_minus(),
sym::vis => match token.kind {
// The follow-set of :vis + "priv" keyword + interpolated
token::Comma | token::Ident(..) | token::Interpolated(..) => true,
_ => token.can_begin_type(),
},
sym::block => match token.kind {
token::OpenDelim(token::Brace) => true,
token::Interpolated(ref nt) => match **nt {
token::NtItem(_)
| token::NtPat(_)
| token::NtTy(_)
| token::NtIdent(..)
| token::NtMeta(_)
| token::NtPath(_)
| token::NtVis(_) => false, // none of these may start with '{'.
_ => true,
},
_ => false,
},
sym::path | sym::meta => match token.kind {
token::ModSep | token::Ident(..) => true,
token::Interpolated(ref nt) => match **nt {
token::NtPath(_) | token::NtMeta(_) => true,
_ => may_be_ident(&nt),
},
_ => false,
},
sym::pat => match token.kind {
token::Ident(..) | // box, ref, mut, and other identifiers (can stricten)
token::OpenDelim(token::Paren) | // tuple pattern
token::OpenDelim(token::Bracket) | // slice pattern
token::BinOp(token::And) | // reference
token::BinOp(token::Minus) | // negative literal
token::AndAnd | // double reference
token::Literal(..) | // literal
token::DotDot | // range pattern (future compat)
token::DotDotDot | // range pattern (future compat)
token::ModSep | // path
token::Lt | // path (UFCS constant)
token::BinOp(token::Shl) => true, // path (double UFCS)
token::Interpolated(ref nt) => may_be_ident(nt),
_ => false,
},
sym::lifetime => match token.kind {
token::Lifetime(_) => true,
token::Interpolated(ref nt) => match **nt {
token::NtLifetime(_) | token::NtTT(_) => true,
_ => false,
},
_ => false,
},
_ => match token.kind {
token::CloseDelim(_) => false,
_ => true,
},
}
}

/// A call to the "black-box" parser to parse some Rust non-terminal.
///
/// # Parameters
///
/// - `p`: the "black-box" parser to use
/// - `sp`: the `Span` we want to parse
/// - `name`: the name of the metavar _matcher_ we want to match (e.g., `tt`, `ident`, `block`,
/// etc...)
///
/// # Returns
///
/// The parsed non-terminal.
fn parse_nt(p: &mut Parser<'_>, sp: Span, name: Symbol) -> Result<Nonterminal, ()> {
// FIXME(Centril): Consider moving this to `parser.rs` to make
// the visibilities of the methods used below `pub(super)` at most.
if name == sym::tt {
return Ok(token::NtTT(p.parse_token_tree()));
}
parse_nt_inner(p, sp, name).map_err(|mut err| {
err.span_label(sp, format!("while parsing argument for this `{}` macro fragment", name))
.emit()
})
}

fn parse_nt_inner<'a>(p: &mut Parser<'a>, sp: Span, name: Symbol) -> PResult<'a, Nonterminal> {
// Any `Nonterminal` which stores its tokens (currently `NtItem` and `NtExpr`)
// needs to have them force-captured here.
// A `macro_rules!` invocation may pass a captured item/expr to a proc-macro,
// which requires having captured tokens available. Since we cannot determine
// in advance whether or not a proc-macro will be (transitively) invoked,
// we always capture tokens for any `Nonterminal` which needs them.
Ok(match name {
sym::item => match p.collect_tokens(|this| this.parse_item())? {
(Some(mut item), tokens) => {
// If we captured tokens during parsing (due to outer attributes),
// use those.
if item.tokens.is_none() {
item.tokens = Some(tokens);
}
token::NtItem(item)
}
(None, _) => return Err(p.struct_span_err(p.token.span, "expected an item keyword")),
},
sym::block => token::NtBlock(p.parse_block()?),
sym::stmt => match p.parse_stmt()? {
Some(s) => token::NtStmt(s),
None => return Err(p.struct_span_err(p.token.span, "expected a statement")),
},
sym::pat => token::NtPat(p.parse_pat(None)?),
sym::expr => {
let (mut expr, tokens) = p.collect_tokens(|this| this.parse_expr())?;
// If we captured tokens during parsing (due to outer attributes),
// use those.
if expr.tokens.is_none() {
expr.tokens = Some(tokens);
}
token::NtExpr(expr)
}
sym::literal => token::NtLiteral(p.parse_literal_maybe_minus()?),
sym::ty => token::NtTy(p.parse_ty()?),
// this could be handled like a token, since it is one
sym::ident => {
if let Some((ident, is_raw)) = get_macro_ident(&p.token) {
p.bump();
token::NtIdent(ident, is_raw)
} else {
let token_str = pprust::token_to_string(&p.token);
let msg = &format!("expected ident, found {}", &token_str);
return Err(p.struct_span_err(p.token.span, msg));
}
}
sym::path => token::NtPath(p.parse_path(PathStyle::Type)?),
sym::meta => token::NtMeta(P(p.parse_attr_item()?)),
sym::vis => token::NtVis(p.parse_visibility(FollowedByType::Yes)?),
sym::lifetime => {
if p.check_lifetime() {
token::NtLifetime(p.expect_lifetime().ident)
} else {
let token_str = pprust::token_to_string(&p.token);
let msg = &format!("expected a lifetime, found `{}`", &token_str);
return Err(p.struct_span_err(p.token.span, msg));
}
}
// this is not supposed to happen, since it has been checked
// when compiling the macro.
_ => p.span_bug(sp, "invalid fragment specifier"),
})
}
20 changes: 2 additions & 18 deletions src/librustc_expand/mbe/macro_rules.rs
Expand Up @@ -9,7 +9,7 @@ use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
use crate::mbe::transcribe::transcribe;

use rustc_ast::ast;
use rustc_ast::token::{self, NtTT, Token, TokenKind::*};
use rustc_ast::token::{self, NonterminalKind, NtTT, Token, TokenKind::*};
use rustc_ast::tokenstream::{DelimSpan, TokenStream};
use rustc_ast_pretty::pprust;
use rustc_attr::{self as attr, TransparencyError};
Expand Down Expand Up @@ -1228,23 +1228,7 @@ fn is_legal_fragment_specifier(
* for checking against feature gates. See past versions of
* this function.
*/
match frag_name {
sym::item
| sym::block
| sym::stmt
| sym::expr
| sym::pat
| sym::lifetime
| sym::path
| sym::ty
| sym::ident
| sym::meta
| sym::tt
| sym::vis
| sym::literal
| kw::Invalid => true,
_ => false,
}
NonterminalKind::from_symbol(frag_name).is_some() || frag_name == kw::Invalid
}

fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
Expand Down
2 changes: 1 addition & 1 deletion src/librustc_parse/parser/expr.rs
Expand Up @@ -1450,7 +1450,7 @@ impl<'a> Parser<'a> {

/// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`).
/// Keep this in sync with `Token::can_begin_literal_maybe_minus`.
pub fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P<Expr>> {
pub(super) fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P<Expr>> {
maybe_whole_expr!(self);

let lo = self.token.span;
Expand Down
6 changes: 4 additions & 2 deletions src/librustc_parse/parser/mod.rs
@@ -1,6 +1,7 @@
pub mod attr;
mod expr;
mod item;
mod nonterminal;
mod pat;
mod path;
mod ty;
Expand All @@ -10,6 +11,7 @@ mod generics;
mod stmt;
use diagnostics::Error;


use crate::lexer::UnmatchedBrace;

use log::debug;
Expand Down Expand Up @@ -958,7 +960,7 @@ impl<'a> Parser<'a> {
}

/// Parses a single token tree from the input.
pub fn parse_token_tree(&mut self) -> TokenTree {
pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
match self.token.kind {
token::OpenDelim(..) => {
let frame = mem::replace(
Expand Down Expand Up @@ -1017,7 +1019,7 @@ impl<'a> Parser<'a> {
/// If the following element can't be a tuple (i.e., it's a function definition), then
/// it's not a tuple struct field), and the contents within the parentheses isn't valid,
/// so emit a proper diagnostic.
pub fn parse_visibility(&mut self, fbt: FollowedByType) -> PResult<'a, Visibility> {
pub(crate) fn parse_visibility(&mut self, fbt: FollowedByType) -> PResult<'a, Visibility> {
maybe_whole!(self, NtVis, |x| x);

self.expected_tokens.push(TokenType::Keyword(kw::Crate));
Expand Down

0 comments on commit 2595d75

Please sign in to comment.