Cleanup and followup to PR #17830: parsing changes

Prevents breaking down `$name` tokens into separate `$` and `name`. Reports unknown macro variables. Fixes #18775 Fixes #18839 Fixes #15640
rust-lang · Jan 6, 2015 · d85c017 · d85c017
1 parent 8efd990
commit d85c017
Show file tree

Hide file tree

Showing 12 changed files with 170 additions and 103 deletions.
diff --git a/src/doc/reference.md b/src/doc/reference.md
@@ -690,10 +690,9 @@ balanced, but they are otherwise not special.
 
 In the matcher, `$` _name_ `:` _designator_ matches the nonterminal in the Rust
 syntax named by _designator_. Valid designators are `item`, `block`, `stmt`,
-`pat`, `expr`, `ty` (type), `ident`, `path`, `matchers` (lhs of the `=>` in
-macro rules), `tt` (rhs of the `=>` in macro rules). In the transcriber, the
-designator is already known, and so only the name of a matched nonterminal
-comes after the dollar sign.
+`pat`, `expr`, `ty` (type), `ident`, `path`, `tt` (either side of the `=>`
+in macro rules). In the transcriber, the designator is already known, and so
+only the name of a matched nonterminal comes after the dollar sign.
 
 In both the matcher and transcriber, the Kleene star-like operator indicates
 repetition. The Kleene star operator consists of `$` and parens, optionally

diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs
@@ -1780,9 +1780,11 @@ pub struct UnstableFileStat {
     pub gen: u64,
 }
 
+
+// NOTE(stage0): change this one last #[doc=..] to /// after the next snapshot
 bitflags! {
-    #[doc = "A set of permissions for a file or directory is represented"]
-    #[doc = "by a set of flags which are or'd together."]
+    #[doc = "A set of permissions for a file or directory is represented by a set of"]
+    /// flags which are or'd together.
     flags FilePermission: u32 {
         const USER_READ     = 0o400,
         const USER_WRITE    = 0o200,
@@ -1798,20 +1800,20 @@ bitflags! {
         const GROUP_RWX = GROUP_READ.bits | GROUP_WRITE.bits | GROUP_EXECUTE.bits,
         const OTHER_RWX = OTHER_READ.bits | OTHER_WRITE.bits | OTHER_EXECUTE.bits,
 
-        #[doc = "Permissions for user owned files, equivalent to 0644 on"]
-        #[doc = "unix-like systems."]
+        /// Permissions for user owned files, equivalent to 0644 on unix-like
+        /// systems.
         const USER_FILE = USER_READ.bits | USER_WRITE.bits | GROUP_READ.bits | OTHER_READ.bits,
 
-        #[doc = "Permissions for user owned directories, equivalent to 0755 on"]
-        #[doc = "unix-like systems."]
+        /// Permissions for user owned directories, equivalent to 0755 on
+        /// unix-like systems.
         const USER_DIR  = USER_RWX.bits | GROUP_READ.bits | GROUP_EXECUTE.bits |
                    OTHER_READ.bits | OTHER_EXECUTE.bits,
 
-        #[doc = "Permissions for user owned executables, equivalent to 0755"]
-        #[doc = "on unix-like systems."]
+        /// Permissions for user owned executables, equivalent to 0755
+        /// on unix-like systems.
         const USER_EXEC = USER_DIR.bits,
 
-        #[doc = "All possible permissions enabled."]
+        /// All possible permissions enabled.
         const ALL_PERMISSIONS = USER_RWX.bits | GROUP_RWX.bits | OTHER_RWX.bits,
     }
 }

diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs
@@ -883,7 +883,6 @@ impl TokenTree {
     pub fn len(&self) -> uint {
         match *self {
             TtToken(_, token::DocComment(_)) => 2,
-            TtToken(_, token::SubstNt(..)) => 2,
             TtToken(_, token::SpecialVarNt(..)) => 2,
             TtToken(_, token::MatchNt(..)) => 3,
             TtDelimited(_, ref delimed) => {
@@ -921,11 +920,6 @@ impl TokenTree {
                 }
                 delimed.tts[index - 1].clone()
             }
-            (&TtToken(sp, token::SubstNt(name, name_st)), _) => {
-                let v = [TtToken(sp, token::Dollar),
-                         TtToken(sp, token::Ident(name, name_st))];
-                v[index]
-            }
             (&TtToken(sp, token::SpecialVarNt(var)), _) => {
                 let v = [TtToken(sp, token::Dollar),
                          TtToken(sp, token::Ident(token::str_to_ident(var.as_str()),

diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs
@@ -506,6 +506,17 @@ pub fn parse(sess: &ParseSess,
 }
 
 pub fn parse_nt(p: &mut Parser, name: &str) -> Nonterminal {
+    match name {
+        "tt" => {
+            p.quote_depth += 1u; //but in theory, non-quoted tts might be useful
+            let res = token::NtTT(P(p.parse_token_tree()));
+            p.quote_depth -= 1u;
+            return res;
+        }
+        _ => {}
+    }
+    // check at the beginning and the parser checks after each bump
+    p.check_unknown_macro_variable();
     match name {
       "item" => match p.parse_item(Vec::new()) {
         Some(i) => token::NtItem(i),
@@ -529,12 +540,6 @@ pub fn parse_nt(p: &mut Parser, name: &str) -> Nonterminal {
         token::NtPath(box p.parse_path(LifetimeAndTypesWithoutColons))
       }
       "meta" => token::NtMeta(p.parse_meta_item()),
-      "tt" => {
-        p.quote_depth += 1u; //but in theory, non-quoted tts might be useful
-        let res = token::NtTT(P(p.parse_token_tree()));
-        p.quote_depth -= 1u;
-        res
-      }
       _ => {
           p.fatal(format!("unsupported builtin nonterminal parser: {}", name)[])
       }

diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs
@@ -16,7 +16,7 @@ use ext::base::{NormalTT, TTMacroExpander};
 use ext::tt::macro_parser::{Success, Error, Failure};
 use ext::tt::macro_parser::{NamedMatch, MatchedSeq, MatchedNonterminal};
 use ext::tt::macro_parser::{parse, parse_or_else};
-use parse::lexer::new_tt_reader;
+use parse::lexer::{new_tt_reader, new_tt_reader_with_doc_flag};
 use parse::parser::Parser;
 use parse::attr::ParserAttr;
 use parse::token::{special_idents, gensym_ident};
@@ -158,13 +158,13 @@ fn generic_extension<'cx>(cx: &'cx ExtCtxt,
                 _ => cx.span_fatal(sp, "malformed macro lhs")
             };
             // `None` is because we're not interpolating
-            let mut arg_rdr = new_tt_reader(&cx.parse_sess().span_diagnostic,
-                                            None,
-                                            None,
-                                            arg.iter()
-                                               .map(|x| (*x).clone())
-                                               .collect());
-            arg_rdr.desugar_doc_comments = true;
+            let arg_rdr = new_tt_reader_with_doc_flag(&cx.parse_sess().span_diagnostic,
+                                                      None,
+                                                      None,
+                                                      arg.iter()
+                                                         .map(|x| (*x).clone())
+                                                         .collect(),
+                                                      true);
             match parse(cx.parse_sess(), cx.cfg(), arg_rdr, lhs_tt) {
               Success(named_matches) => {
                 let rhs = match *rhses[i] {
@@ -183,7 +183,8 @@ fn generic_extension<'cx>(cx: &'cx ExtCtxt,
                                            Some(named_matches),
                                            imported_from,
                                            rhs);
-                let p = Parser::new(cx.parse_sess(), cx.cfg(), box trncbr);
+                let mut p = Parser::new(cx.parse_sess(), cx.cfg(), box trncbr);
+                p.check_unknown_macro_variable();
                 // Let the context choose how to interpret the result.
                 // Weird, but useful for X-macros.
                 return box ParserAnyMacro {

diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs
@@ -53,13 +53,28 @@ pub struct TtReader<'a> {
 }
 
 /// This can do Macro-By-Example transcription. On the other hand, if
-/// `src` contains no `TtSequence`s and `TtNonterminal`s, `interp` can (and
-/// should) be none.
+/// `src` contains no `TtSequence`s, `MatchNt`s or `SubstNt`s, `interp` can
+/// (and should) be None.
 pub fn new_tt_reader<'a>(sp_diag: &'a SpanHandler,
                          interp: Option<HashMap<Ident, Rc<NamedMatch>>>,
                          imported_from: Option<Ident>,
-                         src: Vec<ast::TokenTree> )
+                         src: Vec<ast::TokenTree>)
                          -> TtReader<'a> {
+    new_tt_reader_with_doc_flag(sp_diag, interp, imported_from, src, false)
+}
+
+/// The extra `desugar_doc_comments` flag enables reading doc comments
+/// like any other attribute which consists of `meta` and surrounding #[ ] tokens.
+///
+/// This can do Macro-By-Example transcription. On the other hand, if
+/// `src` contains no `TtSequence`s, `MatchNt`s or `SubstNt`s, `interp` can
+/// (and should) be None.
+pub fn new_tt_reader_with_doc_flag<'a>(sp_diag: &'a SpanHandler,
+                                       interp: Option<HashMap<Ident, Rc<NamedMatch>>>,
+                                       imported_from: Option<Ident>,
+                                       src: Vec<ast::TokenTree>,
+                                       desugar_doc_comments: bool)
+                                       -> TtReader<'a> {
     let mut r = TtReader {
         sp_diag: sp_diag,
         stack: vec!(TtFrame {
@@ -80,7 +95,7 @@ pub fn new_tt_reader<'a>(sp_diag: &'a SpanHandler,
         crate_name_next: None,
         repeat_idx: Vec::new(),
         repeat_len: Vec::new(),
-        desugar_doc_comments: false,
+        desugar_doc_comments: desugar_doc_comments,
         /* dummy values, never read: */
         cur_tok: token::Eof,
         cur_span: DUMMY_SP,
@@ -266,18 +281,15 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
             }
             // FIXME #2887: think about span stuff here
             TtToken(sp, SubstNt(ident, namep)) => {
+                r.stack.last_mut().unwrap().idx += 1;
                 match lookup_cur_matched(r, ident) {
                     None => {
-                        r.stack.push(TtFrame {
-                            forest: TtToken(sp, SubstNt(ident, namep)),
-                            idx: 0,
-                            dotdotdoted: false,
-                            sep: None
-                        });
+                        r.cur_span = sp;
+                        r.cur_tok = SubstNt(ident, namep);
+                        return ret_val;
                         // this can't be 0 length, just like TtDelimited
                     }
                     Some(cur_matched) => {
-                        r.stack.last_mut().unwrap().idx += 1;
                         match *cur_matched {
                             // sidestep the interpolation tricks for ident because
                             // (a) idents can be in lots of places, so it'd be a pain

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
@@ -25,7 +25,7 @@ use std::rc::Rc;
 use std::str;
 use std::string::CowString;
 
-pub use ext::tt::transcribe::{TtReader, new_tt_reader};
+pub use ext::tt::transcribe::{TtReader, new_tt_reader, new_tt_reader_with_doc_flag};
 
 pub mod comments;
 

diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
@@ -297,7 +297,9 @@ pub fn tts_to_parser<'a>(sess: &'a ParseSess,
                          tts: Vec<ast::TokenTree>,
                          cfg: ast::CrateConfig) -> Parser<'a> {
     let trdr = lexer::new_tt_reader(&sess.span_diagnostic, None, None, tts);
-    Parser::new(sess, cfg, box trdr)
+    let mut p = Parser::new(sess, cfg, box trdr);
+    p.check_unknown_macro_variable();
+    p
 }
 
 // FIXME (Issue #16472): The `with_hygiene` mod should go away after

diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
@@ -946,6 +946,8 @@ impl<'a> Parser<'a> {
         self.token = next.tok;
         self.tokens_consumed += 1u;
         self.expected_tokens.clear();
+        // check after each token
+        self.check_unknown_macro_variable();
     }
 
     /// Advance the parser by one token and return the bumped token.
@@ -2655,6 +2657,70 @@ impl<'a> Parser<'a> {
         return e;
     }
 
+    // Parse unquoted tokens after a `$` in a token tree
+    fn parse_unquoted(&mut self) -> TokenTree {
+        let mut sp = self.span;
+        let (name, namep) = match self.token {
+            token::Dollar => {
+                self.bump();
+
+                if self.token == token::OpenDelim(token::Paren) {
+                    let Spanned { node: seq, span: seq_span } = self.parse_seq(
+                        &token::OpenDelim(token::Paren),
+                        &token::CloseDelim(token::Paren),
+                        seq_sep_none(),
+                        |p| p.parse_token_tree()
+                    );
+                    let (sep, repeat) = self.parse_sep_and_kleene_op();
+                    let name_num = macro_parser::count_names(seq[]);
+                    return TtSequence(mk_sp(sp.lo, seq_span.hi),
+                                      Rc::new(SequenceRepetition {
+                                          tts: seq,
+                                          separator: sep,
+                                          op: repeat,
+                                          num_captures: name_num
+                                      }));
+                } else if self.token.is_keyword_allow_following_colon(keywords::Crate) {
+                    self.bump();
+                    return TtToken(sp, SpecialVarNt(SpecialMacroVar::CrateMacroVar));
+                } else {
+                    sp = mk_sp(sp.lo, self.span.hi);
+                    let namep = match self.token { token::Ident(_, p) => p, _ => token::Plain };
+                    let name = self.parse_ident();
+                    (name, namep)
+                }
+            }
+            token::SubstNt(name, namep) => {
+                self.bump();
+                (name, namep)
+            }
+            _ => unreachable!()
+        };
+        // continue by trying to parse the `:ident` after `$name`
+        if self.token == token::Colon && self.look_ahead(1, |t| t.is_ident() &&
+                                                                !t.is_strict_keyword() &&
+                                                                !t.is_reserved_keyword()) {
+            self.bump();
+            sp = mk_sp(sp.lo, self.span.hi);
+            let kindp = match self.token { token::Ident(_, p) => p, _ => token::Plain };
+            let nt_kind = self.parse_ident();
+            TtToken(sp, MatchNt(name, nt_kind, namep, kindp))
+        } else {
+            TtToken(sp, SubstNt(name, namep))
+        }
+    }
+
+    pub fn check_unknown_macro_variable(&mut self) {
+        if self.quote_depth == 0u {
+            match self.token {
+                token::SubstNt(name, _) =>
+                    self.fatal(format!("unknown macro variable `{}`",
+                                       token::get_ident(name))[]),
+                _ => {}
+            }
+        }
+    }
+
     /// Parse an optional separator followed by a Kleene-style
     /// repetition token (+ or *).
     pub fn parse_sep_and_kleene_op(&mut self) -> (Option<token::Token>, ast::KleeneOp) {
@@ -2701,63 +2767,25 @@ impl<'a> Parser<'a> {
         fn parse_non_delim_tt_tok(p: &mut Parser) -> TokenTree {
             maybe_whole!(deref p, NtTT);
             match p.token {
-              token::CloseDelim(_) => {
-                  // This is a conservative error: only report the last unclosed delimiter. The
-                  // previous unclosed delimiters could actually be closed! The parser just hasn't
-                  // gotten to them yet.
-                  match p.open_braces.last() {
-                      None => {}
-                      Some(&sp) => p.span_note(sp, "unclosed delimiter"),
-                  };
-                  let token_str = p.this_token_to_string();
-                  p.fatal(format!("incorrect close delimiter: `{}`",
-                                  token_str)[])
-              },
-              /* we ought to allow different depths of unquotation */
-              token::Dollar if p.quote_depth > 0u => {
-                p.bump();
-                let sp = p.span;
-
-                if p.token == token::OpenDelim(token::Paren) {
-                    let seq = p.parse_seq(
-                        &token::OpenDelim(token::Paren),
-                        &token::CloseDelim(token::Paren),
-                        seq_sep_none(),
-                        |p| p.parse_token_tree()
-                    );
-                    let (sep, repeat) = p.parse_sep_and_kleene_op();
-                    let seq = match seq {
-                        Spanned { node, .. } => node,
+                token::CloseDelim(_) => {
+                    // This is a conservative error: only report the last unclosed delimiter. The
+                    // previous unclosed delimiters could actually be closed! The parser just hasn't
+                    // gotten to them yet.
+                    match p.open_braces.last() {
+                        None => {}
+                        Some(&sp) => p.span_note(sp, "unclosed delimiter"),
                     };
-                    let name_num = macro_parser::count_names(seq[]);
-                    TtSequence(mk_sp(sp.lo, p.span.hi),
-                               Rc::new(SequenceRepetition {
-                                   tts: seq,
-                                   separator: sep,
-                                   op: repeat,
-                                   num_captures: name_num
-                               }))
-                } else if p.token.is_keyword_allow_following_colon(keywords::Crate) {
-                    p.bump();
-                    TtToken(sp, SpecialVarNt(SpecialMacroVar::CrateMacroVar))
-                } else {
-                    // A nonterminal that matches or not
-                    let namep = match p.token { token::Ident(_, p) => p, _ => token::Plain };
-                    let name = p.parse_ident();
-                    if p.token == token::Colon && p.look_ahead(1, |t| t.is_ident()) {
-                        p.bump();
-                        let kindp = match p.token { token::Ident(_, p) => p, _ => token::Plain };
-                        let nt_kind = p.parse_ident();
-                        let m = TtToken(sp, MatchNt(name, nt_kind, namep, kindp));
-                        m
-                    } else {
-                        TtToken(sp, SubstNt(name, namep))
-                    }
+                    let token_str = p.this_token_to_string();
+                    p.fatal(format!("incorrect close delimiter: `{}`",
+                                    token_str)[])
+                },
+                /* we ought to allow different depths of unquotation */
+                token::Dollar | token::SubstNt(..) if p.quote_depth > 0u => {
+                    p.parse_unquoted()
+                }
+                _ => {
+                    TtToken(p.span, p.bump_and_get())
                 }
-              }
-              _ => {
-                  TtToken(p.span, p.bump_and_get())
-              }
             }
         }