syntax: implement 'macro input future proofing'

See RFC 550 (rust-lang/rfcs#550) for the motivation and details. If this breaks your code, add one of the listed tokens after the relevant non-terminal in your matcher. [breaking-change]
rust-lang · Jan 6, 2015 · 6680c9c · 6680c9c
1 parent 8efd990
commit 6680c9c
Show file tree

Hide file tree

Showing 3 changed files with 167 additions and 8 deletions.
diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs
@@ -1,4 +1,4 @@
-// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-use ast::{Ident, TtDelimited, TtSequence, TtToken};
+use ast::{TokenTree, TtDelimited, TtSequence, TtToken};
 use ast;
 use codemap::{Span, DUMMY_SP};
 use ext::base::{ExtCtxt, MacResult, SyntaxExtension};
@@ -19,8 +19,8 @@ use ext::tt::macro_parser::{parse, parse_or_else};
 use parse::lexer::new_tt_reader;
 use parse::parser::Parser;
 use parse::attr::ParserAttr;
-use parse::token::{special_idents, gensym_ident};
-use parse::token::{MatchNt, NtTT};
+use parse::token::{special_idents, gensym_ident, NtTT, Token};
+use parse::token::Token::*;
 use parse::token;
 use print;
 use ptr::P;
@@ -109,8 +109,8 @@ impl<'a> MacResult for ParserAnyMacro<'a> {
 }
 
 struct MacroRulesMacroExpander {
-    name: Ident,
-    imported_from: Option<Ident>,
+    name: ast::Ident,
+    imported_from: Option<ast::Ident>,
     lhses: Vec<Rc<NamedMatch>>,
     rhses: Vec<Rc<NamedMatch>>,
 }
@@ -134,8 +134,8 @@ impl TTMacroExpander for MacroRulesMacroExpander {
 /// Given `lhses` and `rhses`, this is the new macro we create
 fn generic_extension<'cx>(cx: &'cx ExtCtxt,
                           sp: Span,
-                          name: Ident,
-                          imported_from: Option<Ident>,
+                          name: ast::Ident,
+                          imported_from: Option<ast::Ident>,
                           arg: &[ast::TokenTree],
                           lhses: &[Rc<NamedMatch>],
                           rhses: &[Rc<NamedMatch>])
@@ -260,6 +260,10 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt,
         _ => cx.span_bug(def.span, "wrong-structured lhs")
     };
 
+    for lhs in lhses.iter() {
+        check_lhs_nt_follows(cx, &**lhs, def.span);
+    }
+
     let rhses = match *argument_map[rhs_nm] {
         MatchedSeq(ref s, _) => /* FIXME (#2543) */ (*s).clone(),
         _ => cx.span_bug(def.span, "wrong-structured rhs")
@@ -274,3 +278,131 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt,
 
     NormalTT(exp, Some(def.span))
 }
+
+fn check_lhs_nt_follows(cx: &mut ExtCtxt, lhs: &NamedMatch, sp: Span) {
+    // lhs is going to be like MatchedNonterminal(NtTT(TtDelimited(...))), where
+    // the entire lhs is those tts.
+    // if ever we get box/deref patterns, this could turn into an `if let
+    // &MatchedNonterminal(NtTT(box TtDelimited(...))) = lhs`
+    let matcher = match lhs {
+        &MatchedNonterminal(NtTT(ref inner)) => match &**inner {
+            &TtDelimited(_, ref tts) => tts.tts[],
+            _ => cx.span_bug(sp, "wrong-structured lhs for follow check")
+        },
+        _ => cx.span_bug(sp, "wrong-structured lhs for follow check")
+    };
+
+    check_matcher(cx, matcher, &Eof);
+    // we don't abort on errors on rejection, the driver will do that for us
+    // after parsing/expansion. we can report every error in every macro this way.
+}
+
+fn check_matcher(cx: &mut ExtCtxt, matcher: &[TokenTree], follow: &Token) {
+    use print::pprust::token_to_string;
+
+    // 1. If there are no tokens in M, accept
+    if matcher.is_empty() {
+        return;
+    }
+
+    // 2. For each token T in M:
+    let mut tokens = matcher.iter().peekable();
+    while let Some(token) = tokens.next() {
+        match *token {
+            TtToken(sp, MatchNt(ref name, ref frag_spec, _, _)) => {
+                // ii. If T is a simple NT, look ahead to the next token T' in
+                // M.
+                let next_token = match tokens.peek() {
+                    // If T' closes a complex NT, replace T' with F
+                    Some(&&TtToken(_, CloseDelim(_))) => follow,
+                    Some(&&TtToken(_, ref tok)) => tok,
+                    // T' is any NT (this catches complex NTs, the next
+                    // iteration will die if it's a TtDelimited).
+                    Some(_) => continue,
+                    // else, we're at the end of the macro or sequence
+                    None => follow
+                };
+
+                // If T' is in the set FOLLOW(NT), continue. Else, reject.
+                match *next_token {
+                    Eof | MatchNt(..) => continue,
+                    _ if is_in_follow(cx, next_token, frag_spec.as_str()) => continue,
+                    ref tok => cx.span_err(sp, format!("`${0}:{1}` is followed by `{2}`, which \
+                                                        is not allowed for `{1}` fragments",
+                                                        name.as_str(), frag_spec.as_str(),
+                                                        token_to_string(tok))[])
+                }
+            },
+            TtSequence(_, ref seq) => {
+                // iii. Else, T is a complex NT.
+                match seq.separator {
+                    // If T has the form $(...)U+ or $(...)U* for some token U,
+                    // run the algorithm on the contents with F set to U. If it
+                    // accepts, continue, else, reject.
+                    Some(ref u) => check_matcher(cx, seq.tts[], u),
+                    // If T has the form $(...)+ or $(...)*, run the algorithm
+                    // on the contents with F set to EOF. If it accepts,
+                    // continue, else, reject.
+                    None => check_matcher(cx, seq.tts[], &Eof)
+                }
+            },
+            TtToken(..) => {
+                // i. If T is not an NT, continue.
+                continue
+            },
+            TtDelimited(_, ref tts) => {
+                // if we don't pass in that close delimiter, we'll incorrectly consider the matcher
+                // `{ $foo:ty }` as having a follow that isn't `}`
+                check_matcher(cx, tts.tts[], &tts.close_token())
+            }
+        }
+    }
+}
+
+fn is_in_follow(cx: &ExtCtxt, tok: &Token, frag: &str) -> bool {
+    if let &CloseDelim(_) = tok {
+        return true;
+    }
+
+    match frag {
+        "item" => {
+            // since items *must* be followed by either a `;` or a `}`, we can
+            // accept anything after them
+            true
+        },
+        "block" => {
+            // anything can follow block, the braces provide a easy boundary to
+            // maintain
+            true
+        },
+        "stmt" | "expr"  => {
+            match *tok {
+                Comma | Semi => true,
+                _ => false
+            }
+        },
+        "pat" => {
+            match *tok {
+                FatArrow | Comma | Eq => true,
+                _ => false
+            }
+        },
+        "path" | "ty" => {
+            match *tok {
+                Comma | RArrow | Colon | Eq | Gt => true,
+                Ident(i, _) if i.as_str() == "as" => true,
+                _ => false
+            }
+        },
+        "ident" => {
+            // being a single token, idents are harmless
+            true
+        },
+        "meta" | "tt" => {
+            // being either a single token or a delimited sequence, tt is
+            // harmless
+            true
+        },
+        _ => cx.bug(format!("unrecognized builtin nonterminal {}", frag)[]),
+    }
+}
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
@@ -392,6 +392,7 @@ impl fmt::Show for Nonterminal {
     }
 }
 
+
 // Get the first "argument"
 macro_rules! first {
     ( $first:expr, $( $remainder:expr, )* ) => ( $first )

diff --git a/src/test/compile-fail/macro-input-future-proofing.rs b/src/test/compile-fail/macro-input-future-proofing.rs
@@ -0,0 +1,26 @@
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+macro_rules! errors_everywhere {
+    ($ty:ty <) => () //~ ERROR `$ty:ty` is followed by `<`, which is not allowed for `ty` fragments
+    ($ty:ty < foo ,) => () //~ ERROR `$ty:ty` is followed by `<`, which is not allowed for `ty`
+    ($ty:ty , ) => ()
+    ( ( $ty:ty ) ) => ()
+    ( { $ty:ty } ) => ()
+    ( [ $ty:ty ] ) => ()
+    ($bl:block < ) => ()
+    ($pa:pat >) => () //~ ERROR `$pa:pat` is followed by `>` which is not allowed for `pat`
+    ($pa:pat , ) => ()
+    ($pa:pat | ) => ()
+    ($pa:pat $pb:pat $ty:ty ,) => ()
+    ($($ty:ty)-+) => () //~ ERROR `$ty:ty` is followed by `-` which is not allowed for `ty`
+}
+
+fn main() { }