From 3f064cae3d9d0d33951a44c30d83696563244572 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 11 May 2019 02:31:34 +0300 Subject: [PATCH] Move literal parsing code into a separate file Remove some dead code --- src/libsyntax/attr/mod.rs | 99 ---- src/libsyntax/parse/classify.rs | 13 - src/libsyntax/parse/lexer/mod.rs | 19 +- src/libsyntax/parse/literal.rs | 487 ++++++++++++++++++ src/libsyntax/parse/mod.rs | 352 +------------ src/libsyntax/parse/parser.rs | 56 +- src/test/ui/attr-eq-token-tree.stderr | 4 +- ...sive_range_pattern_syntax_collision.stderr | 4 +- ...ive_range_pattern_syntax_collision2.stderr | 4 +- src/test/ui/macros/macro-attribute.stderr | 4 +- .../malformed/malformed-interpolated.stderr | 8 +- src/test/ui/parser/attr-bad-meta-2.stderr | 4 +- src/test/ui/parser/pat-tuple-5.stderr | 4 +- 13 files changed, 521 insertions(+), 537 deletions(-) create mode 100644 src/libsyntax/parse/literal.rs diff --git a/src/libsyntax/attr/mod.rs b/src/libsyntax/attr/mod.rs index c122e1994e749..07e4bbf78ffdf 100644 --- a/src/libsyntax/attr/mod.rs +++ b/src/libsyntax/attr/mod.rs @@ -27,11 +27,9 @@ use crate::ThinVec; use crate::tokenstream::{TokenStream, TokenTree, DelimSpan}; use crate::GLOBALS; -use errors::Handler; use log::debug; use syntax_pos::{FileName, Span}; -use std::ascii; use std::iter; use std::ops::DerefMut; @@ -620,103 +618,6 @@ impl NestedMetaItem { } } -impl Lit { - crate fn tokens(&self) -> TokenStream { - let token = match self.token { - token::Bool(symbol) => Token::Ident(Ident::with_empty_ctxt(symbol), false), - token => Token::Literal(token, self.suffix), - }; - TokenTree::Token(self.span, token).into() - } -} - -impl LitKind { - /// Attempts to recover a token from semantic literal. - /// This function is used when the original token doesn't exist (e.g. the literal is created - /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). - pub fn to_lit_token(&self) -> (token::Lit, Option) { - match *self { - LitKind::Str(string, ast::StrStyle::Cooked) => { - let escaped = string.as_str().escape_default().to_string(); - (token::Lit::Str_(Symbol::intern(&escaped)), None) - } - LitKind::Str(string, ast::StrStyle::Raw(n)) => { - (token::Lit::StrRaw(string, n), None) - } - LitKind::ByteStr(ref bytes) => { - let string = bytes.iter().cloned().flat_map(ascii::escape_default) - .map(Into::::into).collect::(); - (token::Lit::ByteStr(Symbol::intern(&string)), None) - } - LitKind::Byte(byte) => { - let string: String = ascii::escape_default(byte).map(Into::::into).collect(); - (token::Lit::Byte(Symbol::intern(&string)), None) - } - LitKind::Char(ch) => { - let string: String = ch.escape_default().map(Into::::into).collect(); - (token::Lit::Char(Symbol::intern(&string)), None) - } - LitKind::Int(n, ty) => { - let suffix = match ty { - ast::LitIntType::Unsigned(ty) => Some(Symbol::intern(ty.ty_to_string())), - ast::LitIntType::Signed(ty) => Some(Symbol::intern(ty.ty_to_string())), - ast::LitIntType::Unsuffixed => None, - }; - (token::Lit::Integer(Symbol::intern(&n.to_string())), suffix) - } - LitKind::Float(symbol, ty) => { - (token::Lit::Float(symbol), Some(Symbol::intern(ty.ty_to_string()))) - } - LitKind::FloatUnsuffixed(symbol) => (token::Lit::Float(symbol), None), - LitKind::Bool(value) => { - let kw = if value { keywords::True } else { keywords::False }; - (token::Lit::Bool(kw.name()), None) - } - LitKind::Err(val) => (token::Lit::Err(val), None), - } - } -} - -impl Lit { - /// Converts literal token with a suffix into an AST literal. - /// Works speculatively and may return `None` is diagnostic handler is not passed. - /// If diagnostic handler is passed, may return `Some`, - /// possibly after reporting non-fatal errors and recovery, or `None` for irrecoverable errors. - crate fn from_token( - token: &token::Token, - span: Span, - diag: Option<(Span, &Handler)>, - ) -> Option { - let (token, suffix) = match *token { - token::Ident(ident, false) if ident.name == keywords::True.name() || - ident.name == keywords::False.name() => - (token::Bool(ident.name), None), - token::Literal(token, suffix) => - (token, suffix), - token::Interpolated(ref nt) => { - if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt { - if let ast::ExprKind::Lit(lit) = &expr.node { - return Some(lit.clone()); - } - } - return None; - } - _ => return None, - }; - - let node = LitKind::from_lit_token(token, suffix, diag)?; - Some(Lit { node, token, suffix, span }) - } - - /// Attempts to recover an AST literal from semantic literal. - /// This function is used when the original token doesn't exist (e.g. the literal is created - /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). - pub fn from_lit_kind(node: LitKind, span: Span) -> Lit { - let (token, suffix) = node.to_lit_token(); - Lit { node, token, suffix, span } - } -} - pub trait HasAttrs: Sized { fn attrs(&self) -> &[ast::Attribute]; fn visit_attrs)>(&mut self, f: F); diff --git a/src/libsyntax/parse/classify.rs b/src/libsyntax/parse/classify.rs index b4103440e3577..dfd6f451c28d7 100644 --- a/src/libsyntax/parse/classify.rs +++ b/src/libsyntax/parse/classify.rs @@ -25,16 +25,3 @@ pub fn expr_requires_semi_to_be_stmt(e: &ast::Expr) -> bool { _ => true, } } - -/// this statement requires a semicolon after it. -/// note that in one case (`stmt_semi`), we've already -/// seen the semicolon, and thus don't need another. -pub fn stmt_ends_with_semi(stmt: &ast::StmtKind) -> bool { - match *stmt { - ast::StmtKind::Local(_) => true, - ast::StmtKind::Expr(ref e) => expr_requires_semi_to_be_stmt(e), - ast::StmtKind::Item(_) | - ast::StmtKind::Semi(..) | - ast::StmtKind::Mac(..) => false, - } -} diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 2882acb0e780c..e76605cde32ab 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -262,18 +262,6 @@ impl<'a> StringReader<'a> { } } - pub fn new(sess: &'a ParseSess, - source_file: Lrc, - override_span: Option) -> Self { - let mut sr = StringReader::new_raw(sess, source_file, override_span); - if sr.advance_token().is_err() { - sr.emit_fatal_errors(); - FatalError.raise(); - } - - sr - } - pub fn new_or_buffered_errs(sess: &'a ParseSess, source_file: Lrc, override_span: Option) -> Result> { @@ -1627,7 +1615,12 @@ mod tests { teststr: String) -> StringReader<'a> { let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr); - StringReader::new(sess, sf, None) + let mut sr = StringReader::new_raw(sess, sf, None); + if sr.advance_token().is_err() { + sr.emit_fatal_errors(); + FatalError.raise(); + } + sr } #[test] diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs new file mode 100644 index 0000000000000..2c7ba13fbef82 --- /dev/null +++ b/src/libsyntax/parse/literal.rs @@ -0,0 +1,487 @@ +//! Code related to parsing literals. + +use crate::ast::{self, Ident, Lit, LitKind}; +use crate::parse::parser::Parser; +use crate::parse::PResult; +use crate::parse::token::{self, Token}; +use crate::parse::unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte}; +use crate::print::pprust; +use crate::symbol::{keywords, Symbol}; +use crate::tokenstream::{TokenStream, TokenTree}; + +use errors::{Applicability, Handler}; +use log::debug; +use rustc_data_structures::sync::Lrc; +use syntax_pos::Span; + +use std::ascii; + +macro_rules! err { + ($opt_diag:expr, |$span:ident, $diag:ident| $($body:tt)*) => { + match $opt_diag { + Some(($span, $diag)) => { $($body)* } + None => return None, + } + } +} + +impl LitKind { + /// Converts literal token with a suffix into a semantic literal. + /// Works speculatively and may return `None` is diagnostic handler is not passed. + /// If diagnostic handler is passed, always returns `Some`, + /// possibly after reporting non-fatal errors and recovery. + fn from_lit_token( + lit: token::Lit, + suf: Option, + diag: Option<(Span, &Handler)> + ) -> Option { + if suf.is_some() && !lit.may_have_suffix() { + err!(diag, |span, diag| { + expect_no_suffix(span, diag, &format!("a {}", lit.literal_name()), suf) + }); + } + + Some(match lit { + token::Bool(i) => { + assert!(i == keywords::True.name() || i == keywords::False.name()); + LitKind::Bool(i == keywords::True.name()) + } + token::Byte(i) => { + match unescape_byte(&i.as_str()) { + Ok(c) => LitKind::Byte(c), + Err(_) => LitKind::Err(i), + } + }, + token::Char(i) => { + match unescape_char(&i.as_str()) { + Ok(c) => LitKind::Char(c), + Err(_) => LitKind::Err(i), + } + }, + token::Err(i) => LitKind::Err(i), + + // There are some valid suffixes for integer and float literals, + // so all the handling is done internally. + token::Integer(s) => return integer_lit(&s.as_str(), suf, diag), + token::Float(s) => return float_lit(&s.as_str(), suf, diag), + + token::Str_(mut sym) => { + // If there are no characters requiring special treatment we can + // reuse the symbol from the Token. Otherwise, we must generate a + // new symbol because the string in the LitKind is different to the + // string in the Token. + let mut has_error = false; + let s = &sym.as_str(); + if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { + let mut buf = String::with_capacity(s.len()); + unescape_str(s, &mut |_, unescaped_char| { + match unescaped_char { + Ok(c) => buf.push(c), + Err(_) => has_error = true, + } + }); + if has_error { + return Some(LitKind::Err(sym)); + } + sym = Symbol::intern(&buf) + } + + LitKind::Str(sym, ast::StrStyle::Cooked) + } + token::StrRaw(mut sym, n) => { + // Ditto. + let s = &sym.as_str(); + if s.contains('\r') { + sym = Symbol::intern(&raw_str_lit(s)); + } + LitKind::Str(sym, ast::StrStyle::Raw(n)) + } + token::ByteStr(i) => { + let s = &i.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut has_error = false; + unescape_byte_str(s, &mut |_, unescaped_byte| { + match unescaped_byte { + Ok(c) => buf.push(c), + Err(_) => has_error = true, + } + }); + if has_error { + return Some(LitKind::Err(i)); + } + buf.shrink_to_fit(); + LitKind::ByteStr(Lrc::new(buf)) + } + token::ByteStrRaw(i, _) => { + LitKind::ByteStr(Lrc::new(i.to_string().into_bytes())) + } + }) + } + + /// Attempts to recover a token from semantic literal. + /// This function is used when the original token doesn't exist (e.g. the literal is created + /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). + pub fn to_lit_token(&self) -> (token::Lit, Option) { + match *self { + LitKind::Str(string, ast::StrStyle::Cooked) => { + let escaped = string.as_str().escape_default().to_string(); + (token::Lit::Str_(Symbol::intern(&escaped)), None) + } + LitKind::Str(string, ast::StrStyle::Raw(n)) => { + (token::Lit::StrRaw(string, n), None) + } + LitKind::ByteStr(ref bytes) => { + let string = bytes.iter().cloned().flat_map(ascii::escape_default) + .map(Into::::into).collect::(); + (token::Lit::ByteStr(Symbol::intern(&string)), None) + } + LitKind::Byte(byte) => { + let string: String = ascii::escape_default(byte).map(Into::::into).collect(); + (token::Lit::Byte(Symbol::intern(&string)), None) + } + LitKind::Char(ch) => { + let string: String = ch.escape_default().map(Into::::into).collect(); + (token::Lit::Char(Symbol::intern(&string)), None) + } + LitKind::Int(n, ty) => { + let suffix = match ty { + ast::LitIntType::Unsigned(ty) => Some(Symbol::intern(ty.ty_to_string())), + ast::LitIntType::Signed(ty) => Some(Symbol::intern(ty.ty_to_string())), + ast::LitIntType::Unsuffixed => None, + }; + (token::Lit::Integer(Symbol::intern(&n.to_string())), suffix) + } + LitKind::Float(symbol, ty) => { + (token::Lit::Float(symbol), Some(Symbol::intern(ty.ty_to_string()))) + } + LitKind::FloatUnsuffixed(symbol) => (token::Lit::Float(symbol), None), + LitKind::Bool(value) => { + let kw = if value { keywords::True } else { keywords::False }; + (token::Lit::Bool(kw.name()), None) + } + LitKind::Err(val) => (token::Lit::Err(val), None), + } + } +} + +impl Lit { + /// Converts literal token with a suffix into an AST literal. + /// Works speculatively and may return `None` is diagnostic handler is not passed. + /// If diagnostic handler is passed, may return `Some`, + /// possibly after reporting non-fatal errors and recovery, or `None` for irrecoverable errors. + crate fn from_token( + token: &token::Token, + span: Span, + diag: Option<(Span, &Handler)>, + ) -> Option { + let (token, suffix) = match *token { + token::Ident(ident, false) if ident.name == keywords::True.name() || + ident.name == keywords::False.name() => + (token::Bool(ident.name), None), + token::Literal(token, suffix) => + (token, suffix), + token::Interpolated(ref nt) => { + if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt { + if let ast::ExprKind::Lit(lit) = &expr.node { + return Some(lit.clone()); + } + } + return None; + } + _ => return None, + }; + + let node = LitKind::from_lit_token(token, suffix, diag)?; + Some(Lit { node, token, suffix, span }) + } + + /// Attempts to recover an AST literal from semantic literal. + /// This function is used when the original token doesn't exist (e.g. the literal is created + /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). + pub fn from_lit_kind(node: LitKind, span: Span) -> Lit { + let (token, suffix) = node.to_lit_token(); + Lit { node, token, suffix, span } + } + + /// Losslessly convert an AST literal into a token stream. + crate fn tokens(&self) -> TokenStream { + let token = match self.token { + token::Bool(symbol) => Token::Ident(Ident::with_empty_ctxt(symbol), false), + token => Token::Literal(token, self.suffix), + }; + TokenTree::Token(self.span, token).into() + } +} + +impl<'a> Parser<'a> { + /// Matches `lit = true | false | token_lit`. + crate fn parse_lit(&mut self) -> PResult<'a, Lit> { + let diag = Some((self.span, &self.sess.span_diagnostic)); + if let Some(lit) = Lit::from_token(&self.token, self.span, diag) { + self.bump(); + return Ok(lit); + } else if self.token == token::Dot { + // Recover `.4` as `0.4`. + let recovered = self.look_ahead(1, |t| { + if let token::Literal(token::Integer(val), suf) = *t { + let next_span = self.look_ahead_span(1); + if self.span.hi() == next_span.lo() { + let sym = String::from("0.") + &val.as_str(); + let token = token::Literal(token::Float(Symbol::intern(&sym)), suf); + return Some((token, self.span.to(next_span))); + } + } + None + }); + if let Some((token, span)) = recovered { + self.diagnostic() + .struct_span_err(span, "float literals must have an integer part") + .span_suggestion( + span, + "must have an integer part", + pprust::token_to_string(&token), + Applicability::MachineApplicable, + ) + .emit(); + let diag = Some((span, &self.sess.span_diagnostic)); + if let Some(lit) = Lit::from_token(&token, span, diag) { + self.bump(); + self.bump(); + return Ok(lit); + } + } + } + + Err(self.span_fatal(self.span, &format!("unexpected token: {}", self.this_token_descr()))) + } +} + +crate fn expect_no_suffix(sp: Span, diag: &Handler, kind: &str, suffix: Option) { + match suffix { + None => {/* everything ok */} + Some(suf) => { + let text = suf.as_str(); + if text.is_empty() { + diag.span_bug(sp, "found empty literal suffix in Some") + } + let mut err = if kind == "a tuple index" && + ["i32", "u32", "isize", "usize"].contains(&text.to_string().as_str()) + { + // #59553: warn instead of reject out of hand to allow the fix to percolate + // through the ecosystem when people fix their macros + let mut err = diag.struct_span_warn( + sp, + &format!("suffixes on {} are invalid", kind), + ); + err.note(&format!( + "`{}` is *temporarily* accepted on tuple index fields as it was \ + incorrectly accepted on stable for a few releases", + text, + )); + err.help( + "on proc macros, you'll want to use `syn::Index::from` or \ + `proc_macro::Literal::*_unsuffixed` for code that will desugar \ + to tuple field access", + ); + err.note( + "for more context, see https://github.com/rust-lang/rust/issues/60210", + ); + err + } else { + diag.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) + }; + err.span_label(sp, format!("invalid suffix `{}`", text)); + err.emit(); + } + } +} + +/// Parses a string representing a raw string literal into its final form. The +/// only operation this does is convert embedded CRLF into a single LF. +fn raw_str_lit(lit: &str) -> String { + debug!("raw_str_lit: given {}", lit.escape_default()); + let mut res = String::with_capacity(lit.len()); + + let mut chars = lit.chars().peekable(); + while let Some(c) = chars.next() { + if c == '\r' { + if *chars.peek().unwrap() != '\n' { + panic!("lexer accepted bare CR"); + } + chars.next(); + res.push('\n'); + } else { + res.push(c); + } + } + + res.shrink_to_fit(); + res +} + +// check if `s` looks like i32 or u1234 etc. +fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { + s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) +} + +fn filtered_float_lit(data: Symbol, suffix: Option, diag: Option<(Span, &Handler)>) + -> Option { + debug!("filtered_float_lit: {}, {:?}", data, suffix); + let suffix = match suffix { + Some(suffix) => suffix, + None => return Some(LitKind::FloatUnsuffixed(data)), + }; + + Some(match &*suffix.as_str() { + "f32" => LitKind::Float(data, ast::FloatTy::F32), + "f64" => LitKind::Float(data, ast::FloatTy::F64), + suf => { + err!(diag, |span, diag| { + if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) { + // if it looks like a width, lets try to be helpful. + let msg = format!("invalid width `{}` for float literal", &suf[1..]); + diag.struct_span_err(span, &msg).help("valid widths are 32 and 64").emit() + } else { + let msg = format!("invalid suffix `{}` for float literal", suf); + diag.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("valid suffixes are `f32` and `f64`") + .emit(); + } + }); + + LitKind::FloatUnsuffixed(data) + } + }) +} +fn float_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) + -> Option { + debug!("float_lit: {:?}, {:?}", s, suffix); + // FIXME #2252: bounds checking float literals is deferred until trans + + // Strip underscores without allocating a new String unless necessary. + let s2; + let s = if s.chars().any(|c| c == '_') { + s2 = s.chars().filter(|&c| c != '_').collect::(); + &s2 + } else { + s + }; + + filtered_float_lit(Symbol::intern(s), suffix, diag) +} + +fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) + -> Option { + // s can only be ascii, byte indexing is fine + + // Strip underscores without allocating a new String unless necessary. + let s2; + let mut s = if s.chars().any(|c| c == '_') { + s2 = s.chars().filter(|&c| c != '_').collect::(); + &s2 + } else { + s + }; + + debug!("integer_lit: {}, {:?}", s, suffix); + + let mut base = 10; + let orig = s; + let mut ty = ast::LitIntType::Unsuffixed; + + if s.starts_with('0') && s.len() > 1 { + match s.as_bytes()[1] { + b'x' => base = 16, + b'o' => base = 8, + b'b' => base = 2, + _ => { } + } + } + + // 1f64 and 2f32 etc. are valid float literals. + if let Some(suf) = suffix { + if looks_like_width_suffix(&['f'], &suf.as_str()) { + let err = match base { + 16 => Some("hexadecimal float literal is not supported"), + 8 => Some("octal float literal is not supported"), + 2 => Some("binary float literal is not supported"), + _ => None, + }; + if let Some(err) = err { + err!(diag, |span, diag| { + diag.struct_span_err(span, err) + .span_label(span, "not supported") + .emit(); + }); + } + return filtered_float_lit(Symbol::intern(s), Some(suf), diag) + } + } + + if base != 10 { + s = &s[2..]; + } + + if let Some(suf) = suffix { + if suf.as_str().is_empty() { + err!(diag, |span, diag| diag.span_bug(span, "found empty literal suffix in Some")); + } + ty = match &*suf.as_str() { + "isize" => ast::LitIntType::Signed(ast::IntTy::Isize), + "i8" => ast::LitIntType::Signed(ast::IntTy::I8), + "i16" => ast::LitIntType::Signed(ast::IntTy::I16), + "i32" => ast::LitIntType::Signed(ast::IntTy::I32), + "i64" => ast::LitIntType::Signed(ast::IntTy::I64), + "i128" => ast::LitIntType::Signed(ast::IntTy::I128), + "usize" => ast::LitIntType::Unsigned(ast::UintTy::Usize), + "u8" => ast::LitIntType::Unsigned(ast::UintTy::U8), + "u16" => ast::LitIntType::Unsigned(ast::UintTy::U16), + "u32" => ast::LitIntType::Unsigned(ast::UintTy::U32), + "u64" => ast::LitIntType::Unsigned(ast::UintTy::U64), + "u128" => ast::LitIntType::Unsigned(ast::UintTy::U128), + suf => { + // i and u look like widths, so lets + // give an error message along those lines + err!(diag, |span, diag| { + if looks_like_width_suffix(&['i', 'u'], suf) { + let msg = format!("invalid width `{}` for integer literal", &suf[1..]); + diag.struct_span_err(span, &msg) + .help("valid widths are 8, 16, 32, 64 and 128") + .emit(); + } else { + let msg = format!("invalid suffix `{}` for numeric literal", suf); + diag.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("the suffix must be one of the integral types \ + (`u32`, `isize`, etc)") + .emit(); + } + }); + + ty + } + } + } + + debug!("integer_lit: the type is {:?}, base {:?}, the new string is {:?}, the original \ + string was {:?}, the original suffix was {:?}", ty, base, s, orig, suffix); + + Some(match u128::from_str_radix(s, base) { + Ok(r) => LitKind::Int(r, ty), + Err(_) => { + // small bases are lexed as if they were base 10, e.g, the string + // might be `0b10201`. This will cause the conversion above to fail, + // but these cases have errors in the lexer: we don't want to emit + // two errors, and we especially don't want to emit this error since + // it isn't necessarily true. + let already_errored = base < 10 && + s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); + + if !already_errored { + err!(diag, |span, diag| diag.span_err(span, "int literal is too large")); + } + LitKind::Int(0, ty) + } + }) +} diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 868b344c06584..526143b28755f 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -1,11 +1,10 @@ //! The main parser interface. -use crate::ast::{self, CrateConfig, LitKind, NodeId}; +use crate::ast::{self, CrateConfig, NodeId}; use crate::early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId}; use crate::source_map::{SourceMap, FilePathMapping}; use crate::feature_gate::UnstableFeatures; use crate::parse::parser::Parser; -use crate::symbol::{keywords, Symbol}; use crate::syntax::parse::parser::emit_unclosed_delims; use crate::tokenstream::{TokenStream, TokenTree}; use crate::diagnostics::plugin::ErrorMap; @@ -14,7 +13,6 @@ use crate::print::pprust::token_to_string; use errors::{Applicability, FatalError, Level, Handler, ColorConfig, Diagnostic, DiagnosticBuilder}; use rustc_data_structures::sync::{Lrc, Lock}; use syntax_pos::{Span, SourceFile, FileName, MultiSpan}; -use log::debug; use rustc_data_structures::fx::{FxHashSet, FxHashMap}; use std::borrow::Cow; @@ -25,18 +23,15 @@ pub type PResult<'a, T> = Result>; #[macro_use] pub mod parser; - +pub mod attr; pub mod lexer; pub mod token; -pub mod attr; -pub mod diagnostics; - -pub mod classify; - -pub(crate) mod unescape; -use unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte}; -pub(crate) mod unescape_error_reporting; +crate mod classify; +crate mod diagnostics; +crate mod literal; +crate mod unescape; +crate mod unescape_error_reporting; /// Info about a parsing session. pub struct ParseSess { @@ -334,339 +329,6 @@ pub fn stream_to_parser(sess: &ParseSess, stream: TokenStream) -> Parser<'_> { Parser::new(sess, stream, None, true, false) } -/// Parses a string representing a raw string literal into its final form. The -/// only operation this does is convert embedded CRLF into a single LF. -fn raw_str_lit(lit: &str) -> String { - debug!("raw_str_lit: given {}", lit.escape_default()); - let mut res = String::with_capacity(lit.len()); - - let mut chars = lit.chars().peekable(); - while let Some(c) = chars.next() { - if c == '\r' { - if *chars.peek().unwrap() != '\n' { - panic!("lexer accepted bare CR"); - } - chars.next(); - res.push('\n'); - } else { - res.push(c); - } - } - - res.shrink_to_fit(); - res -} - -// check if `s` looks like i32 or u1234 etc. -fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { - s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) -} - -macro_rules! err { - ($opt_diag:expr, |$span:ident, $diag:ident| $($body:tt)*) => { - match $opt_diag { - Some(($span, $diag)) => { $($body)* } - None => return None, - } - } -} - -crate fn expect_no_suffix(sp: Span, diag: &Handler, kind: &str, suffix: Option) { - match suffix { - None => {/* everything ok */} - Some(suf) => { - let text = suf.as_str(); - if text.is_empty() { - diag.span_bug(sp, "found empty literal suffix in Some") - } - let mut err = if kind == "a tuple index" && - ["i32", "u32", "isize", "usize"].contains(&text.to_string().as_str()) - { - // #59553: warn instead of reject out of hand to allow the fix to percolate - // through the ecosystem when people fix their macros - let mut err = diag.struct_span_warn( - sp, - &format!("suffixes on {} are invalid", kind), - ); - err.note(&format!( - "`{}` is *temporarily* accepted on tuple index fields as it was \ - incorrectly accepted on stable for a few releases", - text, - )); - err.help( - "on proc macros, you'll want to use `syn::Index::from` or \ - `proc_macro::Literal::*_unsuffixed` for code that will desugar \ - to tuple field access", - ); - err.note( - "for more context, see https://github.com/rust-lang/rust/issues/60210", - ); - err - } else { - diag.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) - }; - err.span_label(sp, format!("invalid suffix `{}`", text)); - err.emit(); - } - } -} - -impl LitKind { - /// Converts literal token with a suffix into a semantic literal. - /// Works speculatively and may return `None` is diagnostic handler is not passed. - /// If diagnostic handler is passed, always returns `Some`, - /// possibly after reporting non-fatal errors and recovery. - crate fn from_lit_token( - lit: token::Lit, - suf: Option, - diag: Option<(Span, &Handler)> - ) -> Option { - if suf.is_some() && !lit.may_have_suffix() { - err!(diag, |span, diag| { - expect_no_suffix(span, diag, &format!("a {}", lit.literal_name()), suf) - }); - } - - Some(match lit { - token::Bool(i) => { - assert!(i == keywords::True.name() || i == keywords::False.name()); - LitKind::Bool(i == keywords::True.name()) - } - token::Byte(i) => { - match unescape_byte(&i.as_str()) { - Ok(c) => LitKind::Byte(c), - Err(_) => LitKind::Err(i), - } - }, - token::Char(i) => { - match unescape_char(&i.as_str()) { - Ok(c) => LitKind::Char(c), - Err(_) => LitKind::Err(i), - } - }, - token::Err(i) => LitKind::Err(i), - - // There are some valid suffixes for integer and float literals, - // so all the handling is done internally. - token::Integer(s) => return integer_lit(&s.as_str(), suf, diag), - token::Float(s) => return float_lit(&s.as_str(), suf, diag), - - token::Str_(mut sym) => { - // If there are no characters requiring special treatment we can - // reuse the symbol from the Token. Otherwise, we must generate a - // new symbol because the string in the LitKind is different to the - // string in the Token. - let mut has_error = false; - let s = &sym.as_str(); - if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { - let mut buf = String::with_capacity(s.len()); - unescape_str(s, &mut |_, unescaped_char| { - match unescaped_char { - Ok(c) => buf.push(c), - Err(_) => has_error = true, - } - }); - if has_error { - return Some(LitKind::Err(sym)); - } - sym = Symbol::intern(&buf) - } - - LitKind::Str(sym, ast::StrStyle::Cooked) - } - token::StrRaw(mut sym, n) => { - // Ditto. - let s = &sym.as_str(); - if s.contains('\r') { - sym = Symbol::intern(&raw_str_lit(s)); - } - LitKind::Str(sym, ast::StrStyle::Raw(n)) - } - token::ByteStr(i) => { - let s = &i.as_str(); - let mut buf = Vec::with_capacity(s.len()); - let mut has_error = false; - unescape_byte_str(s, &mut |_, unescaped_byte| { - match unescaped_byte { - Ok(c) => buf.push(c), - Err(_) => has_error = true, - } - }); - if has_error { - return Some(LitKind::Err(i)); - } - buf.shrink_to_fit(); - LitKind::ByteStr(Lrc::new(buf)) - } - token::ByteStrRaw(i, _) => { - LitKind::ByteStr(Lrc::new(i.to_string().into_bytes())) - } - }) - } -} - -fn filtered_float_lit(data: Symbol, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { - debug!("filtered_float_lit: {}, {:?}", data, suffix); - let suffix = match suffix { - Some(suffix) => suffix, - None => return Some(LitKind::FloatUnsuffixed(data)), - }; - - Some(match &*suffix.as_str() { - "f32" => LitKind::Float(data, ast::FloatTy::F32), - "f64" => LitKind::Float(data, ast::FloatTy::F64), - suf => { - err!(diag, |span, diag| { - if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) { - // if it looks like a width, lets try to be helpful. - let msg = format!("invalid width `{}` for float literal", &suf[1..]); - diag.struct_span_err(span, &msg).help("valid widths are 32 and 64").emit() - } else { - let msg = format!("invalid suffix `{}` for float literal", suf); - diag.struct_span_err(span, &msg) - .span_label(span, format!("invalid suffix `{}`", suf)) - .help("valid suffixes are `f32` and `f64`") - .emit(); - } - }); - - LitKind::FloatUnsuffixed(data) - } - }) -} -fn float_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { - debug!("float_lit: {:?}, {:?}", s, suffix); - // FIXME #2252: bounds checking float literals is deferred until trans - - // Strip underscores without allocating a new String unless necessary. - let s2; - let s = if s.chars().any(|c| c == '_') { - s2 = s.chars().filter(|&c| c != '_').collect::(); - &s2 - } else { - s - }; - - filtered_float_lit(Symbol::intern(s), suffix, diag) -} - -fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { - // s can only be ascii, byte indexing is fine - - // Strip underscores without allocating a new String unless necessary. - let s2; - let mut s = if s.chars().any(|c| c == '_') { - s2 = s.chars().filter(|&c| c != '_').collect::(); - &s2 - } else { - s - }; - - debug!("integer_lit: {}, {:?}", s, suffix); - - let mut base = 10; - let orig = s; - let mut ty = ast::LitIntType::Unsuffixed; - - if s.starts_with('0') && s.len() > 1 { - match s.as_bytes()[1] { - b'x' => base = 16, - b'o' => base = 8, - b'b' => base = 2, - _ => { } - } - } - - // 1f64 and 2f32 etc. are valid float literals. - if let Some(suf) = suffix { - if looks_like_width_suffix(&['f'], &suf.as_str()) { - let err = match base { - 16 => Some("hexadecimal float literal is not supported"), - 8 => Some("octal float literal is not supported"), - 2 => Some("binary float literal is not supported"), - _ => None, - }; - if let Some(err) = err { - err!(diag, |span, diag| { - diag.struct_span_err(span, err) - .span_label(span, "not supported") - .emit(); - }); - } - return filtered_float_lit(Symbol::intern(s), Some(suf), diag) - } - } - - if base != 10 { - s = &s[2..]; - } - - if let Some(suf) = suffix { - if suf.as_str().is_empty() { - err!(diag, |span, diag| diag.span_bug(span, "found empty literal suffix in Some")); - } - ty = match &*suf.as_str() { - "isize" => ast::LitIntType::Signed(ast::IntTy::Isize), - "i8" => ast::LitIntType::Signed(ast::IntTy::I8), - "i16" => ast::LitIntType::Signed(ast::IntTy::I16), - "i32" => ast::LitIntType::Signed(ast::IntTy::I32), - "i64" => ast::LitIntType::Signed(ast::IntTy::I64), - "i128" => ast::LitIntType::Signed(ast::IntTy::I128), - "usize" => ast::LitIntType::Unsigned(ast::UintTy::Usize), - "u8" => ast::LitIntType::Unsigned(ast::UintTy::U8), - "u16" => ast::LitIntType::Unsigned(ast::UintTy::U16), - "u32" => ast::LitIntType::Unsigned(ast::UintTy::U32), - "u64" => ast::LitIntType::Unsigned(ast::UintTy::U64), - "u128" => ast::LitIntType::Unsigned(ast::UintTy::U128), - suf => { - // i and u look like widths, so lets - // give an error message along those lines - err!(diag, |span, diag| { - if looks_like_width_suffix(&['i', 'u'], suf) { - let msg = format!("invalid width `{}` for integer literal", &suf[1..]); - diag.struct_span_err(span, &msg) - .help("valid widths are 8, 16, 32, 64 and 128") - .emit(); - } else { - let msg = format!("invalid suffix `{}` for numeric literal", suf); - diag.struct_span_err(span, &msg) - .span_label(span, format!("invalid suffix `{}`", suf)) - .help("the suffix must be one of the integral types \ - (`u32`, `isize`, etc)") - .emit(); - } - }); - - ty - } - } - } - - debug!("integer_lit: the type is {:?}, base {:?}, the new string is {:?}, the original \ - string was {:?}, the original suffix was {:?}", ty, base, s, orig, suffix); - - Some(match u128::from_str_radix(s, base) { - Ok(r) => LitKind::Int(r, ty), - Err(_) => { - // small bases are lexed as if they were base 10, e.g, the string - // might be `0b10201`. This will cause the conversion above to fail, - // but these cases have errors in the lexer: we don't want to emit - // two errors, and we especially don't want to emit this error since - // it isn't necessarily true. - let already_errored = base < 10 && - s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); - - if !already_errored { - err!(diag, |span, diag| diag.span_err(span, "int literal is too large")); - } - LitKind::Int(0, ty) - } - }) -} - /// A sequence separator. pub struct SeqSep { /// The seperator token. diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index b81f7be9c2c14..f95981680b940 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -15,7 +15,7 @@ use crate::ast::{ForeignItem, ForeignItemKind, FunctionRetTy}; use crate::ast::{GenericParam, GenericParamKind}; use crate::ast::GenericArg; use crate::ast::{Ident, ImplItem, IsAsync, IsAuto, Item, ItemKind}; -use crate::ast::{Label, Lifetime, Lit}; +use crate::ast::{Label, Lifetime}; use crate::ast::{Local, LocalSource}; use crate::ast::MacStmtStyle; use crate::ast::{Mac, Mac_, MacDelimiter}; @@ -35,7 +35,7 @@ use crate::ast::{RangeEnd, RangeSyntax}; use crate::{ast, attr}; use crate::ext::base::DummyResult; use crate::source_map::{self, SourceMap, Spanned, respan}; -use crate::parse::{self, SeqSep, classify, token}; +use crate::parse::{SeqSep, classify, literal, token}; use crate::parse::lexer::{TokenAndSpan, UnmatchedBrace}; use crate::parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration}; use crate::parse::token::DelimToken; @@ -613,7 +613,7 @@ impl<'a> Parser<'a> { }) } - fn this_token_descr(&self) -> String { + crate fn this_token_descr(&self) -> String { if let Some(prefix) = self.token_descr() { format!("{} `{}`", prefix, self.this_token_to_string()) } else { @@ -621,11 +621,6 @@ impl<'a> Parser<'a> { } } - fn unexpected_last(&self, t: &token::Token) -> PResult<'a, T> { - let token_str = pprust::token_to_string(t); - Err(self.span_fatal(self.prev_span, &format!("unexpected token: `{}`", token_str))) - } - crate fn unexpected(&mut self) -> PResult<'a, T> { match self.expect_one_of(&[], &[]) { Err(e) => Err(e), @@ -1109,7 +1104,7 @@ impl<'a> Parser<'a> { } fn expect_no_suffix(&self, sp: Span, kind: &str, suffix: Option) { - parse::expect_no_suffix(sp, &self.sess.span_diagnostic, kind, suffix) + literal::expect_no_suffix(sp, &self.sess.span_diagnostic, kind, suffix) } /// Attempts to consume a `<`. If `<<` is seen, replaces it with a single @@ -1387,7 +1382,7 @@ impl<'a> Parser<'a> { }) } - fn look_ahead_span(&self, dist: usize) -> Span { + crate fn look_ahead_span(&self, dist: usize) -> Span { if dist == 0 { return self.span } @@ -2030,47 +2025,6 @@ impl<'a> Parser<'a> { } } - /// Matches `lit = true | false | token_lit`. - crate fn parse_lit(&mut self) -> PResult<'a, Lit> { - let diag = Some((self.span, &self.sess.span_diagnostic)); - if let Some(lit) = Lit::from_token(&self.token, self.span, diag) { - self.bump(); - return Ok(lit); - } else if self.token == token::Dot { - // Recover `.4` as `0.4`. - let recovered = self.look_ahead(1, |t| { - if let token::Literal(token::Integer(val), suf) = *t { - let next_span = self.look_ahead_span(1); - if self.span.hi() == next_span.lo() { - let sym = String::from("0.") + &val.as_str(); - let token = token::Literal(token::Float(Symbol::intern(&sym)), suf); - return Some((token, self.span.to(next_span))); - } - } - None - }); - if let Some((token, span)) = recovered { - self.diagnostic() - .struct_span_err(span, "float literals must have an integer part") - .span_suggestion( - span, - "must have an integer part", - pprust::token_to_string(&token), - Applicability::MachineApplicable, - ) - .emit(); - let diag = Some((span, &self.sess.span_diagnostic)); - if let Some(lit) = Lit::from_token(&token, span, diag) { - self.bump(); - self.bump(); - return Ok(lit); - } - } - } - - self.unexpected_last(&self.token) - } - /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). crate fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P> { maybe_whole_expr!(self); diff --git a/src/test/ui/attr-eq-token-tree.stderr b/src/test/ui/attr-eq-token-tree.stderr index aae25b2721e4d..571779dfa1ae7 100644 --- a/src/test/ui/attr-eq-token-tree.stderr +++ b/src/test/ui/attr-eq-token-tree.stderr @@ -1,8 +1,8 @@ error: unexpected token: `!` - --> $DIR/attr-eq-token-tree.rs:3:11 + --> $DIR/attr-eq-token-tree.rs:3:13 | LL | #[my_attr = !] - | ^ + | ^ error: aborting due to previous error diff --git a/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision.stderr b/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision.stderr index 03867a8e43b10..359725a41c105 100644 --- a/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision.stderr +++ b/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision.stderr @@ -1,8 +1,8 @@ error: unexpected token: `,` - --> $DIR/exclusive_range_pattern_syntax_collision.rs:5:15 + --> $DIR/exclusive_range_pattern_syntax_collision.rs:5:17 | LL | [_, 99.., _] => {}, - | ^^ + | ^ error: aborting due to previous error diff --git a/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision2.stderr b/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision2.stderr index 5ac435bf011e4..8f849d7b3f87c 100644 --- a/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision2.stderr +++ b/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision2.stderr @@ -1,8 +1,8 @@ error: unexpected token: `]` - --> $DIR/exclusive_range_pattern_syntax_collision2.rs:5:15 + --> $DIR/exclusive_range_pattern_syntax_collision2.rs:5:17 | LL | [_, 99..] => {}, - | ^^ + | ^ error: aborting due to previous error diff --git a/src/test/ui/macros/macro-attribute.stderr b/src/test/ui/macros/macro-attribute.stderr index aa1cd94b0c638..d28ce25341d3a 100644 --- a/src/test/ui/macros/macro-attribute.stderr +++ b/src/test/ui/macros/macro-attribute.stderr @@ -1,8 +1,8 @@ error: unexpected token: `$` - --> $DIR/macro-attribute.rs:1:7 + --> $DIR/macro-attribute.rs:1:9 | LL | #[doc = $not_there] - | ^ + | ^ error: aborting due to previous error diff --git a/src/test/ui/malformed/malformed-interpolated.stderr b/src/test/ui/malformed/malformed-interpolated.stderr index bc2146e409d47..e805416172bab 100644 --- a/src/test/ui/malformed/malformed-interpolated.stderr +++ b/src/test/ui/malformed/malformed-interpolated.stderr @@ -7,19 +7,19 @@ LL | check!(0u8); = help: instead of using a suffixed literal (1u8, 1.0f32, etc.), use an unsuffixed version (1, 1.0, etc.). error: unexpected token: `-0` - --> $DIR/malformed-interpolated.rs:5:19 + --> $DIR/malformed-interpolated.rs:5:21 | LL | #[my_attr = $expr] - | ^ + | ^^^^^ ... LL | check!(-0); // ERROR, see above | ----------- in this macro invocation error: unexpected token: `0 + 0` - --> $DIR/malformed-interpolated.rs:5:19 + --> $DIR/malformed-interpolated.rs:5:21 | LL | #[my_attr = $expr] - | ^ + | ^^^^^ ... LL | check!(0 + 0); // ERROR, see above | -------------- in this macro invocation diff --git a/src/test/ui/parser/attr-bad-meta-2.stderr b/src/test/ui/parser/attr-bad-meta-2.stderr index ffbfc583e8a75..2d772dae69125 100644 --- a/src/test/ui/parser/attr-bad-meta-2.stderr +++ b/src/test/ui/parser/attr-bad-meta-2.stderr @@ -1,8 +1,8 @@ error: unexpected token: `]` - --> $DIR/attr-bad-meta-2.rs:1:8 + --> $DIR/attr-bad-meta-2.rs:1:9 | LL | #[path =] - | ^ + | ^ error: aborting due to previous error diff --git a/src/test/ui/parser/pat-tuple-5.stderr b/src/test/ui/parser/pat-tuple-5.stderr index 61ae40b355d38..f9832214c6800 100644 --- a/src/test/ui/parser/pat-tuple-5.stderr +++ b/src/test/ui/parser/pat-tuple-5.stderr @@ -1,8 +1,8 @@ error: unexpected token: `)` - --> $DIR/pat-tuple-5.rs:3:14 + --> $DIR/pat-tuple-5.rs:3:16 | LL | (pat ..) => {} - | ^^ + | ^ error: aborting due to previous error