diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 02ef94fe9adba..e3830b1e3b668 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1086,10 +1086,12 @@ impl<'a> StringReader<'a> { Ok(TokenKind::lit(token::Str, symbol, suffix)) } 'r' => { - let (kind, symbol) = self.scan_raw_string(); + let (start, end, hash_count) = self.scan_raw_string(); + let symbol = self.name_from_to(start, end); + self.validate_raw_str_escape(start, end); let suffix = self.scan_optional_raw_name(); - Ok(TokenKind::lit(kind, symbol, suffix)) + Ok(TokenKind::lit(token::StrRaw(hash_count), symbol, suffix)) } '-' => { if self.nextch_is('>') { @@ -1243,7 +1245,7 @@ impl<'a> StringReader<'a> { id } - fn scan_raw_string(&mut self) -> (token::LitKind, Symbol) { + fn scan_raw_string(&mut self) -> (BytePos, BytePos, u16) { let start_bpos = self.pos; self.bump(); let mut hash_count: u16 = 0; @@ -1273,7 +1275,6 @@ impl<'a> StringReader<'a> { self.bump(); let content_start_bpos = self.pos; let mut content_end_bpos; - let mut valid = true; 'outer: loop { match self.ch { None => { @@ -1289,29 +1290,14 @@ impl<'a> StringReader<'a> { } break; } - Some(c) => { - if c == '\r' && !self.nextch_is('\n') { - let last_bpos = self.pos; - self.err_span_(start_bpos, - last_bpos, - "bare CR not allowed in raw string, use \\r \ - instead"); - valid = false; - } - } + _ => (), } self.bump(); } self.bump(); - let symbol = if valid { - self.name_from_to(content_start_bpos, content_end_bpos) - } else { - Symbol::intern("??") - }; - - (token::StrRaw(hash_count), symbol) + (content_start_bpos, content_end_bpos, hash_count) } fn scan_raw_byte_string(&mut self) -> (token::LitKind, Symbol) { @@ -1421,6 +1407,23 @@ impl<'a> StringReader<'a> { }); } + fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) { + self.with_str_from_to(content_start, content_end, |lit: &str| { + unescape::unescape_raw_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Str, + range, + err, + ) + } + }) + }); + } + fn validate_byte_str_escape(&self, start_with_quote: BytePos) { self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| { unescape::unescape_byte_str(lit, &mut |range, c| { diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index 7d5356ffe4d8d..3a2d905585c0e 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -4,7 +4,8 @@ use crate::ast::{self, Lit, LitKind}; use crate::parse::parser::Parser; use crate::parse::PResult; use crate::parse::token::{self, Token, TokenKind}; -use crate::parse::unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte}; +use crate::parse::unescape::{unescape_str, unescape_byte_str, unescape_raw_str}; +use crate::parse::unescape::{unescape_char, unescape_byte}; use crate::print::pprust; use crate::symbol::{kw, sym, Symbol}; use crate::tokenstream::{TokenStream, TokenTree}; @@ -141,7 +142,17 @@ impl LitKind { // Ditto. let s = symbol.as_str(); let symbol = if s.contains('\r') { - Symbol::intern(&raw_str_lit(&s)) + let mut buf = String::with_capacity(s.len()); + let mut error = Ok(()); + unescape_raw_str(&s, &mut |_, unescaped_char| { + match unescaped_char { + Ok(c) => buf.push(c), + Err(_) => error = Err(LitError::LexerError), + } + }); + error?; + buf.shrink_to_fit(); + Symbol::intern(&buf) } else { symbol }; @@ -350,29 +361,6 @@ crate fn expect_no_suffix(diag: &Handler, sp: Span, kind: &str, suffix: Option String { - debug!("raw_str_lit: {:?}", lit); - let mut res = String::with_capacity(lit.len()); - - let mut chars = lit.chars().peekable(); - while let Some(c) = chars.next() { - if c == '\r' { - if *chars.peek().unwrap() != '\n' { - panic!("lexer accepted bare CR"); - } - chars.next(); - res.push('\n'); - } else { - res.push(c); - } - } - - res.shrink_to_fit(); - res -} - // Checks if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) diff --git a/src/libsyntax/parse/unescape.rs b/src/libsyntax/parse/unescape.rs index 55a628d411e4b..d6b7db16305bb 100644 --- a/src/libsyntax/parse/unescape.rs +++ b/src/libsyntax/parse/unescape.rs @@ -66,6 +66,28 @@ where }) } +/// Takes a contents of a string literal (without quotes) and produces a +/// sequence of characters or errors. +/// NOTE: Raw strings do not perform any explicit character escaping, here we +/// only translate CRLF to LF and produce errors on bare CR. +pub(crate) fn unescape_raw_str(literal_text: &str, callback: &mut F) +where + F: FnMut(Range, Result), +{ + let mut byte_offset: usize = 0; + + let mut chars = literal_text.chars().peekable(); + while let Some(curr) = chars.next() { + let result = match (curr, chars.peek()) { + ('\r', Some('\n')) => Ok(curr), + ('\r', _) => Err(EscapeError::BareCarriageReturn), + _ => Ok(curr), + }; + callback(byte_offset..(byte_offset + curr.len_utf8()), result); + byte_offset += curr.len_utf8(); + } +} + #[derive(Debug, Clone, Copy)] pub(crate) enum Mode { Char, diff --git a/src/test/ui/parser/lex-bare-cr-string-literal-doc-comment.rs b/src/test/ui/parser/lex-bare-cr-string-literal-doc-comment.rs index b588b007ae929..ed5df42f9dd4e 100644 --- a/src/test/ui/parser/lex-bare-cr-string-literal-doc-comment.rs +++ b/src/test/ui/parser/lex-bare-cr-string-literal-doc-comment.rs @@ -21,7 +21,7 @@ fn main() { let _s = "foo bar"; //~ ERROR: bare CR not allowed in string // the following string literal has a bare CR in it - let _s = r"bar foo"; //~ ERROR: bare CR not allowed in raw string + let _s = r"bar foo"; //~ ERROR: bare CR not allowed in string // the following string literal has a bare CR in it let _s = "foo\ bar"; //~ ERROR: unknown character escape: \r diff --git a/src/test/ui/parser/lex-bare-cr-string-literal-doc-comment.stderr b/src/test/ui/parser/lex-bare-cr-string-literal-doc-comment.stderr index 7d944569ca9c4..153237a7f71b4 100644 --- a/src/test/ui/parser/lex-bare-cr-string-literal-doc-comment.stderr +++ b/src/test/ui/parser/lex-bare-cr-string-literal-doc-comment.stderr @@ -28,11 +28,11 @@ error: bare CR not allowed in string, use \r instead LL | let _s = "foo bar"; | ^ -error: bare CR not allowed in raw string, use \r instead - --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:24:14 +error: bare CR not allowed in string, use \r instead + --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:24:19 | LL | let _s = r"bar foo"; - | ^^^^^ + | ^ error: unknown character escape: \r --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:27:19