Navigation Menu

Skip to content

Commit

Permalink
remove special handling of \r\n from the lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
matklad committed Aug 14, 2019
1 parent 004f3ac commit 911398b
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 104 deletions.
2 changes: 0 additions & 2 deletions src/librustc_lexer/src/lib.rs
Expand Up @@ -352,7 +352,6 @@ impl Cursor<'_> {
loop {
match self.nth_char(0) {
'\n' => break,
'\r' if self.nth_char(1) == '\n' => break,
EOF_CHAR if self.is_eof() => break,
_ => {
self.bump();
Expand Down Expand Up @@ -525,7 +524,6 @@ impl Cursor<'_> {
match self.nth_char(0) {
'/' if !first => break,
'\n' if self.nth_char(1) != '\'' => break,
'\r' if self.nth_char(1) == '\n' => break,
EOF_CHAR if self.is_eof() => break,
'\'' => {
self.bump();
Expand Down
36 changes: 8 additions & 28 deletions src/librustc_lexer/src/unescape.rs
Expand Up @@ -128,11 +128,7 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
if first_char != '\\' {
return match first_char {
'\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
'\r' => Err(if chars.clone().next() == Some('\n') {
EscapeError::EscapeOnlyChar
} else {
EscapeError::BareCarriageReturn
}),
'\r' => Err(EscapeError::BareCarriageReturn),
'\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
'"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
_ => {
Expand Down Expand Up @@ -244,27 +240,15 @@ where

let unescaped_char = match first_char {
'\\' => {
let (second_char, third_char) = {
let mut chars = chars.clone();
(chars.next(), chars.next())
};
match (second_char, third_char) {
(Some('\n'), _) | (Some('\r'), Some('\n')) => {
let second_char = chars.clone().next();
match second_char {
Some('\n') => {
skip_ascii_whitespace(&mut chars);
continue;
}
_ => scan_escape(first_char, &mut chars, mode),
}
}
'\r' => {
let second_char = chars.clone().next();
if second_char == Some('\n') {
chars.next();
Ok('\n')
} else {
scan_escape(first_char, &mut chars, mode)
}
}
'\n' => Ok('\n'),
'\t' => Ok('\t'),
_ => scan_escape(first_char, &mut chars, mode),
Expand Down Expand Up @@ -298,15 +282,11 @@ where
while let Some(curr) = chars.next() {
let start = initial_len - chars.as_str().len() - curr.len_utf8();

let result = match (curr, chars.clone().next()) {
('\r', Some('\n')) => {
chars.next();
Ok('\n')
},
('\r', _) => Err(EscapeError::BareCarriageReturnInRawString),
(c, _) if mode.is_bytes() && !c.is_ascii() =>
let result = match curr {
'\r' => Err(EscapeError::BareCarriageReturnInRawString),
c if mode.is_bytes() && !c.is_ascii() =>
Err(EscapeError::NonAsciiCharInByteString),
(c, _) => Ok(c),
c => Ok(c),
};
let end = initial_len - chars.as_str().len();

Expand Down
11 changes: 3 additions & 8 deletions src/librustc_lexer/src/unescape/tests.rs
Expand Up @@ -11,7 +11,6 @@ fn test_unescape_char_bad() {
check(r"\", EscapeError::LoneSlash);

check("\n", EscapeError::EscapeOnlyChar);
check("\r\n", EscapeError::EscapeOnlyChar);
check("\t", EscapeError::EscapeOnlyChar);
check("'", EscapeError::EscapeOnlyChar);
check("\r", EscapeError::BareCarriageReturn);
Expand All @@ -31,6 +30,7 @@ fn test_unescape_char_bad() {
check(r"\v", EscapeError::InvalidEscape);
check(r"\💩", EscapeError::InvalidEscape);
check(r"\●", EscapeError::InvalidEscape);
check("\\\r", EscapeError::InvalidEscape);

check(r"\x", EscapeError::TooShortHexEscape);
check(r"\x0", EscapeError::TooShortHexEscape);
Expand Down Expand Up @@ -116,10 +116,9 @@ fn test_unescape_str_good() {

check("foo", "foo");
check("", "");
check(" \t\n\r\n", " \t\n\n");
check(" \t\n", " \t\n");

check("hello \\\n world", "hello world");
check("hello \\\r\n world", "hello world");
check("thread's", "thread's")
}

Expand All @@ -134,7 +133,6 @@ fn test_unescape_byte_bad() {
check(r"\", EscapeError::LoneSlash);

check("\n", EscapeError::EscapeOnlyChar);
check("\r\n", EscapeError::EscapeOnlyChar);
check("\t", EscapeError::EscapeOnlyChar);
check("'", EscapeError::EscapeOnlyChar);
check("\r", EscapeError::BareCarriageReturn);
Expand Down Expand Up @@ -238,10 +236,9 @@ fn test_unescape_byte_str_good() {

check("foo", b"foo");
check("", b"");
check(" \t\n\r\n", b" \t\n\n");
check(" \t\n", b" \t\n");

check("hello \\\n world", b"hello world");
check("hello \\\r\n world", b"hello world");
check("thread's", b"thread's")
}

Expand All @@ -253,7 +250,6 @@ fn test_unescape_raw_str() {
assert_eq!(unescaped, expected);
}

check("\r\n", &[(0..2, Ok('\n'))]);
check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
check("\rx", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString)), (1..2, Ok('x'))]);
}
Expand All @@ -266,7 +262,6 @@ fn test_unescape_raw_byte_str() {
assert_eq!(unescaped, expected);
}

check("\r\n", &[(0..2, Ok(byte_from_char('\n')))]);
check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]);
check(
Expand Down
81 changes: 15 additions & 66 deletions src/libsyntax/parse/lexer/mod.rs
Expand Up @@ -8,9 +8,7 @@ use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
use rustc_lexer::Base;
use rustc_lexer::unescape;

use std::borrow::Cow;
use std::char;
use std::iter;
use std::convert::TryInto;
use rustc_data_structures::sync::Lrc;
use log::debug;
Expand Down Expand Up @@ -181,18 +179,7 @@ impl<'a> StringReader<'a> {
let string = self.str_from(start);
// comments with only more "/"s are not doc comments
let tok = if is_doc_comment(string) {
let mut idx = 0;
loop {
idx = match string[idx..].find('\r') {
None => break,
Some(it) => idx + it + 1
};
if string[idx..].chars().next() != Some('\n') {
self.err_span_(start + BytePos(idx as u32 - 1),
start + BytePos(idx as u32),
"bare CR not allowed in doc-comment");
}
}
self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment");
token::DocComment(Symbol::intern(string))
} else {
token::Comment
Expand All @@ -217,15 +204,10 @@ impl<'a> StringReader<'a> {
}

let tok = if is_doc_comment {
let has_cr = string.contains('\r');
let string = if has_cr {
self.translate_crlf(start,
string,
"bare CR not allowed in block doc-comment")
} else {
string.into()
};
token::DocComment(Symbol::intern(&string[..]))
self.forbid_bare_cr(start,
string,
"bare CR not allowed in block doc-comment");
token::DocComment(Symbol::intern(string))
} else {
token::Comment
};
Expand Down Expand Up @@ -516,49 +498,16 @@ impl<'a> StringReader<'a> {
&self.src[self.src_index(start)..self.src_index(end)]
}

/// Converts CRLF to LF in the given string, raising an error on bare CR.
fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
let mut chars = s.char_indices().peekable();
while let Some((i, ch)) = chars.next() {
if ch == '\r' {
if let Some((lf_idx, '\n')) = chars.peek() {
return translate_crlf_(self, start, s, *lf_idx, chars, errmsg).into();
}
let pos = start + BytePos(i as u32);
let end_pos = start + BytePos((i + ch.len_utf8()) as u32);
self.err_span_(pos, end_pos, errmsg);
}
}
return s.into();

fn translate_crlf_(rdr: &StringReader<'_>,
start: BytePos,
s: &str,
mut j: usize,
mut chars: iter::Peekable<impl Iterator<Item = (usize, char)>>,
errmsg: &str)
-> String {
let mut buf = String::with_capacity(s.len());
// Skip first CR
buf.push_str(&s[.. j - 1]);
while let Some((i, ch)) = chars.next() {
if ch == '\r' {
if j < i {
buf.push_str(&s[j..i]);
}
let next = i + ch.len_utf8();
j = next;
if chars.peek().map(|(_, ch)| *ch) != Some('\n') {
let pos = start + BytePos(i as u32);
let end_pos = start + BytePos(next as u32);
rdr.err_span_(pos, end_pos, errmsg);
}
}
}
if j < s.len() {
buf.push_str(&s[j..]);
}
buf
fn forbid_bare_cr(&self, start: BytePos, s: &str, errmsg: &str) {
let mut idx = 0;
loop {
idx = match s[idx..].find('\r') {
None => break,
Some(it) => idx + it + 1
};
self.err_span_(start + BytePos(idx as u32 - 1),
start + BytePos(idx as u32),
errmsg);
}
}

Expand Down

0 comments on commit 911398b

Please sign in to comment.