Skip to content

Commit

Permalink
Auto merge of #72918 - Dylan-DPC:rollup-pnvj62b, r=Dylan-DPC
Browse files Browse the repository at this point in the history
Rollup of 4 pull requests

Successful merges:

 - #72884 (RawString error reporting cleanup )
 - #72888 (Add a warning about infinite reading in read_(until|line))
 - #72914 (Minor: off-by-one error in RELEASES.md)
 - #72916 (Update README.md)

Failed merges:

r? @ghost
  • Loading branch information
bors committed Jun 2, 2020
2 parents eeaf497 + bed597a commit fe10f1a
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 242 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -23,7 +23,7 @@ or reading the [rustc dev guide][rustcguidebuild].

[rustcguidebuild]: https://rustc-dev-guide.rust-lang.org/building/how-to-build-and-run.html

### Building on Unix-like system
### Building on a Unix-like system
1. Make sure you have installed the dependencies:

* `g++` 5.1 or later or `clang++` 3.5 or later
Expand Down
2 changes: 1 addition & 1 deletion RELEASES.md
Expand Up @@ -43,7 +43,7 @@ Libraries
- [Unicode 13 is now supported.][69929]
- [`String` now implements `From<&mut str>`.][69661]
- [`IoSlice` now implements `Copy`.][69403]
- [`Vec<T>` now implements `From<[T; N]>`.][68692] Where `N` is less than 32.
- [`Vec<T>` now implements `From<[T; N]>`.][68692] Where `N` is at most 32.
- [`proc_macro::LexError` now implements `fmt::Display` and `Error`.][68899]
- [`from_le_bytes`, `to_le_bytes`, `from_be_bytes`, `to_be_bytes`,
`from_ne_bytes`, and `to_ne_bytes` methods are now `const` for all
Expand Down
150 changes: 45 additions & 105 deletions src/librustc_lexer/src/lib.rs
Expand Up @@ -29,7 +29,7 @@ mod tests;
use self::LiteralKind::*;
use self::TokenKind::*;
use crate::cursor::{Cursor, EOF_CHAR};
use std::convert::TryInto;
use std::convert::TryFrom;

/// Parsed token.
/// It doesn't contain information about data that has been parsed,
Expand Down Expand Up @@ -142,84 +142,24 @@ pub enum LiteralKind {
/// "b"abc"", "b"abc"
ByteStr { terminated: bool },
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
RawStr(UnvalidatedRawStr),
RawStr { n_hashes: u16, err: Option<RawStrError> },
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
RawByteStr(UnvalidatedRawStr),
}

/// Represents something that looks like a raw string, but may have some
/// problems. Use `.validate()` to convert it into something
/// usable.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct UnvalidatedRawStr {
/// The prefix (`r###"`) is valid
valid_start: bool,

/// The postfix (`"###`) is valid
valid_end: bool,

/// The number of leading `#`
n_start_hashes: usize,
/// The number of trailing `#`. `n_end_hashes` <= `n_start_hashes`
n_end_hashes: usize,
/// The offset starting at `r` or `br` where the user may have intended to end the string.
/// Currently, it is the longest sequence of pattern `"#+"`.
possible_terminator_offset: Option<usize>,
RawByteStr { n_hashes: u16, err: Option<RawStrError> },
}

/// Error produced validating a raw string. Represents cases like:
/// - `r##~"abcde"##`: `LexRawStrError::InvalidStarter`
/// - `r###"abcde"##`: `LexRawStrError::NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
/// - Too many `#`s (>65536): `TooManyDelimiters`
/// - `r##~"abcde"##`: `InvalidStarter`
/// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
/// - Too many `#`s (>65535): `TooManyDelimiters`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum LexRawStrError {
pub enum RawStrError {
/// Non `#` characters exist between `r` and `"` eg. `r#~"..`
InvalidStarter,
InvalidStarter { bad_char: char },
/// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
/// may have intended to terminate it.
NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option<usize> },
/// More than 65536 `#`s exist.
TooManyDelimiters,
}

/// Raw String that contains a valid prefix (`#+"`) and postfix (`"#+`) where
/// there are a matching number of `#` characters in both. Note that this will
/// not consume extra trailing `#` characters: `r###"abcde"####` is lexed as a
/// `ValidatedRawString { n_hashes: 3 }` followed by a `#` token.
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub struct ValidatedRawStr {
n_hashes: u16,
}

impl ValidatedRawStr {
pub fn num_hashes(&self) -> u16 {
self.n_hashes
}
}

impl UnvalidatedRawStr {
pub fn validate(self) -> Result<ValidatedRawStr, LexRawStrError> {
if !self.valid_start {
return Err(LexRawStrError::InvalidStarter);
}

// Only up to 65535 `#`s are allowed in raw strings
let n_start_safe: u16 =
self.n_start_hashes.try_into().map_err(|_| LexRawStrError::TooManyDelimiters)?;

if self.n_start_hashes > self.n_end_hashes || !self.valid_end {
Err(LexRawStrError::NoTerminator {
expected: self.n_start_hashes,
found: self.n_end_hashes,
possible_terminator_offset: self.possible_terminator_offset,
})
} else {
// Since the lexer should never produce a literal with n_end > n_start, if n_start <= n_end,
// they must be equal.
debug_assert_eq!(self.n_start_hashes, self.n_end_hashes);
Ok(ValidatedRawStr { n_hashes: n_start_safe })
}
}
/// More than 65535 `#`s exist.
TooManyDelimiters { found: usize },
}

/// Base of numeric literal encoding according to its prefix.
Expand Down Expand Up @@ -354,12 +294,12 @@ impl Cursor<'_> {
'r' => match (self.first(), self.second()) {
('#', c1) if is_id_start(c1) => self.raw_ident(),
('#', _) | ('"', _) => {
let raw_str_i = self.raw_double_quoted_string(1);
let (n_hashes, err) = self.raw_double_quoted_string(1);
let suffix_start = self.len_consumed();
if raw_str_i.n_end_hashes == raw_str_i.n_start_hashes {
if err.is_none() {
self.eat_literal_suffix();
}
let kind = RawStr(raw_str_i);
let kind = RawStr { n_hashes, err };
Literal { kind, suffix_start }
}
_ => self.ident(),
Expand Down Expand Up @@ -389,14 +329,12 @@ impl Cursor<'_> {
}
('r', '"') | ('r', '#') => {
self.bump();
let raw_str_i = self.raw_double_quoted_string(2);
let (n_hashes, err) = self.raw_double_quoted_string(2);
let suffix_start = self.len_consumed();
let terminated = raw_str_i.n_start_hashes == raw_str_i.n_end_hashes;
if terminated {
if err.is_none() {
self.eat_literal_suffix();
}

let kind = RawByteStr(raw_str_i);
let kind = RawByteStr { n_hashes, err };
Literal { kind, suffix_start }
}
_ => self.ident(),
Expand Down Expand Up @@ -692,27 +630,34 @@ impl Cursor<'_> {
false
}

/// Eats the double-quoted string and returns an `UnvalidatedRawStr`.
fn raw_double_quoted_string(&mut self, prefix_len: usize) -> UnvalidatedRawStr {
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
fn raw_double_quoted_string(&mut self, prefix_len: usize) -> (u16, Option<RawStrError>) {
// Wrap the actual function to handle the error with too many hashes.
// This way, it eats the whole raw string.
let (n_hashes, err) = self.raw_string_unvalidated(prefix_len);
// Only up to 65535 `#`s are allowed in raw strings
match u16::try_from(n_hashes) {
Ok(num) => (num, err),
// We lie about the number of hashes here :P
Err(_) => (0, Some(RawStrError::TooManyDelimiters { found: n_hashes })),
}
}

fn raw_string_unvalidated(&mut self, prefix_len: usize) -> (usize, Option<RawStrError>) {
debug_assert!(self.prev() == 'r');
let mut valid_start: bool = false;
let start_pos = self.len_consumed();
let (mut possible_terminator_offset, mut max_hashes) = (None, 0);
let mut possible_terminator_offset = None;
let mut max_hashes = 0;

// Count opening '#' symbols.
let n_start_hashes = self.eat_while(|c| c == '#');

// Check that string is started.
match self.bump() {
Some('"') => valid_start = true,
_ => {
return UnvalidatedRawStr {
valid_start,
valid_end: false,
n_start_hashes,
n_end_hashes: 0,
possible_terminator_offset,
};
Some('"') => (),
c => {
let c = c.unwrap_or(EOF_CHAR);
return (n_start_hashes, Some(RawStrError::InvalidStarter { bad_char: c }));
}
}

Expand All @@ -722,13 +667,14 @@ impl Cursor<'_> {
self.eat_while(|c| c != '"');

if self.is_eof() {
return UnvalidatedRawStr {
valid_start,
valid_end: false,
return (
n_start_hashes,
n_end_hashes: max_hashes,
possible_terminator_offset,
};
Some(RawStrError::NoTerminator {
expected: n_start_hashes,
found: max_hashes,
possible_terminator_offset,
}),
);
}

// Eat closing double quote.
Expand All @@ -737,7 +683,7 @@ impl Cursor<'_> {
// Check that amount of closing '#' symbols
// is equal to the amount of opening ones.
// Note that this will not consume extra trailing `#` characters:
// `r###"abcde"####` is lexed as a `LexedRawString { n_hashes: 3 }`
// `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
// followed by a `#` token.
let mut hashes_left = n_start_hashes;
let is_closing_hash = |c| {
Expand All @@ -751,13 +697,7 @@ impl Cursor<'_> {
let n_end_hashes = self.eat_while(is_closing_hash);

if n_end_hashes == n_start_hashes {
return UnvalidatedRawStr {
valid_start,
valid_end: true,
n_start_hashes,
n_end_hashes,
possible_terminator_offset: None,
};
return (n_start_hashes, None);
} else if n_end_hashes > max_hashes {
// Keep track of possible terminators to give a hint about
// where there might be a missing terminator
Expand Down

0 comments on commit fe10f1a

Please sign in to comment.