Skip to content

Commit

Permalink
lexer: show correct span on lexical errors
Browse files Browse the repository at this point in the history
Previously, the lexer calling `rdr.fatal(...)` would report the span of
the last complete token, instead of a span within the erroneous token
(besides one span fixed in 1ac90bb).

This commit adds a wrapper around `rdr.fatal(...)` that sets the span
explicilty, so that all fatal errors in `libsyntax/parse/lexer.rs` now
report the offending code more precisely. A number of tests try to
verify that, though the `compile-fail` testing setup can only check that
the spans are on the right lines, and the "unterminated string/block
comment" errors can't have the line marked at all, so that's incomplete.

Closes #9149.
  • Loading branch information
ben0x539 committed Sep 19, 2013
1 parent d2b0b11 commit 1019177
Show file tree
Hide file tree
Showing 13 changed files with 215 additions and 19 deletions.
78 changes: 59 additions & 19 deletions src/libsyntax/parse/lexer.rs
Expand Up @@ -149,6 +149,16 @@ impl reader for TtReader {
fn dup(@mut self) -> @mut reader { dup_tt_reader(self) as @mut reader }
}

// report a lexical error spanning [`from_pos`, `to_pos`)
fn fatal_span(rdr: @mut StringReader,
from_pos: BytePos,
to_pos: BytePos,
m: ~str)
-> ! {
rdr.peek_span = codemap::mk_sp(from_pos, to_pos);
rdr.fatal(m);
}

// EFFECT: advance peek_tok and peek_span to refer to the next token.
// EFFECT: update the interner, maybe.
fn string_advance_token(r: @mut StringReader) {
Expand Down Expand Up @@ -327,7 +337,8 @@ fn consume_block_comment(rdr: @mut StringReader)
bump(rdr);
}
if is_eof(rdr) {
rdr.fatal(~"unterminated block doc-comment");
fatal_span(rdr, start_bpos, rdr.last_pos,
~"unterminated block doc-comment");
} else {
bump(rdr);
bump(rdr);
Expand All @@ -344,8 +355,12 @@ fn consume_block_comment(rdr: @mut StringReader)
}
}
} else {
let start_bpos = rdr.last_pos - BytePos(2u);
loop {
if is_eof(rdr) { rdr.fatal(~"unterminated block comment"); }
if is_eof(rdr) {
fatal_span(rdr, start_bpos, rdr.last_pos,
~"unterminated block comment");
}
if rdr.curr == '*' && nextch(rdr) == '/' {
bump(rdr);
bump(rdr);
Expand All @@ -362,6 +377,7 @@ fn consume_block_comment(rdr: @mut StringReader)
}

fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
let start_bpos = rdr.last_pos;
let mut c = rdr.curr;
let mut rslt = ~"";
if c == 'e' || c == 'E' {
Expand All @@ -375,7 +391,10 @@ fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
let exponent = scan_digits(rdr, 10u);
if exponent.len() > 0u {
return Some(rslt + exponent);
} else { rdr.fatal(~"scan_exponent: bad fp literal"); }
} else {
fatal_span(rdr, start_bpos, rdr.last_pos,
~"scan_exponent: bad fp literal");
}
} else { return None::<~str>; }
}

Expand All @@ -399,6 +418,7 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
let mut base = 10u;
let mut c = c;
let mut n = nextch(rdr);
let start_bpos = rdr.last_pos;
if c == '0' && n == 'x' {
bump(rdr);
bump(rdr);
Expand Down Expand Up @@ -442,11 +462,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
else { either::Right(ast::ty_u64) };
}
if num_str.len() == 0u {
rdr.fatal(~"no valid digits found for number");
fatal_span(rdr, start_bpos, rdr.last_pos,
~"no valid digits found for number");
}
let parsed = match from_str_radix::<u64>(num_str, base as uint) {
Some(p) => p,
None => rdr.fatal(~"int literal is too large")
None => fatal_span(rdr, start_bpos, rdr.last_pos,
~"int literal is too large")
};

match tp {
Expand All @@ -464,8 +486,10 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
}
if is_float {
match base {
16u => rdr.fatal(~"hexadecimal float literal is not supported"),
2u => rdr.fatal(~"binary float literal is not supported"),
16u => fatal_span(rdr, start_bpos, rdr.last_pos,
~"hexadecimal float literal is not supported"),
2u => fatal_span(rdr, start_bpos, rdr.last_pos,
~"binary float literal is not supported"),
_ => ()
}
}
Expand Down Expand Up @@ -507,11 +531,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str));
} else {
if num_str.len() == 0u {
rdr.fatal(~"no valid digits found for number");
fatal_span(rdr, start_bpos, rdr.last_pos,
~"no valid digits found for number");
}
let parsed = match from_str_radix::<u64>(num_str, base as uint) {
Some(p) => p,
None => rdr.fatal(~"int literal is too large")
None => fatal_span(rdr, start_bpos, rdr.last_pos,
~"int literal is too large")
};

debug!("lexing %s as an unsuffixed integer literal",
Expand All @@ -523,19 +549,23 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
fn scan_numeric_escape(rdr: @mut StringReader, n_hex_digits: uint) -> char {
let mut accum_int = 0;
let mut i = n_hex_digits;
let start_bpos = rdr.last_pos;
while i != 0u {
let n = rdr.curr;
bump(rdr);
if !is_hex_digit(n) {
rdr.fatal(fmt!("illegal numeric character escape: %d", n as int));
fatal_span(rdr, rdr.last_pos, rdr.pos,
fmt!("illegal numeric character escape: %d",
n as int));
}
bump(rdr);
accum_int *= 16;
accum_int += hex_digit_val(n);
i -= 1u;
}
match char::from_u32(accum_int as u32) {
Some(x) => x,
None => rdr.fatal(fmt!("illegal numeric character escape"))
None => fatal_span(rdr, start_bpos, rdr.last_pos,
fmt!("illegal numeric character escape"))
}
}

Expand Down Expand Up @@ -691,6 +721,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
if c2 == '\\' {
// '\X' for some X must be a character constant:
let escaped = rdr.curr;
let escaped_pos = rdr.last_pos;
bump(rdr);
match escaped {
'n' => { c2 = '\n'; }
Expand All @@ -704,12 +735,18 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
'u' => { c2 = scan_numeric_escape(rdr, 4u); }
'U' => { c2 = scan_numeric_escape(rdr, 8u); }
c2 => {
rdr.fatal(fmt!("unknown character escape: %d", c2 as int));
fatal_span(rdr, escaped_pos, rdr.last_pos,
fmt!("unknown character escape: %d", c2 as int));
}
}
}
if rdr.curr != '\'' {
rdr.fatal(~"unterminated character constant");
fatal_span(rdr,
// Byte offsetting here is okay because the character
// before position `start` is an ascii single quote.
start - BytePos(1u),
rdr.last_pos,
~"unterminated character constant");
}
bump(rdr); // advance curr past token
return token::LIT_CHAR(c2 as u32);
Expand All @@ -721,7 +758,9 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
while rdr.curr != '"' {
if is_eof(rdr) {
do with_str_from(rdr, n) |s| {
rdr.fatal(fmt!("unterminated double quote string: %s", s));
fatal_span(rdr, n, rdr.last_pos,
fmt!("unterminated double quote string: %s",
s));
}
}

Expand All @@ -730,6 +769,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
match ch {
'\\' => {
let escaped = rdr.curr;
let escaped_pos = rdr.last_pos;
bump(rdr);
match escaped {
'n' => accum_str.push_char('\n'),
Expand All @@ -750,7 +790,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
accum_str.push_char(scan_numeric_escape(rdr, 8u));
}
c2 => {
rdr.fatal(fmt!("unknown string escape: %d", c2 as int));
fatal_span(rdr, escaped_pos, rdr.last_pos,
fmt!("unknown string escape: %d", c2 as int));
}
}
}
Expand Down Expand Up @@ -786,11 +827,10 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
'^' => { return binop(rdr, token::CARET); }
'%' => { return binop(rdr, token::PERCENT); }
c => {
// So the error span points to the unrecognized character
rdr.peek_span = codemap::mk_sp(rdr.last_pos, rdr.pos);
let mut cs = ~"";
char::escape_default(c, |c| cs.push_char(c));
rdr.fatal(fmt!("unknown start of token: %s", cs));
fatal_span(rdr, rdr.last_pos, rdr.pos,
fmt!("unknown start of token: %s", cs));
}
}
}
Expand Down
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-bad-fp-lit.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static f: float =
1e+ //~ ERROR: scan_exponent: bad fp literal
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-hex-float-lit.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static f: float =
0x539.0 //~ ERROR: hexadecimal float literal is not supported
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-illegal-num-char-escape-2.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static c: char =
'\Uffffffff' //~ ERROR: illegal numeric character escape
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-illegal-num-char-escape.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static c: char =
'\u539_' //~ ERROR: illegal numeric character escape
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-int-lit-too-large-2.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static i: int =
99999999999999999999999999999999u32 //~ ERROR: int literal is too large
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-int-lit-too-large.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static i: int =
99999999999999999999999999999999 //~ ERROR: int literal is too large
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-no-valid-digits-2.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static i: int =
0xu32 //~ ERROR: no valid digits
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-no-valid-digits.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static i: int =
0x //~ ERROR: no valid digits
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-unknown-char-escape.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static c: char =
'\●' //~ ERROR: unknown character escape
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-unknown-start-tok.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

fn main() {
//~ ERROR: unknown start of token
}
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-unknown-str-escape.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static s: &'static str =
"\●" //~ ERROR: unknown string escape
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-unterminated-char-const.rs
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static c: char =
'//~ ERROR: unterminated character constant
;

0 comments on commit 1019177

Please sign in to comment.