Skip to content

Commit

Permalink
Switch numeric suffix parsing to use the new system.
Browse files Browse the repository at this point in the history
This moves errors and all handling of numeric suffixes into the parser
rather than the lexer.
  • Loading branch information
huonw committed Nov 19, 2014
1 parent 6679595 commit 606a309
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 149 deletions.
74 changes: 4 additions & 70 deletions src/libsyntax/parse/lexer/mod.rs
Expand Up @@ -672,16 +672,6 @@ impl<'a> StringReader<'a> {
'0'...'9' | '_' | '.' => {
num_digits = self.scan_digits(10) + 1;
}
'u' | 'i' => {
self.scan_int_suffix();
return token::Integer(self.name_from(start_bpos));
},
'f' => {
let last_pos = self.last_pos;
self.scan_float_suffix();
self.check_float_base(start_bpos, last_pos, base);
return token::Float(self.name_from(start_bpos));
}
_ => {
// just a 0
return token::Integer(self.name_from(start_bpos));
Expand All @@ -695,8 +685,6 @@ impl<'a> StringReader<'a> {

if num_digits == 0 {
self.err_span_(start_bpos, self.last_pos, "no valid digits found for number");
// eat any suffix
self.scan_int_suffix();
return token::Integer(token::intern("0"));
}

Expand All @@ -711,28 +699,19 @@ impl<'a> StringReader<'a> {
if self.curr.unwrap_or('\0').is_digit_radix(10) {
self.scan_digits(10);
self.scan_float_exponent();
self.scan_float_suffix();
}
let last_pos = self.last_pos;
self.check_float_base(start_bpos, last_pos, base);
return token::Float(self.name_from(start_bpos));
} else if self.curr_is('f') {
// or it might be an integer literal suffixed as a float
self.scan_float_suffix();
let last_pos = self.last_pos;
self.check_float_base(start_bpos, last_pos, base);
return token::Float(self.name_from(start_bpos));
} else {
// it might be a float if it has an exponent
if self.curr_is('e') || self.curr_is('E') {
self.scan_float_exponent();
self.scan_float_suffix();
let last_pos = self.last_pos;
self.check_float_base(start_bpos, last_pos, base);
return token::Float(self.name_from(start_bpos));
}
// but we certainly have an integer!
self.scan_int_suffix();
return token::Integer(self.name_from(start_bpos));
}
}
Expand Down Expand Up @@ -869,55 +848,6 @@ impl<'a> StringReader<'a> {
true
}

/// Scan over an int literal suffix.
fn scan_int_suffix(&mut self) {
match self.curr {
Some('i') | Some('u') => {
self.bump();

if self.curr_is('8') {
self.bump();
} else if self.curr_is('1') {
if !self.nextch_is('6') {
self.err_span_(self.last_pos, self.pos,
"illegal int suffix");
} else {
self.bump(); self.bump();
}
} else if self.curr_is('3') {
if !self.nextch_is('2') {
self.err_span_(self.last_pos, self.pos,
"illegal int suffix");
} else {
self.bump(); self.bump();
}
} else if self.curr_is('6') {
if !self.nextch_is('4') {
self.err_span_(self.last_pos, self.pos,
"illegal int suffix");
} else {
self.bump(); self.bump();
}
}
},
_ => { }
}
}

/// Scan over a float literal suffix
fn scan_float_suffix(&mut self) {
if self.curr_is('f') {
if (self.nextch_is('3') && self.nextnextch_is('2'))
|| (self.nextch_is('6') && self.nextnextch_is('4')) {
self.bump();
self.bump();
self.bump();
} else {
self.err_span_(self.last_pos, self.pos, "illegal float suffix");
}
}
}

/// Scan over a float exponent.
fn scan_float_exponent(&mut self) {
if self.curr_is('e') || self.curr_is('E') {
Expand Down Expand Up @@ -988,6 +918,7 @@ impl<'a> StringReader<'a> {
if is_dec_digit(c) {
let num = self.scan_number(c.unwrap());
let suffix = self.scan_optional_raw_name();
debug!("next_token_inner: scanned number {}, {}", num, suffix);
return token::Literal(num, suffix)
}

Expand Down Expand Up @@ -1609,6 +1540,9 @@ mod test {
test!("1.0", Float, "1.0");
test!("1.0e10", Float, "1.0e10");

assert_eq!(setup(&mk_sh(), "2u".to_string()).next_token().tok,
token::Literal(token::Integer(token::intern("2")),
Some(token::intern("u"))));
assert_eq!(setup(&mk_sh(), "r###\"raw\"###suffix".to_string()).next_token().tok,
token::Literal(token::StrRaw(token::intern("raw"), 3),
Some(token::intern("suffix"))));
Expand Down
145 changes: 76 additions & 69 deletions src/libsyntax/parse/mod.rs
Expand Up @@ -511,28 +511,41 @@ pub fn raw_str_lit(lit: &str) -> String {
res
}

pub fn float_lit(s: &str) -> ast::Lit_ {
debug!("float_lit: {}", s);
// FIXME #2252: bounds checking float literals is defered until trans
let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
let s = s2.as_slice();

let mut ty = None;

if s.ends_with("f32") {
ty = Some(ast::TyF32);
} else if s.ends_with("f64") {
ty = Some(ast::TyF64);
}
// check if `s` looks like i32 or u1234 etc.
fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
s.len() > 1 &&
first_chars.contains(&s.char_at(0)) &&
s.slice_from(1).chars().all(|c| '0' <= c && c <= '9')
}

fn filtered_float_lit(data: token::InternedString, suffix: Option<&str>,
sd: &SpanHandler, sp: Span) -> ast::Lit_ {
debug!("filtered_float_lit: {}, {}", data, suffix);
match suffix {
Some("f32") => ast::LitFloat(data, ast::TyF32),
Some("f64") => ast::LitFloat(data, ast::TyF64),
Some(suf) => {
if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) {
// if it looks like a width, lets try to be helpful.
sd.span_err(sp, &*format!("illegal width `{}` for float literal, \
valid widths are 32 and 64", suf.slice_from(1)));
} else {
sd.span_err(sp, &*format!("illegal suffix `{}` for float literal, \
valid suffixes are `f32` and `f64`", suf));
}

match ty {
Some(t) => {
ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t)
},
None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s))
ast::LitFloatUnsuffixed(data)
}
None => ast::LitFloatUnsuffixed(data)
}
}
pub fn float_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
debug!("float_lit: {}, {}", s, suffix);
// FIXME #2252: bounds checking float literals is defered until trans
let s = s.chars().filter(|&c| c != '_').collect::<String>();
let data = token::intern_and_get_ident(&*s);
filtered_float_lit(data, suffix, sd, sp)
}

/// Parse a string representing a byte literal into its final form. Similar to `char_lit`
pub fn byte_lit(lit: &str) -> (u8, uint) {
Expand Down Expand Up @@ -626,24 +639,19 @@ pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
Rc::new(res)
}

pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
// s can only be ascii, byte indexing is fine

let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
let mut s = s2.as_slice();

debug!("parse_integer_lit: {}", s);

if s.len() == 1 {
let n = (s.char_at(0)).to_digit(10).unwrap();
return ast::LitInt(n as u64, ast::UnsuffixedIntLit(ast::Sign::new(n)));
}
debug!("integer_lit: {}, {}", s, suffix);

let mut base = 10;
let orig = s;
let mut ty = ast::UnsuffixedIntLit(ast::Plus);

if s.char_at(0) == '0' {
if s.char_at(0) == '0' && s.len() > 1 {
match s.char_at(1) {
'x' => base = 16,
'o' => base = 8,
Expand All @@ -652,57 +660,56 @@ pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
}
}

// 1f64 and 2f32 etc. are valid float literals.
match suffix {
Some(suf) if looks_like_width_suffix(&['f'], suf) => {
match base {
16u => sd.span_err(sp, "hexadecimal float literal is not supported"),
8u => sd.span_err(sp, "octal float literal is not supported"),
2u => sd.span_err(sp, "binary float literal is not supported"),
_ => ()
}
let ident = token::intern_and_get_ident(&*s);
return filtered_float_lit(ident, suffix, sd, sp)
}
_ => {}
}

if base != 10 {
s = s.slice_from(2);
}

let last = s.len() - 1;
match s.char_at(last) {
'i' => ty = ast::SignedIntLit(ast::TyI, ast::Plus),
'u' => ty = ast::UnsignedIntLit(ast::TyU),
'8' => {
if s.len() > 2 {
match s.char_at(last - 1) {
'i' => ty = ast::SignedIntLit(ast::TyI8, ast::Plus),
'u' => ty = ast::UnsignedIntLit(ast::TyU8),
_ => { }
}
}
},
'6' => {
if s.len() > 3 && s.char_at(last - 1) == '1' {
match s.char_at(last - 2) {
'i' => ty = ast::SignedIntLit(ast::TyI16, ast::Plus),
'u' => ty = ast::UnsignedIntLit(ast::TyU16),
_ => { }
}
}
},
'2' => {
if s.len() > 3 && s.char_at(last - 1) == '3' {
match s.char_at(last - 2) {
'i' => ty = ast::SignedIntLit(ast::TyI32, ast::Plus),
'u' => ty = ast::UnsignedIntLit(ast::TyU32),
_ => { }
}
}
},
'4' => {
if s.len() > 3 && s.char_at(last - 1) == '6' {
match s.char_at(last - 2) {
'i' => ty = ast::SignedIntLit(ast::TyI64, ast::Plus),
'u' => ty = ast::UnsignedIntLit(ast::TyU64),
_ => { }
if let Some(suf) = suffix {
if suf.is_empty() { sd.span_bug(sp, "found empty literal suffix in Some")}
ty = match suf {
"i" => ast::SignedIntLit(ast::TyI, ast::Plus),
"i8" => ast::SignedIntLit(ast::TyI8, ast::Plus),
"i16" => ast::SignedIntLit(ast::TyI16, ast::Plus),
"i32" => ast::SignedIntLit(ast::TyI32, ast::Plus),
"i64" => ast::SignedIntLit(ast::TyI64, ast::Plus),
"u" => ast::UnsignedIntLit(ast::TyU),
"u8" => ast::UnsignedIntLit(ast::TyU8),
"u16" => ast::UnsignedIntLit(ast::TyU16),
"u32" => ast::UnsignedIntLit(ast::TyU32),
"u64" => ast::UnsignedIntLit(ast::TyU64),
_ => {
// i<digits> and u<digits> look like widths, so lets
// give an error message along those lines
if looks_like_width_suffix(&['i', 'u'], suf) {
sd.span_err(sp, &*format!("illegal width `{}` for integer literal; \
valid widths are 8, 16, 32 and 64",
suf.slice_from(1)));
} else {
sd.span_err(sp, &*format!("illegal suffix `{}` for numeric literal", suf));
}

ty
}
},
_ => { }
}
}

debug!("The suffix is {}, base {}, the new string is {}, the original \
string was {}", ty, base, s, orig);

s = s.slice_to(s.len() - ty.suffix_len());
debug!("integer_lit: the type is {}, base {}, the new string is {}, the original \
string was {}, the original suffix was {}", ty, base, s, orig, suffix);

let res: u64 = match ::std::num::from_str_radix(s, base) {
Some(r) => r,
Expand Down
25 changes: 19 additions & 6 deletions src/libsyntax/parse/parser.rs
Expand Up @@ -652,9 +652,9 @@ impl<'a> Parser<'a> {
Some(suf) => {
let text = suf.as_str();
if text.is_empty() {
self.span_bug(sp, "found empty non-None literal suffix")
self.span_bug(sp, "found empty literal suffix in Some")
}
self.span_err(sp, &*format!("a {} with a suffix is illegal", kind));
self.span_err(sp, &*format!("{} with a suffix is illegal", kind));
}
}
}
Expand Down Expand Up @@ -1661,10 +1661,23 @@ impl<'a> Parser<'a> {
let (suffix_illegal, out) = match lit {
token::Byte(i) => (true, LitByte(parse::byte_lit(i.as_str()).val0())),
token::Char(i) => (true, LitChar(parse::char_lit(i.as_str()).val0())),
token::Integer(s) => (false, parse::integer_lit(s.as_str(),
&self.sess.span_diagnostic,
self.last_span)),
token::Float(s) => (false, parse::float_lit(s.as_str())),

// there are some valid suffixes for integer and
// float literals, so all the handling is done
// internally.
token::Integer(s) => {
(false, parse::integer_lit(s.as_str(),
suf.as_ref().map(|s| s.as_str()),
&self.sess.span_diagnostic,
self.last_span))
}
token::Float(s) => {
(false, parse::float_lit(s.as_str(),
suf.as_ref().map(|s| s.as_str()),
&self.sess.span_diagnostic,
self.last_span))
}

token::Str_(s) => {
(true,
LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()),
Expand Down
13 changes: 9 additions & 4 deletions src/test/compile-fail/bad-lit-suffixes.rs
Expand Up @@ -29,8 +29,13 @@ fn main() {
'a'suffix; //~ ERROR char literal with a suffix is illegal
b'a'suffix; //~ ERROR byte literal with a suffix is illegal

1234suffix;
0b101suffix;
1.0suffix;
1.0e10suffix;
1234u1024; //~ ERROR illegal width `1024` for integer literal
1234i1024; //~ ERROR illegal width `1024` for integer literal
1234f1024; //~ ERROR illegal width `1024` for float literal
1234.5f1024; //~ ERROR illegal width `1024` for float literal

1234suffix; //~ ERROR illegal suffix `suffix` for numeric literal
0b101suffix; //~ ERROR illegal suffix `suffix` for numeric literal
1.0suffix; //~ ERROR illegal suffix `suffix` for numeric literal
1.0e10suffix; //~ ERROR illegal suffix `suffix` for numeric literal
}

0 comments on commit 606a309

Please sign in to comment.