From 606a309d4aeb09ba88a0962c633a5b3fd4b300f6 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 19 Nov 2014 20:22:54 +1100 Subject: [PATCH] Switch numeric suffix parsing to use the new system. This moves errors and all handling of numeric suffixes into the parser rather than the lexer. --- src/libsyntax/parse/lexer/mod.rs | 74 +---------- src/libsyntax/parse/mod.rs | 145 ++++++++++++---------- src/libsyntax/parse/parser.rs | 25 +++- src/test/compile-fail/bad-lit-suffixes.rs | 13 +- 4 files changed, 108 insertions(+), 149 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 55c4335941dbc..fbca4868255ff 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -672,16 +672,6 @@ impl<'a> StringReader<'a> { '0'...'9' | '_' | '.' => { num_digits = self.scan_digits(10) + 1; } - 'u' | 'i' => { - self.scan_int_suffix(); - return token::Integer(self.name_from(start_bpos)); - }, - 'f' => { - let last_pos = self.last_pos; - self.scan_float_suffix(); - self.check_float_base(start_bpos, last_pos, base); - return token::Float(self.name_from(start_bpos)); - } _ => { // just a 0 return token::Integer(self.name_from(start_bpos)); @@ -695,8 +685,6 @@ impl<'a> StringReader<'a> { if num_digits == 0 { self.err_span_(start_bpos, self.last_pos, "no valid digits found for number"); - // eat any suffix - self.scan_int_suffix(); return token::Integer(token::intern("0")); } @@ -711,28 +699,19 @@ impl<'a> StringReader<'a> { if self.curr.unwrap_or('\0').is_digit_radix(10) { self.scan_digits(10); self.scan_float_exponent(); - self.scan_float_suffix(); } let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); return token::Float(self.name_from(start_bpos)); - } else if self.curr_is('f') { - // or it might be an integer literal suffixed as a float - self.scan_float_suffix(); - let last_pos = self.last_pos; - self.check_float_base(start_bpos, last_pos, base); - return token::Float(self.name_from(start_bpos)); } else { // it might be a float if it has an exponent if self.curr_is('e') || self.curr_is('E') { self.scan_float_exponent(); - self.scan_float_suffix(); let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); return token::Float(self.name_from(start_bpos)); } // but we certainly have an integer! - self.scan_int_suffix(); return token::Integer(self.name_from(start_bpos)); } } @@ -869,55 +848,6 @@ impl<'a> StringReader<'a> { true } - /// Scan over an int literal suffix. - fn scan_int_suffix(&mut self) { - match self.curr { - Some('i') | Some('u') => { - self.bump(); - - if self.curr_is('8') { - self.bump(); - } else if self.curr_is('1') { - if !self.nextch_is('6') { - self.err_span_(self.last_pos, self.pos, - "illegal int suffix"); - } else { - self.bump(); self.bump(); - } - } else if self.curr_is('3') { - if !self.nextch_is('2') { - self.err_span_(self.last_pos, self.pos, - "illegal int suffix"); - } else { - self.bump(); self.bump(); - } - } else if self.curr_is('6') { - if !self.nextch_is('4') { - self.err_span_(self.last_pos, self.pos, - "illegal int suffix"); - } else { - self.bump(); self.bump(); - } - } - }, - _ => { } - } - } - - /// Scan over a float literal suffix - fn scan_float_suffix(&mut self) { - if self.curr_is('f') { - if (self.nextch_is('3') && self.nextnextch_is('2')) - || (self.nextch_is('6') && self.nextnextch_is('4')) { - self.bump(); - self.bump(); - self.bump(); - } else { - self.err_span_(self.last_pos, self.pos, "illegal float suffix"); - } - } - } - /// Scan over a float exponent. fn scan_float_exponent(&mut self) { if self.curr_is('e') || self.curr_is('E') { @@ -988,6 +918,7 @@ impl<'a> StringReader<'a> { if is_dec_digit(c) { let num = self.scan_number(c.unwrap()); let suffix = self.scan_optional_raw_name(); + debug!("next_token_inner: scanned number {}, {}", num, suffix); return token::Literal(num, suffix) } @@ -1609,6 +1540,9 @@ mod test { test!("1.0", Float, "1.0"); test!("1.0e10", Float, "1.0e10"); + assert_eq!(setup(&mk_sh(), "2u".to_string()).next_token().tok, + token::Literal(token::Integer(token::intern("2")), + Some(token::intern("u")))); assert_eq!(setup(&mk_sh(), "r###\"raw\"###suffix".to_string()).next_token().tok, token::Literal(token::StrRaw(token::intern("raw"), 3), Some(token::intern("suffix")))); diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 2810db4eaddd8..d111108269dfe 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -511,28 +511,41 @@ pub fn raw_str_lit(lit: &str) -> String { res } -pub fn float_lit(s: &str) -> ast::Lit_ { - debug!("float_lit: {}", s); - // FIXME #2252: bounds checking float literals is defered until trans - let s2 = s.chars().filter(|&c| c != '_').collect::(); - let s = s2.as_slice(); - - let mut ty = None; - - if s.ends_with("f32") { - ty = Some(ast::TyF32); - } else if s.ends_with("f64") { - ty = Some(ast::TyF64); - } +// check if `s` looks like i32 or u1234 etc. +fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { + s.len() > 1 && + first_chars.contains(&s.char_at(0)) && + s.slice_from(1).chars().all(|c| '0' <= c && c <= '9') +} +fn filtered_float_lit(data: token::InternedString, suffix: Option<&str>, + sd: &SpanHandler, sp: Span) -> ast::Lit_ { + debug!("filtered_float_lit: {}, {}", data, suffix); + match suffix { + Some("f32") => ast::LitFloat(data, ast::TyF32), + Some("f64") => ast::LitFloat(data, ast::TyF64), + Some(suf) => { + if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) { + // if it looks like a width, lets try to be helpful. + sd.span_err(sp, &*format!("illegal width `{}` for float literal, \ + valid widths are 32 and 64", suf.slice_from(1))); + } else { + sd.span_err(sp, &*format!("illegal suffix `{}` for float literal, \ + valid suffixes are `f32` and `f64`", suf)); + } - match ty { - Some(t) => { - ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t) - }, - None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s)) + ast::LitFloatUnsuffixed(data) + } + None => ast::LitFloatUnsuffixed(data) } } +pub fn float_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ { + debug!("float_lit: {}, {}", s, suffix); + // FIXME #2252: bounds checking float literals is defered until trans + let s = s.chars().filter(|&c| c != '_').collect::(); + let data = token::intern_and_get_ident(&*s); + filtered_float_lit(data, suffix, sd, sp) +} /// Parse a string representing a byte literal into its final form. Similar to `char_lit` pub fn byte_lit(lit: &str) -> (u8, uint) { @@ -626,24 +639,19 @@ pub fn binary_lit(lit: &str) -> Rc> { Rc::new(res) } -pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ { +pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ { // s can only be ascii, byte indexing is fine let s2 = s.chars().filter(|&c| c != '_').collect::(); let mut s = s2.as_slice(); - debug!("parse_integer_lit: {}", s); - - if s.len() == 1 { - let n = (s.char_at(0)).to_digit(10).unwrap(); - return ast::LitInt(n as u64, ast::UnsuffixedIntLit(ast::Sign::new(n))); - } + debug!("integer_lit: {}, {}", s, suffix); let mut base = 10; let orig = s; let mut ty = ast::UnsuffixedIntLit(ast::Plus); - if s.char_at(0) == '0' { + if s.char_at(0) == '0' && s.len() > 1 { match s.char_at(1) { 'x' => base = 16, 'o' => base = 8, @@ -652,57 +660,56 @@ pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ { } } + // 1f64 and 2f32 etc. are valid float literals. + match suffix { + Some(suf) if looks_like_width_suffix(&['f'], suf) => { + match base { + 16u => sd.span_err(sp, "hexadecimal float literal is not supported"), + 8u => sd.span_err(sp, "octal float literal is not supported"), + 2u => sd.span_err(sp, "binary float literal is not supported"), + _ => () + } + let ident = token::intern_and_get_ident(&*s); + return filtered_float_lit(ident, suffix, sd, sp) + } + _ => {} + } + if base != 10 { s = s.slice_from(2); } - let last = s.len() - 1; - match s.char_at(last) { - 'i' => ty = ast::SignedIntLit(ast::TyI, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU), - '8' => { - if s.len() > 2 { - match s.char_at(last - 1) { - 'i' => ty = ast::SignedIntLit(ast::TyI8, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU8), - _ => { } - } - } - }, - '6' => { - if s.len() > 3 && s.char_at(last - 1) == '1' { - match s.char_at(last - 2) { - 'i' => ty = ast::SignedIntLit(ast::TyI16, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU16), - _ => { } - } - } - }, - '2' => { - if s.len() > 3 && s.char_at(last - 1) == '3' { - match s.char_at(last - 2) { - 'i' => ty = ast::SignedIntLit(ast::TyI32, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU32), - _ => { } - } - } - }, - '4' => { - if s.len() > 3 && s.char_at(last - 1) == '6' { - match s.char_at(last - 2) { - 'i' => ty = ast::SignedIntLit(ast::TyI64, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU64), - _ => { } + if let Some(suf) = suffix { + if suf.is_empty() { sd.span_bug(sp, "found empty literal suffix in Some")} + ty = match suf { + "i" => ast::SignedIntLit(ast::TyI, ast::Plus), + "i8" => ast::SignedIntLit(ast::TyI8, ast::Plus), + "i16" => ast::SignedIntLit(ast::TyI16, ast::Plus), + "i32" => ast::SignedIntLit(ast::TyI32, ast::Plus), + "i64" => ast::SignedIntLit(ast::TyI64, ast::Plus), + "u" => ast::UnsignedIntLit(ast::TyU), + "u8" => ast::UnsignedIntLit(ast::TyU8), + "u16" => ast::UnsignedIntLit(ast::TyU16), + "u32" => ast::UnsignedIntLit(ast::TyU32), + "u64" => ast::UnsignedIntLit(ast::TyU64), + _ => { + // i and u look like widths, so lets + // give an error message along those lines + if looks_like_width_suffix(&['i', 'u'], suf) { + sd.span_err(sp, &*format!("illegal width `{}` for integer literal; \ + valid widths are 8, 16, 32 and 64", + suf.slice_from(1))); + } else { + sd.span_err(sp, &*format!("illegal suffix `{}` for numeric literal", suf)); } + + ty } - }, - _ => { } + } } - debug!("The suffix is {}, base {}, the new string is {}, the original \ - string was {}", ty, base, s, orig); - - s = s.slice_to(s.len() - ty.suffix_len()); + debug!("integer_lit: the type is {}, base {}, the new string is {}, the original \ + string was {}, the original suffix was {}", ty, base, s, orig, suffix); let res: u64 = match ::std::num::from_str_radix(s, base) { Some(r) => r, diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index b9e1fe07e712a..85364b8f65ffa 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -652,9 +652,9 @@ impl<'a> Parser<'a> { Some(suf) => { let text = suf.as_str(); if text.is_empty() { - self.span_bug(sp, "found empty non-None literal suffix") + self.span_bug(sp, "found empty literal suffix in Some") } - self.span_err(sp, &*format!("a {} with a suffix is illegal", kind)); + self.span_err(sp, &*format!("{} with a suffix is illegal", kind)); } } } @@ -1661,10 +1661,23 @@ impl<'a> Parser<'a> { let (suffix_illegal, out) = match lit { token::Byte(i) => (true, LitByte(parse::byte_lit(i.as_str()).val0())), token::Char(i) => (true, LitChar(parse::char_lit(i.as_str()).val0())), - token::Integer(s) => (false, parse::integer_lit(s.as_str(), - &self.sess.span_diagnostic, - self.last_span)), - token::Float(s) => (false, parse::float_lit(s.as_str())), + + // there are some valid suffixes for integer and + // float literals, so all the handling is done + // internally. + token::Integer(s) => { + (false, parse::integer_lit(s.as_str(), + suf.as_ref().map(|s| s.as_str()), + &self.sess.span_diagnostic, + self.last_span)) + } + token::Float(s) => { + (false, parse::float_lit(s.as_str(), + suf.as_ref().map(|s| s.as_str()), + &self.sess.span_diagnostic, + self.last_span)) + } + token::Str_(s) => { (true, LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()), diff --git a/src/test/compile-fail/bad-lit-suffixes.rs b/src/test/compile-fail/bad-lit-suffixes.rs index e48bb807488a0..e142365a8ca07 100644 --- a/src/test/compile-fail/bad-lit-suffixes.rs +++ b/src/test/compile-fail/bad-lit-suffixes.rs @@ -29,8 +29,13 @@ fn main() { 'a'suffix; //~ ERROR char literal with a suffix is illegal b'a'suffix; //~ ERROR byte literal with a suffix is illegal - 1234suffix; - 0b101suffix; - 1.0suffix; - 1.0e10suffix; + 1234u1024; //~ ERROR illegal width `1024` for integer literal + 1234i1024; //~ ERROR illegal width `1024` for integer literal + 1234f1024; //~ ERROR illegal width `1024` for float literal + 1234.5f1024; //~ ERROR illegal width `1024` for float literal + + 1234suffix; //~ ERROR illegal suffix `suffix` for numeric literal + 0b101suffix; //~ ERROR illegal suffix `suffix` for numeric literal + 1.0suffix; //~ ERROR illegal suffix `suffix` for numeric literal + 1.0e10suffix; //~ ERROR illegal suffix `suffix` for numeric literal }