From 15518a9c0c1c5f1774503c72072c73e64411d130 Mon Sep 17 00:00:00 2001 From: Robin Kruppe Date: Mon, 10 Aug 2015 23:14:30 +0200 Subject: [PATCH] Mention that the fast path is broken without SSE. --- src/libcore/num/dec2flt/algorithm.rs | 14 +++++++++++--- src/libcoretest/num/dec2flt/mod.rs | 7 +++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/libcore/num/dec2flt/algorithm.rs b/src/libcore/num/dec2flt/algorithm.rs index 97019090b56c7..f166bb9b3eb8d 100644 --- a/src/libcore/num/dec2flt/algorithm.rs +++ b/src/libcore/num/dec2flt/algorithm.rs @@ -21,9 +21,8 @@ use super::num::{self, Big}; /// Number of significand bits in Fp const P: u32 = 64; -// We simply store the best approximation for *all* exponents, so -// the variable "h" and the associated conditions can be omitted. -// This trades performance for space (11 KiB versus... 5 KiB or so?) +// We simply store the best approximation for *all* exponents, so the variable "h" and the +// associated conditions can be omitted. This trades performance for a couple kilobytes of space. fn power_of_ten(e: i16) -> Fp { assert!(e >= table::MIN_E); @@ -37,6 +36,15 @@ fn power_of_ten(e: i16) -> Fp { /// /// This is extracted into a separate function so that it can be attempted before constructing /// a bignum. +/// +/// The fast path crucially depends on arithmetic being correctly rounded, so on x86 +/// without SSE or SSE2 it will be **wrong** (as in, off by one ULP occasionally), because the x87 +/// FPU stack will round to 80 bit first before rounding to 64/32 bit. However, as such hardware +/// is extremely rare nowadays and in fact all in-tree target triples assume an SSE2-capable +/// microarchitecture, there is little incentive to deal with that. There's a test that will fail +/// when SSE or SSE2 is disabled, so people building their own non-SSE copy will get a heads up. +/// +/// FIXME: It would nevertheless be nice if we had a good way to detect and deal with x87. pub fn fast_path(integral: &[u8], fractional: &[u8], e: i64) -> Option { let num_digits = integral.len() + fractional.len(); // log_10(f64::max_sig) ~ 15.95. We compare the exact value to max_sig near the end, diff --git a/src/libcoretest/num/dec2flt/mod.rs b/src/libcoretest/num/dec2flt/mod.rs index bd8cfc74f0c63..b7ef956055e29 100644 --- a/src/libcoretest/num/dec2flt/mod.rs +++ b/src/libcoretest/num/dec2flt/mod.rs @@ -90,6 +90,13 @@ fn zero() { test_literal!(1e-500); } +#[test] +fn fast_path_correct() { + // This number triggers the fast path and is handled incorrectly when compiling on + // x86 without SSE2 (i.e., using the x87 FPU stack). + test_literal!(1.448997445238699); +} + #[test] fn lonely_dot() { assert_eq!(to_f64("."), Ok(0.0));