Merge pull request #10176 from simonbyrne/significand

implement significand, exponent and frexp in native Julia
JuliaLang · Mar 12, 2015 · 7e8b10c · 7e8b10c
2 parents 217fa8d + f2a523a
commit 7e8b10c
Show file tree

Hide file tree

Showing 5 changed files with 111 additions and 54 deletions.
diff --git a/base/float.jl b/base/float.jl
@@ -416,3 +416,23 @@ Rounding.set_rounding_raw(Float64, Rounding.JL_FE_TONEAREST)
 ## byte order swaps for arbitrary-endianness serialization/deserialization ##
 bswap(x::Float32) = box(Float32,bswap_int(unbox(Float32,x)))
 bswap(x::Float64) = box(Float64,bswap_int(unbox(Float64,x)))
+
+# bit patterns
+reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64,x)
+reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32,x)
+
+sign_mask(::Type{Float64}) =        0x8000_0000_0000_0000
+exponent_mask(::Type{Float64}) =    0x7ff0_0000_0000_0000
+exponent_one(::Type{Float64}) =     0x3ff0_0000_0000_0000
+exponent_half(::Type{Float64}) =    0x3fe0_0000_0000_0000
+significand_mask(::Type{Float64}) = 0x000f_ffff_ffff_ffff
+
+sign_mask(::Type{Float32}) =        0x8000_0000
+exponent_mask(::Type{Float32}) =    0x7f80_0000
+exponent_one(::Type{Float32}) =     0x3f80_0000
+exponent_half(::Type{Float32}) =    0x3f00_0000
+significand_mask(::Type{Float32}) = 0x007f_ffff
+
+significand_bits{T<:FloatingPoint}(::Type{T}) = trailing_ones(significand_mask(T))
+exponent_bits{T<:FloatingPoint}(::Type{T}) = sizeof(T)*8 - significand_bits(T) - 1
+exponent_bias{T<:FloatingPoint}(::Type{T}) = Int(exponent_one(T) >> significand_bits(T))
diff --git a/base/float16.jl b/base/float16.jl
@@ -157,7 +157,15 @@ for func in (:div,:fld,:cld,:rem,:mod,:atan2,:hypot)
 end
 
 ldexp(a::Float16, b::Integer) = Float16(ldexp(Float32(a), b))
-exponent(x::Float16) = exponent(Float32(x))
+
 ^(x::Float16, y::Integer) = Float16(Float32(x)^y)
 
 rationalize{T<:Integer}(::Type{T}, x::Float16; tol::Real=eps(x)) = rationalize(T, Float32(x); tol=tol)
+
+reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16,x)
+
+sign_mask(::Type{Float16}) =        0x8000
+exponent_mask(::Type{Float16}) =    0x7c00
+exponent_one(::Type{Float16}) =     0x3c00
+exponent_half(::Type{Float16}) =    0x3800
+significand_mask(::Type{Float16}) = 0x03ff
diff --git a/base/math.jl b/base/math.jl
@@ -23,7 +23,10 @@ export sin, cos, tan, sinh, cosh, tanh, asin, acos, atan,
 import Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
              acos, atan, asinh, acosh, atanh, sqrt, log2, log10,
              max, min, minmax, ^, exp2,
-             exp10, expm1, log1p
+             exp10, expm1, log1p,
+             sign_mask, exponent_mask, exponent_one, exponent_half,
+             significand_mask, significand_bits, exponent_bits, exponent_bias
+
 
 import Core.Intrinsics: nan_dom_err, sqrt_llvm, box, unbox, powi_llvm
 
@@ -132,15 +135,6 @@ sqrt(x::Float32) = box(Float32,sqrt_llvm(unbox(Float32,x)))
 sqrt(x::Real) = sqrt(float(x))
 @vectorize_1arg Number sqrt
 
-for f in (:significand,)
-    @eval begin
-        ($f)(x::Float64) = ccall(($(string(f)),libm), Float64, (Float64,), x)
-        ($f)(x::Float32) = ccall(($(string(f,"f")),libm), Float32, (Float32,), x)
-        @vectorize_1arg Real $f
-    end
-end
-
-
 hypot(x::Real, y::Real) = hypot(promote(float(x), float(y))...)
 function hypot{T<:FloatingPoint}(x::T, y::T)
     x = abs(x)
@@ -183,53 +177,63 @@ minmax{T<:FloatingPoint}(x::T, y::T) =  x <= y ? (x, y) :
                                         x >  y ? (y, x) :
                                         x == x ? (x, x) : (y, y)
 
-function exponent(x::Float64)
-    if x==0 || !isfinite(x)
-        throw(DomainError())
-    end
-    Int(ccall((:ilogb,libm), Int32, (Float64,), x))
-end
-function exponent(x::Float32)
-    if x==0 || !isfinite(x)
+
+ldexp(x::Float64,e::Integer) = ccall((:scalbn,libm),  Float64, (Float64,Int32), x, Int32(e))
+ldexp(x::Float32,e::Integer) = ccall((:scalbnf,libm), Float32, (Float32,Int32), x, Int32(e))
+# TODO: vectorize ldexp
+
+function exponent{T<:FloatingPoint}(x::T)
+    xu = reinterpret(Unsigned,x)
+    xe = xu & exponent_mask(T)
+    k = Int(xe >> significand_bits(T))
+    if xe == 0 # x is subnormal
+        x == 0 && throw(DomainError())
+        xu &= significand_mask(T)
+        m = leading_zeros(xu)-exponent_bits(T)
+        k = 1-m
+    elseif xe == exponent_mask(T) # NaN or Inf
         throw(DomainError())
     end
-    Int(ccall((:ilogbf,libm), Int32, (Float32,), x))
+    k - exponent_bias(T)
 end
 @vectorize_1arg Real exponent
 
-ldexp(x::Float64,e::Int) = ccall((:scalbn,libm),  Float64, (Float64,Int32), x, Int32(e))
-ldexp(x::Float32,e::Int) = ccall((:scalbnf,libm), Float32, (Float32,Int32), x, Int32(e))
-# TODO: vectorize ldexp
-
-function frexp(x::Float64)
-    xu = reinterpret(UInt64,x)
-    k = Int(xu >> 52) & 0x07ff
-    if k == 0 # x is subnormal
-        x == zero(x) && return x,0
-        x *= 1.8014398509481984e16 # 0x1p54, normalise significand
-        xu = reinterpret(UInt64,x)
-        k = Int(xu >> 52) & 0x07ff - 54
-    elseif k == 0x07ff # NaN or Inf
-        return x,0
+function significand{T<:FloatingPoint}(x::T)
+    xu = reinterpret(Unsigned,x)
+    xe = xu & exponent_mask(T)
+    if xe == 0 # x is subnormal
+        x == 0 && return x
+        xs = xu & sign_mask(T)
+        xu $= xs
+        m = leading_zeros(xu)-exponent_bits(T)
+        xu <<= m
+        xu $= xs
+    elseif xe == exponent_mask(T) # NaN or Inf
+        return x
     end
-    k -= 1022
-    xu = (xu & 0x800f_ffff_ffff_ffff) | 0x3fe0_0000_0000_0000
-    reinterpret(Float64,xu), k
+    xu = (xu & ~exponent_mask(T)) | exponent_one(T)
+    reinterpret(T,xu)
 end
-function frexp(x::Float32)
-    xu = reinterpret(UInt32,x)
-    k = Int(xu >> 23) & 0x00ff
-    if k == 0 # x is subnormal
-        x == zero(x) && return x,0
-        x *= 3.3554432f7 # 0x1p25: no Float32 hex literal
-        xu = reinterpret(UInt32,x)
-        k = Int(xu >> 23) & 0x00ff - 25
-    elseif k == 0x00ff # NaN or Inf
+@vectorize_1arg Real significand
+
+function frexp{T<:FloatingPoint}(x::T)
+    xu = reinterpret(Unsigned,x)
+    xe = xu & exponent_mask(T)
+    k = Int(xe >> significand_bits(T))
+    if xe == 0 # x is subnormal
+        x == 0 && return x, 0
+        xs = xu & sign_mask(T)
+        xu $= xs
+        m = leading_zeros(xu)-exponent_bits(T)
+        xu <<= m
+        xu $= xs
+        k = 1-m
+    elseif xe == exponent_mask(T) # NaN or Inf
         return x,0
     end
-    k -= 126
-    xu = (xu & 0x807f_ffff) | 0x3f00_0000
-    reinterpret(Float32,xu), k
+    k -= (exponent_bias(T)-1)
+    xu = (xu & ~exponent_mask(T)) | exponent_half(T)
+    reinterpret(T,xu), k
 end
 
 function frexp{T<:FloatingPoint}(A::Array{T})

diff --git a/doc/stdlib/numbers.rst b/doc/stdlib/numbers.rst
@@ -81,7 +81,10 @@ Data Formats
 
 .. function:: significand(x)
 
-   Extract the significand(s) (a.k.a. mantissa), in binary representation, of a floating-point number or array.
+   Extract the significand(s) (a.k.a. mantissa), in binary representation, of
+   a floating-point number or array. If ``x`` is a non-zero finite number,
+   than the result will be a number of the same type on the interval
+   [1,2). Otherwise ``x`` is returned.
 
    .. doctest::
 

diff --git a/test/math.jl b/test/math.jl
@@ -1,8 +1,30 @@
 # frexp,ldexp,significand,exponent
-@test frexp(12.8) == (0.8,4)
-@test ldexp(0.8,4) == 12.8
-@test significand(12.8) == 1.6
-@test exponent(12.8) == 3
+for T in (Float16,Float32,Float64)
+    for z in (zero(T),-zero(T))
+        frexp(z) === (z,0)
+        significand(z) === z
+        @test_throws DomainError exponent(z)
+    end
+
+    for (a,b) in [(T(12.8),T(0.8)),
+                  (prevfloat(realmin(T)), nextfloat(one(T),-2)),
+                  (nextfloat(zero(T),3), T(0.75)),
+                  (nextfloat(zero(T)), T(0.5))]
+
+        n = Int(log2(a/b))
+        @test frexp(a) == (b,n)
+        @test ldexp(b,n) == a
+        @test ldexp(a,-n) == b
+        @test significand(a) == 2b
+        @test exponent(a) == n-1
+
+        @test frexp(-a) == (-b,n)
+        @test ldexp(-b,n) == -a
+        @test ldexp(-a,-n) == -b
+        @test significand(-a) == -2b
+        @test exponent(-a) == n-1
+    end
+end
 
 for T in (Int, Float64, BigFloat)
     @test_approx_eq deg2rad(T(180)) 1pi