make general significand, exponent and frexp methods for use across a…

…ll FloatingPoint types
JuliaLang · Mar 11, 2015 · ba81b2d · ba81b2d
1 parent 1f1ad94
commit ba81b2d
Show file tree

Hide file tree

Showing 4 changed files with 102 additions and 115 deletions.
diff --git a/base/float.jl b/base/float.jl
@@ -416,3 +416,23 @@ Rounding.set_rounding_raw(Float64, Rounding.JL_FE_TONEAREST)
 ## byte order swaps for arbitrary-endianness serialization/deserialization ##
 bswap(x::Float32) = box(Float32,bswap_int(unbox(Float32,x)))
 bswap(x::Float64) = box(Float64,bswap_int(unbox(Float64,x)))
+
+# bit patterns
+reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64,x)
+reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32,x)
+
+sign_mask(::Type{Float64}) =        0x8000_0000_0000_0000
+exponent_mask(::Type{Float64}) =    0x7ff0_0000_0000_0000
+exponent_one(::Type{Float64}) =     0x3ff0_0000_0000_0000
+exponent_half(::Type{Float64}) =    0x3fe0_0000_0000_0000
+significand_mask(::Type{Float64}) = 0x000f_ffff_ffff_ffff
+
+sign_mask(::Type{Float32}) =        0x8000_0000
+exponent_mask(::Type{Float32}) =    0x7f80_0000
+exponent_one(::Type{Float32}) =     0x3f80_0000
+exponent_half(::Type{Float32}) =    0x3f00_0000
+significand_mask(::Type{Float32}) = 0x007f_ffff
+
+significand_bits{T<:FloatingPoint}(::Type{T}) = trailing_ones(significand_mask(T))
+exponent_bits{T<:FloatingPoint}(::Type{T}) = sizeof(T)*8 - significand_bits(T) - 1
+exponent_bias{T<:FloatingPoint}(::Type{T}) = Int(exponent_one(T) >> significand_bits(T))
diff --git a/base/float16.jl b/base/float16.jl
@@ -158,54 +158,14 @@ end
 
 ldexp(a::Float16, b::Integer) = Float16(ldexp(Float32(a), b))
 
-function exponent(x::Float16)
-    xu = reinterpret(UInt16,x)
-    k = int(xu >> 10) & 0x001f
-    if k == 0 # x is subnormal
-        xu == 0 && throw(DomainError())
-        axu = xu & 0x7fff
-        s = leading_zeros(axu)-4
-        axu <<= s
-        xu = (xu & 0x8000) | axu
-        k -= s
-    elseif k == 0x001f # NaN or Inf
-        throw(DomainError())
-    end
-    k - 15
-end
-function significand(x::Float16)
-    xu = reinterpret(UInt16,x)
-    k = int(xu >> 10) & 0x001f
-    if k == 0 # x is subnormal
-        xu == 0 && return x
-        axu = xu & 0x7fff
-        s = leading_zeros(axu)-4
-        axu <<= s
-        xu = (xu & 0x8000) | axu
-    elseif k == 0x001f # NaN or Inf
-        return x
-    end
-    xu = (xu & 0x83ff) | 0x3c00
-    reinterpret(Float16,xu)
-end
-function frexp(x::Float16)
-    xu = reinterpret(UInt16,x)
-    k = int(xu >> 10) & 0x001f
-    if k == 0 # x is subnormal
-        xu == 0 && return x,0
-        axu = xu & 0x7fff
-        s = leading_zeros(axu)-4
-        axu <<= s
-        xu = (xu & 0x8000) | axu
-        k -= s
-    elseif k == 0x001f # NaN or Inf
-        return x,0
-    end
-    k -= 14
-    xu = (xu & 0x83ff) | 0x3800
-    reinterpret(Float16,xu), k
-end
-
 ^(x::Float16, y::Integer) = Float16(Float32(x)^y)
 
 rationalize{T<:Integer}(::Type{T}, x::Float16; tol::Real=eps(x)) = rationalize(T, Float32(x); tol=tol)
+
+reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16,x)
+
+sign_mask(::Type{Float16}) =        0x8000
+exponent_mask(::Type{Float16}) =    0x7c00
+exponent_one(::Type{Float16}) =     0x3c00
+exponent_half(::Type{Float16}) =    0x3800
+significand_mask(::Type{Float16}) = 0x03ff
diff --git a/base/math.jl b/base/math.jl
@@ -23,7 +23,10 @@ export sin, cos, tan, sinh, cosh, tanh, asin, acos, atan,
 import Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
              acos, atan, asinh, acosh, atanh, sqrt, log2, log10,
              max, min, minmax, ^, exp2,
-             exp10, expm1, log1p
+             exp10, expm1, log1p,
+             sign_mask, exponent_mask, exponent_one, exponent_half,
+             significand_mask, significand_bits, exponent_bits, exponent_bias
+
 
 import Core.Intrinsics: nan_dom_err, sqrt_llvm, box, unbox, powi_llvm
 
@@ -174,81 +177,63 @@ minmax{T<:FloatingPoint}(x::T, y::T) =  x <= y ? (x, y) :
                                         x >  y ? (y, x) :
                                         x == x ? (x, x) : (y, y)
 
-function exponent(x::Float64)
-    if x==0 || !isfinite(x)
-        throw(DomainError())
-    end
-    Int(ccall((:ilogb,libm), Int32, (Float64,), x))
-end
-function exponent(x::Float32)
-    if x==0 || !isfinite(x)
-        throw(DomainError())
-    end
-    Int(ccall((:ilogbf,libm), Int32, (Float32,), x))
-end
-@vectorize_1arg Real exponent
 
-ldexp(x::Float64,e::Int) = ccall((:scalbn,libm),  Float64, (Float64,Int32), x, Int32(e))
-ldexp(x::Float32,e::Int) = ccall((:scalbnf,libm), Float32, (Float32,Int32), x, Int32(e))
+ldexp(x::Float64,e::Integer) = ccall((:scalbn,libm),  Float64, (Float64,Int32), x, Int32(e))
+ldexp(x::Float32,e::Integer) = ccall((:scalbnf,libm), Float32, (Float32,Int32), x, Int32(e))
 # TODO: vectorize ldexp
 
-function significand(x::Float64)
-    xu = reinterpret(UInt64,x)
-    k = int(xu >> 52) & 0x07ff
-    if k == 0 # x is subnormal
-        x == 0 && return x
-        x *= 1.8014398509481984e16 # 0x1p54, normalise significand
-        xu = reinterpret(UInt64,x)
-    elseif k == 0x07ff # NaN or Inf
-        return x
+function exponent{T<:FloatingPoint}(x::T)
+    xu = reinterpret(Unsigned,x)
+    xe = xu & exponent_mask(T)
+    k = Int(xe >> significand_bits(T))
+    if xe == 0 # x is subnormal
+        x == 0 && throw(DomainError())
+        xu &= significand_mask(T)
+        m = leading_zeros(xu)-exponent_bits(T)
+        k = 1-m
+    elseif xe == exponent_mask(T) # NaN or Inf
+        throw(DomainError())
     end
-    xu = (xu & 0x800f_ffff_ffff_ffff) | 0x3ff0_0000_0000_0000
-    reinterpret(Float64,xu)
+    k - exponent_bias(T)
 end
-function significand(x::Float32)
-    xu = reinterpret(UInt32,x)
-    k = int(xu >> 23) & 0x00ff
-    if k == 0 # x is subnormal
+@vectorize_1arg Real exponent
+
+function significand{T<:FloatingPoint}(x::T)
+    xu = reinterpret(Unsigned,x)
+    xe = xu & exponent_mask(T)
+    if xe == 0 # x is subnormal
         x == 0 && return x
-        x *= 3.3554432f7 # 0x1p25: no Float32 hex literal
-        xu = reinterpret(UInt32,x)
-    elseif k == 0x00ff # NaN or Inf
+        xs = xu & sign_mask(T)
+        xu $= xs
+        m = leading_zeros(xu)-exponent_bits(T)
+        xu <<= m
+        xu $= xs
+    elseif xe == exponent_mask(T) # NaN or Inf
         return x
     end
-    xu = (xu & 0x807f_ffff) | 0x3f80_0000
-    reinterpret(Float32,xu)
+    xu = (xu & ~exponent_mask(T)) | exponent_one(T)
+    reinterpret(T,xu)
 end
 @vectorize_1arg Real significand
 
-function frexp(x::Float64)
-    xu = reinterpret(UInt64,x)
-    k = Int(xu >> 52) & 0x07ff
-    if k == 0 # x is subnormal
-        x == 0 && return x,0
-        x *= 1.8014398509481984e16 # 0x1p54, normalise significand
-        xu = reinterpret(UInt64,x)
-        k = Int(xu >> 52) & 0x07ff - 54
-    elseif k == 0x07ff # NaN or Inf
-        return x,0
-    end
-    k -= 1022
-    xu = (xu & 0x800f_ffff_ffff_ffff) | 0x3fe0_0000_0000_0000
-    reinterpret(Float64,xu), k
-end
-function frexp(x::Float32)
-    xu = reinterpret(UInt32,x)
-    k = Int(xu >> 23) & 0x00ff
-    if k == 0 # x is subnormal
-        x == 0 && return x,0
-        x *= 3.3554432f7 # 0x1p25: no Float32 hex literal
-        xu = reinterpret(UInt32,x)
-        k = Int(xu >> 23) & 0x00ff - 25
-    elseif k == 0x00ff # NaN or Inf
+function frexp{T<:FloatingPoint}(x::T)
+    xu = reinterpret(Unsigned,x)
+    xe = xu & exponent_mask(T)
+    k = Int(xe >> significand_bits(T))
+    if xe == 0 # x is subnormal
+        x == 0 && return x, 0
+        xs = xu & sign_mask(T)
+        xu $= xs
+        m = leading_zeros(xu)-exponent_bits(T)
+        xu <<= m
+        xu $= xs
+        k = 1-m
+    elseif xe == exponent_mask(T) # NaN or Inf
         return x,0
     end
-    k -= 126
-    xu = (xu & 0x807f_ffff) | 0x3f00_0000
-    reinterpret(Float32,xu), k
+    k -= (exponent_bias(T)-1)
+    xu = (xu & ~exponent_mask(T)) | exponent_half(T)
+    reinterpret(T,xu), k
 end
 
 function frexp{T<:FloatingPoint}(A::Array{T})

diff --git a/test/math.jl b/test/math.jl
@@ -1,8 +1,30 @@
 # frexp,ldexp,significand,exponent
-@test frexp(12.8) == (0.8,4)
-@test ldexp(0.8,4) == 12.8
-@test significand(12.8) == 1.6
-@test exponent(12.8) == 3
+for T in (Float16,Float32,Float64)
+    for z in (zero(T),-zero(T))
+        frexp(z) === (z,0)
+        significand(z) === z
+        @test_throws DomainError exponent(z)
+    end
+
+    for (a,b) in [(T(12.8),T(0.8)),
+                  (prevfloat(realmin(T)), nextfloat(one(T),-2)),
+                  (nextfloat(zero(T),3), T(1.5)),
+                  (nextfloat(zero(T)), one(T))]
+
+        n = Int(log2(a/b))
+        @test frexp(a) == (b,n)
+        @test ldexp(b,n) == a
+        @test ldexp(a,-n) == b
+        @test significand(a) == 2b
+        @test exponent(a) == n-1
+
+        @test frexp(-a) == (-b,n)
+        @test ldexp(-b,n) == -a
+        @test ldexp(-a,-n) == -b
+        @test significand(-a) == -2b
+        @test exponent(-a) == n-1
+    end
+end
 
 for T in (Int, Float64, BigFloat)
     @test_approx_eq deg2rad(T(180)) 1pi