Skip to content

Commit

Permalink
Merge pull request #10176 from simonbyrne/significand
Browse files Browse the repository at this point in the history
implement significand, exponent and frexp in native Julia
  • Loading branch information
simonbyrne committed Mar 12, 2015
2 parents 217fa8d + f2a523a commit 7e8b10c
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 54 deletions.
20 changes: 20 additions & 0 deletions base/float.jl
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,23 @@ Rounding.set_rounding_raw(Float64, Rounding.JL_FE_TONEAREST)
## byte order swaps for arbitrary-endianness serialization/deserialization ##
bswap(x::Float32) = box(Float32,bswap_int(unbox(Float32,x)))
bswap(x::Float64) = box(Float64,bswap_int(unbox(Float64,x)))

# bit patterns
reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64,x)
reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32,x)

sign_mask(::Type{Float64}) = 0x8000_0000_0000_0000
exponent_mask(::Type{Float64}) = 0x7ff0_0000_0000_0000
exponent_one(::Type{Float64}) = 0x3ff0_0000_0000_0000
exponent_half(::Type{Float64}) = 0x3fe0_0000_0000_0000
significand_mask(::Type{Float64}) = 0x000f_ffff_ffff_ffff

sign_mask(::Type{Float32}) = 0x8000_0000
exponent_mask(::Type{Float32}) = 0x7f80_0000
exponent_one(::Type{Float32}) = 0x3f80_0000
exponent_half(::Type{Float32}) = 0x3f00_0000
significand_mask(::Type{Float32}) = 0x007f_ffff

significand_bits{T<:FloatingPoint}(::Type{T}) = trailing_ones(significand_mask(T))
exponent_bits{T<:FloatingPoint}(::Type{T}) = sizeof(T)*8 - significand_bits(T) - 1
exponent_bias{T<:FloatingPoint}(::Type{T}) = Int(exponent_one(T) >> significand_bits(T))
10 changes: 9 additions & 1 deletion base/float16.jl
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,15 @@ for func in (:div,:fld,:cld,:rem,:mod,:atan2,:hypot)
end

ldexp(a::Float16, b::Integer) = Float16(ldexp(Float32(a), b))
exponent(x::Float16) = exponent(Float32(x))

^(x::Float16, y::Integer) = Float16(Float32(x)^y)

rationalize{T<:Integer}(::Type{T}, x::Float16; tol::Real=eps(x)) = rationalize(T, Float32(x); tol=tol)

reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16,x)

sign_mask(::Type{Float16}) = 0x8000
exponent_mask(::Type{Float16}) = 0x7c00
exponent_one(::Type{Float16}) = 0x3c00
exponent_half(::Type{Float16}) = 0x3800
significand_mask(::Type{Float16}) = 0x03ff
100 changes: 52 additions & 48 deletions base/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ export sin, cos, tan, sinh, cosh, tanh, asin, acos, atan,
import Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
acos, atan, asinh, acosh, atanh, sqrt, log2, log10,
max, min, minmax, ^, exp2,
exp10, expm1, log1p
exp10, expm1, log1p,
sign_mask, exponent_mask, exponent_one, exponent_half,
significand_mask, significand_bits, exponent_bits, exponent_bias


import Core.Intrinsics: nan_dom_err, sqrt_llvm, box, unbox, powi_llvm

Expand Down Expand Up @@ -132,15 +135,6 @@ sqrt(x::Float32) = box(Float32,sqrt_llvm(unbox(Float32,x)))
sqrt(x::Real) = sqrt(float(x))
@vectorize_1arg Number sqrt

for f in (:significand,)
@eval begin
($f)(x::Float64) = ccall(($(string(f)),libm), Float64, (Float64,), x)
($f)(x::Float32) = ccall(($(string(f,"f")),libm), Float32, (Float32,), x)
@vectorize_1arg Real $f
end
end


hypot(x::Real, y::Real) = hypot(promote(float(x), float(y))...)
function hypot{T<:FloatingPoint}(x::T, y::T)
x = abs(x)
Expand Down Expand Up @@ -183,53 +177,63 @@ minmax{T<:FloatingPoint}(x::T, y::T) = x <= y ? (x, y) :
x > y ? (y, x) :
x == x ? (x, x) : (y, y)

function exponent(x::Float64)
if x==0 || !isfinite(x)
throw(DomainError())
end
Int(ccall((:ilogb,libm), Int32, (Float64,), x))
end
function exponent(x::Float32)
if x==0 || !isfinite(x)

ldexp(x::Float64,e::Integer) = ccall((:scalbn,libm), Float64, (Float64,Int32), x, Int32(e))
ldexp(x::Float32,e::Integer) = ccall((:scalbnf,libm), Float32, (Float32,Int32), x, Int32(e))
# TODO: vectorize ldexp

function exponent{T<:FloatingPoint}(x::T)
xu = reinterpret(Unsigned,x)
xe = xu & exponent_mask(T)
k = Int(xe >> significand_bits(T))
if xe == 0 # x is subnormal
x == 0 && throw(DomainError())
xu &= significand_mask(T)
m = leading_zeros(xu)-exponent_bits(T)
k = 1-m
elseif xe == exponent_mask(T) # NaN or Inf
throw(DomainError())
end
Int(ccall((:ilogbf,libm), Int32, (Float32,), x))
k - exponent_bias(T)
end
@vectorize_1arg Real exponent

ldexp(x::Float64,e::Int) = ccall((:scalbn,libm), Float64, (Float64,Int32), x, Int32(e))
ldexp(x::Float32,e::Int) = ccall((:scalbnf,libm), Float32, (Float32,Int32), x, Int32(e))
# TODO: vectorize ldexp

function frexp(x::Float64)
xu = reinterpret(UInt64,x)
k = Int(xu >> 52) & 0x07ff
if k == 0 # x is subnormal
x == zero(x) && return x,0
x *= 1.8014398509481984e16 # 0x1p54, normalise significand
xu = reinterpret(UInt64,x)
k = Int(xu >> 52) & 0x07ff - 54
elseif k == 0x07ff # NaN or Inf
return x,0
function significand{T<:FloatingPoint}(x::T)
xu = reinterpret(Unsigned,x)
xe = xu & exponent_mask(T)
if xe == 0 # x is subnormal
x == 0 && return x
xs = xu & sign_mask(T)
xu $= xs
m = leading_zeros(xu)-exponent_bits(T)
xu <<= m
xu $= xs
elseif xe == exponent_mask(T) # NaN or Inf
return x
end
k -= 1022
xu = (xu & 0x800f_ffff_ffff_ffff) | 0x3fe0_0000_0000_0000
reinterpret(Float64,xu), k
xu = (xu & ~exponent_mask(T)) | exponent_one(T)
reinterpret(T,xu)
end
function frexp(x::Float32)
xu = reinterpret(UInt32,x)
k = Int(xu >> 23) & 0x00ff
if k == 0 # x is subnormal
x == zero(x) && return x,0
x *= 3.3554432f7 # 0x1p25: no Float32 hex literal
xu = reinterpret(UInt32,x)
k = Int(xu >> 23) & 0x00ff - 25
elseif k == 0x00ff # NaN or Inf
@vectorize_1arg Real significand

function frexp{T<:FloatingPoint}(x::T)
xu = reinterpret(Unsigned,x)
xe = xu & exponent_mask(T)
k = Int(xe >> significand_bits(T))
if xe == 0 # x is subnormal
x == 0 && return x, 0
xs = xu & sign_mask(T)
xu $= xs
m = leading_zeros(xu)-exponent_bits(T)
xu <<= m
xu $= xs
k = 1-m
elseif xe == exponent_mask(T) # NaN or Inf
return x,0
end
k -= 126
xu = (xu & 0x807f_ffff) | 0x3f00_0000
reinterpret(Float32,xu), k
k -= (exponent_bias(T)-1)
xu = (xu & ~exponent_mask(T)) | exponent_half(T)
reinterpret(T,xu), k
end

function frexp{T<:FloatingPoint}(A::Array{T})
Expand Down
5 changes: 4 additions & 1 deletion doc/stdlib/numbers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ Data Formats

.. function:: significand(x)

Extract the significand(s) (a.k.a. mantissa), in binary representation, of a floating-point number or array.
Extract the significand(s) (a.k.a. mantissa), in binary representation, of
a floating-point number or array. If ``x`` is a non-zero finite number,
than the result will be a number of the same type on the interval
[1,2). Otherwise ``x`` is returned.

.. doctest::

Expand Down
30 changes: 26 additions & 4 deletions test/math.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,30 @@
# frexp,ldexp,significand,exponent
@test frexp(12.8) == (0.8,4)
@test ldexp(0.8,4) == 12.8
@test significand(12.8) == 1.6
@test exponent(12.8) == 3
for T in (Float16,Float32,Float64)
for z in (zero(T),-zero(T))
frexp(z) === (z,0)
significand(z) === z
@test_throws DomainError exponent(z)
end

for (a,b) in [(T(12.8),T(0.8)),
(prevfloat(realmin(T)), nextfloat(one(T),-2)),
(nextfloat(zero(T),3), T(0.75)),
(nextfloat(zero(T)), T(0.5))]

n = Int(log2(a/b))
@test frexp(a) == (b,n)
@test ldexp(b,n) == a
@test ldexp(a,-n) == b
@test significand(a) == 2b
@test exponent(a) == n-1

@test frexp(-a) == (-b,n)
@test ldexp(-b,n) == -a
@test ldexp(-a,-n) == -b
@test significand(-a) == -2b
@test exponent(-a) == n-1
end
end

for T in (Int, Float64, BigFloat)
@test_approx_eq deg2rad(T(180)) 1pi
Expand Down

0 comments on commit 7e8b10c

Please sign in to comment.