Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pure julia exp function #19831

Merged
merged 2 commits into from Jan 9, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions LICENSE.md
Expand Up @@ -73,6 +73,7 @@ The following components of Julia's standard library have separate licenses:
- base/fftw.jl (see [FFTW](http://fftw.org/doc/License-and-Copyright.html))
- base/linalg/umfpack.jl (see [SUITESPARSE](http://faculty.cse.tamu.edu/davis/suitesparse.html))
- base/linalg/cholmod.jl (see [SUITESPARSE](http://faculty.cse.tamu.edu/davis/suitesparse.html))
- base/special/exp.jl (see [FREEBSD MSUN](https://github.com/freebsd/freebsd) [FreeBSD/2-clause BSD/Simplified BSD License])


Julia's build process uses the following external tools:
Expand Down
9 changes: 9 additions & 0 deletions base/float.jl
Expand Up @@ -738,9 +738,18 @@ exponent_one(::Type{Float16}) = 0x3c00
exponent_half(::Type{Float16}) = 0x3800
significand_mask(::Type{Float16}) = 0x03ff

# integer size of float
fpinttype(::Type{Float64}) = UInt64
fpinttype(::Type{Float32}) = UInt32
fpinttype(::Type{Float16}) = UInt16

@pure significand_bits{T<:AbstractFloat}(::Type{T}) = trailing_ones(significand_mask(T))
@pure exponent_bits{T<:AbstractFloat}(::Type{T}) = sizeof(T)*8 - significand_bits(T) - 1
@pure exponent_bias{T<:AbstractFloat}(::Type{T}) = Int(exponent_one(T) >> significand_bits(T))
# maximum float exponent
@pure exponent_max{T<:AbstractFloat}(::Type{T}) = Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T)
# maximum float exponent without bias
@pure exponent_raw_max{T<:AbstractFloat}(::Type{T}) = Int(exponent_mask(T) >> significand_bits(T))

## Array operations on floating point numbers ##

Expand Down
27 changes: 12 additions & 15 deletions base/math.jl
Expand Up @@ -26,12 +26,13 @@ export sin, cos, tan, sinh, cosh, tanh, asin, acos, atan,
import Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
acos, atan, asinh, acosh, atanh, sqrt, log2, log10,
max, min, minmax, ^, exp2, muladd,
exp10, expm1, log1p,
sign_mask, exponent_mask, exponent_one, exponent_half,
significand_mask, significand_bits, exponent_bits, exponent_bias
exp10, expm1, log1p

using Base: sign_mask, exponent_mask, exponent_one, exponent_bias,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to import any of these since we don't extend them

exponent_half, exponent_max, exponent_raw_max, fpinttype,
significand_mask, significand_bits, exponent_bits

import Core.Intrinsics: sqrt_llvm, box, unbox, powi_llvm
using Core.Intrinsics: sqrt_llvm, box, unbox, powi_llvm

# non-type specific math functions

Expand Down Expand Up @@ -222,13 +223,6 @@ Compute the inverse hyperbolic sine of `x`.
"""
asinh(x)

"""
exp(x)

Compute ``e^x``.
"""
exp(x)

"""
erf(x)

Expand All @@ -250,13 +244,16 @@ erfc(x)
Accurately compute ``e^x-1``.
"""
expm1(x)
for f in (:cbrt, :sinh, :cosh, :tanh, :atan, :asinh, :exp, :erf, :erfc, :exp2, :expm1)
for f in (:cbrt, :sinh, :cosh, :tanh, :atan, :asinh, :erf, :erfc, :exp2, :expm1)
@eval begin
($f)(x::Float64) = ccall(($(string(f)),libm), Float64, (Float64,), x)
($f)(x::Float32) = ccall(($(string(f,"f")),libm), Float32, (Float32,), x)
($f)(x::Real) = ($f)(float(x))
end
end
# pure julia exp function
include("special/exp.jl")
exp(x::Real) = exp(float(x))

# fallback definitions to prevent infinite loop from $f(x::Real) def above

Expand Down Expand Up @@ -530,11 +527,11 @@ function ldexp{T<:AbstractFloat}(x::T, e::Integer)
n = e % Int
k += n
# overflow, if k is larger than maximum posible exponent
if k >= Int(exponent_mask(T) >> significand_bits(T))
if k >= exponent_raw_max(T)
return flipsign(T(Inf), x)
end
if k > 0 # normal case
xu = (xu & ~exponent_mask(T)) | (k % typeof(xu) << significand_bits(T))
xu = (xu & ~exponent_mask(T)) | (rem(k, fpinttype(T)) << significand_bits(T))
return reinterpret(T, xu)
else # subnormal case
if k <= -significand_bits(T) # underflow
Expand All @@ -544,7 +541,7 @@ function ldexp{T<:AbstractFloat}(x::T, e::Integer)
end
k += significand_bits(T)
z = T(2.0)^-significand_bits(T)
xu = (xu & ~exponent_mask(T)) | (k % typeof(xu) << significand_bits(T))
xu = (xu & ~exponent_mask(T)) | (rem(k, fpinttype(T)) << significand_bits(T))
return z*reinterpret(T, xu)
end
end
Expand Down
134 changes: 134 additions & 0 deletions base/special/exp.jl
@@ -0,0 +1,134 @@
# Based on FreeBSD lib/msun/src/e_exp.c
# which is made available under the following licence

## Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. Permission
## to use, copy, modify, and distribute this software is freely granted,
## provided that this notice is preserved.

# Method
# 1. Argument reduction: Reduce x to an r so that |r| <= 0.5*ln(2). Given x,
# find r and integer k such that
# x = k*ln(2) + r, |r| <= 0.5*ln(2).
# Here r is represented as r = hi - lo for better accuracy.
#
# 2. Approximate exp(r) by a special rational function on [0, 0.5*ln(2)]:
# R(r^2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r^4/360 + ...
#
# A special Remez algorithm on [0, 0.5*ln(2)] is used to generate a
# polynomial to approximate R.
#
# The computation of exp(r) thus becomes
# 2*r
# exp(r) = 1 + ----------
# R(r) - r
# r*c(r)
# = 1 + r + ----------- (for better accuracy)
# 2 - c(r)
# where
# c(r) = r - (P1*r^2 + P2*r^4 + ... + P5*r^10 + ...).
#
# 3. Scale back: exp(x) = 2^k * exp(r)

# log(2)
const LN2 = 6.931471805599453094172321214581765680755001343602552541206800094933936219696955e-01
# log2(e)
const LOG2E = 1.442695040888963407359924681001892137426646

# log(2) into upper and lower
LN2U(::Type{Float64}) = 6.93147180369123816490e-1
LN2U(::Type{Float32}) = 6.9313812256f-1

LN2L(::Type{Float64}) = 1.90821492927058770002e-10
LN2L(::Type{Float32}) = 9.0580006145f-6

# max and min arguments for exponential fucntions
MAXEXP(::Type{Float64}) = 7.09782712893383996732e2 # log 2^1023*(2-2^-52)
MAXEXP(::Type{Float32}) = 88.72283905206835f0 # log 2^127 *(2-2^-23)

# one less than the min exponent since we can sqeeze a bit more from the exp function
MINEXP(::Type{Float64}) = -7.451332191019412076235e2 # log 2^-1075
MINEXP(::Type{Float32}) = -103.97207708f0 # log 2^-150

@inline exp_kernel(x::Float64) = @horner(x, 1.66666666666666019037e-1,
-2.77777777770155933842e-3, 6.61375632143793436117e-5,
-1.65339022054652515390e-6, 4.13813679705723846039e-8)

@inline exp_kernel(x::Float32) = @horner(x, 1.6666625440f-1, -2.7667332906f-3)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(This is such a low degree that I wonder if we'd be better off using a minimax polynomial rather than a minimax rational function, at least for the single-precision case — the optimal polynomial would require a higher degree, but it would avoid the division. I don't think we need to address that possibility in this PR, however, since this is no worse than what we are doing now.)

Copy link
Contributor Author

@musm musm Jan 3, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This a good question. I have already played with many variations and my conclusion is that the msun version is the best of all things I have tested and played with considering both accuracy (critical goal of being < 1 ulp) and performance.

Here's the problem with the minmax polynomial approach for the exp function:

on my test suite for Float32
FMA system rational
max 0.86076152 at x = -5.85780 mean 0.03712445

NON-FMA system rational
max 0.86076152 at x = -5.85780 mean 0.03712434

FMA system polynomial
max 0.84459400 at x = -48.8587 mean 0.03627487

NON-FMA system polynomial
max 1.09986448 at x = 5.23060 mean 0.03798720

over the range: vcat(-10:0.0002:10, -1000:0.001:1000, -120:0.0023:1000, -1000:0.02:2000)
(errors may be larger, since we haven't tested all floats)

as you can see it's not possible to be under 1 ulp for non-fma systems using this polynomial (below)

What about speed?
Well I tested 3 hot branches
and the polynomial version on an fma system is ~ 0.5-0.8 ns slower

For reference this is the best minmax polynomial I can find for the Float32

@inline exp_kernel{T<:SmallFloat}(x::T) = @horner_oftype(x, 1.0, 1.0, 0.5,
    0.1666666567325592041015625,
    4.1666455566883087158203125e-2,
    8.333526551723480224609375e-3,
    1.39357591979205608367919921875e-3,
    1.97799992747604846954345703125e-4)

Note it's not as simple as adding more degrees to this polynomial to improve the accuracy
This is the best minimal minmax polynomial I was able to find that meats < 1 ulp

For Float64

@inline exp_kernel{T<:LargeFloat}(x::T) = @horner_oftype(x, 1.0, 1.0, 0.5,
    0.16666666666666685170383743752609007060527801513672,
    4.1666666666666692109277647659837384708225727081299e-2,
    8.3333333333159547579027659480743750464171171188354e-3,
    1.38888888888693412537733706813014578074216842651367e-3,
    1.9841269898657093212653024227876130680670030415058e-4,
    2.4801587357008890921336585755341275216778740286827e-5,
    2.7557232875898009206386968239499424271343741565943e-6,
    2.7557245320026768203034231441428403286408865824342e-7,
    2.51126540120060271373185023340013355408473216812126e-8,
    2.0923712382298872819985862227861600493028504388349e-9)

I haven't tested this in a while but the same conclusions holds


# for values smaller than this threshold just use a Taylor expansion
exp_small_thres(::Type{Float64}) = 2.0^-28
exp_small_thres(::Type{Float32}) = 2.0f0^-13

"""
exp(x)

Compute the natural base exponential of `x`, in other words ``e^x``.
"""
function exp{T<:Union{Float32,Float64}}(x::T)
xa = reinterpret(Unsigned, x) & ~sign_mask(T)
xsb = signbit(x)

# filter out non-finite arguments
if xa > reinterpret(Unsigned, MAXEXP(T))
if xa >= exponent_mask(T)
xa & significand_mask(T) != 0 && return T(NaN)
return xsb ? T(0.0) : T(Inf) # exp(+-Inf)
end
x > MAXEXP(T) && return T(Inf)
x < MINEXP(T) && return T(0.0)
end

# This implementation gives 2.7182818284590455 for exp(1.0) when T ==
# Float64, which is well within the allowable error; however,
# 2.718281828459045 is closer to the true value so we prefer that answer,
# given that 1.0 is such an important argument value.
if x == T(1.0) && T == Float64
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe x == T(1.0) && return T(2.718281828459045235360) so that we get the exactly rounded result for Float32 too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Float32 is exactly rounded without this. So it's better to do this to remove the extra check for the Float32 version.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the difference between T==Float64 and T===Float64 I'm leaning towards using the later but for no reason

return 2.718281828459045235360
end

if xa > reinterpret(Unsigned, T(0.5)*T(LN2)) # |x| > 0.5 log(2)
# argument reduction
if xa < reinterpret(Unsigned, T(1.5)*T(LN2)) # |x| < 1.5 log(2)
if xsb
k = -1
hi = x + LN2U(T)
lo = -LN2L(T)
else
k = 1
hi = x - LN2U(T)
lo = LN2L(T)
end
else
n = round(T(LOG2E)*x)
k = unsafe_trunc(Int,n)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems bad that this will behave differently on 32-bit and 64-bit machines?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No it doesn't matter in this case. k is the integer representing the exponent of the float, so there is no way it will cause issues since the number will overflow way past when the Int32 will. We could all the same just use Int32 but I think it might be nicer to just use the machine size Int ?

Copy link
Member

@stevengj stevengj Jan 3, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to use Int32, I think. (We could even use Int16, it sounds like, but there's probably no advantage to that.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made the change, but I don't fully understand why we should prefer Int32 over Int. I tried both and it's not like there is any critical difference in the resulting code.

hi = muladd(n, -LN2U(T), x)
lo = n*LN2L(T)
end
r = hi - lo

# compute approximation on reduced argument
z = r*r
p = r - z*exp_kernel(z)
y = T(1.0) - ((lo - (r*p)/(T(2.0) - p)) - hi)

# scale back
if k > -significand_bits(T)
# multiply by 2.0 first to prevent overflow, which helps extends the range
k == exponent_max(T) && return y*T(2.0)*T(2.0)^(exponent_max(T) - 1)
twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T))
return y*twopk
else
# add significand_bits(T) + 1 to lift the range outside the subnormals
twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T))
return y*twopk*T(2.0)^(-significand_bits(T) - 1)
end
elseif xa < reinterpret(Unsigned, exp_small_thres(T)) # |x| < exp_small_thres
# Taylor approximation for small x
return T(1.0) + x
else
# primary range with k = 0, so compute approximation directly
z = x*x
p = x - z*exp_kernel(z)
return T(1.0) - ((x*p)/(p - T(2.0)) - x)
end
end
1 change: 1 addition & 0 deletions contrib/add_license_to_files.jl
Expand Up @@ -34,6 +34,7 @@ const skipfiles = [
# files to check - already copyright
# see: https://github.com/JuliaLang/julia/pull/11073#issuecomment-98099389
"../base/special/trig.jl",
"../base/special/exp.jl"
"../base/linalg/givens.jl",
#
"../src/abi_llvm.cpp",
Expand Down
102 changes: 67 additions & 35 deletions test/math.jl
Expand Up @@ -23,8 +23,8 @@
end

@testset "constants" begin
@test !(pi == e)
@test !(e == 1//2)
@test pi != e
@test e != 1//2
@test 1//2 <= e
@test e <= 15//3
@test big(1//2) < e
Expand All @@ -34,8 +34,8 @@ end
@test e^2.4 == exp(2.4)
@test e^(2//3) == exp(2//3)

@test Float16(3.) < pi
@test pi < Float16(4.)
@test Float16(3.0) < pi
@test pi < Float16(4.0)
@test contains(sprint(show,π),"3.14159")
end

Expand Down Expand Up @@ -71,37 +71,51 @@ end
@test isnan(x)
@test y == 0

# more ldexp tests
@test ldexp(T(0.8), 4) === T(12.8)
@test ldexp(T(-0.854375), 5) === T(-27.34)
@test ldexp(T(1.0), typemax(Int)) === T(Inf)
@test ldexp(T(1.0), typemin(Int)) === T(0.0)
@test ldexp(prevfloat(realmin(T)), typemax(Int)) === T(Inf)
@test ldexp(prevfloat(realmin(T)), typemin(Int)) === T(0.0)

@test ldexp(T(0.8), Int128(4)) === T(12.8)
@test ldexp(T(-0.854375), Int128(5)) === T(-27.34)
@test ldexp(T(1.0), typemax(Int128)) === T(Inf)
@test ldexp(T(1.0), typemin(Int128)) === T(0.0)
@test ldexp(prevfloat(realmin(T)), typemax(Int128)) === T(Inf)
@test ldexp(prevfloat(realmin(T)), typemin(Int128)) === T(0.0)

@test ldexp(T(0.8), BigInt(4)) === T(12.8)
@test ldexp(T(-0.854375), BigInt(5)) === T(-27.34)
@test ldexp(T(1.0), BigInt(typemax(Int128))) === T(Inf)
@test ldexp(T(1.0), BigInt(typemin(Int128))) === T(0.0)
@test ldexp(prevfloat(realmin(T)), BigInt(typemax(Int128))) === T(Inf)
@test ldexp(prevfloat(realmin(T)), BigInt(typemin(Int128))) === T(0.0)

# Test also against BigFloat reference. Needs to be exactly rounded.
@test ldexp(realmin(T), -1) == T(ldexp(big(realmin(T)), -1))
@test ldexp(realmin(T), -2) == T(ldexp(big(realmin(T)), -2))
@test ldexp(realmin(T)/2, 0) == T(ldexp(big(realmin(T)/2), 0))
@test ldexp(realmin(T)/3, 0) == T(ldexp(big(realmin(T)/3), 0))
@test ldexp(realmin(T)/3, -1) == T(ldexp(big(realmin(T)/3), -1))
@test ldexp(realmin(T)/3, 11) == T(ldexp(big(realmin(T)/3), 11))
@test ldexp(realmin(T)/11, -10) == T(ldexp(big(realmin(T)/11), -10))
@test ldexp(-realmin(T)/11, -10) == T(ldexp(big(-realmin(T)/11), -10))
@testset "ldexp function" begin
@test ldexp(T(0.0), 0) === T(0.0)
@test ldexp(T(-0.0), 0) === T(-0.0)
@test ldexp(T(Inf), 1) === T(Inf)
@test ldexp(T(Inf), 10000) === T(Inf)
@test ldexp(T(-Inf), 1) === T(-Inf)
@test ldexp(T(NaN), 10) === T(NaN)
@test ldexp(T(1.0), 0) === T(1.0)
@test ldexp(T(0.8), 4) === T(12.8)
@test ldexp(T(-0.854375), 5) === T(-27.34)
@test ldexp(T(1.0), typemax(Int)) === T(Inf)
@test ldexp(T(1.0), typemin(Int)) === T(0.0)
@test ldexp(prevfloat(realmin(T)), typemax(Int)) === T(Inf)
@test ldexp(prevfloat(realmin(T)), typemin(Int)) === T(0.0)

@test ldexp(T(0.0), Int128(0)) === T(0.0)
@test ldexp(T(-0.0), Int128(0)) === T(-0.0)
@test ldexp(T(1.0), Int128(0)) === T(1.0)
@test ldexp(T(0.8), Int128(4)) === T(12.8)
@test ldexp(T(-0.854375), Int128(5)) === T(-27.34)
@test ldexp(T(1.0), typemax(Int128)) === T(Inf)
@test ldexp(T(1.0), typemin(Int128)) === T(0.0)
@test ldexp(prevfloat(realmin(T)), typemax(Int128)) === T(Inf)
@test ldexp(prevfloat(realmin(T)), typemin(Int128)) === T(0.0)

@test ldexp(T(0.0), BigInt(0)) === T(0.0)
@test ldexp(T(-0.0), BigInt(0)) === T(-0.0)
@test ldexp(T(1.0), BigInt(0)) === T(1.0)
@test ldexp(T(0.8), BigInt(4)) === T(12.8)
@test ldexp(T(-0.854375), BigInt(5)) === T(-27.34)
@test ldexp(T(1.0), BigInt(typemax(Int128))) === T(Inf)
@test ldexp(T(1.0), BigInt(typemin(Int128))) === T(0.0)
@test ldexp(prevfloat(realmin(T)), BigInt(typemax(Int128))) === T(Inf)
@test ldexp(prevfloat(realmin(T)), BigInt(typemin(Int128))) === T(0.0)

# Test also against BigFloat reference. Needs to be exactly rounded.
@test ldexp(realmin(T), -1) == T(ldexp(big(realmin(T)), -1))
@test ldexp(realmin(T), -2) == T(ldexp(big(realmin(T)), -2))
@test ldexp(realmin(T)/2, 0) == T(ldexp(big(realmin(T)/2), 0))
@test ldexp(realmin(T)/3, 0) == T(ldexp(big(realmin(T)/3), 0))
@test ldexp(realmin(T)/3, -1) == T(ldexp(big(realmin(T)/3), -1))
@test ldexp(realmin(T)/3, 11) == T(ldexp(big(realmin(T)/3), 11))
@test ldexp(realmin(T)/11, -10) == T(ldexp(big(realmin(T)/11), -10))
@test ldexp(-realmin(T)/11, -10) == T(ldexp(big(-realmin(T)/11), -10))
end
end
end

Expand Down Expand Up @@ -235,6 +249,24 @@ end
@test exp2(Float16(2.)) ≈ exp2(2.)
@test log(e) == 1

@testset "exp function" for T in (Float64, Float32)
@testset "$T accuracy" begin
X = map(T, vcat(-10:0.0002:10, -80:0.001:80, 2.0^-27, 2.0^-28, 2.0^-14, 2.0^-13))
for x in X
y, yb = exp(x), exp(big(x))
@test abs(y-yb) <= 1.0*eps(T(yb))
end
end
@testset "$T edge cases" begin
@test isnan(exp(T(NaN)))
@test exp(T(-Inf)) === T(0.0)
@test exp(T(Inf)) === T(Inf)
@test exp(T(0.0)) === T(1.0) # exact
@test exp(T(5000.0)) === T(Inf)
@test exp(T(-5000.0)) === T(0.0)
end
end

@testset "test abstractarray trig fxns" begin
TAA = rand(2,2)
TAA = (TAA + TAA.')/2.
Expand Down