/
exp.jl
259 lines (238 loc) · 14.8 KB
/
exp.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# magic rounding constant: 1.5*2^52 Adding, then subtracting it from a float rounds it to an Int.
# This works because eps(MAGIC_ROUND_CONST(T)) == one(T), so adding it to a smaller number aligns the lsb to the 1s place.
# Values for which this trick doesn't work are going to have outputs of 0 or Inf.
MAGIC_ROUND_CONST(::Type{Float64}) = 6.755399441055744e15
MAGIC_ROUND_CONST(::Type{Float32}) = 1.048576f7
# max, min, and subnormal arguments
# max_exp = T(exponent_bias(T)*log(base, big(2)) + log(base, 2 - big(2.0)^-significand_bits(T)))
MAX_EXP(n::Val{2}, ::Type{Float32}) = 128.0f0
MAX_EXP(n::Val{2}, ::Type{Float64}) = 1024.0
MAX_EXP(n::Val{:ℯ}, ::Type{Float32}) = 88.72284f0
MAX_EXP(n::Val{:ℯ}, ::Type{Float64}) = 709.782712893384
MAX_EXP(n::Val{10}, ::Type{Float32}) = 38.53184f0
MAX_EXP(n::Val{10}, ::Type{Float64}) = 308.25471555991675
# min_exp = T(-(exponent_bias(T)+significand_bits(T)) * log(base, big(2)))
MIN_EXP(n::Val{2}, ::Type{Float32}) = -150.0f0
MIN_EXP(n::Val{2}, ::Type{Float64}) = -1075.0
MIN_EXP(n::Val{:ℯ}, ::Type{Float32}) = -103.97208f0
MIN_EXP(n::Val{:ℯ}, ::Type{Float64}) = -745.1332191019412
MIN_EXP(n::Val{10}, ::Type{Float32}) = -45.1545f0
MIN_EXP(n::Val{10}, ::Type{Float64}) = -323.60724533877976
# subnorm_exp = abs(log(base, floatmin(T)))
# these vals are positive since it's easier to take abs(x) than -abs(x)
SUBNORM_EXP(n::Val{2}, ::Type{Float32}) = 126.00001f0
SUBNORM_EXP(n::Val{2}, ::Type{Float64}) = 1022.0
SUBNORM_EXP(n::Val{:ℯ}, ::Type{Float32}) = 87.33655f0
SUBNORM_EXP(n::Val{:ℯ}, ::Type{Float64}) = 708.3964185322641
SUBNORM_EXP(n::Val{10}, ::Type{Float32}) = 37.92978f0
SUBNORM_EXP(n::Val{10}, ::Type{Float64}) = 307.6526555685887
# 256/log(base, 2) (For Float64 reductions)
LogBo256INV(::Val{2}, ::Type{Float64}) = 256.0
LogBo256INV(::Val{:ℯ}, ::Type{Float64}) = 369.3299304675746
LogBo256INV(::Val{10}, ::Type{Float64}) = 850.4135922911647
# -log(base, 2)/256 in upper and lower bits
# Upper is truncated to only have 34 bits of significand since N has at most
# ceil(log2(-MIN_EXP(base, Float64)*LogBo256INV(Val(2), Float64))) = 19 bits.
# This ensures no rounding when multiplying LogBo256U*N for FMAless hardware
LogBo256U(::Val{2}, ::Type{Float64}) = -0.00390625
LogBo256U(::Val{:ℯ}, ::Type{Float64}) = -0.002707606173999011
LogBo256U(::Val{10}, ::Type{Float64}) = -0.0011758984204561784
LogBo256L(::Val{2}, ::Type{Float64}) = 0.0
LogBo256L(::Val{:ℯ}, ::Type{Float64}) = -6.327543041662719e-14
LogBo256L(::Val{10}, ::Type{Float64}) = -1.0624811566412999e-13
# 1/log(base, 2) (For Float32 reductions)
LogBINV(::Val{2}, ::Type{Float32}) = 1.0f0
LogBINV(::Val{:ℯ}, ::Type{Float32}) = 1.442695f0
LogBINV(::Val{10}, ::Type{Float32}) = 3.321928f0
# -log(base, 2) in upper and lower bits
# Upper is truncated to only have 16 bits of significand since N has at most
# ceil(log2(-MIN_EXP(n, Float32)*LogBINV(Val(2), Float32))) = 8 bits.
# This ensures no rounding when multiplying LogBU*N for FMAless hardware
LogBU(::Val{2}, ::Type{Float32}) = -1.0f0
LogBU(::Val{:ℯ}, ::Type{Float32}) = -0.69314575f0
LogBU(::Val{10}, ::Type{Float32}) = -0.3010254f0
LogBL(::Val{2}, ::Type{Float32}) = 0.0f0
LogBL(::Val{:ℯ}, ::Type{Float32}) = -1.4286068f-6
LogBL(::Val{10}, ::Type{Float32}) = -4.605039f-6
# Range reduced kernels
@inline function expm1b_kernel(::Val{2}, x::Float64)
return x * evalpoly(x, (0.6931471805599393, 0.24022650695910058,
0.05550411502333161, 0.009618129548366803))
end
@inline function expm1b_kernel(::Val{:ℯ}, x::Float64)
return x * evalpoly(x, (0.9999999999999912, 0.4999999999999997,
0.1666666857598779, 0.04166666857598777))
end
@inline function expm1b_kernel(::Val{10}, x::Float64)
return x * evalpoly(x, (2.3025850929940255, 2.6509490552391974,
2.034678825384765, 1.1712552025835192))
end
@inline function expb_kernel(::Val{2}, x::Float32)
return evalpoly(x, (1.0f0, 0.6931472f0, 0.2402265f0,
0.05550411f0, 0.009618025f0,
0.0013333423f0, 0.00015469732f0, 1.5316464f-5))
end
@inline function expb_kernel(::Val{:ℯ}, x::Float32)
return evalpoly(x, (1.0f0, 1.0f0, 0.5f0, 0.16666667f0,
0.041666217f0, 0.008333249f0,
0.001394858f0, 0.00019924171f0))
end
@inline function expb_kernel(::Val{10}, x::Float32)
return evalpoly(x, (1.0f0, 2.3025851f0, 2.650949f0,
2.0346787f0, 1.1712426f0, 0.53937745f0,
0.20788547f0, 0.06837386f0))
end
# Table stores data with 60 sig figs by using the fact that the first 12 bits of all the
# values would be the same if stored as regular Float64.
# This only gains 8 bits since the least significant 4 bits of the exponent
# of the small part are not the same for all table entries
const JU_MASK = typemax(UInt64)>>12
const JL_MASK = typemax(UInt64)>>8
const JU_CONST = 0x3FF0000000000000
const JL_CONST = 0x3C00000000000000
#function make_table(size)
# t_array = zeros(UInt64, size);
# for j in 1:size
# val = 2.0^(BigFloat(j-1)/size)
# valU = Float64(val, RoundDown)
# valL = Float64(val-valU)
# valU = reinterpret(UInt64, valU) & JU_MASK
# valL = ((reinterpret(UInt64, valL) & JL_MASK)>>44)<<52
# t_array[j] = valU | valL
# end
# return Tuple(t_array)
#end
#const J_TABLE = make_table(256);
const J_TABLE = (0x0000000000000000, 0xaac00b1afa5abcbe, 0x9b60163da9fb3335, 0xab502168143b0280, 0xadc02c9a3e778060,
0x656037d42e11bbcc, 0xa7a04315e86e7f84, 0x84c04e5f72f654b1, 0x8d7059b0d3158574, 0xa510650a0e3c1f88,
0xa8d0706b29ddf6dd, 0x83207bd42b72a836, 0x6180874518759bc8, 0xa4b092bdf66607df, 0x91409e3ecac6f383,
0x85d0a9c79b1f3919, 0x98a0b5586cf9890f, 0x94f0c0f145e46c85, 0x9010cc922b7247f7, 0xa210d83b23395deb,
0x4030e3ec32d3d1a2, 0xa5b0efa55fdfa9c4, 0xae40fb66affed31a, 0x8d41073028d7233e, 0xa4911301d0125b50,
0xa1a11edbab5e2ab5, 0xaf712abdc06c31cb, 0xae8136a814f204aa, 0xa661429aaea92ddf, 0xa9114e95934f312d,
0x82415a98c8a58e51, 0x58f166a45471c3c2, 0xab9172b83c7d517a, 0x70917ed48695bbc0, 0xa7718af9388c8de9,
0x94a1972658375d2f, 0x8e51a35beb6fcb75, 0x97b1af99f8138a1c, 0xa351bbe084045cd3, 0x9001c82f95281c6b,
0x9e01d4873168b9aa, 0xa481e0e75eb44026, 0xa711ed5022fcd91c, 0xa201f9c18438ce4c, 0x8dc2063b88628cd6,
0x935212be3578a819, 0x82a21f49917ddc96, 0x8d322bdda27912d1, 0x99b2387a6e756238, 0x8ac2451ffb82140a,
0x8ac251ce4fb2a63f, 0x93e25e85711ece75, 0x82b26b4565e27cdd, 0x9e02780e341ddf29, 0xa2d284dfe1f56380,
0xab4291ba7591bb6f, 0x86129e9df51fdee1, 0xa352ab8a66d10f12, 0xafb2b87fd0dad98f, 0xa572c57e39771b2e,
0x9002d285a6e4030b, 0x9d12df961f641589, 0x71c2ecafa93e2f56, 0xaea2f9d24abd886a, 0x86f306fe0a31b715,
0x89531432edeeb2fd, 0x8a932170fc4cd831, 0xa1d32eb83ba8ea31, 0x93233c08b26416ff, 0xab23496266e3fa2c,
0xa92356c55f929ff0, 0xa8f36431a2de883a, 0xa4e371a7373aa9ca, 0xa3037f26231e7549, 0xa0b38cae6d05d865,
0xa3239a401b7140ee, 0xad43a7db34e59ff6, 0x9543b57fbfec6cf4, 0xa083c32dc313a8e4, 0x7fe3d0e544ede173,
0x8ad3dea64c123422, 0xa943ec70df1c5174, 0xa413fa4504ac801b, 0x8bd40822c367a024, 0xaf04160a21f72e29,
0xa3d423fb27094689, 0xab8431f5d950a896, 0x88843ffa3f84b9d4, 0x48944e086061892d, 0xae745c2042a7d231,
0x9c946a41ed1d0057, 0xa1e4786d668b3236, 0x73c486a2b5c13cd0, 0xab1494e1e192aed1, 0x99c4a32af0d7d3de,
0xabb4b17dea6db7d6, 0x7d44bfdad5362a27, 0x9054ce41b817c114, 0x98e4dcb299fddd0d, 0xa564eb2d81d8abfe,
0xa5a4f9b2769d2ca6, 0x7a2508417f4531ee, 0xa82516daa2cf6641, 0xac65257de83f4eee, 0xabe5342b569d4f81,
0x879542e2f4f6ad27, 0xa8a551a4ca5d920e, 0xa7856070dde910d1, 0x99b56f4736b527da, 0xa7a57e27dbe2c4ce,
0x82958d12d497c7fd, 0xa4059c0827ff07cb, 0x9635ab07dd485429, 0xa245ba11fba87a02, 0x3c45c9268a5946b7,
0xa195d84590998b92, 0x9ba5e76f15ad2148, 0xa985f6a320dceb70, 0xa60605e1b976dc08, 0x9e46152ae6cdf6f4,
0xa636247eb03a5584, 0x984633dd1d1929fd, 0xa8e6434634ccc31f, 0xa28652b9febc8fb6, 0xa226623882552224,
0xa85671c1c70833f5, 0x60368155d44ca973, 0x880690f4b19e9538, 0xa216a09e667f3bcc, 0x7a36b052fa75173e,
0xada6c012750bdabe, 0x9c76cfdcddd47645, 0xae46dfb23c651a2e, 0xa7a6ef9298593ae4, 0xa9f6ff7df9519483,
0x59d70f7466f42e87, 0xaba71f75e8ec5f73, 0xa6f72f8286ead089, 0xa7a73f9a48a58173, 0x90474fbd35d7cbfd,
0xa7e75feb564267c8, 0x9b777024b1ab6e09, 0x986780694fde5d3f, 0x934790b938ac1cf6, 0xaaf7a11473eb0186,
0xa207b17b0976cfda, 0x9f17c1ed0130c132, 0x91b7d26a62ff86f0, 0x7057e2f336cf4e62, 0xabe7f3878491c490,
0xa6c80427543e1a11, 0x946814d2add106d9, 0xa1582589994cce12, 0x9998364c1eb941f7, 0xa9c8471a4623c7ac,
0xaf2857f4179f5b20, 0xa01868d99b4492ec, 0x85d879cad931a436, 0x99988ac7d98a6699, 0x9d589bd0a478580f,
0x96e8ace5422aa0db, 0x9ec8be05bad61778, 0xade8cf3216b5448b, 0xa478e06a5e0866d8, 0x85c8f1ae99157736,
0x959902fed0282c8a, 0xa119145b0b91ffc5, 0xab2925c353aa2fe1, 0xae893737b0cdc5e4, 0xa88948b82b5f98e4,
0xad395a44cbc8520e, 0xaf296bdd9a7670b2, 0xa1797d829fde4e4f, 0x7ca98f33e47a22a2, 0xa749a0f170ca07b9,
0xa119b2bb4d53fe0c, 0x7c79c49182a3f090, 0xa579d674194bb8d4, 0x7829e86319e32323, 0xaad9fa5e8d07f29d,
0xa65a0c667b5de564, 0x9c6a1e7aed8eb8bb, 0x963a309bec4a2d33, 0xa2aa42c980460ad7, 0xa16a5503b23e255c,
0x650a674a8af46052, 0x9bca799e1330b358, 0xa58a8bfe53c12e58, 0x90fa9e6b5579fdbf, 0x889ab0e521356eba,
0xa81ac36bbfd3f379, 0x97ead5ff3a3c2774, 0x97aae89f995ad3ad, 0xa5aafb4ce622f2fe, 0xa21b0e07298db665,
0x94db20ce6c9a8952, 0xaedb33a2b84f15fa, 0xac1b468415b749b0, 0xa1cb59728de55939, 0x92ab6c6e29f1c52a,
0xad5b7f76f2fb5e46, 0xa24b928cf22749e3, 0xa08ba5b030a10649, 0xafcbb8e0b79a6f1e, 0x823bcc1e904bc1d2,
0xafcbdf69c3f3a206, 0xa08bf2c25bd71e08, 0xa89c06286141b33c, 0x811c199bdd85529c, 0xa48c2d1cd9fa652b,
0x9b4c40ab5fffd07a, 0x912c544778fafb22, 0x928c67f12e57d14b, 0xa86c7ba88988c932, 0x71ac8f6d9406e7b5,
0xaa0ca3405751c4da, 0x750cb720dcef9069, 0xac5ccb0f2e6d1674, 0xa88cdf0b555dc3f9, 0xa2fcf3155b5bab73,
0xa1ad072d4a07897b, 0x955d1b532b08c968, 0xa15d2f87080d89f1, 0x93dd43c8eacaa1d6, 0x82ed5818dcfba487,
0x5fed6c76e862e6d3, 0xa77d80e316c98397, 0x9a0d955d71ff6075, 0x9c2da9e603db3285, 0xa24dbe7cd63a8314,
0x92ddd321f301b460, 0xa1ade7d5641c0657, 0xa72dfc97337b9b5e, 0xadae11676b197d16, 0xa42e264614f5a128,
0xa30e3b333b16ee11, 0x839e502ee78b3ff6, 0xaa7e653924676d75, 0x92de7a51fbc74c83, 0xa77e8f7977cdb73f,
0xa0bea4afa2a490d9, 0x948eb9f4867cca6e, 0xa1becf482d8e67f0, 0x91cee4aaa2188510, 0x9dcefa1bee615a27,
0xa66f0f9c1cb64129, 0x93af252b376bba97, 0xacdf3ac948dd7273, 0x99df50765b6e4540, 0x9faf6632798844f8,
0xa12f7bfdad9cbe13, 0xaeef91d802243c88, 0x874fa7c1819e90d8, 0xacdfbdba3692d513, 0x62efd3c22b8f71f1, 0x74afe9d96b2a23d9)
@inline function table_unpack(ind)
j = @inbounds J_TABLE[ind]
jU = reinterpret(Float64, JU_CONST | (j&JU_MASK))
jL = reinterpret(Float64, JL_CONST | (j>>8))
return jU, jL
end
# Method for Float64
# 1. Argument reduction: Reduce x to an r so that |r| <= log(b, 2)/512. Given x, base b,
# find r and integers k, j such that
# x = (k + j/256)*log(b, 2) + r, 0 <= j < 256, |r| <= log(b,2)/512.
#
# 2. Approximate b^r-1 by 3rd-degree minimax polynomial p_b(r) on the interval [-log(b,2)/512, log(b,2)/512].
# Since the bounds on r are very tight, this is sufficient to be accurate to floating point epsilon.
#
# 3. Scale back: b^x = 2^k * 2^(j/256) * (1 + p_b(r))
# Since the range of possible j is small, 2^(j/256) is stored for all possible values in slightly extended precision.
# Method for Float32
# 1. Argument reduction: Reduce x to an r so that |r| <= log(b, 2)/2. Given x, base b,
# find r and integer N such that
# x = N*log(b, 2) + r, |r| <= log(b,2)/2.
#
# 2. Approximate b^r by 7th-degree minimax polynomial p_b(r) on the interval [-log(b,2)/2, log(b,2)/2].
# 3. Scale back: b^x = 2^N * p_b(r)
# For both, a little extra care needs to be taken if b^r is subnormal.
# The solution is to do the scaling back in 2 steps as just messing with the exponent wouldn't work.
for (func, base) in (:exp2=>Val(2), :exp=>Val(:ℯ), :exp10=>Val(10))
@eval begin
function ($func)(x::Real)
xf = float(x)
x === xf && throw(MethodError($func, (x,)))
return ($func)(xf)
end
function ($func)(x::T) where T<:Float64
N_float = muladd(x, LogBo256INV($base, T), MAGIC_ROUND_CONST(T))
N = reinterpret(uinttype(T), N_float) % Int32
N_float -= MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV($base, T))
r = muladd(N_float, LogBo256U($base, T), x)
r = muladd(N_float, LogBo256L($base, T), r)
k = N >> 8
jU, jL = table_unpack(N&255 +1)
small_part = muladd(jU, expm1b_kernel($base, r), jL) + jU
if !(abs(x) <= SUBNORM_EXP($base, T))
x >= MAX_EXP($base, T) && return Inf
x <= MIN_EXP($base, T) && return 0.0
if k <= -53
# The UInt64 forces promotion. (Only matters for 32 bit systems.)
twopk = (k + UInt64(53)) << 52
return reinterpret(T, twopk + reinterpret(UInt64, small_part))*(2.0^-53)
end
end
twopk = Int64(k) << 52
return reinterpret(T, twopk + reinterpret(Int64, small_part))
end
function ($func)(x::T) where T<:Float32
N_float = round(x*LogBINV($base, T))
N = unsafe_trunc(Int32, N_float)
r = muladd(N_float, LogBU($base, T), x)
r = muladd(N_float, LogBL($base, T), r)
small_part = expb_kernel($base, r)
if !(abs(x) <= SUBNORM_EXP($base, T))
x > MAX_EXP($base, T) && return Inf32
x < MIN_EXP($base, T) && return 0.0f0
if N<=Int32(-24)
twopk = reinterpret(T, (N+Int32(151)) << Int32(23))
return (twopk*small_part)*(2f0^(-24))
end
N == exponent_max(T) && return small_part * T(2.0) * T(2.0)^(exponent_max(T) - 1)
end
twopk = reinterpret(T, (N+Int32(127)) << Int32(23))
return twopk*small_part
end
end
end
@doc """
exp(x)
Compute the natural base exponential of `x`, in other words ``e^x``.
# Examples
```jldoctest
julia> exp(1.0)
2.718281828459045
```
""" exp(x::Real)