Add doc strings and explicit BigInt method for lstirling_asym. (#36)

* Add doc strings and explicit BigInt method for lstirling_asym. * Suppress warning on v0.7 * some minor changes. * Changes suggested by Simon B. and Andreas * Add link to DLMF. * Clean up tests with testset and Compat.Test * Add links to `lstirling_asym` * log -> \log * Allow failures on nightly Julia version
JuliaStats · Mar 15, 2018 · 5321025 · 5321025
1 parent 755d8b8
commit 5321025
Show file tree

Hide file tree

Showing 7 changed files with 256 additions and 158 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -7,9 +7,9 @@ julia:
   - nightly
 notifications:
   email: false
-# Avoids a Travis bug on Linux 0.4
-git:
-  depth: 999999
+matrix:
+  allow_failures:
+  - julia: nightly
 # uncomment the following lines to override the default test script
 #script:
 #  - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi

diff --git a/src/basicfuns.jl b/src/basicfuns.jl
@@ -1,45 +1,97 @@
 # common facilities
 
 # scalar functions
+"""
+    xlogx(x::Real)
 
+Return `x * log(x)` for `x ≥ 0`, handling `x = 0` by taking the downward limit.
+
+```jldoctest
+julia> StatsFuns.xlogx(0)
+0.0
+```
+"""
 xlogx(x::Real) = x > zero(x) ? x * log(x) : zero(log(x))
 
+"""
+    xlogy(x::Real, y::Real)
+
+Return `x * log(y)` for `y > 0` with correct limit at `x = 0`.
+"""
 xlogy(x::T, y::T) where {T<:Real} = x > zero(T) ? x * log(y) : zero(log(x))
 xlogy(x::Real, y::Real) = xlogy(promote(x, y)...)
 
-# logistic: 1 / (1 + exp(-x))
-#
-logistic(x::Real) = one(x) / (one(x) + exp(-x))
+"""
+    logistic(x::Real)
+
+The [logistic](https://en.wikipedia.org/wiki/Logistic_function) sigmoid function mapping a real number to a value in the interval [0,1],
+```math
+\sigma(x) = \frac{1}{e^{-x} + 1} = \frac{e^x}{1+e^x}.
+```
+
+Its inverse is the [`logit`](@ref) function.
+"""
+logistic(x::Real) = inv(exp(-x) + one(x))
+
+"""
+    logit(p::Real)
 
-# logit: log(x / (1 - x))
-#
+The [logit](https://en.wikipedia.org/wiki/Logit) or log-odds transformation,
+```math
+\log\left(\frac{x}{1-x}\right), \text{where} 0 < x < 1
+```
+Its inverse is the [`logistic`](@ref) function.
+"""
 logit(x::Real) = log(x / (one(x) - x))
 
-# log1psq: log(1+x^2)
-#
+"""
+    log1psq(x::Real)
+
+Return `log(1+x^2)` evaluated carefully for `abs(x)` very small or very large.
+"""
 log1psq(x::Real) = log1p(abs2(x))
-log1psq(x::Union{Float32,Float64}) = (ax = abs(x); ax < maxintfloat(x) ? log1p(abs2(ax)) : 2 * log(ax))
+function log1psq(x::Union{Float32,Float64}) 
+    ax = abs(x)
+    ax < maxintfloat(x) ? log1p(abs2(ax)) : 2 * log(ax)
+end
+
+"""
+    log1pexp(x::Real)
+    
+Return `log(1+exp(x))` evaluated carefully for largish `x`.
 
-# log1pexp: log(1+exp(x))
-#
+This is also called the ["softplus"](https://en.wikipedia.org/wiki/Rectifier_(neural_networks))
+transformation, being a smooth approximation to `max(0,x)`. Its inverse is [`logexpm1`](@ref). 
+"""
 log1pexp(x::Real) = x < 18.0 ? log1p(exp(x)) : x < 33.3 ? x + exp(-x) : oftype(exp(-x), x)
 log1pexp(x::Float32) = x < 9.0f0 ? log1p(exp(x)) : x < 16.0f0 ? x + exp(-x) : oftype(exp(-x), x)
 
-# log1mexp: log(1 - exp(x))
-#
-# See:
-#   Martin Maechler (2012) "Accurately Computing log(1 − exp(− |a|))"
-#   http://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf
-#
-# Note: different than Maechler (2012), no negation inside parantheses
+"""
+    log1mexp(x::Real)
+
+Return `log(1 - exp(x))`
+
+See:
+ * Martin Maechler (2012) "Accurately Computing log(1 − exp(− |a|))", 
+   http://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf
+
+Note: different than Maechler (2012), no negation inside parentheses
+"""
 log1mexp(x::Real) = x < loghalf ? log1p(-exp(x)) : log(-expm1(x))
 
-# log2mexp: log(2 - exp(x))
-#
+"""
+    log2mexp(x::Real)
+
+Return `log(2 - exp(x))` evaluated as `log1p(-expm1(x))`
+"""
 log2mexp(x::Real) = log1p(-expm1(x))
 
-# logexpm1: log(exp(x) - 1)
-#
+"""
+    logexpm1(x::Real)
+    
+Return `log(exp(x) - 1)` or the "invsoftplus" function.
+It is the inverse of [`log1pexp`](@ref) (aka "softplus").
+"""
 logexpm1(x::Real) = x <= 18.0 ? log(expm1(x)) : x <= 33.3 ? x - exp(-x) : oftype(exp(-x), x)
 logexpm1(x::Float32) = x <= 9f0 ? log(expm1(x)) : x <= 16f0 ? x - exp(-x) : oftype(exp(-x), x)
 
@@ -56,10 +108,13 @@ Compat.@dep_vectorize_1arg Real logexpm1
 const softplus = log1pexp
 const invsoftplus = logexpm1
 
-# log1pmx: log(1 + x) - x
-#
-# use naive calculation or range reduction outside kernel range.
-# accurate ~2ulps for all x
+"""
+    log1pmx(x::Float64)
+
+Return `log(1 + x) - x`
+
+Use naive calculation or range reduction outside kernel range.  Accurate ~2ulps for all `x`.
+"""
 function log1pmx(x::Float64)
     if !(-0.7 < x < 0.9)
         return log1p(x) - x
@@ -80,8 +135,11 @@ function log1pmx(x::Float64)
     end
 end
 
-# logmxp1: log(x) - x + 1
-#
+"""
+    logmxp1(x::Float64)
+
+Return `log(x) - x + 1` carefully evaluated.
+"""
 function logmxp1(x::Float64)
     if x <= 0.3
         return (log(x) + 1.0) - x
@@ -115,15 +173,23 @@ function _log1pmx_ker(x::Float64)
 end
 
 
-## logsumexp
+"""
+    logsumexp(x::Real, y::Real)
 
+Return `log(exp(x) + exp(y))`, avoiding intermediate overflow/undeflow.
+"""
 function logsumexp(x::T, y::T) where T<:Real
     x == y && abs(x) == Inf && return x
     x > y ? x + log1p(exp(y - x)) : y + log1p(exp(x - y))
 end
 
 logsumexp(x::Real, y::Real) = logsumexp(promote(x, y)...)
 
+"""
+    logsumexp(x::AbstractArray{T}) where T<:Real
+
+Return `log(sum(exp, x))`, evaluated avoiding intermediate overflow/undeflow.
+"""
 function logsumexp(x::AbstractArray{T}) where T<:Real
     S = typeof(exp(zero(T)))    # because of 0.4.0
     isempty(x) && return -S(Inf)
@@ -136,8 +202,15 @@ function logsumexp(x::AbstractArray{T}) where T<:Real
     log(s) + u
 end
 
-## softmax
+"""
+    softmax!(r::AbstractArray, x::AbstractArray)
+
+Overwrite `r` with the `softmax` (or _normalized exponential_) transformation of `x`
 
+That is, `r` is overwritten with `exp.(x)`, normalized to sum to 1.
+
+See the [Wikipedia entry](https://en.wikipedia.org/wiki/Softmax_function)
+"""
 function softmax!(r::AbstractArray{R}, x::AbstractArray{T}) where {R<:AbstractFloat,T<:Real}
     n = length(x)
     length(r) == n || throw(DimensionMismatch("Inconsistent array lengths."))
@@ -153,5 +226,10 @@ function softmax!(r::AbstractArray{R}, x::AbstractArray{T}) where {R<:AbstractFl
     r
 end
 
+"""
+    softmax(x::AbstractArray{<:Real})
+
+Return the [`softmax transformation`](https://en.wikipedia.org/wiki/Softmax_function) applied to `x`
+"""
 softmax!(x::AbstractArray{<:AbstractFloat}) = softmax!(x, x)
-softmax(x::AbstractArray{<:Real}) = softmax!(Array{Float64}(size(x)), x)
+softmax(x::AbstractArray{<:Real}) = softmax!(similar(x, Float64), x)
diff --git a/src/distrs/pois.jl b/src/distrs/pois.jl
@@ -18,3 +18,13 @@ poispdf(λ::Real, x::Real) = exp(poislogpdf(λ, x))
 poislogpdf(λ::T, x::T) where {T <: Real} = xlogy(x, λ) - λ - lgamma(x + 1)
 
 poislogpdf(λ::Number, x::Number) = poislogpdf(promote(float(λ), x)...)
+
+#=
+function poislogpdf(λ::Union{Float32,Float64}, x::Union{Float64,Float32,Integer})
+    if iszero(λ)
+        iszero(x) ? zero(λ) : oftype(λ, -Inf)
+    elseif iszero(x)
+        -λ
+    else
+        -lstirling_asym(x + 1)
+=#
diff --git a/src/misc.jl b/src/misc.jl
@@ -1,9 +1,10 @@
 # Miscellaneous functions
 
-# Logarithm of multivariate gamma function
-#
-# See: https://en.wikipedia.org/wiki/Multivariate_gamma_function
-#
+"""
+    logmvgamma(p::Int, a::Real)
+
+Return the logarithm of [multivariate gamma function](https://en.wikipedia.org/wiki/Multivariate_gamma_function) ([DLMF 35.3.1](https://dlmf.nist.gov/35.3.1)).
+"""
 function logmvgamma(p::Int, a::Real)
     # NOTE: one(a) factors are here to prevent unnecessary promotion of Float32
     res = p * (p - 1) * log(pi * one(a)) / 4
@@ -13,23 +14,55 @@ function logmvgamma(p::Int, a::Real)
     return res
 end
 
-# Remainder term after Stirling's approximation to the log-gamma function
-# lstirling(x) = lgamma(x) + x - (x-0.5)*log(x) - 0.5*log2π
-#              = 1/(12x) - 1/(360x^3) + 1/(1260x^5) + ...
-#
-# Asymptotic expansion from:
-#
-#   Temme, N. (1996) Special functions: An introduction to the classical
-#   functions of mathematical physics, Wiley, New York, ISBN: 0-471-11313-1,
-#   Chapter 3.6, pp 61-65.
-#
-# Relative error of approximation is bounded by
-#   (174611/125400 x^-19) / (1/12 x^-1 - 1/360 x^-3)
-# which is < 1/2 ulp for x >= 10.0
-# total numeric error appears to be < 2 ulps
-#
+"""
+    lstirling_asym(x)
+
+The remainder term after
+[Stirling's approximation](https://en.wikipedia.org/wiki/Stirling%27s_approximation)
+to [`lgamma`](@ref):
+
+```math
+\log \Gamma(x) \approx x \log(x) - x + \log(2π/x)/2 = \log(x)*(x-1/2) + \log(2\pi)/2 - x
+```
+
+In Julia syntax, this means:
+
+    lstirling_asym(x) = lgamma(x) + x - (x-0.5)*log(x) - 0.5*log(2π)
+
+For sufficiently large `x`, this can be approximated using the asymptotic
+_Stirling's series_ ([DLMF 5.11.1](https://dlmf.nist.gov/5.11.1)):
+
+```math
+\frac{1}{12x} - \frac{1}{360x^3} + \frac{1}{1260x^5} - \frac{1}{1680x^7} + \ldots
+```
+
+The truncation error is bounded by the first omitted term, and is of the same sign.
+
+Relative error of approximation is bounded by
+    (174611/125400 x^-19) / (1/12 x^-1 - 1/360 x^-3)
+which is < 1/2 ulp for x >= 10.0, and total numeric error appears to be < 2 ulps
+
+# References
+
+* Temme, N. (1996) Special functions: An introduction to the classical functions of
+   mathematical physics, Wiley, New York, ISBN: 0-471-11313-1, Chapter 3.6, pp 61-65.
+* Weisstein, Eric W. ["Stirling's Series."](http://mathworld.wolfram.com/StirlingsSeries.html).
+  MathWorld. 
+* [OEIS A046968](http://oeis.org/A046968) and [OEIS A046969](http://oeis.org/A046969)
+  for the series coefficients
+"""
+function lstirling_asym end
+
+lstirling_asym(x::BigFloat) = lgamma(x) + x - log(x)*(x - big(0.5)) - log2π/big(2)
+
+lstirling_asym(x::Integer) = lstirling_asym(float(x))
+
+const lstirlingF64 = Float64[lstirling_asym(k) for k in big(1):big(64)]
+const lstirlingF32 = Float64[lstirling_asym(k) for k in big(1):big(40)]
+
 function lstirling_asym(x::Float64)
-    t = 1.0/(x*x)
+    isinteger(x) && (0 < x ≤ length(lstirlingF64)) && return lstirlingF64[Int(x)]
+    t = inv(abs2(x))
     @horner(t,
              8.33333333333333333e-2, #  1/12 x^-1
             -2.77777777777777778e-3, # -1/360 x^-3
@@ -43,13 +76,12 @@ function lstirling_asym(x::Float64)
 end
 
 function lstirling_asym(x::Float32)
-    t = 1f0/(x*x)
+    isinteger(x) && (0 < x ≤ length(lstirlingF32)) && return lstirlingF32[Int(x)]
+    t = inv(abs2(x))
     @horner(t,
              8.333333333333f-2, #  1/12 x^-1
             -2.777777777777f-3, # -1/360 x^-3
              7.936507936508f-4, #  1/1260 x^-5
             -5.952380952381f-4, # -1/1680 x^-7
              8.417508417508f-4)/x #  1/1188 x^-9
 end
-
-lstirling_asym(x::Integer) = lstirling_asym(float(x))