diff --git a/.travis.yml b/.travis.yml index 276fdd7..1186154 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,9 +7,9 @@ julia: - nightly notifications: email: false -# Avoids a Travis bug on Linux 0.4 -git: - depth: 999999 +matrix: + allow_failures: + - julia: nightly # uncomment the following lines to override the default test script #script: # - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi diff --git a/src/basicfuns.jl b/src/basicfuns.jl index 6433456..1a81703 100644 --- a/src/basicfuns.jl +++ b/src/basicfuns.jl @@ -1,45 +1,97 @@ # common facilities # scalar functions +""" + xlogx(x::Real) +Return `x * log(x)` for `x ≥ 0`, handling `x = 0` by taking the downward limit. + +```jldoctest +julia> StatsFuns.xlogx(0) +0.0 +``` +""" xlogx(x::Real) = x > zero(x) ? x * log(x) : zero(log(x)) +""" + xlogy(x::Real, y::Real) + +Return `x * log(y)` for `y > 0` with correct limit at `x = 0`. +""" xlogy(x::T, y::T) where {T<:Real} = x > zero(T) ? x * log(y) : zero(log(x)) xlogy(x::Real, y::Real) = xlogy(promote(x, y)...) -# logistic: 1 / (1 + exp(-x)) -# -logistic(x::Real) = one(x) / (one(x) + exp(-x)) +""" + logistic(x::Real) + +The [logistic](https://en.wikipedia.org/wiki/Logistic_function) sigmoid function mapping a real number to a value in the interval [0,1], +```math +\sigma(x) = \frac{1}{e^{-x} + 1} = \frac{e^x}{1+e^x}. +``` + +Its inverse is the [`logit`](@ref) function. +""" +logistic(x::Real) = inv(exp(-x) + one(x)) + +""" + logit(p::Real) -# logit: log(x / (1 - x)) -# +The [logit](https://en.wikipedia.org/wiki/Logit) or log-odds transformation, +```math +\log\left(\frac{x}{1-x}\right), \text{where} 0 < x < 1 +``` +Its inverse is the [`logistic`](@ref) function. +""" logit(x::Real) = log(x / (one(x) - x)) -# log1psq: log(1+x^2) -# +""" + log1psq(x::Real) + +Return `log(1+x^2)` evaluated carefully for `abs(x)` very small or very large. +""" log1psq(x::Real) = log1p(abs2(x)) -log1psq(x::Union{Float32,Float64}) = (ax = abs(x); ax < maxintfloat(x) ? log1p(abs2(ax)) : 2 * log(ax)) +function log1psq(x::Union{Float32,Float64}) + ax = abs(x) + ax < maxintfloat(x) ? log1p(abs2(ax)) : 2 * log(ax) +end + +""" + log1pexp(x::Real) + +Return `log(1+exp(x))` evaluated carefully for largish `x`. -# log1pexp: log(1+exp(x)) -# +This is also called the ["softplus"](https://en.wikipedia.org/wiki/Rectifier_(neural_networks)) +transformation, being a smooth approximation to `max(0,x)`. Its inverse is [`logexpm1`](@ref). +""" log1pexp(x::Real) = x < 18.0 ? log1p(exp(x)) : x < 33.3 ? x + exp(-x) : oftype(exp(-x), x) log1pexp(x::Float32) = x < 9.0f0 ? log1p(exp(x)) : x < 16.0f0 ? x + exp(-x) : oftype(exp(-x), x) -# log1mexp: log(1 - exp(x)) -# -# See: -# Martin Maechler (2012) "Accurately Computing log(1 − exp(− |a|))" -# http://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf -# -# Note: different than Maechler (2012), no negation inside parantheses +""" + log1mexp(x::Real) + +Return `log(1 - exp(x))` + +See: + * Martin Maechler (2012) "Accurately Computing log(1 − exp(− |a|))", + http://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf + +Note: different than Maechler (2012), no negation inside parentheses +""" log1mexp(x::Real) = x < loghalf ? log1p(-exp(x)) : log(-expm1(x)) -# log2mexp: log(2 - exp(x)) -# +""" + log2mexp(x::Real) + +Return `log(2 - exp(x))` evaluated as `log1p(-expm1(x))` +""" log2mexp(x::Real) = log1p(-expm1(x)) -# logexpm1: log(exp(x) - 1) -# +""" + logexpm1(x::Real) + +Return `log(exp(x) - 1)` or the "invsoftplus" function. +It is the inverse of [`log1pexp`](@ref) (aka "softplus"). +""" logexpm1(x::Real) = x <= 18.0 ? log(expm1(x)) : x <= 33.3 ? x - exp(-x) : oftype(exp(-x), x) logexpm1(x::Float32) = x <= 9f0 ? log(expm1(x)) : x <= 16f0 ? x - exp(-x) : oftype(exp(-x), x) @@ -56,10 +108,13 @@ Compat.@dep_vectorize_1arg Real logexpm1 const softplus = log1pexp const invsoftplus = logexpm1 -# log1pmx: log(1 + x) - x -# -# use naive calculation or range reduction outside kernel range. -# accurate ~2ulps for all x +""" + log1pmx(x::Float64) + +Return `log(1 + x) - x` + +Use naive calculation or range reduction outside kernel range. Accurate ~2ulps for all `x`. +""" function log1pmx(x::Float64) if !(-0.7 < x < 0.9) return log1p(x) - x @@ -80,8 +135,11 @@ function log1pmx(x::Float64) end end -# logmxp1: log(x) - x + 1 -# +""" + logmxp1(x::Float64) + +Return `log(x) - x + 1` carefully evaluated. +""" function logmxp1(x::Float64) if x <= 0.3 return (log(x) + 1.0) - x @@ -115,8 +173,11 @@ function _log1pmx_ker(x::Float64) end -## logsumexp +""" + logsumexp(x::Real, y::Real) +Return `log(exp(x) + exp(y))`, avoiding intermediate overflow/undeflow. +""" function logsumexp(x::T, y::T) where T<:Real x == y && abs(x) == Inf && return x x > y ? x + log1p(exp(y - x)) : y + log1p(exp(x - y)) @@ -124,6 +185,11 @@ end logsumexp(x::Real, y::Real) = logsumexp(promote(x, y)...) +""" + logsumexp(x::AbstractArray{T}) where T<:Real + +Return `log(sum(exp, x))`, evaluated avoiding intermediate overflow/undeflow. +""" function logsumexp(x::AbstractArray{T}) where T<:Real S = typeof(exp(zero(T))) # because of 0.4.0 isempty(x) && return -S(Inf) @@ -136,8 +202,15 @@ function logsumexp(x::AbstractArray{T}) where T<:Real log(s) + u end -## softmax +""" + softmax!(r::AbstractArray, x::AbstractArray) + +Overwrite `r` with the `softmax` (or _normalized exponential_) transformation of `x` +That is, `r` is overwritten with `exp.(x)`, normalized to sum to 1. + +See the [Wikipedia entry](https://en.wikipedia.org/wiki/Softmax_function) +""" function softmax!(r::AbstractArray{R}, x::AbstractArray{T}) where {R<:AbstractFloat,T<:Real} n = length(x) length(r) == n || throw(DimensionMismatch("Inconsistent array lengths.")) @@ -153,5 +226,10 @@ function softmax!(r::AbstractArray{R}, x::AbstractArray{T}) where {R<:AbstractFl r end +""" + softmax(x::AbstractArray{<:Real}) + +Return the [`softmax transformation`](https://en.wikipedia.org/wiki/Softmax_function) applied to `x` +""" softmax!(x::AbstractArray{<:AbstractFloat}) = softmax!(x, x) -softmax(x::AbstractArray{<:Real}) = softmax!(Array{Float64}(size(x)), x) +softmax(x::AbstractArray{<:Real}) = softmax!(similar(x, Float64), x) diff --git a/src/distrs/pois.jl b/src/distrs/pois.jl index 55500f9..adedaa6 100644 --- a/src/distrs/pois.jl +++ b/src/distrs/pois.jl @@ -18,3 +18,13 @@ poispdf(λ::Real, x::Real) = exp(poislogpdf(λ, x)) poislogpdf(λ::T, x::T) where {T <: Real} = xlogy(x, λ) - λ - lgamma(x + 1) poislogpdf(λ::Number, x::Number) = poislogpdf(promote(float(λ), x)...) + +#= +function poislogpdf(λ::Union{Float32,Float64}, x::Union{Float64,Float32,Integer}) + if iszero(λ) + iszero(x) ? zero(λ) : oftype(λ, -Inf) + elseif iszero(x) + -λ + else + -lstirling_asym(x + 1) +=# \ No newline at end of file diff --git a/src/misc.jl b/src/misc.jl index 01cf04a..822a09b 100644 --- a/src/misc.jl +++ b/src/misc.jl @@ -1,9 +1,10 @@ # Miscellaneous functions -# Logarithm of multivariate gamma function -# -# See: https://en.wikipedia.org/wiki/Multivariate_gamma_function -# +""" + logmvgamma(p::Int, a::Real) + +Return the logarithm of [multivariate gamma function](https://en.wikipedia.org/wiki/Multivariate_gamma_function) ([DLMF 35.3.1](https://dlmf.nist.gov/35.3.1)). +""" function logmvgamma(p::Int, a::Real) # NOTE: one(a) factors are here to prevent unnecessary promotion of Float32 res = p * (p - 1) * log(pi * one(a)) / 4 @@ -13,23 +14,55 @@ function logmvgamma(p::Int, a::Real) return res end -# Remainder term after Stirling's approximation to the log-gamma function -# lstirling(x) = lgamma(x) + x - (x-0.5)*log(x) - 0.5*log2π -# = 1/(12x) - 1/(360x^3) + 1/(1260x^5) + ... -# -# Asymptotic expansion from: -# -# Temme, N. (1996) Special functions: An introduction to the classical -# functions of mathematical physics, Wiley, New York, ISBN: 0-471-11313-1, -# Chapter 3.6, pp 61-65. -# -# Relative error of approximation is bounded by -# (174611/125400 x^-19) / (1/12 x^-1 - 1/360 x^-3) -# which is < 1/2 ulp for x >= 10.0 -# total numeric error appears to be < 2 ulps -# +""" + lstirling_asym(x) + +The remainder term after +[Stirling's approximation](https://en.wikipedia.org/wiki/Stirling%27s_approximation) +to [`lgamma`](@ref): + +```math +\log \Gamma(x) \approx x \log(x) - x + \log(2π/x)/2 = \log(x)*(x-1/2) + \log(2\pi)/2 - x +``` + +In Julia syntax, this means: + + lstirling_asym(x) = lgamma(x) + x - (x-0.5)*log(x) - 0.5*log(2π) + +For sufficiently large `x`, this can be approximated using the asymptotic +_Stirling's series_ ([DLMF 5.11.1](https://dlmf.nist.gov/5.11.1)): + +```math +\frac{1}{12x} - \frac{1}{360x^3} + \frac{1}{1260x^5} - \frac{1}{1680x^7} + \ldots +``` + +The truncation error is bounded by the first omitted term, and is of the same sign. + +Relative error of approximation is bounded by + (174611/125400 x^-19) / (1/12 x^-1 - 1/360 x^-3) +which is < 1/2 ulp for x >= 10.0, and total numeric error appears to be < 2 ulps + +# References + +* Temme, N. (1996) Special functions: An introduction to the classical functions of + mathematical physics, Wiley, New York, ISBN: 0-471-11313-1, Chapter 3.6, pp 61-65. +* Weisstein, Eric W. ["Stirling's Series."](http://mathworld.wolfram.com/StirlingsSeries.html). + MathWorld. +* [OEIS A046968](http://oeis.org/A046968) and [OEIS A046969](http://oeis.org/A046969) + for the series coefficients +""" +function lstirling_asym end + +lstirling_asym(x::BigFloat) = lgamma(x) + x - log(x)*(x - big(0.5)) - log2π/big(2) + +lstirling_asym(x::Integer) = lstirling_asym(float(x)) + +const lstirlingF64 = Float64[lstirling_asym(k) for k in big(1):big(64)] +const lstirlingF32 = Float64[lstirling_asym(k) for k in big(1):big(40)] + function lstirling_asym(x::Float64) - t = 1.0/(x*x) + isinteger(x) && (0 < x ≤ length(lstirlingF64)) && return lstirlingF64[Int(x)] + t = inv(abs2(x)) @horner(t, 8.33333333333333333e-2, # 1/12 x^-1 -2.77777777777777778e-3, # -1/360 x^-3 @@ -43,7 +76,8 @@ function lstirling_asym(x::Float64) end function lstirling_asym(x::Float32) - t = 1f0/(x*x) + isinteger(x) && (0 < x ≤ length(lstirlingF32)) && return lstirlingF32[Int(x)] + t = inv(abs2(x)) @horner(t, 8.333333333333f-2, # 1/12 x^-1 -2.777777777777f-3, # -1/360 x^-3 @@ -51,5 +85,3 @@ function lstirling_asym(x::Float32) -5.952380952381f-4, # -1/1680 x^-7 8.417508417508f-4)/x # 1/1188 x^-9 end - -lstirling_asym(x::Integer) = lstirling_asym(float(x)) diff --git a/test/basicfuns.jl b/test/basicfuns.jl index ba21627..6ba2610 100644 --- a/test/basicfuns.jl +++ b/test/basicfuns.jl @@ -1,105 +1,91 @@ -using StatsFuns -using Base.Test -using Compat +using Compat, StatsFuns +using Compat.Test -# xlogx & xlogy +@testset "xlogx & xlogy" begin + @test iszero(xlogx(0)) + @test xlogx(2) ≈ 2.0 * log(2.0) -println("\ttesting xlogx & xlogy ...") - -@test xlogx(0) === 0.0 -@test xlogx(2) ≈ 2.0 * log(2.0) - -@test xlogy(0, 1) === 0.0 -@test xlogy(2, 3) ≈ 2.0 * log(3.0) - -# logistic & logit - -println("\ttesting logistic & logit ...") - -@test logistic(2) ≈ 1.0 / (1.0 + exp(-2.0)) -@test logit(0.5) ≈ 0.0 -@test logit(logistic(2)) ≈ 2.0 - -# log1psq + @test iszero(xlogy(0, 1)) + @test xlogy(2, 3) ≈ 2.0 * log(3.0) +end -println("\ttesting log1psq ...") +@testset "logistic & logit" begin + @test logistic(2) ≈ 1.0 / (1.0 + exp(-2.0)) + @test iszero(logit(0.5)) + @test logit(logistic(2)) ≈ 2.0 +end -@test log1psq(0.0) ≈ 0.0 -@test log1psq(1.0) ≈ log1p(1.0) -@test log1psq(2.0) ≈ log1p(4.0) +@testset "log1psq" begin + @test iszero(log1psq(0.0)) + @test log1psq(1.0) ≈ log1p(1.0) + @test log1psq(2.0) ≈ log1p(4.0) +end # log1pexp, log1mexp, log2mexp & logexpm1 -println("\ttesting log1pexp ...") - -@test log1pexp(2.0) ≈ log(1.0 + exp(2.0)) -@test log1pexp(-2.0) ≈ log(1.0 + exp(-2.0)) -@test log1pexp(10000) ≈ 10000.0 -@test log1pexp(-10000) ≈ 0.0 - -println("\ttesting log1mexp ...") - -@test log1mexp(-1.0) ≈ log1p(- exp(-1.0)) -@test log1mexp(-10.0) ≈ log1p(- exp(-10.0)) - -println("\ttesting log2mexp ...") - -@test log2mexp(0.0) ≈ 0.0 -@test log2mexp(-1.0) ≈ log(2.0 - exp(-1.0)) - -println("\ttesting logexpm1 ...") - -@test logexpm1(2.0) ≈ log(exp(2.0) - 1.0) -@test logexpm1(log1pexp(2.0)) ≈ 2.0 -@test logexpm1(log1pexp(-2.0)) ≈ -2.0 - -# log1pmx - -println("\ttesting log1pmx ...") - -@test log1pmx(0.0) ≈ 0.0 -@test log1pmx(1.0) ≈ log(2.0) - 1.0 -@test log1pmx(2.0) ≈ log(3.0) - 2.0 - -println("\ttesting logmxp1 ...") +@testset "log1pexp" begin + @test log1pexp(2.0) ≈ log(1.0 + exp(2.0)) + @test log1pexp(-2.0) ≈ log(1.0 + exp(-2.0)) + @test log1pexp(10000) ≈ 10000.0 + @test log1pexp(-10000) ≈ 0.0 +end -@test logmxp1(1.0) ≈ 0.0 -@test logmxp1(2.0) ≈ log(2.0) - 1.0 -@test logmxp1(3.0) ≈ log(3.0) - 2.0 +@testset "log1mexp" begin + @test log1mexp(-1.0) ≈ log1p(- exp(-1.0)) + @test log1mexp(-10.0) ≈ log1p(- exp(-10.0)) +end -# logsumexp +@testset "log2mexp" begin + @test log2mexp(0.0) ≈ 0.0 + @test log2mexp(-1.0) ≈ log(2.0 - exp(-1.0)) +end -println("\ttesting logsumexp ...") +@testset "logexpm1" begin + @test logexpm1(2.0) ≈ log(exp(2.0) - 1.0) + @test logexpm1(log1pexp(2.0)) ≈ 2.0 + @test logexpm1(log1pexp(-2.0)) ≈ -2.0 +end -@test logsumexp(2.0, 3.0) ≈ log(exp(2.0) + exp(3.0)) -@test logsumexp(10002, 10003) ≈ 10000 + logsumexp(2.0, 3.0) +@testset "log1pmx" begin + @test iszero(log1pmx(0.0)) + @test log1pmx(1.0) ≈ log(2.0) - 1.0 + @test log1pmx(2.0) ≈ log(3.0) - 2.0 +end -@test logsumexp([1.0, 2.0, 3.0]) ≈ 3.40760596444438 -@test logsumexp([1.0, 2.0, 3.0] .+ 1000.) ≈ 1003.40760596444438 +@testset "logmxp1" begin + @test iszero(logmxp1(1.0)) + @test logmxp1(2.0) ≈ log(2.0) - 1.0 + @test logmxp1(3.0) ≈ log(3.0) - 2.0 +end -let cases = [([-Inf, -Inf], -Inf), # correct handling of all -Inf - ([-Inf, -Inf32], -Inf), # promotion - ([-Inf32, -Inf32], -Inf32), # Float32 - ([-Inf, Inf], Inf), - ([-Inf, 9.0], 9.0), - ([Inf, 9.0], Inf), - ([NaN, 9.0], NaN), # NaN propagation - ([NaN, Inf], NaN), # NaN propagation - ([NaN, -Inf], NaN), # NaN propagation - ([0, 0], log(2.0))] # non-float arguments - for (arguments, result) in cases - @test logsumexp(arguments) ≡ result - @test logsumexp(arguments...) ≡ result +@testset "logsumexp" begin + @test logsumexp(2.0, 3.0) ≈ log(exp(2.0) + exp(3.0)) + @test logsumexp(10002, 10003) ≈ 10000 + logsumexp(2.0, 3.0) + + @test logsumexp([1.0, 2.0, 3.0]) ≈ 3.40760596444438 + @test logsumexp([1.0, 2.0, 3.0] .+ 1000.) ≈ 1003.40760596444438 + + let cases = [([-Inf, -Inf], -Inf), # correct handling of all -Inf + ([-Inf, -Inf32], -Inf), # promotion + ([-Inf32, -Inf32], -Inf32), # Float32 + ([-Inf, Inf], Inf), + ([-Inf, 9.0], 9.0), + ([Inf, 9.0], Inf), + ([NaN, 9.0], NaN), # NaN propagation + ([NaN, Inf], NaN), # NaN propagation + ([NaN, -Inf], NaN), # NaN propagation + ([0, 0], log(2.0))] # non-float arguments + for (arguments, result) in cases + @test logsumexp(arguments) ≡ result + @test logsumexp(arguments...) ≡ result + end end end -# softmax - -println("\ttesting softmax ...") - -x = [1.0, 2.0, 3.0] -# Explicit conversion to Vector{Float64} can be romved when we drop 0.4 support -r = Vector{Float64}(@compat exp.(x) ./ sum(exp.(x))) -@test softmax([1.0, 2.0, 3.0]) ≈ r -softmax!(x) -@test x ≈ r +@testset "softmax" begin + x = [1.0, 2.0, 3.0] + r = exp.(x) ./ sum(exp.(x)) + @test softmax([1.0, 2.0, 3.0]) ≈ r + softmax!(x) + @test x ≈ r +end diff --git a/test/rmath.jl b/test/rmath.jl index 18d3bee..a6b0db5 100644 --- a/test/rmath.jl +++ b/test/rmath.jl @@ -1,7 +1,6 @@ -using StatsFuns -using Base.Test +using Compat, StatsFuns +using Compat.Test import StatsFuns.RFunctions -using Compat function check_rmath(fname, statsfun, rmathfun, params, aname, a, isprob, rtol) v = statsfun(params..., a) diff --git a/test/runtests.jl b/test/runtests.jl index 4493459..c7cd840 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,14 +1,7 @@ tests = ["basicfuns", "rmath", "generic"] for t in tests - to_test = true - # if VERSION < v"0.4.0-dev" - # to_test = false - # end - - if to_test - fp = "$t.jl" - println("* running $fp") - include(fp) - end + fp = "$t.jl" + println("* running $fp") + include(fp) end