Skip to content

Commit

Permalink
Use julia implementations for pdfs and some cdf-like functions (#113)
Browse files Browse the repository at this point in the history
* Use Julia implementations for (log)pdfs. Adjust Rmath tests to use
testsets to give more informative errors.

* Use Julia implementations for most cdf-like functions of the Chisq
distribution.

* Change Beta distribution to be using native implementations.

* Switch binomial cdfs to use native implementations (but not the inverses)

* Use native implementations for most cdf like gamma functions

* Just use the gamma defitions for the chisq

* Use the beta definitions for binomial

* Add native implementions for Poisson's cdf-like functions

* Rely on SpecialFunctions promotion handling in beta functions

* Rely on gamma_inc_inv promotion in gammainv(c)cdf

* Apply suggestions from code review

* Add compat for HypergeometricFunctions

* Handle negative arguments in Poisson (log)(c)cdf

* Handle negative x in Gamma (log)(c)cdf

* Handle arguments outside support for Beta and Fdist (log)(c)cdf

* Fix Fdist quantile functions

* Handle arguments out of support in Binomial (log)(c)cdf and apply
the floor function to make the results correct for non-integer arguments

* Avoid accidental promotion to Float64 in fdist(log)(c)cdf

* Fix typo in "x" calculation for fdist(log)(c)cdf

* Drop redundant low tolerance tests for Beta(1000, 2). We are now
generally more accurate than Rmath so failure are because of Rmath.

* Handle some non-finite edge cases in gamma and binomial

* Only switch to log-version of betalogcdf when the result is tiny

* Handle the degenerate cases in beta(log)(c)cdf when alpha is zero

* Handle the degenerate case of the gamma distribution when k==0.
Also, only calculate the log(c)cdf based on the hypergeometric
function when p is zero or subnormal

* Address review comments

* Avoid rational return type in gamma(log)(c)cdf

* Handle the case when alpha and beta are zero in the betacdf
  • Loading branch information
andreasnoack committed Feb 18, 2022
1 parent 13e231a commit 28d732a
Show file tree
Hide file tree
Showing 9 changed files with 278 additions and 90 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ version = "0.9.15"

[deps]
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
HypergeometricFunctions = "34004b35-14d8-5ef3-9330-4cdb6864b03a"
InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
Expand All @@ -13,6 +14,7 @@ SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"

[compat]
ChainRulesCore = "1"
HypergeometricFunctions = "0.3"
InverseFunctions = "0.1"
IrrationalConstants = "0.1"
LogExpFunctions = "0.3.2"
Expand Down
74 changes: 67 additions & 7 deletions src/distrs/beta.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
# functions related to beta distributions

using HypergeometricFunctions: _₂F₁

# R implementations
# For pdf and logpdf we use the Julia implementation
using .RFunctions:
betacdf,
betaccdf,
betalogcdf,
betalogccdf,
betainvcdf,
betainvccdf,
# betapdf,
# betalogpdf,
# betacdf,
# betaccdf,
# betalogcdf,
# betalogccdf,
# betainvcdf,
# betainvccdf,
betainvlogcdf,
betainvlogccdf

Expand All @@ -22,3 +25,60 @@ function betalogpdf(α::T, β::T, x::T) where {T<:Real}
val = xlogy- 1, y) + xlog1py- 1, -y) - logbeta(α, β)
return x < 0 || x > 1 ? oftype(val, -Inf) : val
end

function betacdf::Real, β::Real, x::Real)
# Handle a degenerate case
if iszero(α) && β > 0
return float(last(promote(α, β, x, x >= 0)))
end

return first(beta_inc(α, β, clamp(x, 0, 1)))
end

function betaccdf::Real, β::Real, x::Real)
# Handle a degenerate case
if iszero(α) && β > 0
return float(last(promote(α, β, x, x < 0)))
end

last(beta_inc(α, β, clamp(x, 0, 1)))
end

# The log version is currently based on non-log version. When the cdf is very small we shift
# to an implementation based on the hypergeometric function ₂F₁ to avoid underflow.
function betalogcdf::T, β::T, x::T) where {T<:Real}
# Handle a degenerate case
if iszero(α) && β > 0
return log(last(promote(x, x >= 0)))
end

_x = clamp(x, 0, 1)
p, q = beta_inc(α, β, _x)
if p < floatmin(p)
# see https://dlmf.nist.gov/8.17#E7
return -log(α) + xlogy(α, _x) + log(_₂F₁(promote(α, 1 - β, α + 1, _x)...)) - logbeta(α, β)
elseif p <= 0.7
return log(p)
else
return log1p(-q)
end
end
betalogcdf::Real, β::Real, x::Real) = betalogcdf(promote(α, β, x)...)

function betalogccdf::Real, β::Real, x::Real)
# Handle a degenerate case
if iszero(α) && β > 0
return log(last(promote(α, β, x, x < 0)))
end

p, q = beta_inc(α, β, clamp(x, 0, 1))
if q < 0.7
return log(q)
else
return log1p(-p)
end
end

betainvcdf::Real, β::Real, p::Real) = first(beta_inc_inv(α, β, p))

betainvccdf::Real, β::Real, p::Real) = last(beta_inc_inv(β, α, p))
32 changes: 26 additions & 6 deletions src/distrs/binom.jl
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
# functions related to binomial distribution

# R implementations
# For pdf and logpdf we use the Julia implementation
using .RFunctions:
binomcdf,
binomccdf,
binomlogcdf,
binomlogccdf,
# binompdf,
# binomlogpdf,
# binomcdf,
# binomccdf,
# binomlogcdf,
# binomlogccdf,
binominvcdf,
binominvccdf,
binominvlogcdf,
binominvlogccdf


# Julia implementations
binompdf(n::Real, p::Real, k::Real) = exp(binomlogpdf(n, p, k))

Expand All @@ -22,3 +22,23 @@ function binomlogpdf(n::T, p::T, k::T) where {T<:Real}
val = min(0, betalogpdf(m + 1, n - m + 1, p) - log(n + 1))
return 0 <= k <= n && isinteger(k) ? val : oftype(val, -Inf)
end

for l in ("", "log"), compl in (false, true)
fbinom = Symbol(string("binom", l, ifelse(compl, "c", "" ), "cdf"))
fbeta = Symbol(string("beta" , l, ifelse(compl, "", "c"), "cdf"))
@eval function ($fbinom)(n::Real, p::Real, k::Real)
if isnan(k)
return last(promote(n, p, k))
end
res = ($fbeta)(max(0, floor(k) + 1), max(0, n - floor(k)), p)

# When p == 1 the betaccdf doesn't return the correct result
# so these cases have to be special cased
if isone(p)
newres = oftype(res, $compl ? k < n : k >= n)
return $(l === "" ? :newres : :(log(newres)))
else
return res
end
end
end
29 changes: 9 additions & 20 deletions src/distrs/chisq.jl
Original file line number Diff line number Diff line change
@@ -1,22 +1,11 @@
# functions related to chi-square distribution

# R implementations
# For pdf and logpdf we use the Julia implementation
using .RFunctions:
chisqcdf,
chisqccdf,
chisqlogcdf,
chisqlogccdf,
chisqinvcdf,
chisqinvccdf,
chisqinvlogcdf,
chisqinvlogccdf

# Julia implementations
# promotion ensures that we do forward e.g. `chisqpdf(::Int, ::Float32)` to
# `gammapdf(::Float32, ::Int, ::Float32)` but not `gammapdf(::Float64, ::Int, ::Float32)`
chisqpdf(k::Real, x::Real) = chisqpdf(promote(k, x)...)
chisqpdf(k::T, x::T) where {T<:Real} = gammapdf(k / 2, 2, x)

chisqlogpdf(k::Real, x::Real) = chisqlogpdf(promote(k, x)...)
chisqlogpdf(k::T, x::T) where {T<:Real} = gammalogpdf(k / 2, 2, x)
# Just use the Gamma definitions
for f in ("pdf", "logpdf", "cdf", "ccdf", "logcdf", "logccdf", "invcdf", "invccdf", "invlogcdf", "invlogccdf")
_chisqf = Symbol("chisq"*f)
_gammaf = Symbol("gamma"*f)
@eval begin
$(_chisqf)(k::Real, x::Real) = $(_chisqf)(promote(k, x)...)
$(_chisqf)(k::T, x::T) where {T<:Real} = $(_gammaf)(k/2, 2, x)
end
end
28 changes: 16 additions & 12 deletions src/distrs/fdist.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,5 @@
# functions related to F distribution

# R implementations
# For pdf and logpdf we use the Julia implementation
using .RFunctions:
fdistcdf,
fdistccdf,
fdistlogcdf,
fdistlogccdf,
fdistinvcdf,
fdistinvccdf,
fdistinvlogcdf,
fdistinvlogccdf

# Julia implementations
fdistpdf(ν1::Real, ν2::Real, x::Real) = exp(fdistlogpdf(ν1, ν2, x))

Expand All @@ -23,3 +11,19 @@ function fdistlogpdf(ν1::T, ν2::T, x::T) where {T<:Real}
val = (xlogy(ν1, ν1ν2) + xlogy(ν1 - 2, y) - xlogy(ν1 + ν2, 1 + ν1ν2 * y)) / 2 - logbeta(ν1 / 2, ν2 / 2)
return x < 0 ? oftype(val, -Inf) : val
end

for f in ("cdf", "ccdf", "logcdf", "logccdf")
ff = Symbol("fdist"*f)
bf = Symbol("beta"*f)
@eval $ff(ν1::T, ν2::T, x::T) where {T<:Real} = $bf(ν1/2, ν2/2, inv(1 + ν2/(ν1*max(0, x))))
@eval $ff(ν1::Real, ν2::Real, x::Real) = $ff(promote(ν1, ν2, x)...)
end
for f in ("invcdf", "invccdf", "invlogcdf", "invlogccdf")
ff = Symbol("fdist"*f)
bf = Symbol("beta"*f)
@eval function $ff(ν1::T, ν2::T, y::T) where {T<:Real}
x = $bf(ν1/2, ν2/2, y)
return x/(1 - x)*ν2/ν1
end
@eval $ff(ν1::Real, ν2::Real, y::Real) = $ff(promote(ν1, ν2, y)...)
end
94 changes: 87 additions & 7 deletions src/distrs/gamma.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
# functions related to gamma distribution

using HypergeometricFunctions: drummond1F1

# R implementations
# For pdf and logpdf we use the Julia implementation
using .RFunctions:
gammacdf,
gammaccdf,
gammalogcdf,
gammalogccdf,
gammainvcdf,
gammainvccdf,
# gammapdf,
# gammalogpdf,
# gammacdf,
# gammaccdf,
# gammalogcdf,
# gammalogccdf,
# gammainvcdf,
# gammainvccdf,
gammainvlogcdf,
gammainvlogccdf

Expand All @@ -22,3 +25,80 @@ function gammalogpdf(k::T, θ::T, x::T) where {T<:Real}
val = -loggamma(k) + xlogy(k - 1, xθ) - log(θ) -
return x < 0 ? oftype(val, -Inf) : val
end

function gammacdf(k::T, θ::T, x::T) where {T<:Real}
# Handle the degenerate case
if iszero(k)
return float(last(promote(x, x >= 0)))
end
return first(gamma_inc(k, max(0, x)/θ))
end
gammacdf(k::Real, θ::Real, x::Real) = gammacdf(map(float, promote(k, θ, x))...)

function gammaccdf(k::T, θ::T, x::T) where {T<:Real}
# Handle the degenerate case
if iszero(k)
return float(last(promote(x, x < 0)))
end
return last(gamma_inc(k, max(0, x)/θ))
end
gammaccdf(k::Real, θ::Real, x::Real) = gammaccdf(map(float, promote(k, θ, x))...)

gammalogcdf(k::Real, θ::Real, x::Real) = _gammalogcdf(map(float, promote(k, θ, x))...)

# Implemented via the non-log version. For tiny values, we recompute the result with
# loggamma. In that situation, there is little risk of significant cancellation.
function _gammalogcdf(k::Float64, θ::Float64, x::Float64)
# Handle the degenerate case
if iszero(k)
return log(x >= 0)
end

xdθ = max(0, x)/θ
l, u = gamma_inc(k, xdθ)
if l < floatmin(Float64) && isfinite(k) && isfinite(xdθ)
return -log(k) + k*log(xdθ) - xdθ + log(drummond1F1(1.0, 1 + k, xdθ)) - loggamma(k)
elseif l < 0.7
return log(l)
else
return log1p(-u)
end
end
function _gammalogcdf(k::T, θ::T, x::T) where {T<:Union{Float16,Float32}}
return T(_gammalogcdf(Float64(k), Float64(θ), Float64(x)))
end

gammalogccdf(k::Real, θ::Real, x::Real) = _gammalogccdf(map(float, promote(k, θ, x))...)

# Implemented via the non-log version. For tiny values, we recompute the result with
# loggamma. In that situation, there is little risk of significant cancellation.
function _gammalogccdf(k::Float64, θ::Float64, x::Float64)
# Handle the degenerate case
if iszero(k)
return log(x < 0)
end

xdθ = max(0, x)/θ
l, u = gamma_inc(k, xdθ)
if u < floatmin(Float64)
return loggamma(k, xdθ) - loggamma(k)
elseif u < 0.7
return log(u)
else
return log1p(-l)
end
end

function _gammalogccdf(k::T, θ::T, x::T) where {T<:Union{Float16,Float32}}
return T(_gammalogccdf(Float64(k), Float64(θ), Float64(x)))
end

function gammainvcdf(k::Real, θ::Real, p::Real)
_k, _θ, _p = promote(k, θ, p)
return*gamma_inc_inv(_k, _p, 1 - _p)
end

function gammainvccdf(k::Real, θ::Real, p::Real)
_k, _θ, _p = promote(k, θ, p)
return*gamma_inc_inv(_k, 1 - _p, _p)
end
20 changes: 15 additions & 5 deletions src/distrs/pois.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# functions related to Poisson distribution

# R implementations
# For pdf and logpdf we use the Julia implementation
using .RFunctions:
poiscdf,
poisccdf,
poislogcdf,
poislogccdf,
# poispdf,
# poislogpdf,
# poiscdf,
# poisccdf,
# poislogcdf,
# poislogccdf,
poisinvcdf,
poisinvccdf,
poisinvlogcdf,
Expand All @@ -20,3 +21,12 @@ function poislogpdf(λ::T, x::T) where {T <: Real}
val = xlogy(x, λ) - λ - loggamma(x + 1)
return x >= 0 && isinteger(x) ? val : oftype(val, -Inf)
end

# Just use the Gamma definitions
poiscdf::Real, x::Real) = gammaccdf(max(0, floor(x + 1)), 1, λ)

poisccdf::Real, x::Real) = gammacdf(max(0, floor(x + 1)), 1, λ)

poislogcdf::Real, x::Real) = gammalogccdf(max(0, floor(x + 1)), 1, λ)

poislogccdf::Real, x::Real) = gammalogcdf(max(0, floor(x + 1)), 1, λ)
3 changes: 2 additions & 1 deletion src/distrs/tdist.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# functions related to student's T distribution

# R implementations
# For pdf and logpdf we use the Julia implementation
using .RFunctions:
# tdistpdf,
# tdistlogpdf,
tdistcdf,
tdistccdf,
tdistlogcdf,
Expand Down

0 comments on commit 28d732a

Please sign in to comment.