Skip to content

Commit

Permalink
matrix-variate uniformity and unit test overhaul (#1095)
Browse files Browse the repository at this point in the history
* add test utils to src

* fix inverse wishart variance

* fix MatrixFDist random params

* make size(d, i) generic

* shuffle wishart around

* shuffle inversewishart around

* c0 -> logc0

* throw ArgumentErrors in wisharts

* change integrating constant sign

* make _logpdf generic and make logkernel the eval primitive

* wishart params only return actual params

* MatrixBeta and LKJ params also return dims

* use logc0

* use params right

* update docs

* initialize new matrixvariate tests

* rein in random covariance matrices

* archive Stan output

* add _multivariate test util

* use ScalMat is MatrixBeta

* doc update

* better LKJ univariate

* new tests

* delete old tests

* reshape vector that's already converted in json/stan stuff

* try something else to skirt this reshape issue
  • Loading branch information
johnczito committed Apr 3, 2020
1 parent b5e3217 commit 2d7f203
Show file tree
Hide file tree
Showing 25 changed files with 966 additions and 1,001 deletions.
2 changes: 1 addition & 1 deletion docs/src/matrix.md
Expand Up @@ -32,9 +32,9 @@ vec(d::MatrixDistribution)
## Distributions

```@docs
MatrixNormal
Wishart
InverseWishart
MatrixNormal
MatrixTDist
MatrixBeta
MatrixFDist
Expand Down
2 changes: 1 addition & 1 deletion src/Distributions.jl
Expand Up @@ -315,7 +315,7 @@ Supported distributions:
Frechet, FullNormal, FullNormalCanon, Gamma, GeneralizedPareto,
GeneralizedExtremeValue, Geometric, Gumbel, Hypergeometric,
InverseWishart, InverseGamma, InverseGaussian, IsoNormal,
IsoNormalCanon, Kolmogorov, KSDist, KSOneSided, Laplace, Levy,
IsoNormalCanon, Kolmogorov, KSDist, KSOneSided, Laplace, Levy, LKJ,
Logistic, LogNormal, MatrixBeta, MatrixFDist, MatrixNormal, MatrixTDist, MixtureModel,
Multinomial, MultivariateNormal, MvLogNormal, MvNormal, MvNormalCanon,
MvNormalKnownCov, MvTDist, NegativeBinomial, NoncentralBeta, NoncentralChisq,
Expand Down
110 changes: 68 additions & 42 deletions src/matrix/inversewishart.jl
Expand Up @@ -6,31 +6,35 @@
```
The [inverse Wishart distribution](http://en.wikipedia.org/wiki/Inverse-Wishart_distribution)
generalizes the inverse gamma distribution to ``p\\times p`` real, positive definite
matrices ``\\boldsymbol{\\Sigma}``. If ``\\boldsymbol{\\Sigma}\\sim IW_p(\\nu,\\boldsymbol{\\Psi})``,
matrices ``\\boldsymbol{\\Sigma}``.
If ``\\boldsymbol{\\Sigma}\\sim \\textrm{IW}_p(\\nu,\\boldsymbol{\\Psi})``,
then its probability density function is
```math
f(\\boldsymbol{\\Sigma}; \\nu,\\boldsymbol{\\Psi}) =
\\frac{\\left|\\boldsymbol{\\Psi}\\right|^{\\nu/2}}{2^{\\nu p/2}\\Gamma_p(\\frac{\\nu}{2})} \\left|\\boldsymbol{\\Sigma}\\right|^{-(\\nu+p+1)/2} e^{-\\frac{1}{2}\\operatorname{tr}(\\boldsymbol{\\Psi}\\boldsymbol{\\Sigma}^{-1})}.
```
``\\mathbf{H}\\sim W_p(\\nu, \\mathbf{S})`` if and only if ``\\mathbf{H}^{-1}\\sim IW_p(\\nu, \\mathbf{S}^{-1})``.
``\\mathbf{H}\\sim \\textrm{W}_p(\\nu, \\mathbf{S})`` if and only if
``\\mathbf{H}^{-1}\\sim \\textrm{IW}_p(\\nu, \\mathbf{S}^{-1})``.
"""
struct InverseWishart{T<:Real, ST<:AbstractPDMat} <: ContinuousMatrixDistribution
df::T # degree of freedom
Ψ::ST # scale matrix
c0::T # log of normalizing constant
logc0::T # log of normalizing constant
end

#### Constructors
# -----------------------------------------------------------------------------
# Constructors
# -----------------------------------------------------------------------------

function InverseWishart(df::T, Ψ::AbstractPDMat{T}) where T<:Real
p = dim(Ψ)
df > p - 1 || error("df should be greater than dim - 1.")
c0 = _invwishart_c0(df, Ψ)
R = Base.promote_eltype(T, c0)
df > p - 1 || throw(ArgumentError("df should be greater than dim - 1."))
logc0 = invwishart_logc0(df, Ψ)
R = Base.promote_eltype(T, logc0)
prom_Ψ = convert(AbstractArray{R}, Ψ)
InverseWishart{R, typeof(prom_Ψ)}(R(df), prom_Ψ, R(c0))
InverseWishart{R, typeof(prom_Ψ)}(R(df), prom_Ψ, R(logc0))
end

function InverseWishart(df::Real, Ψ::AbstractPDMat)
Expand All @@ -42,50 +46,44 @@ InverseWishart(df::Real, Ψ::Matrix) = InverseWishart(df, PDMat(Ψ))

InverseWishart(df::Real, Ψ::Cholesky) = InverseWishart(df, PDMat(Ψ))

function _invwishart_c0(df::Real, Ψ::AbstractPDMat)
h_df = df / 2
p = dim(Ψ)
h_df * (p * typeof(df)(logtwo) - logdet(Ψ)) + logmvgamma(p, h_df)
end
# -----------------------------------------------------------------------------
# REPL display
# -----------------------------------------------------------------------------

#### Properties
show(io::IO, d::InverseWishart) = show_multline(io, d, [(:df, d.df), (, Matrix(d.Ψ))])

insupport(::Type{InverseWishart}, X::Matrix) = isposdef(X)
insupport(d::InverseWishart, X::Matrix) = size(X) == size(d) && isposdef(X)
# -----------------------------------------------------------------------------
# Conversion
# -----------------------------------------------------------------------------

dim(d::InverseWishart) = dim(d.Ψ)
size(d::InverseWishart) = (p = dim(d); (p, p))
size(d::InverseWishart, i) = size(d)[i]
rank(d::InverseWishart) = dim(d)
params(d::InverseWishart) = (d.df, d.Ψ, d.c0)
@inline partype(d::InverseWishart{T}) where {T<:Real} = T

### Conversion
function convert(::Type{InverseWishart{T}}, d::InverseWishart) where T<:Real
P = convert(AbstractArray{T}, d.Ψ)
InverseWishart{T, typeof(P)}(T(d.df), P, T(d.c0))
InverseWishart{T, typeof(P)}(T(d.df), P, T(d.logc0))
end
function convert(::Type{InverseWishart{T}}, df, Ψ::AbstractPDMat, c0) where T<:Real
function convert(::Type{InverseWishart{T}}, df, Ψ::AbstractPDMat, logc0) where T<:Real
P = convert(AbstractArray{T}, Ψ)
InverseWishart{T, typeof(P)}(T(df), P, T(c0))
InverseWishart{T, typeof(P)}(T(df), P, T(logc0))
end

#### Show

show(io::IO, d::InverseWishart) = show_multline(io, d, [(:df, d.df), (, Matrix(d.Ψ))])
# -----------------------------------------------------------------------------
# Properties
# -----------------------------------------------------------------------------

insupport(::Type{InverseWishart}, X::Matrix) = isposdef(X)
insupport(d::InverseWishart, X::Matrix) = size(X) == size(d) && isposdef(X)

#### Statistics
dim(d::InverseWishart) = dim(d.Ψ)
size(d::InverseWishart) = (p = dim(d); (p, p))
rank(d::InverseWishart) = dim(d)
params(d::InverseWishart) = (d.df, d.Ψ)
@inline partype(d::InverseWishart{T}) where {T<:Real} = T

function mean(d::InverseWishart)
df = d.df
p = dim(d)
r = df - (p + 1)
if r > 0.0
return Matrix(d.Ψ) * (1.0 / r)
else
error("mean only defined for df > p + 1")
end
r > 0.0 || throw(ArgumentError("mean only defined for df > p + 1"))
return Matrix(d.Ψ) * (1.0 / r)
end

mode(d::InverseWishart) = d.Ψ * inv(d.df + dim(d) + 1.0)
Expand All @@ -100,22 +98,50 @@ end
function var(d::InverseWishart, i::Integer, j::Integer)
p, ν, Ψ = (dim(d), d.df, Matrix(d.Ψ))
ν > p + 3 || throw(ArgumentError("var only defined for df > dim + 3"))
inv((ν - p)*- p - 3)*- p - 1)^2)*- p + 1)*Ψ[i,j]^2 +- p - 1)*Ψ[i,i]*Ψ[j,j]
inv((ν - p)*- p - 3)*- p - 1)^2)*((ν - p + 1)*Ψ[i,j]^2 +- p - 1)*Ψ[i,i]*Ψ[j,j])
end

#### Evaluation
# -----------------------------------------------------------------------------
# Evaluation
# -----------------------------------------------------------------------------

function invwishart_logc0(df::Real, Ψ::AbstractPDMat)
h_df = df / 2
p = dim(Ψ)
-h_df * (p * typeof(df)(logtwo) - logdet(Ψ)) - logmvgamma(p, h_df)
end

function _logpdf(d::InverseWishart, X::AbstractMatrix)
function logkernel(d::InverseWishart, X::AbstractMatrix)
p = dim(d)
df = d.df
Xcf = cholesky(X)
# we use the fact: tr(Ψ * inv(X)) = tr(inv(X) * Ψ) = tr(X \ Ψ)
Ψ = Matrix(d.Ψ)
-0.5 * ((df + p + 1) * logdet(Xcf) + tr(Xcf \ Ψ)) - d.c0
-0.5 * ((df + p + 1) * logdet(Xcf) + tr(Xcf \ Ψ))
end


#### Sampling
# -----------------------------------------------------------------------------
# Sampling
# -----------------------------------------------------------------------------

_rand!(rng::AbstractRNG, d::InverseWishart, A::AbstractMatrix) =
(A .= inv(cholesky!(_rand!(rng, Wishart(d.df, inv(d.Ψ)), A))))

# -----------------------------------------------------------------------------
# Test utils
# -----------------------------------------------------------------------------

function _univariate(d::InverseWishart)
check_univariate(d)
ν, Ψ = params(d)
α = ν / 2
β = Matrix(Ψ)[1] / 2
return InverseGamma(α, β)
end

function _rand_params(::Type{InverseWishart}, elty, n::Int, p::Int)
n == p || throw(ArgumentError("dims must be equal for InverseWishart"))
ν = elty( n + 3 + abs(10randn()) )
Ψ = (X = 2rand(elty, n, n) .- 1 ; X * X')
return ν, Ψ
end
21 changes: 17 additions & 4 deletions src/matrix/lkj.jl
Expand Up @@ -71,7 +71,7 @@ insupport(d::LKJ, R::AbstractMatrix) = isreal(R) && size(R) == size(d) && isone(
mean(d::LKJ) = Matrix{partype(d)}(I, dim(d), dim(d))

function mode(d::LKJ; check_args = true)
η = params(d)
p, η = params(d)
if check_args
η > 1 || throw(ArgumentError("mode is defined only when η > 1."))
end
Expand All @@ -85,7 +85,7 @@ function var(lkj::LKJ)
σ² * (ones(partype(lkj), d, d) - I)
end

params(d::LKJ) = d.η
params(d::LKJ) = (d.d, d.η)

@inline partype(d::LKJ{T}) where {T <: Real} = T

Expand All @@ -109,8 +109,6 @@ end

logkernel(d::LKJ, R::AbstractMatrix) = (d.η - 1) * logdet(R)

_logpdf(d::LKJ, R::AbstractMatrix) = logkernel(d, R) + d.logc0

# -----------------------------------------------------------------------------
# Sampling
# -----------------------------------------------------------------------------
Expand Down Expand Up @@ -160,6 +158,21 @@ function _marginal(lkj::LKJ)
LocationScale(-1, 2, Beta(α, α))
end

# -----------------------------------------------------------------------------
# Test utils
# -----------------------------------------------------------------------------

function _univariate(d::LKJ)
check_univariate(d)
return DiscreteNonParametric([one(d.η)], [one(d.η)])
end

function _rand_params(::Type{LKJ}, elty, n::Int, p::Int)
n == p || throw(ArgumentError("dims must be equal for LKJ"))
η = abs(3randn(elty))
return n, η
end

# -----------------------------------------------------------------------------
# Several redundant implementations of the recipricol integrating constant.
# If f(R; n) = c₀ |R|ⁿ⁻¹, these give log(1 / c₀).
Expand Down
41 changes: 27 additions & 14 deletions src/matrix/matrixbeta.jl
Expand Up @@ -8,21 +8,22 @@ n2::Real degrees of freedom (greater than p - 1)
The [matrix beta distribution](https://en.wikipedia.org/wiki/Matrix_variate_beta_distribution)
generalizes the beta distribution to ``p\\times p`` real matrices ``\\mathbf{U}``
for which ``\\mathbf{U}`` and ``\\mathbf{I}_p-\\mathbf{U}`` are both positive definite.
If ``\\mathbf{U}\\sim MB_p(n_1/2, n_2/2)``, then its probability density function is
If ``\\mathbf{U}\\sim \\textrm{MB}_p(n_1/2, n_2/2)``, then its probability density function is
```math
f(\\mathbf{U}; n_1,n_2) = \\frac{\\Gamma_p(\\frac{n_1+n_2}{2})}{\\Gamma_p(\\frac{n_1}{2})\\Gamma_p(\\frac{n_2}{2})}
|\\mathbf{U}|^{(n_1-p-1)/2}\\left|\\mathbf{I}_p-\\mathbf{U}\\right|^{(n_2-p-1)/2}.
```
If ``\\mathbf{S}_1\\sim W_p(n_1,\\mathbf{I}_p)`` and ``\\mathbf{S}_2\\sim W_p(n_2,\\mathbf{I}_p)``
If ``\\mathbf{S}_1\\sim \\textrm{W}_p(n_1,\\mathbf{I}_p)`` and
``\\mathbf{S}_2\\sim \\textrm{W}_p(n_2,\\mathbf{I}_p)``
are independent, and we use ``\\mathcal{L}(\\cdot)`` to denote the lower Cholesky factor, then
```math
\\mathbf{U}=\\mathcal{L}(\\mathbf{S}_1+\\mathbf{S}_2)^{-1}\\mathbf{S}_1\\mathcal{L}(\\mathbf{S}_1+\\mathbf{S}_2)^{-\\rm{T}}
```
has ``\\mathbf{U}\\sim MB_p(n_1/2, n_2/2)``.
has ``\\mathbf{U}\\sim \\textrm{MB}_p(n_1/2, n_2/2)``.
"""
struct MatrixBeta{T <: Real, TW <: Wishart} <: ContinuousMatrixDistribution
W1::TW
Expand All @@ -38,7 +39,7 @@ function MatrixBeta(p::Int, n1::Real, n2::Real)
p > 0 || throw(ArgumentError("dim must be positive: got $(p)."))
logc0 = matrixbeta_logc0(p, n1, n2)
T = Base.promote_eltype(n1, n2, logc0)
Ip = PDMat( Matrix{T}(I, p, p) )
Ip = ScalMat(p, one(T))
W1 = Wishart(T(n1), Ip)
W2 = Wishart(T(n2), Ip)
MatrixBeta{T, typeof(W1)}(W1, W2, T(logc0))
Expand Down Expand Up @@ -78,25 +79,23 @@ rank(d::MatrixBeta) = dim(d)

insupport(d::MatrixBeta, U::AbstractMatrix) = isreal(U) && size(U) == size(d) && isposdef(U) && isposdef(I - U)

params(d::MatrixBeta) = (d.W1.df, d.W2.df)
params(d::MatrixBeta) = (dim(d), d.W1.df, d.W2.df)

mean(d::MatrixBeta) = ((n1, n2) = params(d); Matrix((n1 / (n1 + n2)) * I, dim(d), dim(d)))
mean(d::MatrixBeta) = ((p, n1, n2) = params(d); Matrix((n1 / (n1 + n2)) * I, p, p))

@inline partype(d::MatrixBeta{T}) where {T <: Real} = T

# Konno (1988 JJSS) Corollary 3.3.i
function cov(d::MatrixBeta, i::Integer, j::Integer, k::Integer, l::Integer)
n1, n2 = params(d)
p, n1, n2 = params(d)
n = n1 + n2
p = dim(d)
Ω = Matrix{partype(d)}(I, p, p)
n1*n2*inv(n*(n - 1)*(n + 2))*(-(2/n)*Ω[i,j]*Ω[k,l] + Ω[j,l]*Ω[i,k] + Ω[i,l]*Ω[k,j])
end

function var(d::MatrixBeta, i::Integer, j::Integer)
n1, n2 = params(d)
p, n1, n2 = params(d)
n = n1 + n2
p = dim(d)
Ω = Matrix{partype(d)}(I, p, p)
n1*n2*inv(n*(n - 1)*(n + 2))*((1 - (2/n))*Ω[i,j]^2 + Ω[j,j]*Ω[i,i])
end
Expand All @@ -111,13 +110,10 @@ function matrixbeta_logc0(p::Int, n1::Real, n2::Real)
end

function logkernel(d::MatrixBeta, U::AbstractMatrix)
p = dim(d)
n1, n2 = params(d)
p, n1, n2 = params(d)
((n1 - p - 1) / 2) * logdet(U) + ((n2 - p - 1) / 2) * logdet(I - U)
end

_logpdf(d::MatrixBeta, U::AbstractMatrix) = logkernel(d, U) + d.logc0

# -----------------------------------------------------------------------------
# Sampling
# -----------------------------------------------------------------------------
Expand All @@ -131,3 +127,20 @@ function _rand!(rng::AbstractRNG, d::MatrixBeta, A::AbstractMatrix)
invL = Matrix( inv(S.chol.L) )
A .= X_A_Xt(S1, invL)
end

# -----------------------------------------------------------------------------
# Test utils
# -----------------------------------------------------------------------------

function _univariate(d::MatrixBeta)
check_univariate(d)
p, n1, n2 = params(d)
return Beta(n1 / 2, n2 / 2)
end

function _rand_params(::Type{MatrixBeta}, elty, n::Int, p::Int)
n == p || throw(ArgumentError("dims must be equal for MatrixBeta"))
n1 = elty( n + 1 + abs(10randn()) )
n2 = elty( n + 1 + abs(10randn()) )
return n, n1, n2
end

0 comments on commit 2d7f203

Please sign in to comment.