matrix-variate uniformity and unit test overhaul (#1095)

* add test utils to src * fix inverse wishart variance * fix MatrixFDist random params * make size(d, i) generic * shuffle wishart around * shuffle inversewishart around * c0 -> logc0 * throw ArgumentErrors in wisharts * change integrating constant sign * make _logpdf generic and make logkernel the eval primitive * wishart params only return actual params * MatrixBeta and LKJ params also return dims * use logc0 * use params right * update docs * initialize new matrixvariate tests * rein in random covariance matrices * archive Stan output * add _multivariate test util * use ScalMat is MatrixBeta * doc update * better LKJ univariate * new tests * delete old tests * reshape vector that's already converted in json/stan stuff * try something else to skirt this reshape issue
JuliaStats · Apr 3, 2020 · 2d7f203 · 2d7f203
1 parent b5e3217
commit 2d7f203
Show file tree

Hide file tree

Showing 25 changed files with 966 additions and 1,001 deletions.
diff --git a/docs/src/matrix.md b/docs/src/matrix.md
@@ -32,9 +32,9 @@ vec(d::MatrixDistribution)
 ## Distributions
 
 ```@docs
+MatrixNormal
 Wishart
 InverseWishart
-MatrixNormal
 MatrixTDist
 MatrixBeta
 MatrixFDist

diff --git a/src/Distributions.jl b/src/Distributions.jl
@@ -315,7 +315,7 @@ Supported distributions:
     Frechet, FullNormal, FullNormalCanon, Gamma, GeneralizedPareto,
     GeneralizedExtremeValue, Geometric, Gumbel, Hypergeometric,
     InverseWishart, InverseGamma, InverseGaussian, IsoNormal,
-    IsoNormalCanon, Kolmogorov, KSDist, KSOneSided, Laplace, Levy,
+    IsoNormalCanon, Kolmogorov, KSDist, KSOneSided, Laplace, Levy, LKJ,
     Logistic, LogNormal, MatrixBeta, MatrixFDist, MatrixNormal, MatrixTDist, MixtureModel,
     Multinomial, MultivariateNormal, MvLogNormal, MvNormal, MvNormalCanon,
     MvNormalKnownCov, MvTDist, NegativeBinomial, NoncentralBeta, NoncentralChisq,

diff --git a/src/matrix/inversewishart.jl b/src/matrix/inversewishart.jl
@@ -6,31 +6,35 @@
 ```
 The [inverse Wishart distribution](http://en.wikipedia.org/wiki/Inverse-Wishart_distribution)
 generalizes the inverse gamma distribution to ``p\\times p`` real, positive definite
-matrices ``\\boldsymbol{\\Sigma}``. If ``\\boldsymbol{\\Sigma}\\sim IW_p(\\nu,\\boldsymbol{\\Psi})``,
+matrices ``\\boldsymbol{\\Sigma}``.
+If ``\\boldsymbol{\\Sigma}\\sim \\textrm{IW}_p(\\nu,\\boldsymbol{\\Psi})``,
 then its probability density function is
 
 ```math
 f(\\boldsymbol{\\Sigma}; \\nu,\\boldsymbol{\\Psi}) =
 \\frac{\\left|\\boldsymbol{\\Psi}\\right|^{\\nu/2}}{2^{\\nu p/2}\\Gamma_p(\\frac{\\nu}{2})} \\left|\\boldsymbol{\\Sigma}\\right|^{-(\\nu+p+1)/2} e^{-\\frac{1}{2}\\operatorname{tr}(\\boldsymbol{\\Psi}\\boldsymbol{\\Sigma}^{-1})}.
 ```
 
-``\\mathbf{H}\\sim W_p(\\nu, \\mathbf{S})`` if and only if ``\\mathbf{H}^{-1}\\sim IW_p(\\nu, \\mathbf{S}^{-1})``.
+``\\mathbf{H}\\sim \\textrm{W}_p(\\nu, \\mathbf{S})`` if and only if
+``\\mathbf{H}^{-1}\\sim \\textrm{IW}_p(\\nu, \\mathbf{S}^{-1})``.
 """
 struct InverseWishart{T<:Real, ST<:AbstractPDMat} <: ContinuousMatrixDistribution
     df::T     # degree of freedom
     Ψ::ST     # scale matrix
-    c0::T     # log of normalizing constant
+    logc0::T  # log of normalizing constant
 end
 
-#### Constructors
+#  -----------------------------------------------------------------------------
+#  Constructors
+#  -----------------------------------------------------------------------------
 
 function InverseWishart(df::T, Ψ::AbstractPDMat{T}) where T<:Real
     p = dim(Ψ)
-    df > p - 1 || error("df should be greater than dim - 1.")
-    c0 = _invwishart_c0(df, Ψ)
-    R = Base.promote_eltype(T, c0)
+    df > p - 1 || throw(ArgumentError("df should be greater than dim - 1."))
+    logc0 = invwishart_logc0(df, Ψ)
+    R = Base.promote_eltype(T, logc0)
     prom_Ψ = convert(AbstractArray{R}, Ψ)
-    InverseWishart{R, typeof(prom_Ψ)}(R(df), prom_Ψ, R(c0))
+    InverseWishart{R, typeof(prom_Ψ)}(R(df), prom_Ψ, R(logc0))
 end
 
 function InverseWishart(df::Real, Ψ::AbstractPDMat)
@@ -42,50 +46,44 @@ InverseWishart(df::Real, Ψ::Matrix) = InverseWishart(df, PDMat(Ψ))
 
 InverseWishart(df::Real, Ψ::Cholesky) = InverseWishart(df, PDMat(Ψ))
 
-function _invwishart_c0(df::Real, Ψ::AbstractPDMat)
-    h_df = df / 2
-    p = dim(Ψ)
-    h_df * (p * typeof(df)(logtwo) - logdet(Ψ)) + logmvgamma(p, h_df)
-end
+#  -----------------------------------------------------------------------------
+#  REPL display
+#  -----------------------------------------------------------------------------
 
-#### Properties
+show(io::IO, d::InverseWishart) = show_multline(io, d, [(:df, d.df), (:Ψ, Matrix(d.Ψ))])
 
-insupport(::Type{InverseWishart}, X::Matrix) = isposdef(X)
-insupport(d::InverseWishart, X::Matrix) = size(X) == size(d) && isposdef(X)
+#  -----------------------------------------------------------------------------
+#  Conversion
+#  -----------------------------------------------------------------------------
 
-dim(d::InverseWishart) = dim(d.Ψ)
-size(d::InverseWishart) = (p = dim(d); (p, p))
-size(d::InverseWishart, i) = size(d)[i]
-rank(d::InverseWishart) = dim(d)
-params(d::InverseWishart) = (d.df, d.Ψ, d.c0)
-@inline partype(d::InverseWishart{T}) where {T<:Real} = T
-
-### Conversion
 function convert(::Type{InverseWishart{T}}, d::InverseWishart) where T<:Real
     P = convert(AbstractArray{T}, d.Ψ)
-    InverseWishart{T, typeof(P)}(T(d.df), P, T(d.c0))
+    InverseWishart{T, typeof(P)}(T(d.df), P, T(d.logc0))
 end
-function convert(::Type{InverseWishart{T}}, df, Ψ::AbstractPDMat, c0) where T<:Real
+function convert(::Type{InverseWishart{T}}, df, Ψ::AbstractPDMat, logc0) where T<:Real
     P = convert(AbstractArray{T}, Ψ)
-    InverseWishart{T, typeof(P)}(T(df), P, T(c0))
+    InverseWishart{T, typeof(P)}(T(df), P, T(logc0))
 end
 
-#### Show
-
-show(io::IO, d::InverseWishart) = show_multline(io, d, [(:df, d.df), (:Ψ, Matrix(d.Ψ))])
+#  -----------------------------------------------------------------------------
+#  Properties
+#  -----------------------------------------------------------------------------
 
+insupport(::Type{InverseWishart}, X::Matrix) = isposdef(X)
+insupport(d::InverseWishart, X::Matrix) = size(X) == size(d) && isposdef(X)
 
-#### Statistics
+dim(d::InverseWishart) = dim(d.Ψ)
+size(d::InverseWishart) = (p = dim(d); (p, p))
+rank(d::InverseWishart) = dim(d)
+params(d::InverseWishart) = (d.df, d.Ψ)
+@inline partype(d::InverseWishart{T}) where {T<:Real} = T
 
 function mean(d::InverseWishart)
     df = d.df
     p = dim(d)
     r = df - (p + 1)
-    if r > 0.0
-        return Matrix(d.Ψ) * (1.0 / r)
-    else
-        error("mean only defined for df > p + 1")
-    end
+    r > 0.0 || throw(ArgumentError("mean only defined for df > p + 1"))
+    return Matrix(d.Ψ) * (1.0 / r)
 end
 
 mode(d::InverseWishart) = d.Ψ * inv(d.df + dim(d) + 1.0)
@@ -100,22 +98,50 @@ end
 function var(d::InverseWishart, i::Integer, j::Integer)
     p, ν, Ψ = (dim(d), d.df, Matrix(d.Ψ))
     ν > p + 3 || throw(ArgumentError("var only defined for df > dim + 3"))
-    inv((ν - p)*(ν - p - 3)*(ν - p - 1)^2)*(ν - p + 1)*Ψ[i,j]^2 + (ν - p - 1)*Ψ[i,i]*Ψ[j,j]
+    inv((ν - p)*(ν - p - 3)*(ν - p - 1)^2)*((ν - p + 1)*Ψ[i,j]^2 + (ν - p - 1)*Ψ[i,i]*Ψ[j,j])
 end
 
-#### Evaluation
+#  -----------------------------------------------------------------------------
+#  Evaluation
+#  -----------------------------------------------------------------------------
+
+function invwishart_logc0(df::Real, Ψ::AbstractPDMat)
+    h_df = df / 2
+    p = dim(Ψ)
+    -h_df * (p * typeof(df)(logtwo) - logdet(Ψ)) - logmvgamma(p, h_df)
+end
 
-function _logpdf(d::InverseWishart, X::AbstractMatrix)
+function logkernel(d::InverseWishart, X::AbstractMatrix)
     p = dim(d)
     df = d.df
     Xcf = cholesky(X)
     # we use the fact: tr(Ψ * inv(X)) = tr(inv(X) * Ψ) = tr(X \ Ψ)
     Ψ = Matrix(d.Ψ)
-    -0.5 * ((df + p + 1) * logdet(Xcf) + tr(Xcf \ Ψ)) - d.c0
+    -0.5 * ((df + p + 1) * logdet(Xcf) + tr(Xcf \ Ψ))
 end
 
-
-#### Sampling
+#  -----------------------------------------------------------------------------
+#  Sampling
+#  -----------------------------------------------------------------------------
 
 _rand!(rng::AbstractRNG, d::InverseWishart, A::AbstractMatrix) =
     (A .= inv(cholesky!(_rand!(rng, Wishart(d.df, inv(d.Ψ)), A))))
+
+#  -----------------------------------------------------------------------------
+#  Test utils
+#  -----------------------------------------------------------------------------
+
+function _univariate(d::InverseWishart)
+    check_univariate(d)
+    ν, Ψ = params(d)
+    α = ν / 2
+    β = Matrix(Ψ)[1] / 2
+    return InverseGamma(α, β)
+end
+
+function _rand_params(::Type{InverseWishart}, elty, n::Int, p::Int)
+    n == p || throw(ArgumentError("dims must be equal for InverseWishart"))
+    ν = elty( n + 3 + abs(10randn()) )
+    Ψ = (X = 2rand(elty, n, n) .- 1 ; X * X')
+    return ν, Ψ
+end
diff --git a/src/matrix/lkj.jl b/src/matrix/lkj.jl
@@ -71,7 +71,7 @@ insupport(d::LKJ, R::AbstractMatrix) = isreal(R) && size(R) == size(d) && isone(
 mean(d::LKJ) = Matrix{partype(d)}(I, dim(d), dim(d))
 
 function mode(d::LKJ; check_args = true)
-    η = params(d)
+    p, η = params(d)
     if check_args
         η > 1 || throw(ArgumentError("mode is defined only when η > 1."))
     end
@@ -85,7 +85,7 @@ function var(lkj::LKJ)
     σ² * (ones(partype(lkj), d, d) - I)
 end
 
-params(d::LKJ) = d.η
+params(d::LKJ) = (d.d, d.η)
 
 @inline partype(d::LKJ{T}) where {T <: Real} = T
 
@@ -109,8 +109,6 @@ end
 
 logkernel(d::LKJ, R::AbstractMatrix) = (d.η - 1) * logdet(R)
 
-_logpdf(d::LKJ, R::AbstractMatrix) = logkernel(d, R) + d.logc0
-
 #  -----------------------------------------------------------------------------
 #  Sampling
 #  -----------------------------------------------------------------------------
@@ -160,6 +158,21 @@ function _marginal(lkj::LKJ)
     LocationScale(-1, 2, Beta(α, α))
 end
 
+#  -----------------------------------------------------------------------------
+#  Test utils
+#  -----------------------------------------------------------------------------
+
+function _univariate(d::LKJ)
+    check_univariate(d)
+    return DiscreteNonParametric([one(d.η)], [one(d.η)])
+end
+
+function _rand_params(::Type{LKJ}, elty, n::Int, p::Int)
+    n == p || throw(ArgumentError("dims must be equal for LKJ"))
+    η = abs(3randn(elty))
+    return n, η
+end
+
 #  -----------------------------------------------------------------------------
 #  Several redundant implementations of the recipricol integrating constant.
 #  If f(R; n) = c₀ |R|ⁿ⁻¹, these give log(1 / c₀).

diff --git a/src/matrix/matrixbeta.jl b/src/matrix/matrixbeta.jl
@@ -8,21 +8,22 @@ n2::Real  degrees of freedom (greater than p - 1)
 The [matrix beta distribution](https://en.wikipedia.org/wiki/Matrix_variate_beta_distribution)
 generalizes the beta distribution to ``p\\times p`` real matrices ``\\mathbf{U}``
 for which ``\\mathbf{U}`` and ``\\mathbf{I}_p-\\mathbf{U}`` are both positive definite.
-If ``\\mathbf{U}\\sim MB_p(n_1/2, n_2/2)``, then its probability density function is
+If ``\\mathbf{U}\\sim \\textrm{MB}_p(n_1/2, n_2/2)``, then its probability density function is
 
 ```math
 f(\\mathbf{U}; n_1,n_2) = \\frac{\\Gamma_p(\\frac{n_1+n_2}{2})}{\\Gamma_p(\\frac{n_1}{2})\\Gamma_p(\\frac{n_2}{2})}
 |\\mathbf{U}|^{(n_1-p-1)/2}\\left|\\mathbf{I}_p-\\mathbf{U}\\right|^{(n_2-p-1)/2}.
 ```
 
-If ``\\mathbf{S}_1\\sim W_p(n_1,\\mathbf{I}_p)`` and ``\\mathbf{S}_2\\sim W_p(n_2,\\mathbf{I}_p)``
+If ``\\mathbf{S}_1\\sim \\textrm{W}_p(n_1,\\mathbf{I}_p)`` and
+``\\mathbf{S}_2\\sim \\textrm{W}_p(n_2,\\mathbf{I}_p)``
 are independent, and we use ``\\mathcal{L}(\\cdot)`` to denote the lower Cholesky factor, then
 
 ```math
 \\mathbf{U}=\\mathcal{L}(\\mathbf{S}_1+\\mathbf{S}_2)^{-1}\\mathbf{S}_1\\mathcal{L}(\\mathbf{S}_1+\\mathbf{S}_2)^{-\\rm{T}}
 ```
 
-has ``\\mathbf{U}\\sim MB_p(n_1/2, n_2/2)``.
+has ``\\mathbf{U}\\sim \\textrm{MB}_p(n_1/2, n_2/2)``.
 """
 struct MatrixBeta{T <: Real, TW <: Wishart} <: ContinuousMatrixDistribution
     W1::TW
@@ -38,7 +39,7 @@ function MatrixBeta(p::Int, n1::Real, n2::Real)
     p > 0 || throw(ArgumentError("dim must be positive: got $(p)."))
     logc0 = matrixbeta_logc0(p, n1, n2)
     T = Base.promote_eltype(n1, n2, logc0)
-    Ip = PDMat( Matrix{T}(I, p, p) )
+    Ip = ScalMat(p, one(T))
     W1 = Wishart(T(n1), Ip)
     W2 = Wishart(T(n2), Ip)
     MatrixBeta{T, typeof(W1)}(W1, W2, T(logc0))
@@ -78,25 +79,23 @@ rank(d::MatrixBeta) = dim(d)
 
 insupport(d::MatrixBeta, U::AbstractMatrix) = isreal(U) && size(U) == size(d) && isposdef(U) && isposdef(I - U)
 
-params(d::MatrixBeta) = (d.W1.df, d.W2.df)
+params(d::MatrixBeta) = (dim(d), d.W1.df, d.W2.df)
 
-mean(d::MatrixBeta) = ((n1, n2) = params(d); Matrix((n1 / (n1 + n2)) * I, dim(d), dim(d)))
+mean(d::MatrixBeta) = ((p, n1, n2) = params(d); Matrix((n1 / (n1 + n2)) * I, p, p))
 
 @inline partype(d::MatrixBeta{T}) where {T <: Real} = T
 
 #  Konno (1988 JJSS) Corollary 3.3.i
 function cov(d::MatrixBeta, i::Integer, j::Integer, k::Integer, l::Integer)
-    n1, n2 = params(d)
+    p, n1, n2 = params(d)
     n = n1 + n2
-    p = dim(d)
     Ω = Matrix{partype(d)}(I, p, p)
     n1*n2*inv(n*(n - 1)*(n + 2))*(-(2/n)*Ω[i,j]*Ω[k,l] + Ω[j,l]*Ω[i,k] + Ω[i,l]*Ω[k,j])
 end
 
 function var(d::MatrixBeta, i::Integer, j::Integer)
-    n1, n2 = params(d)
+    p, n1, n2 = params(d)
     n = n1 + n2
-    p = dim(d)
     Ω = Matrix{partype(d)}(I, p, p)
     n1*n2*inv(n*(n - 1)*(n + 2))*((1 - (2/n))*Ω[i,j]^2 + Ω[j,j]*Ω[i,i])
 end
@@ -111,13 +110,10 @@ function matrixbeta_logc0(p::Int, n1::Real, n2::Real)
 end
 
 function logkernel(d::MatrixBeta, U::AbstractMatrix)
-    p = dim(d)
-    n1, n2 = params(d)
+    p, n1, n2 = params(d)
     ((n1 - p - 1) / 2) * logdet(U) + ((n2 - p - 1) / 2) * logdet(I - U)
 end
 
-_logpdf(d::MatrixBeta, U::AbstractMatrix) = logkernel(d, U) + d.logc0
-
 #  -----------------------------------------------------------------------------
 #  Sampling
 #  -----------------------------------------------------------------------------
@@ -131,3 +127,20 @@ function _rand!(rng::AbstractRNG, d::MatrixBeta, A::AbstractMatrix)
     invL = Matrix( inv(S.chol.L) )
     A .= X_A_Xt(S1, invL)
 end
+
+#  -----------------------------------------------------------------------------
+#  Test utils
+#  -----------------------------------------------------------------------------
+
+function _univariate(d::MatrixBeta)
+    check_univariate(d)
+    p, n1, n2 = params(d)
+    return Beta(n1 / 2, n2 / 2)
+end
+
+function _rand_params(::Type{MatrixBeta}, elty, n::Int, p::Int)
+    n == p || throw(ArgumentError("dims must be equal for MatrixBeta"))
+    n1 = elty( n + 1 + abs(10randn()) )
+    n2 = elty( n + 1 + abs(10randn()) )
+    return n, n1, n2
+end