Skip to content

Commit

Permalink
Fix simulate! when only the estimable coefficients are provided (#756)
Browse files Browse the repository at this point in the history
* tinker

* fix simulate when only the non pivoted coefs are provided

* tests

* patch bump and NEWS

* improve rank deficiency handling in GLMM

* more news

* readability

Co-authored-by: Alex Arslan <ararslan@comcast.net>

* add pivot accessor

* good point, @ararslan

* more fullrankx

---------

Co-authored-by: Alex Arslan <ararslan@comcast.net>
  • Loading branch information
palday and ararslan committed Mar 25, 2024
1 parent e7cd789 commit beaaa36
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 37 deletions.
7 changes: 7 additions & 0 deletions NEWS.md
@@ -1,3 +1,9 @@
MixedModels v4.23.1 Release Notes
==============================
* Fix for `simulate!` when only the estimable coefficients for a rank-deficient model are provided. [#756]
* Improve handling of rank deficiency in GLMM. [#756]
* Fix display of GLMM bootstrap without a dispersion parameter. [#756]

MixedModels v4.23.0 Release Notes
==============================
* Support for rank deficiency in the parametric bootstrap. [#755]
Expand Down Expand Up @@ -509,3 +515,4 @@ Package dependencies
[#744]: https://github.com/JuliaStats/MixedModels.jl/issues/744
[#748]: https://github.com/JuliaStats/MixedModels.jl/issues/748
[#755]: https://github.com/JuliaStats/MixedModels.jl/issues/755
[#756]: https://github.com/JuliaStats/MixedModels.jl/issues/756
2 changes: 1 addition & 1 deletion Project.toml
@@ -1,7 +1,7 @@
name = "MixedModels"
uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
author = ["Phillip Alday <me@phillipalday.com>", "Douglas Bates <dmbates@gmail.com>", "Jose Bayoan Santiago Calderon <jbs3hp@virginia.edu>"]
version = "4.23.0"
version = "4.23.1"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Expand Down
11 changes: 11 additions & 0 deletions src/Xymat.jl
Expand Up @@ -61,11 +61,22 @@ Base.copyto!(A::FeTerm{T}, src::AbstractVecOrMat{T}) where {T} = copyto!(A.x, sr

Base.eltype(::FeTerm{T}) where {T} = T

"""
pivot(m::MixedModel)
pivot(A::FeTerm)
Return the pivot associated with the FeTerm.
"""
@inline pivot(m::MixedModel) = pivot(m.feterm)
@inline pivot(A::FeTerm) = A.piv

function fullrankx(A::FeTerm)
x, rnk = A.x, A.rank
return rnk == size(x, 2) ? x : view(x, :, 1:rnk) # this handles the zero-columns case
end

fullrankx(m::MixedModel) = fullrankx(m.feterm)

LinearAlgebra.rank(A::FeTerm) = A.rank

"""
Expand Down
14 changes: 8 additions & 6 deletions src/bootstrap.jl
Expand Up @@ -626,11 +626,12 @@ function σρ!(v::AbstractVector, d::Diagonal, σ)
return append!(v, σ .* d.diag)
end

function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T}
"""
σρ!(v, t, σ)
push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v`
"""
"""
σρ!(v, t, σ)
push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v`
"""
function σρ!(v::AbstractVector{<:Union{T,Missing}}, t::LowerTriangular, σ) where {T}
dat = t.data
for i in axes(dat, 1)
ssqr = zero(T)
Expand Down Expand Up @@ -659,11 +660,12 @@ end

function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T}
(; fits, λ) = bsamp
Tfull = ismissing(first(bsamp.fits).σ) ? Union{T,Missing} : T
λcp = copy.(λ)
syms = _syms(bsamp)
m = length(syms)
n = length(fits)
v = sizehint!(T[], m * n)
v = sizehint!(Tfull[], m * n)
for f in fits
(; β, θ, σ) = f
push!(v, f.objective)
Expand Down
9 changes: 5 additions & 4 deletions src/generalizedlinearmixedmodel.jl
Expand Up @@ -54,7 +54,7 @@ struct GeneralizedLinearMixedModel{T<:AbstractFloat,D<:Distribution} <: MixedMod
end

function StatsAPI.coef(m::GeneralizedLinearMixedModel{T}) where {T}
piv = m.LMM.feterm.piv
piv = pivot(m)
return invpermute!(copyto!(fill(T(-0.0), length(piv)), m.β), piv)
end

Expand Down Expand Up @@ -426,6 +426,7 @@ function GeneralizedLinearMixedModel(
LMM.optsum,
)
end
X = fullrankx(LMM.feterm)
# if the response is constant, there's no point (and this may even fail)
# we allow this instead of simply failing so that a constant response can
# be used as the starting point to simulation where the response will be
Expand All @@ -439,8 +440,8 @@ function GeneralizedLinearMixedModel(
# XXX unfortunately, this means we have double-rank deficiency detection
# TODO: construct GLM by hand so that we skip collinearity checks
# TODO: extend this so that we never fit a GLM when initializing from LMM
dofit = size(LMM.X, 2) != 0 # GLM.jl kwarg
gl = glm(LMM.X, y, d, l;
dofit = size(X, 2) != 0 # GLM.jl kwarg
gl = glm(X, y, d, l;
wts=convert(Vector{T}, wts),
dofit,
offset=convert(Vector{T}, offset))
Expand Down Expand Up @@ -793,7 +794,7 @@ function updateη!(m::GeneralizedLinearMixedModel{T}) where {T}
b = m.b
u = m.u
reterms = m.LMM.reterms
mul!(η, modelmatrix(m), m.β)
mul!(η, fullrankx(m), m.β)
for i in eachindex(b)
mul!(η, reterms[i], vec(mul!(b[i], reterms[i].λ, u[i])), one(T), one(T))
end
Expand Down
10 changes: 6 additions & 4 deletions src/linearmixedmodel.jl
Expand Up @@ -267,11 +267,11 @@ function StatsAPI.fit(
end

function StatsAPI.coef(m::LinearMixedModel{T}) where {T}
return coef!(Vector{T}(undef, length(m.feterm.piv)), m)
return coef!(Vector{T}(undef, length(pivot(m))), m)
end

function coef!(v::AbstractVector{Tv}, m::MixedModel{T}) where {Tv,T}
piv = m.feterm.piv
piv = pivot(m)
return invpermute!(fixef!(v, m), piv)
end

Expand Down Expand Up @@ -904,6 +904,8 @@ Overwrite `v` with the conditional modes of the random effects for `m`.
If `uscale` is `true` the random effects are on the spherical (i.e. `u`) scale, otherwise
on the original scale
`β` is the truncated, pivoted coefficient vector.
"""
function ranef!(
v::Vector, m::LinearMixedModel{T}, β::AbstractArray{T}, uscale::Bool
Expand Down Expand Up @@ -1241,12 +1243,12 @@ function stderror!(v::AbstractVector{Tv}, m::LinearMixedModel{T}) where {Tv,T}
scr[i] = true
v[i] = s * norm(ldiv!(L, scr))
end
invpermute!(v, m.feterm.piv)
invpermute!(v, pivot(m))
return v
end

function StatsAPI.stderror(m::LinearMixedModel{T}) where {T}
return stderror!(similar(m.feterm.piv, T), m)
return stderror!(similar(pivot(m), T), m)
end

"""
Expand Down
6 changes: 3 additions & 3 deletions src/predict.jl
Expand Up @@ -70,7 +70,7 @@ Similarly, offsets are also not supported for `GeneralizedLinearMixedModel`.
function StatsAPI.predict(
m::LinearMixedModel, newdata::Tables.ColumnTable; new_re_levels=:missing
)
return _predict(m, newdata, coef(m)[m.feterm.piv]; new_re_levels)
return _predict(m, newdata, coef(m)[pivot(m)]; new_re_levels)
end

function StatsAPI.predict(
Expand All @@ -81,7 +81,7 @@ function StatsAPI.predict(
)
type in (:linpred, :response) || throw(ArgumentError("Invalid value for type: $(type)"))
# want pivoted but not truncated
y = _predict(m.LMM, newdata, coef(m)[m.feterm.piv]; new_re_levels)
y = _predict(m.LMM, newdata, coef(m)[pivot(m)]; new_re_levels)

return type == :linpred ? y : broadcast!(Base.Fix1(linkinv, Link(m)), y, y)
end
Expand Down Expand Up @@ -126,7 +126,7 @@ function _predict(m::MixedModel{T}, newdata, β; new_re_levels) where {T}
ytemp, lmm
end

pivotmatch = mnew.feterm.piv[m.feterm.piv]
pivotmatch = pivot(mnew)[pivot(m)]
grps = fnames(m)
mul!(y, view(mnew.X, :, pivotmatch), β)
# mnew.reterms for the correct Z matrices
Expand Down
18 changes: 9 additions & 9 deletions src/simulate.jl
Expand Up @@ -149,17 +149,18 @@ end
function simulate!(
rng::AbstractRNG, y::AbstractVector, m::LinearMixedModel{T}; β=m.β, σ=m.σ, θ=m.θ
) where {T}
length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank ||
length(β) == length(pivot(m)) || length(β) == m.feterm.rank ||
throw(ArgumentError("You must specify all (non-singular) βs"))

β = convert(Vector{T}, β)
σ = T(σ)
θ = convert(Vector{T}, θ)
isempty(θ) || setθ!(m, θ)

if length(β) length(m.feterm.piv)
padding = length(model.feterm.piv) - m.feterm.rank
if length(β) length(pivot(m))
padding = length(pivot(m)) - rank(m)
append!(β, fill(-0.0, padding))
invpermute!(β, pivot(m))
end

# initialize y to standard normal
Expand Down Expand Up @@ -228,7 +229,7 @@ function _simulate!(
θ,
resp=nothing,
) where {T}
length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank ||
length(β) == length(pivot(m)) || length(β) == m.feterm.rank ||
throw(ArgumentError("You must specify all (non-singular) βs"))

dispersion_parameter(m) ||
Expand All @@ -247,11 +248,10 @@ function _simulate!(

d = m.resp.d

if length(β) length(m.feterm.piv)
padding = length(model.feterm.piv) - m.feterm.rank
append!(β, fill(-0.0, padding))
if length(β) == length(pivot(m))
# unlike LMM, GLMM stores the truncated, pivoted vector directly
β = view(β, view(pivot(m), 1:(rank(m))))
end

fast = (length(m.θ) == length(m.optsum.final))
setpar! = fast ? setθ! : setβθ!
params = fast ? θ : vcat(β, θ)
Expand All @@ -271,7 +271,7 @@ function _simulate!(
# add fixed-effects contribution
# note that unit scaling may not be correct for
# families with a dispersion parameter
mul!(η, lm.X, β, one(T), one(T))
mul!(η, fullrankx(lm), β, one(T), one(T))

μ = resp === nothing ? linkinv.(Link(m), η) : GLM.updateμ!(resp, η).mu

Expand Down
27 changes: 17 additions & 10 deletions test/bootstrap.jl
Expand Up @@ -5,6 +5,7 @@ using Random
using Statistics
using StableRNGs
using Statistics
using Suppressor
using Tables
using Test

Expand Down Expand Up @@ -224,16 +225,22 @@ end
@testset "Rank deficient" begin
rng = MersenneTwister(0);
x = rand(rng, 100);
data = (x = x, x2 = 1.5 .* x, y = rand(rng, 100), z = repeat('A':'T', 5))
model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data; progress=false)
boot = quickboot(model, 10)

dropped_idx = model.feterm.piv[end]
dropped_coef = coefnames(model)[dropped_idx]
@test all(boot.β) do nt
# if we're the dropped coef, then we must be -0.0
# need isequal because of -0.0
return nt.coefname != dropped_coef || isequal(nt.β, -0.0)
data = (x = x, x2 = 1.5 .* x, y = rand(rng, [0,1], 100), z = repeat('A':'T', 5))
@testset "$family" for family in [Normal(), Bernoulli()]
model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data, family; progress=false)
boot = quickboot(model, 10)

dropped_idx = model.feterm.piv[end]
dropped_coef = coefnames(model)[dropped_idx]
@test all(boot.β) do nt
# if we're the dropped coef, then we must be -0.0
# need isequal because of -0.0
return nt.coefname != dropped_coef || isequal(nt.β, -0.0)
end

yc = simulate(StableRNG(1), model; β=coef(model))
yf = simulate(StableRNG(1), model; β=fixef(model))
@test all(x -> isapprox(x...), zip(yc, yf))
end
end
end
Expand Down

2 comments on commit beaaa36

@palday
Copy link
Member Author

@palday palday commented on beaaa36 Mar 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/103601

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v4.23.1 -m "<description of version>" beaaa36be5195a11618769ae9aafcf683b8bc755
git push origin v4.23.1

Please sign in to comment.