/
linearmixedmodel.jl
1347 lines (1182 loc) · 40.1 KB
/
linearmixedmodel.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
LinearMixedModel
Linear mixed-effects model representation
## Fields
* `formula`: the formula for the model
* `reterms`: a `Vector{AbstractReMat{T}}` of random-effects terms.
* `Xymat`: horizontal concatenation of a full-rank fixed-effects model matrix `X` and response `y` as an `FeMat{T}`
* `feterm`: the fixed-effects model matrix as an `FeTerm{T}`
* `sqrtwts`: vector of square roots of the case weights. Can be empty.
* `parmap` : Vector{NTuple{3,Int}} of (block, row, column) mapping of θ to λ
* `dims` : NamedTuple{(:n, :p, :nretrms),NTuple{3,Int}} of dimensions. `p` is the rank of `X`, which may be smaller than `size(X, 2)`.
* `A`: a `Vector{AbstractMatrix}` containing the row-major packed lower triangle of `hcat(Z,X,y)'hcat(Z,X,y)`
* `L`: the blocked lower Cholesky factor of `Λ'AΛ+I` in the same Vector representation as `A`
* `optsum`: an [`OptSummary`](@ref) object
## Properties
* `θ` or `theta`: the covariance parameter vector used to form λ
* `β` or `beta`: the fixed-effects coefficient vector
* `λ` or `lambda`: a vector of lower triangular matrices repeated on the diagonal blocks of `Λ`
* `σ` or `sigma`: current value of the standard deviation of the per-observation noise
* `b`: random effects on the original scale, as a vector of matrices
* `u`: random effects on the orthogonal scale, as a vector of matrices
* `lowerbd`: lower bounds on the elements of θ
* `X`: the fixed-effects model matrix
* `y`: the response vector
"""
struct LinearMixedModel{T<:AbstractFloat} <: MixedModel{T}
formula::FormulaTerm
reterms::Vector{<:AbstractReMat{T}}
Xymat::FeMat{T}
feterm::FeTerm{T}
sqrtwts::Vector{T}
parmap::Vector{NTuple{3,Int}}
dims::NamedTuple{(:n, :p, :nretrms),NTuple{3,Int}}
A::Vector{AbstractMatrix{T}} # cross-product blocks
L::Vector{AbstractMatrix{T}}
optsum::OptSummary{T}
end
function LinearMixedModel(
f::FormulaTerm, tbl; contrasts=Dict{Symbol,Any}(), wts=[], σ=nothing, amalgamate=true
)
return LinearMixedModel(
f::FormulaTerm, Tables.columntable(tbl); contrasts, wts, σ, amalgamate
)
end
const _MISSING_RE_ERROR = ArgumentError(
"Formula contains no random effects; this isn't a mixed model. Perhaps you want to use GLM.jl?",
)
function LinearMixedModel(
f::FormulaTerm, tbl::Tables.ColumnTable; contrasts=Dict{Symbol,Any}(), wts=[],
σ=nothing, amalgamate=true,
)
fvars = StatsModels.termvars(f)
tvars = Tables.columnnames(tbl)
fvars ⊆ tvars ||
throw(
ArgumentError(
"The following formula variables are not present in the table: $(setdiff(fvars, tvars))",
),
)
# TODO: perform missing_omit() after apply_schema() when improved
# missing support is in a StatsModels release
tbl, _ = StatsModels.missing_omit(tbl, f)
form = schematize(f, tbl, contrasts)
if form.rhs isa MatrixTerm || !any(x -> isa(x, AbstractReTerm), form.rhs)
throw(_MISSING_RE_ERROR)
end
y, Xs = modelcols(form, tbl)
return LinearMixedModel(y, Xs, form, wts, σ, amalgamate)
end
"""
LinearMixedModel(y, Xs, form, wts=[], σ=nothing, amalgamate=true)
Private constructor for a LinearMixedModel.
To construct a model, you only need the response (`y`), already assembled
model matrices (`Xs`), schematized formula (`form`) and weights (`wts`).
Everything else in the structure can be derived from these quantities.
!!! note
This method is internal and experimental and so may change or disappear in
a future release without being considered a breaking change.
"""
function LinearMixedModel(
y::AbstractArray,
Xs::Tuple, # can't be more specific here without stressing the compiler
form::FormulaTerm,
wts=[],
σ=nothing,
amalgamate=true,
)
T = promote_type(Float64, float(eltype(y))) # ensure eltype of model matrices is at least Float64
reterms = AbstractReMat{T}[]
feterms = FeTerm{T}[]
for (i, x) in enumerate(Xs)
if isa(x, AbstractReMat{T})
push!(reterms, x)
elseif isa(x, ReMat) # this can occur in weird situation where x is a ReMat{U}
# avoid keeping a second copy if unweighted
z = convert(Matrix{T}, x.z)
wtz = x.z === x.wtz ? z : convert(Matrix{T}, x.wtz)
S = size(z, 1)
x = ReMat{T,S}(
x.trm,
x.refs,
x.levels,
x.cnames,
z,
wtz,
convert(LowerTriangular{Float64,Matrix{Float64}}, x.λ),
x.inds,
convert(SparseMatrixCSC{T,Int32}, x.adjA),
convert(Matrix{T}, x.scratch),
)
push!(reterms, x)
else
cnames = coefnames(form.rhs[i])
push!(feterms, FeTerm(x, isa(cnames, String) ? [cnames] : collect(cnames)))
end
end
isempty(reterms) && throw(_MISSING_RE_ERROR)
return LinearMixedModel(
convert(Array{T}, y), only(feterms), reterms, form, wts, σ, amalgamate
)
end
"""
LinearMixedModel(y, feterm, reterms, form, wts=[], σ=nothing; amalgamate=true)
Private constructor for a `LinearMixedModel` given already assembled fixed and random effects.
To construct a model, you only need a vector of `FeMat`s (the fixed-effects
model matrix and response), a vector of `AbstractReMat` (the random-effects
model matrices), the formula and the weights. Everything else in the structure
can be derived from these quantities.
!!! note
This method is internal and experimental and so may change or disappear in
a future release without being considered a breaking change.
"""
function LinearMixedModel(
y::AbstractArray,
feterm::FeTerm{T},
reterms::AbstractVector{<:AbstractReMat{T}},
form::FormulaTerm,
wts=[],
σ=nothing,
amalgamate=true,
) where {T}
# detect and combine RE terms with the same grouping var
if length(reterms) > 1 && amalgamate
# okay, this looks weird, but it allows us to have the kwarg with the same name
# as the internal function
reterms = MixedModels.amalgamate(reterms)
end
sort!(reterms; by=nranef, rev=true)
Xy = FeMat(feterm, vec(y))
sqrtwts = map!(sqrt, Vector{T}(undef, length(wts)), wts)
reweight!.(reterms, Ref(sqrtwts))
reweight!(Xy, sqrtwts)
A, L = createAL(reterms, Xy)
lbd = foldl(vcat, lowerbd(c) for c in reterms)
θ = foldl(vcat, getθ(c) for c in reterms)
optsum = OptSummary(θ, lbd, :LN_BOBYQA; ftol_rel=T(1.0e-12), ftol_abs=T(1.0e-8))
optsum.sigma = isnothing(σ) ? nothing : T(σ)
fill!(optsum.xtol_abs, 1.0e-10)
return LinearMixedModel(
form,
reterms,
Xy,
feterm,
sqrtwts,
mkparmap(reterms),
(n=length(y), p=feterm.rank, nretrms=length(reterms)),
A,
L,
optsum,
)
end
function StatsAPI.fit(
::Type{LinearMixedModel},
f::FormulaTerm,
tbl;
kwargs...,
)
return fit(
LinearMixedModel,
f,
Tables.columntable(tbl);
kwargs...,
)
end
function StatsAPI.fit(
::Type{LinearMixedModel},
f::FormulaTerm,
tbl::Tables.ColumnTable;
wts=[],
contrasts=Dict{Symbol,Any}(),
progress=true,
REML=false,
σ=nothing,
thin=typemax(Int),
amalgamate=true,
)
return fit!(
LinearMixedModel(f, tbl; contrasts, wts, σ, amalgamate); progress, REML, thin
)
end
function _offseterr()
return throw(
ArgumentError(
"Offsets are not supported in linear models. You can simply shift the response by the offset.",
),
)
end
function StatsAPI.fit(
::Type{MixedModel},
f::FormulaTerm,
tbl;
offset=[],
kwargs...,
)
return if !isempty(offset)
_offseterr()
else
fit(LinearMixedModel, f, tbl; kwargs...)
end
end
function StatsAPI.fit(
::Type{MixedModel},
f::FormulaTerm,
tbl,
d::Normal,
l::IdentityLink;
offset=[],
fast=nothing,
nAGQ=nothing,
kwargs...,
)
return if !isempty(offset)
_offseterr()
else
if !isnothing(fast) || !isnothing(nAGQ)
@warn "fast and nAGQ arguments are ignored when fitting a LinearMixedModel"
end
fit(LinearMixedModel, f, tbl; kwargs...)
end
end
function StatsAPI.coef(m::LinearMixedModel{T}) where {T}
return coef!(Vector{T}(undef, length(pivot(m))), m)
end
function coef!(v::AbstractVector{Tv}, m::MixedModel{T}) where {Tv,T}
piv = pivot(m)
return invpermute!(fixef!(v, m), piv)
end
βs(m::LinearMixedModel) = NamedTuple{(Symbol.(coefnames(m))...,)}(coef(m))
function StatsAPI.coefnames(m::LinearMixedModel)
Xtrm = m.feterm
return invpermute!(copy(Xtrm.cnames), Xtrm.piv)
end
function StatsAPI.coeftable(m::LinearMixedModel)
co = coef(m)
se = stderror!(similar(co), m)
z = co ./ se
pvalue = ccdf.(Chisq(1), abs2.(z))
names = coefnames(m)
return CoefTable(
hcat(co, se, z, pvalue),
["Coef.", "Std. Error", "z", "Pr(>|z|)"],
names,
4, # pvalcol
3, # teststatcol
)
end
"""
condVar(m::LinearMixedModel)
Return the conditional variances matrices of the random effects.
The random effects are returned by `ranef` as a vector of length `k`,
where `k` is the number of random effects terms. The `i`th element
is a matrix of size `vᵢ × ℓᵢ` where `vᵢ` is the size of the
vector-valued random effects for each of the `ℓᵢ` levels of the grouping
factor. Technically those values are the modes of the conditional
distribution of the random effects given the observed data.
This function returns an array of `k` three dimensional arrays,
where the `i`th array is of size `vᵢ × vᵢ × ℓᵢ`. These are the
diagonal blocks from the conditional variance-covariance matrix,
s² Λ(Λ'Z'ZΛ + I)⁻¹Λ'
"""
function condVar(m::LinearMixedModel{T}) where {T}
return [condVar(m, fnm) for fnm in fnames(m)]
end
function condVar(m::LinearMixedModel{T}, fname) where {T}
Lblk = LowerTriangular(densify(sparseL(m; fname=fname)))
blk = findfirst(isequal(fname), fnames(m))
λt = Array(m.λ[blk]') .* sdest(m)
vsz = size(λt, 2)
ℓ = length(m.reterms[blk].levels)
val = Array{T}(undef, (vsz, vsz, ℓ))
scratch = Matrix{T}(undef, (size(Lblk, 1), vsz))
for b in 1:ℓ
fill!(scratch, zero(T))
copyto!(view(scratch, (b - 1) * vsz .+ (1:vsz), :), λt)
ldiv!(Lblk, scratch)
mul!(view(val, :, :, b), scratch', scratch)
end
return val
end
function _cvtbl(arr::Array{T,3}, trm) where {T}
return merge(
NamedTuple{(fname(trm),)}((trm.levels,)),
columntable([
NamedTuple{(:σ, :ρ)}(sdcorr(view(arr, :, :, i))) for i in axes(arr, 3)
]),
)
end
"""
condVartables(m::LinearMixedModel)
Return the conditional covariance matrices of the random effects as a `NamedTuple` of columntables
"""
function condVartables(m::MixedModel{T}) where {T}
return NamedTuple{_unique_fnames(m)}((map(_cvtbl, condVar(m), m.reterms)...,))
end
"""
confint(pr::MixedModelProfile; level::Real=0.95)
Compute profile confidence intervals for (fixed effects) coefficients, with confidence level `level` (by default 95%).
!!! note
The API guarantee is for a Tables.jl compatible table. The exact return type is an
implementation detail and may change in a future minor release without being considered
breaking.
"""
function StatsBase.confint(m::MixedModel{T}; level=0.95) where {T}
cutoff = sqrt.(quantile(Chisq(1), level))
β, std = m.β, m.stderror
return DictTable(;
coef=coefnames(m),
lower=β .- cutoff .* std,
upper=β .+ cutoff .* std
)
end
function _pushALblock!(A, L, blk)
push!(L, blk)
return push!(A, deepcopy(isa(blk, BlockedSparse) ? blk.cscmat : blk))
end
function createAL(reterms::Vector{<:AbstractReMat{T}}, Xy::FeMat{T}) where {T}
k = length(reterms)
vlen = kchoose2(k + 1)
A = sizehint!(AbstractMatrix{T}[], vlen)
L = sizehint!(AbstractMatrix{T}[], vlen)
for i in eachindex(reterms)
for j in 1:i
_pushALblock!(A, L, densify(reterms[i]' * reterms[j]))
end
end
for j in eachindex(reterms) # can't fold this into the previous loop b/c block order
_pushALblock!(A, L, densify(Xy' * reterms[j]))
end
_pushALblock!(A, L, densify(Xy'Xy))
for i in 2:k # check for fill-in due to non-nested grouping factors
ci = reterms[i]
for j in 1:(i - 1)
cj = reterms[j]
if !isnested(cj, ci)
for l in i:k
ind = block(l, i)
L[ind] = Matrix(L[ind])
end
break
end
end
end
return A, L
end
StatsAPI.deviance(m::LinearMixedModel) = objective(m)
GLM.dispersion(m::LinearMixedModel, sqr::Bool=false) = sqr ? varest(m) : sdest(m)
GLM.dispersion_parameter(m::LinearMixedModel) = true
"""
feL(m::LinearMixedModel)
Return the lower Cholesky factor for the fixed-effects parameters, as an `LowerTriangular`
`p × p` matrix.
"""
function feL(m::LinearMixedModel)
XyL = m.L[end]
k = size(XyL, 1)
inds = Base.OneTo(k - 1)
return LowerTriangular(view(XyL, inds, inds))
end
"""
fit!(m::LinearMixedModel; progress::Bool=true, REML::Bool=false,
σ::Union{Real, Nothing}=nothing,
thin::Int=typemax(Int))
Optimize the objective of a `LinearMixedModel`. When `progress` is `true` a
`ProgressMeter.ProgressUnknown` display is shown during the optimization of the
objective, if the optimization takes more than one second or so.
At every `thin`th iteration is recorded in `fitlog`, optimization progress is
saved in `m.optsum.fitlog`.
"""
function StatsAPI.fit!(
m::LinearMixedModel{T};
progress::Bool=true,
REML::Bool=false,
σ::Union{Real,Nothing}=nothing,
thin::Int=typemax(Int),
) where {T}
optsum = m.optsum
# this doesn't matter for LMM, but it does for GLMM, so let's be consistent
if optsum.feval > 0
throw(ArgumentError("This model has already been fitted. Use refit!() instead."))
end
if all(==(first(m.y)), m.y)
throw(
ArgumentError("The response is constant and thus model fitting has failed")
)
end
opt = Opt(optsum)
optsum.REML = REML
prog = ProgressUnknown(; desc="Minimizing", showspeed=true)
# start from zero for the initial call to obj before optimization
iter = 0
fitlog = optsum.fitlog
function obj(x, g)
isempty(g) || throw(ArgumentError("g should be empty for this objective"))
iter += 1
val = if isone(iter) && x == optsum.initial
optsum.finitial
else
try
objective(updateL!(setθ!(m, x)))
catch ex
# This can happen when the optimizer drifts into an area where
# there isn't enough shrinkage. Why finitial? Generally, it will
# be the (near) worst case scenario value, so the optimizer won't
# view it as an optimum. Using Inf messes up the quadratic
# approximation in BOBYQA.
ex isa PosDefException || rethrow()
optsum.finitial
end
end
progress && ProgressMeter.next!(prog; showvalues=[(:objective, val)])
!isone(iter) && iszero(rem(iter, thin)) && push!(fitlog, (copy(x), val))
return val
end
NLopt.min_objective!(opt, obj)
try
# use explicit evaluation w/o calling opt to avoid confusing iteration count
optsum.finitial = objective(updateL!(setθ!(m, optsum.initial)))
catch ex
ex isa PosDefException || rethrow()
# give it one more try with a massive change in scaling
@info "Initial objective evaluation failed, rescaling initial guess and trying again."
@warn """Failure of the initial evaluation is often indicative of a model specification
that is not well supported by the data and/or a poorly scaled model.
"""
optsum.initial ./=
(isempty(m.sqrtwts) ? 1.0 : maximum(m.sqrtwts)^2) *
maximum(response(m))
optsum.finitial = objective(updateL!(setθ!(m, optsum.initial)))
end
empty!(fitlog)
push!(fitlog, (copy(optsum.initial), optsum.finitial))
fmin, xmin, ret = NLopt.optimize!(opt, copyto!(optsum.final, optsum.initial))
ProgressMeter.finish!(prog)
## check if small non-negative parameter values can be set to zero
xmin_ = copy(xmin)
lb = optsum.lowerbd
for i in eachindex(xmin_)
if iszero(lb[i]) && zero(T) < xmin_[i] < T(0.001)
xmin_[i] = zero(T)
end
end
loglength = length(fitlog)
if xmin ≠ xmin_
if (zeroobj = obj(xmin_, T[])) ≤ (fmin + 1.e-5)
fmin = zeroobj
copyto!(xmin, xmin_)
elseif length(fitlog) > loglength
# remove unused extra log entry
pop!(fitlog)
end
end
## ensure that the parameter values saved in m are xmin
updateL!(setθ!(m, xmin))
optsum.feval = opt.numevals
optsum.final = xmin
optsum.fmin = fmin
optsum.returnvalue = ret
_check_nlopt_return(ret)
return m
end
"""
fitted!(v::AbstractArray{T}, m::LinearMixedModel{T})
Overwrite `v` with the fitted values from `m`.
See also `fitted`.
"""
function fitted!(v::AbstractArray{T}, m::LinearMixedModel{T}) where {T}
## FIXME: Create and use `effects(m) -> β, b` w/o calculating β twice
Xtrm = m.feterm
vv = mul!(vec(v), Xtrm.x, fixef!(similar(Xtrm.piv, T), m))
for (rt, bb) in zip(m.reterms, ranef(m))
mul!(vv, rt, bb, one(T), one(T))
end
return v
end
StatsAPI.fitted(m::LinearMixedModel{T}) where {T} = fitted!(Vector{T}(undef, nobs(m)), m)
"""
fixef!(v::Vector{T}, m::MixedModel{T})
Overwrite `v` with the pivoted fixed-effects coefficients of model `m`
For full-rank models the length of `v` must be the rank of `X`. For rank-deficient models
the length of `v` can be the rank of `X` or the number of columns of `X`. In the latter
case the calculated coefficients are padded with -0.0 out to the number of columns.
"""
function fixef!(v::AbstractVector{Tv}, m::LinearMixedModel{T}) where {Tv,T}
fill!(v, -zero(Tv))
XyL = m.L[end]
L = feL(m)
k = size(XyL, 1)
r = size(L, 1)
for j in 1:r
v[j] = XyL[k, j]
end
ldiv!(L', length(v) == r ? v : view(v, 1:r))
return v
end
"""
fixef(m::MixedModel)
Return the fixed-effects parameter vector estimate of `m`.
In the rank-deficient case the truncated parameter vector, of length `rank(m)` is returned.
This is unlike `coef` which always returns a vector whose length matches the number of
columns in `X`.
"""
fixef(m::LinearMixedModel{T}) where {T} = fixef!(Vector{T}(undef, m.feterm.rank), m)
"""
fixefnames(m::MixedModel)
Return a (permuted and truncated in the rank-deficient case) vector of coefficient names.
"""
function fixefnames(m::LinearMixedModel)
Xtrm = m.feterm
return Xtrm.cnames[1:(Xtrm.rank)]
end
"""
fnames(m::MixedModel)
Return the names of the grouping factors for the random-effects terms.
"""
fnames(m::MixedModel) = (fname.(m.reterms)...,)
function _unique_fnames(m::MixedModel)
fn = fnames(m)
ufn = unique(fn)
length(fn) == length(ufn) && return fn
fn = collect(fn)
d = Dict(ufn .=> 0)
for i in eachindex(fn)
(d[fn[i]] += 1) == 1 && continue
fn[i] = Symbol(string(fn[i], ".", d[fn[i]]))
end
return Tuple(fn)
end
"""
getθ(m::LinearMixedModel)
Return the current covariance parameter vector.
"""
getθ(m::LinearMixedModel{T}) where {T} = getθ!(Vector{T}(undef, length(m.parmap)), m)
function getθ!(v::AbstractVector{Tv}, m::LinearMixedModel{T}) where {Tv,T}
pmap = m.parmap
if length(v) ≠ length(pmap)
throw(
DimensionMismatch(
"length(v) = $(length(v)) ≠ length(m.parmap) = $(length(pmap))"
),
)
end
reind = 1
λ = first(m.reterms).λ
for (k, tp) in enumerate(pmap)
tp1 = first(tp)
if reind ≠ tp1
reind = tp1
λ = m.reterms[tp1].λ
end
v[k] = λ[tp[2], tp[3]]
end
return v
end
function Base.getproperty(m::LinearMixedModel{T}, s::Symbol) where {T}
if s == :θ || s == :theta
getθ(m)
elseif s == :β || s == :beta
coef(m)
elseif s == :βs || s == :betas
βs(m)
elseif s == :λ || s == :lambda
getproperty.(m.reterms, :λ)
elseif s == :σ || s == :sigma
sdest(m)
elseif s == :σs || s == :sigmas
σs(m)
elseif s == :σρs || s == :sigmarhos
σρs(m)
elseif s == :b
ranef(m)
elseif s == :objective
objective(m)
elseif s == :corr
vcov(m; corr=true)
elseif s == :vcov
vcov(m; corr=false)
elseif s == :PCA
PCA(m)
elseif s == :pvalues
ccdf.(Chisq(1), abs2.(coef(m) ./ stderror(m)))
elseif s == :stderror
stderror(m)
elseif s == :u
ranef(m; uscale=true)
elseif s == :lowerbd
m.optsum.lowerbd
elseif s == :X
modelmatrix(m)
elseif s == :y
let xy = m.Xymat.xy
view(xy, :, size(xy, 2))
end
elseif s == :rePCA
rePCA(m)
else
getfield(m, s)
end
end
StatsAPI.islinear(m::LinearMixedModel) = true
"""
_3blockL(::LinearMixedModel)
returns L in 3-block form:
- a Diagonal or UniformBlockDiagonal block
- a dense rectangular block
- and a dense lowertriangular block
"""
function _3blockL(m::LinearMixedModel{T}) where {T}
L = m.L
reterms = m.reterms
isone(length(reterms)) &&
return first(L), L[block(2, 1)], LowerTriangular(L[block(2, 2)])
rows = sum(k -> size(L[kp1choose2(k + 1)], 1), axes(reterms, 1))
cols = size(first(L), 2)
B2 = Matrix{T}(undef, (rows, cols))
B3 = Matrix{T}(undef, (rows, rows))
rowoffset = 0
for i in 1 .+ axes(reterms, 1)
Li1 = L[block(i, 1)]
rows = rowoffset .+ axes(Li1, 1)
copyto!(view(B2, rows, :), Li1)
coloffset = 0
for j in 2:i
Lij = L[block(i, j)]
copyto!(view(B3, rows, coloffset .+ axes(Lij, 2)), Lij)
coloffset += size(Lij, 2)
end
rowoffset += size(Li1, 1)
end
return first(L), B2, LowerTriangular(B3)
end
# use dispatch to distinguish Diagonal and UniformBlockDiagonal in first(L)
_ldivB1!(B1::Diagonal{T}, rhs::AbstractVector{T}, ind) where {T} = rhs ./= B1.diag[ind]
function _ldivB1!(B1::UniformBlockDiagonal{T}, rhs::AbstractVector{T}, ind) where {T}
return ldiv!(LowerTriangular(view(B1.data, :, :, ind)), rhs)
end
"""
leverage(::LinearMixedModel)
Return the diagonal of the hat matrix of the model.
For a linear model, the sum of the leverage values is the degrees of freedom
for the model in the sense that this sum is the dimension of the span of columns
of the model matrix. With a bit of hand waving a similar argument could be made
for linear mixed-effects models. The hat matrix is of the form ``[ZΛ X][L L']⁻¹[ZΛ X]'``.
"""
function StatsAPI.leverage(m::LinearMixedModel{T}) where {T}
# To obtain the diagonal elements solve L⁻¹[ZΛ X]'eⱼ
# where eⱼ is the j'th basis vector in Rⁿ and evaluate the squared length of the solution.
# The fact that the [1,1] block of L is always UniformBlockDiagonal
# or Diagonal makes it easy to obtain the first chunk of the solution.
B1, B2, B3 = _3blockL(m)
reterms = m.reterms
re1 = first(reterms)
re1z = re1.z
r1sz = size(re1z, 1)
re1λ = re1.λ
re1refs = re1.refs
Xy = m.Xymat
rhs1 = zeros(T, size(re1z, 1)) # for the first block only the nonzeros are stored
rhs2 = zeros(T, size(B2, 1))
value = similar(m.y)
for i in eachindex(value)
re1ind = re1refs[i]
_ldivB1!(B1, mul!(rhs1, adjoint(re1λ), view(re1z, :, i)), re1ind)
off = (re1ind - 1) * r1sz
fill!(rhs2, 0)
rhsoffset = 0
for j in 2:length(reterms)
trm = reterms[j]
z = trm.z
stride = size(z, 1)
mul!(
view(
rhs2, muladd((trm.refs[i] - 1), stride, rhsoffset) .+ Base.OneTo(stride)
),
adjoint(trm.λ),
view(z, :, i),
)
rhsoffset += length(trm.levels) * stride
end
copyto!(view(rhs2, rhsoffset .+ Base.OneTo(size(Xy, 2))), view(Xy, i, :))
ldiv!(B3, mul!(rhs2, view(B2, :, off .+ Base.OneTo(r1sz)), rhs1, 1, -1))
rhs2[end] = 0
value[i] = sum(abs2, rhs1) + sum(abs2, rhs2)
end
return value
end
function StatsAPI.loglikelihood(m::LinearMixedModel)
if m.optsum.REML
throw(ArgumentError("loglikelihood not available for models fit by REML"))
end
return -objective(m) / 2
end
lowerbd(m::LinearMixedModel) = m.optsum.lowerbd
function mkparmap(reterms::Vector{<:AbstractReMat{T}}) where {T}
parmap = NTuple{3,Int}[]
for (k, trm) in enumerate(reterms)
n = LinearAlgebra.checksquare(trm.λ)
for ind in trm.inds
d, r = divrem(ind - 1, n)
push!(parmap, (k, r + 1, d + 1))
end
end
return parmap
end
nθ(m::LinearMixedModel) = length(m.parmap)
"""
objective(m::LinearMixedModel)
Return negative twice the log-likelihood of model `m`
"""
function objective(m::LinearMixedModel{T}) where {T}
wts = m.sqrtwts
denomdf = T(ssqdenom(m))
σ = m.optsum.sigma
val = if isnothing(σ)
logdet(m) + denomdf * (one(T) + log2π + log(pwrss(m) / denomdf))
else
muladd(denomdf, muladd(2, log(σ), log2π), (logdet(m) + pwrss(m) / σ^2))
end
return isempty(wts) ? val : val - T(2.0) * sum(log, wts)
end
"""
objective!(m::LinearMixedModel, θ)
objective!(m::LinearMixedModel)
Equivalent to `objective(updateL!(setθ!(m, θ)))`.
When `m` has a single, scalar random-effects term, `θ` can be a scalar.
The one-argument method curries and returns a single-argument function of `θ`.
Note that these methods modify `m`.
The calling function is responsible for restoring the optimal `θ`.
"""
function objective! end
function objective!(m::LinearMixedModel)
return Base.Fix1(objective!, m)
end
function objective!(m::LinearMixedModel{T}, θ) where {T}
return objective(updateL!(setθ!(m, θ)))
end
function objective!(m::LinearMixedModel{T}, x::Number) where {T}
retrm = only(m.reterms)
isa(retrm, ReMat{T,1}) ||
throw(DimensionMismatch("length(m.θ) = $(length(m.θ)), should be 1"))
copyto!(retrm.λ.data, x)
return objective(updateL!(m))
end
function Base.propertynames(m::LinearMixedModel, private::Bool=false)
return (
fieldnames(LinearMixedModel)...,
:θ,
:theta,
:β,
:beta,
:βs,
:betas,
:λ,
:lambda,
:stderror,
:σ,
:sigma,
:σs,
:sigmas,
:σρs,
:sigmarhos,
:b,
:u,
:lowerbd,
:X,
:y,
:corr,
:vcov,
:PCA,
:rePCA,
:objective,
:pvalues,
)
end
"""
pwrss(m::LinearMixedModel)
The penalized, weighted residual sum-of-squares.
"""
pwrss(m::LinearMixedModel) = abs2(last(last(m.L)))
"""
ranef!(v::Vector{Matrix{T}}, m::MixedModel{T}, β, uscale::Bool) where {T}
Overwrite `v` with the conditional modes of the random effects for `m`.
If `uscale` is `true` the random effects are on the spherical (i.e. `u`) scale, otherwise
on the original scale
`β` is the truncated, pivoted coefficient vector.
"""
function ranef!(
v::Vector, m::LinearMixedModel{T}, β::AbstractArray{T}, uscale::Bool
) where {T}
(k = length(v)) == length(m.reterms) || throw(DimensionMismatch(""))
L = m.L
lind = length(L)
for j in k:-1:1
lind -= 1
Ljkp1 = L[lind]
vj = v[j]
length(vj) == size(Ljkp1, 2) || throw(DimensionMismatch(""))
pp1 = size(Ljkp1, 1)
copyto!(vj, view(Ljkp1, pp1, :))
mul!(vec(vj), view(Ljkp1, 1:(pp1 - 1), :)', β, -one(T), one(T))
end
for i in k:-1:1
Lii = L[kp1choose2(i)]
vi = vec(v[i])
ldiv!(adjoint(isa(Lii, Diagonal) ? Lii : LowerTriangular(Lii)), vi)
for j in 1:(i - 1)
mul!(vec(v[j]), L[block(i, j)]', vi, -one(T), one(T))
end
end
if !uscale
for (t, vv) in zip(m.reterms, v)
lmul!(t.λ, vv)
end
end
return v
end
ranef!(v::Vector, m::LinearMixedModel, uscale::Bool) = ranef!(v, m, fixef(m), uscale)
"""
ranef(m::LinearMixedModel; uscale=false)
Return, as a `Vector{Matrix{T}}`, the conditional modes of the random effects in model `m`.
If `uscale` is `true` the random effects are on the spherical (i.e. `u`) scale, otherwise on
the original scale.
For a named variant, see [`raneftables`](@ref).
"""
function ranef(m::LinearMixedModel{T}; uscale=false) where {T}
reterms = m.reterms
v = [Matrix{T}(undef, size(t.z, 1), nlevs(t)) for t in reterms]
return ranef!(v, m, uscale)
end
LinearAlgebra.rank(m::LinearMixedModel) = m.feterm.rank
"""
rePCA(m::LinearMixedModel; corr::Bool=true)
Return a named tuple of the normalized cumulative variance of a principal components
analysis of the random effects covariance matrices or correlation
matrices when `corr` is `true`.
The normalized cumulative variance is the proportion of the variance for the first
principal component, the first two principal components, etc. The last element is
always 1.0 representing the complete proportion of the variance.
"""
function rePCA(m::LinearMixedModel; corr::Bool=true)
pca = PCA.(m.reterms, corr=corr)
return NamedTuple{_unique_fnames(m)}(getproperty.(pca, :cumvar))
end
"""
PCA(m::LinearMixedModel; corr::Bool=true)
Return a named tuple of the principal components analysis of the random effects
covariance matrices or correlation matrices when `corr` is `true`.
"""
function PCA(m::LinearMixedModel; corr::Bool=true)
return NamedTuple{_unique_fnames(m)}(PCA.(m.reterms, corr=corr))
end
"""
reevaluateAend!(m::LinearMixedModel)
Reevaluate the last column of `m.A` from `m.Xymat`. This function should be called
after updating the response.
"""
function reevaluateAend!(m::LinearMixedModel)
A = m.A
reterms = m.reterms
nre = length(reterms)
trmn = reweight!(m.Xymat, m.sqrtwts)
ind = kp1choose2(nre)
for trm in m.reterms