From 8119ad231d9da83139722920e5108e020468bb89 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 31 Jul 2024 21:27:09 -0700 Subject: [PATCH 01/20] remove get_observed() does not seem to be used anywhere; also the method signature does not match Julia conventions --- src/additional_functions/helper.jl | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/additional_functions/helper.jl b/src/additional_functions/helper.jl index 138ae431e..be559b0d9 100644 --- a/src/additional_functions/helper.jl +++ b/src/additional_functions/helper.jl @@ -33,15 +33,7 @@ function semvec(observed, imply, loss, optimizer) return sem_vec end -function get_observed(rowind, data, semobserved; args = (), kwargs = NamedTuple()) - observed_vec = Vector{semobserved}(undef, length(rowind)) - for i in 1:length(rowind) - observed_vec[i] = semobserved(args...; data = Matrix(data[rowind[i], :]), kwargs...) - end - return observed_vec -end - -skipmissing_mean(mat::AbstractMatrix) = +skipmissing_mean(mat::AbstractMatrix) = [mean(skipmissing(coldata)) for coldata in eachcol(mat)] function F_one_person(imp_mean, meandiff, inverse, data, logdet) From 1c179d47df5c298643779d3fd6c6c4736962ff88 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 17 Mar 2024 00:10:30 -0700 Subject: [PATCH 02/20] fix ridge eval --- src/loss/regularization/ridge.jl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/loss/regularization/ridge.jl b/src/loss/regularization/ridge.jl index a61dd2af0..66ce37428 100644 --- a/src/loss/regularization/ridge.jl +++ b/src/loss/regularization/ridge.jl @@ -62,11 +62,10 @@ function SemRidge(; which_ridge = getindex.(Ref(par2ind), which_ridge) end end - which = [CartesianIndex(x) for x in which_ridge] which_H = [CartesianIndex(x, x) for x in which_ridge] return SemRidge( α_ridge, - which, + which_ridge, which_H, zeros(parameter_type, nparams), zeros(parameter_type, nparams, nparams), @@ -77,15 +76,15 @@ end ### methods ############################################################################################ -objective!(ridge::SemRidge, par, model) = @views ridge.α * sum(x -> x^2, par[ridge.which]) +objective!(ridge::SemRidge, par, model) = @views ridge.α * sum(abs2, par[ridge.which]) function gradient!(ridge::SemRidge, par, model) - @views ridge.gradient[ridge.which] .= 2 * ridge.α * par[ridge.which] + @views ridge.gradient[ridge.which] .= (2 * ridge.α) * par[ridge.which] return ridge.gradient end function hessian!(ridge::SemRidge, par, model) - @views @. ridge.hessian[ridge.which_H] += ridge.α * 2.0 + @views @. ridge.hessian[ridge.which_H] .= 2 * ridge.α return ridge.hessian end From 22e76ebe3823d3757796fc99c096d772df6f11b0 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 18 Mar 2024 17:28:21 -0700 Subject: [PATCH 03/20] MeanStructure, HessianEvaluation traits * replace has_meanstrcture and approximate_hessian fields with trait-like typeparams * remove methods for has_meanstructure-based dispatch --- src/StructuralEquationModels.jl | 6 + src/imply/RAM/generic.jl | 53 ++---- src/imply/RAM/symbolic.jl | 35 ++-- src/imply/empty.jl | 2 +- src/loss/ML/FIML.jl | 2 +- src/loss/ML/ML.jl | 212 +++++++---------------- src/loss/WLS/WLS.jl | 133 ++++---------- src/loss/constant/constant.jl | 2 +- src/loss/regularization/ridge.jl | 2 +- src/types.jl | 33 +++- test/examples/multigroup/build_models.jl | 2 +- 11 files changed, 170 insertions(+), 312 deletions(-) diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index a032ab724..a171c29d0 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -95,6 +95,12 @@ export AbstractSem, Sem, SemFiniteDiff, SemEnsemble, + MeanStructure, + NoMeanStructure, + HasMeanStructure, + HessianEvaluation, + ExactHessian, + ApproximateHessian, SemImply, RAMSymbolic, RAM, diff --git a/src/imply/RAM/generic.jl b/src/imply/RAM/generic.jl index 9ff46bd2e..c749e3ff0 100644 --- a/src/imply/RAM/generic.jl +++ b/src/imply/RAM/generic.jl @@ -65,8 +65,8 @@ Additional interfaces Only available in gradient! calls: - `I_A⁻¹(::RAM)` -> ``(I-A)^{-1}`` """ -mutable struct RAM{A1, A2, A3, A4, A5, A6, V2, I1, I2, I3, M1, M2, M3, M4, S1, S2, S3, B} <: - SemImply +mutable struct RAM{MS, A1, A2, A3, A4, A5, A6, V2, I1, I2, I3, M1, M2, M3, M4, S1, S2, S3} <: + SemImply{MS, ExactHessian} Σ::A1 A::A2 S::A3 @@ -75,7 +75,6 @@ mutable struct RAM{A1, A2, A3, A4, A5, A6, V2, I1, I2, I3, M1, M2, M3, M4, S1, S M::A6 ram_matrices::V2 - has_meanstructure::B A_indices::I1 S_indices::I2 @@ -89,9 +88,10 @@ mutable struct RAM{A1, A2, A3, A4, A5, A6, V2, I1, I2, I3, M1, M2, M3, M4, S1, S ∇A::S1 ∇S::S2 ∇M::S3 -end -using StructuralEquationModels + RAM{MS}(args...) where {MS <: MeanStructure} = + new{MS, map(typeof, args)...}(args...) +end ############################################################################################ ### Constructors @@ -143,7 +143,7 @@ function RAM(; # μ if meanstructure - has_meanstructure = Val(true) + MS = HasMeanStructure !isnothing(M_indices) || throw( ArgumentError( "You set `meanstructure = true`, but your model specification contains no mean parameters.", @@ -152,14 +152,14 @@ function RAM(; ∇M = gradient ? matrix_gradient(M_indices, n_var) : nothing μ = zeros(n_obs) else - has_meanstructure = Val(false) + MS = NoMeanStructure M_indices = nothing M_pre = nothing μ = nothing ∇M = nothing end - return RAM( + return RAM{MS}( Σ, A_pre, S_pre, @@ -167,7 +167,6 @@ function RAM(; μ, M_pre, ram_matrices, - has_meanstructure, A_indices, S_indices, M_indices, @@ -185,14 +184,8 @@ end ### methods ############################################################################################ -# dispatch on meanstructure -objective!(imply::RAM, par, model::AbstractSemSingle) = - objective!(imply, par, model, imply.has_meanstructure) -gradient!(imply::RAM, par, model::AbstractSemSingle) = - gradient!(imply, par, model, imply.has_meanstructure) - # objective and gradient -function objective!(imply::RAM, params, model, has_meanstructure::Val{T}) where {T} +function objective!(imply::RAM, params, model) fill_A_S_M!( imply.A, imply.S, @@ -211,17 +204,12 @@ function objective!(imply::RAM, params, model, has_meanstructure::Val{T}) where Σ_RAM!(imply.Σ, imply.F⨉I_A⁻¹, imply.S, imply.F⨉I_A⁻¹S) - if T + if MeanStructure(imply) === HasMeanStructure μ_RAM!(imply.μ, imply.F⨉I_A⁻¹, imply.M) end end -function gradient!( - imply::RAM, - params, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} +function gradient!(imply::RAM, params, model::AbstractSemSingle) fill_A_S_M!( imply.A, imply.S, @@ -240,21 +228,18 @@ function gradient!( Σ_RAM!(imply.Σ, imply.F⨉I_A⁻¹, imply.S, imply.F⨉I_A⁻¹S) - if T + if MeanStructure(imply) === HasMeanStructure μ_RAM!(imply.μ, imply.F⨉I_A⁻¹, imply.M) end end -hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -objective_gradient!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -objective_hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -gradient_hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -objective_gradient_hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) +hessian!(imply::RAM, par, model::AbstractSemSingle) = gradient!(imply, par, model) +objective_gradient!(imply::RAM, par, model::AbstractSemSingle) = + gradient!(imply, par, model) +objective_hessian!(imply::RAM, par, model::AbstractSemSingle) = gradient!(imply, par, model) +gradient_hessian!(imply::RAM, par, model::AbstractSemSingle) = gradient!(imply, par, model) +objective_gradient_hessian!(imply::RAM, par, model::AbstractSemSingle) = + gradient!(imply, par, model) ############################################################################################ ### Recommended methods diff --git a/src/imply/RAM/symbolic.jl b/src/imply/RAM/symbolic.jl index b8da20148..a0e68c298 100644 --- a/src/imply/RAM/symbolic.jl +++ b/src/imply/RAM/symbolic.jl @@ -62,8 +62,8 @@ and for models with a meanstructure, the model implied means are computed as \mu = F(I-A)^{-1}M ``` """ -struct RAMSymbolic{F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5, B} <: - SemImplySymbolic +struct RAMSymbolic{MS, F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5} <: + SemImplySymbolic{MS, ExactHessian} Σ_function::F1 ∇Σ_function::F2 ∇²Σ_function::F3 @@ -78,7 +78,9 @@ struct RAMSymbolic{F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5, B} <: μ::A4 ∇μ_function::F5 ∇μ::A5 - has_meanstructure::B + + RAMSymbolic{MS}(args...) where {MS <: MeanStructure} = + new{MS, map(typeof, args)...}(args...) end ############################################################################################ @@ -140,7 +142,7 @@ function RAMSymbolic(; ∇Σ = nothing end - if hessian & !approximate_hessian + if hessian && !approximate_hessian n_sig = length(Σ_symbolic) ∇²Σ_symbolic_vec = [Symbolics.sparsehessian(σᵢ, [par...]) for σᵢ in vec(Σ_symbolic)] @@ -161,7 +163,7 @@ function RAMSymbolic(; # μ if meanstructure - has_meanstructure = Val(true) + MS = HasMeanStructure μ_symbolic = eval_μ_symbolic(M, I_A⁻¹, F) μ_function = Symbolics.build_function(μ_symbolic, par, expression = Val{false})[2] μ = zeros(size(μ_symbolic)) @@ -175,14 +177,14 @@ function RAMSymbolic(; ∇μ = nothing end else - has_meanstructure = Val(false) + MS = NoMeanStructure μ_function = nothing μ = nothing ∇μ_function = nothing ∇μ = nothing end - return RAMSymbolic( + return RAMSymbolic{MS}( Σ_function, ∇Σ_function, ∇²Σ_function, @@ -197,7 +199,6 @@ function RAMSymbolic(; μ, ∇μ_function, ∇μ, - has_meanstructure, ) end @@ -205,23 +206,21 @@ end ### objective, gradient, hessian ############################################################################################ -# dispatch on meanstructure -objective!(imply::RAMSymbolic, par, model) = - objective!(imply, par, model, imply.has_meanstructure) -gradient!(imply::RAMSymbolic, par, model) = - gradient!(imply, par, model, imply.has_meanstructure) - # objective -function objective!(imply::RAMSymbolic, par, model, has_meanstructure::Val{T}) where {T} +function objective!(imply::RAMSymbolic, par, model) imply.Σ_function(imply.Σ, par) - T && imply.μ_function(imply.μ, par) + if MeanStructure(imply) === HasMeanStructure + imply.μ_function(imply.μ, par) + end end # gradient -function gradient!(imply::RAMSymbolic, par, model, has_meanstructure::Val{T}) where {T} +function gradient!(imply::RAMSymbolic, par, model) objective!(imply, par, model, imply.has_meanstructure) imply.∇Σ_function(imply.∇Σ, par) - T && imply.∇μ_function(imply.∇μ, par) + if MeanStructure(imply) === HasMeanStructure + imply.∇μ_function(imply.∇μ, par) + end end # other methods diff --git a/src/imply/empty.jl b/src/imply/empty.jl index f1af2ec42..cf5270599 100644 --- a/src/imply/empty.jl +++ b/src/imply/empty.jl @@ -25,7 +25,7 @@ model per group and an additional model with `ImplyEmpty` and `SemRidge` for the ## Implementation Subtype of `SemImply`. """ -struct ImplyEmpty{V2} <: SemImply +struct ImplyEmpty{V2} <: SemImply{NoMeanStructure, ExactHessian} ram_matrices::V2 end diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index cd5d0270f..4a6d6b5c3 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -24,7 +24,7 @@ Analytic gradients are available. ## Implementation Subtype of `SemLossFunction`. """ -mutable struct SemFIML{INV, C, L, O, M, IM, I, T, W} <: SemLossFunction +mutable struct SemFIML{INV, C, L, O, M, IM, I, T, W} <: SemLossFunction{ExactHessian} inverses::INV #preallocated inverses of imp_cov choleskys::C #preallocated choleskys logdets::L #logdets of implied covmats diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 7811cda7f..85d36ca78 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -27,26 +27,28 @@ Analytic gradients are available, and for models without a meanstructure, also a ## Implementation Subtype of `SemLossFunction`. """ -struct SemML{INV, M, M2, B, V} <: SemLossFunction +struct SemML{HE <: HessianEvaluation, INV, M, M2} <: SemLossFunction{HE} Σ⁻¹::INV Σ⁻¹Σₒ::M meandiff::M2 - approximate_hessian::B - has_meanstructure::V + + SemML{HE}(args...) where {HE <: HessianEvaluation} = + new{HE, map(typeof, args)...}(args...) end ############################################################################################ ### Constructors ############################################################################################ -function SemML(; observed, meanstructure = false, approximate_hessian = false, kwargs...) - isnothing(obs_mean(observed)) ? meandiff = nothing : meandiff = copy(obs_mean(observed)) - return SemML( - similar(obs_cov(observed)), - similar(obs_cov(observed)), +function SemML(; observed::SemObserved, approximate_hessian::Bool = false, kwargs...) + obsmean = obs_mean(observed) + obscov = obs_cov(observed) + meandiff = isnothing(obsmean) ? nothing : copy(obsmean) + + return SemML{approximate_hessian ? ApproximateHessian : ExactHessian}( + similar(obscov), + similar(obscov), meandiff, - approximate_hessian, - Val(meanstructure), ) end @@ -54,38 +56,26 @@ end ### objective, gradient, hessian methods ############################################################################################ -# first, dispatch for meanstructure +# dispatch for SemImply objective!(semml::SemML, par, model::AbstractSemSingle) = - objective!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) + objective!(semml, par, model, imply(model)) gradient!(semml::SemML, par, model::AbstractSemSingle) = - gradient!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) + gradient!(semml, par, model, imply(model)) hessian!(semml::SemML, par, model::AbstractSemSingle) = - hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) + hessian!(semml, par, model, imply(model)) objective_gradient!(semml::SemML, par, model::AbstractSemSingle) = - objective_gradient!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) + objective_gradient!(semml, par, model, imply(model)) objective_hessian!(semml::SemML, par, model::AbstractSemSingle) = - objective_hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) + objective_hessian!(semml, par, model, imply(model)) gradient_hessian!(semml::SemML, par, model::AbstractSemSingle) = - gradient_hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) + gradient_hessian!(semml, par, model, imply(model)) objective_gradient_hessian!(semml::SemML, par, model::AbstractSemSingle) = - objective_gradient_hessian!( - semml::SemML, - par, - model, - semml.has_meanstructure, - imply(model), - ) + objective_gradient_hessian!(semml, par, model, imply(model)) ############################################################################################ ### Symbolic Imply Types -function objective!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::SemImplySymbolic, -) where {T} +function objective!(semml::SemML, par, model::AbstractSemSingle, imp::SemImplySymbolic) let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), @@ -100,7 +90,7 @@ function objective!( Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) #mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - if T + if MeanStructure(imply(model)) === HasMeanStructure μ₋ = μₒ - μ return ld + dot(Σ⁻¹, Σₒ) + dot(μ₋, Σ⁻¹, μ₋) else @@ -109,13 +99,7 @@ function objective!( end end -function gradient!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::SemImplySymbolic, -) where {T} +function gradient!(semml::SemML, par, model::AbstractSemSingle, imp::SemImplySymbolic) let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), @@ -131,25 +115,22 @@ function gradient!( Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - if T + if MeanStructure(imply(model)) === HasMeanStructure μ₋ = μₒ - μ μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ gradient = vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹ - μ₋ᵀΣ⁻¹'μ₋ᵀΣ⁻¹)' * ∇Σ - 2 * μ₋ᵀΣ⁻¹ * ∇μ - return gradient' else gradient = vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹)' * ∇Σ - return gradient' end + return gradient' end end -function hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, - imp::SemImplySymbolic, -) +function hessian!(semml::SemML, par, model::AbstractSemSingle, imp::SemImplySymbolic) + if MeanStructure(imply(model)) === HasMeanStructure + throw(DomainError(H, "hessian of ML + meanstructure is not available")) + end + let Σ = Σ(imply(model)), ∇Σ = ∇Σ(imply(model)), Σₒ = obs_cov(observed(model)), @@ -158,12 +139,7 @@ function hessian!( ∇²Σ_function! = ∇²Σ_function(imply(model)), ∇²Σ = ∇²Σ(imply(model)) - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return diagm(fill(one(eltype(par)), length(par))) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - - if semml.approximate_hessian + if HessianEvaluation(semml) === ApproximateHessian hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ else mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) @@ -181,23 +157,12 @@ function hessian!( end end -function hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) -end - function objective_gradient!( semml::SemML, par, model::AbstractSemSingle, - has_meanstructure::Val{T}, imp::SemImplySymbolic, -) where {T} +) let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), @@ -216,18 +181,17 @@ function objective_gradient!( Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - if T + if MeanStructure(imply(model)) === HasMeanStructure μ₋ = μₒ - μ μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ objective = ld + tr(Σ⁻¹Σₒ) + dot(μ₋, Σ⁻¹, μ₋) gradient = vec(Σ⁻¹ * (I - Σₒ * Σ⁻¹ - μ₋ * μ₋ᵀΣ⁻¹))' * ∇Σ - 2 * μ₋ᵀΣ⁻¹ * ∇μ - return objective, gradient' else objective = ld + tr(Σ⁻¹Σₒ) gradient = (vec(Σ⁻¹) - vec(Σ⁻¹Σₒ * Σ⁻¹))' * ∇Σ - return objective, gradient' end + return objective, gradient' end end end @@ -236,9 +200,11 @@ function objective_hessian!( semml::SemML, par, model::AbstractSemSingle, - has_meanstructure::Val{T}, imp::SemImplySymbolic, -) where {T} +) + if MeanStructure(imply(model)) === HasMeanStructure + throw(DomainError(H, "hessian of ML + meanstructure is not available")) + end let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), @@ -258,7 +224,7 @@ function objective_hessian!( mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) objective = ld + tr(Σ⁻¹Σₒ) - if semml.approximate_hessian + if HessianEvaluation(semml) === ApproximateHessian hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ else Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ @@ -276,23 +242,16 @@ function objective_hessian!( end end -function objective_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) -end - function gradient_hessian!( semml::SemML, par, model::AbstractSemSingle, - has_meanstructure::Val{false}, imp::SemImplySymbolic, ) + if MeanStructure(imply(model)) === HasMeanStructure + throw(DomainError(H, "hessian of ML + meanstructure is not available")) + end + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), @@ -314,7 +273,7 @@ function gradient_hessian!( J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' gradient = J * ∇Σ - if semml.approximate_hessian + if HessianEvaluation(semml) === ApproximateHessian hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ else # inner @@ -329,23 +288,16 @@ function gradient_hessian!( end end -function gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) -end - function objective_gradient_hessian!( semml::SemML, par, model::AbstractSemSingle, - has_meanstructure::Val{false}, imp::SemImplySymbolic, ) + if MeanStructure(imply(model)) === HasMeanStructure + throw(DomainError(H, "hessian of ML + meanstructure is not available")) + end + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), @@ -372,7 +324,7 @@ function objective_gradient_hessian!( J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' gradient = J * ∇Σ - if semml.approximate_hessian + if HessianEvaluation(semml) == ApproximateHessian hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ else Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ @@ -388,64 +340,30 @@ function objective_gradient_hessian!( end end -function objective_gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) -end - ############################################################################################ ### Non-Symbolic Imply Types # no hessians ------------------------------------------------------------------------------ -function hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure, imp::RAM) +function hessian!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) end -function objective_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure, - imp::RAM, -) +function objective_hessian!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) end -function gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure, - imp::RAM, -) +function gradient_hessian!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) end -function objective_gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure, - imp::RAM, -) +function objective_gradient_hessian!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) end # objective, gradient ---------------------------------------------------------------------- -function objective!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::RAM, -) where {T} +function objective!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), @@ -460,7 +378,7 @@ function objective!( Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - if T + if MeanStructure(imply(model)) === HasMeanStructure μ₋ = μₒ - μ return ld + tr(Σ⁻¹Σₒ) + dot(μ₋, Σ⁻¹, μ₋) else @@ -469,13 +387,7 @@ function objective!( end end -function gradient!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::RAM, -) where {T} +function gradient!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), @@ -499,7 +411,7 @@ function gradient!( C = F⨉I_A⁻¹' * (I - Σ⁻¹Σₒ) * Σ⁻¹ * F⨉I_A⁻¹ gradient = 2vec(C * S * I_A⁻¹')'∇A + vec(C)'∇S - if T + if MeanStructure(imply(model)) === HasMeanStructure μ₋ = μₒ - μ μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ k = μ₋ᵀΣ⁻¹ * F⨉I_A⁻¹ @@ -511,13 +423,7 @@ function gradient!( end end -function objective_gradient!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::RAM, -) where {T} +function objective_gradient!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), @@ -547,7 +453,7 @@ function objective_gradient!( C = F⨉I_A⁻¹' * (I - Σ⁻¹Σₒ) * Σ⁻¹ * F⨉I_A⁻¹ gradient = 2vec(C * S * I_A⁻¹')'∇A + vec(C)'∇S - if T + if MeanStructure(semml) === HasMeanStructure μ₋ = μₒ - μ objective += dot(μ₋, Σ⁻¹, μ₋) diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index 8fcc84a99..b75d47454 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -38,18 +38,19 @@ Analytic gradients are available, and for models without a meanstructure, also a ## Implementation Subtype of `SemLossFunction`. """ -struct SemWLS{Vt, St, B, C, B2} <: SemLossFunction +struct SemWLS{HE <: HessianEvaluation, Vt, St, C} <: SemLossFunction{HE} V::Vt σₒ::St - approximate_hessian::B V_μ::C - has_meanstructure::B2 end ############################################################################################ ### Constructors ############################################################################################ +SemWLS{HE}(args...) where {HE <: HessianEvaluation} = + SemWLS{HE, map(typeof, args)...}(args...) + function SemWLS(; observed, wls_weight_matrix = nothing, @@ -77,43 +78,16 @@ function SemWLS(; else wls_weight_matrix_mean = nothing end + HE = approximate_hessian ? ApproximateHessian : AnalyticHessian - return SemWLS( - wls_weight_matrix, - s, - approximate_hessian, - wls_weight_matrix_mean, - Val(meanstructure), - ) + return SemWLS{HE}(wls_weight_matrix, s, wls_weight_matrix_mean) end ############################################################################ ### methods ############################################################################ -objective!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective!(semwls::SemWLS, par, model, semwls.has_meanstructure) -gradient!(semwls::SemWLS, par, model::AbstractSemSingle) = - gradient!(semwls::SemWLS, par, model, semwls.has_meanstructure) -hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) - -objective_gradient!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective_gradient!(semwls::SemWLS, par, model, semwls.has_meanstructure) -objective_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) -gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - gradient_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) - -objective_gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective_gradient_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) - -function objective!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} +function objective!(semwls::SemWLS, par, model::AbstractSemSingle) let σ = Σ(imply(model)), μ = μ(imply(model)), σₒ = semwls.σₒ, @@ -123,7 +97,7 @@ function objective!( σ₋ = σₒ - σ - if T + if MeanStructure(imply(model)) === HasMeanStructure μ₋ = μₒ - μ return dot(σ₋, V, σ₋) + dot(μ₋, V_μ, μ₋) else @@ -132,12 +106,7 @@ function objective!( end end -function gradient!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} +function gradient!(semwls::SemWLS, par, model::AbstractSemSingle) let σ = Σ(imply(model)), μ = μ(imply(model)), σₒ = semwls.σₒ, @@ -149,7 +118,7 @@ function gradient!( σ₋ = σₒ - σ - if T + if MeanStructure(imply(model)) === HasMeanStructure μ₋ = μₒ - μ return -2 * (σ₋' * V * ∇σ + μ₋' * V_μ * ∇μ)' else @@ -158,12 +127,7 @@ function gradient!( end end -function hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} +function hessian!(semwls::SemWLS, par, model::AbstractSemSingle) let σ = Σ(imply(model)), σₒ = semwls.σₒ, V = semwls.V, @@ -173,11 +137,11 @@ function hessian!( σ₋ = σₒ - σ - if T + if MeanStructure(imply(model)) === HasMeanStructure throw(DomainError(H, "hessian of WLS with meanstructure is not available")) else hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian + if HessianEvaluation(semwls) === ExactHessian J = -2 * (σ₋' * semwls.V)' ∇²Σ_function!(∇²Σ, J, par) hessian .+= ∇²Σ @@ -187,12 +151,7 @@ function hessian!( end end -function objective_gradient!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} +function objective_gradient!(semwls::SemWLS, par, model::AbstractSemSingle) let σ = Σ(imply(model)), μ = μ(imply(model)), σₒ = semwls.σₒ, @@ -204,9 +163,9 @@ function objective_gradient!( σ₋ = σₒ - σ - if T + if MeanStructure(imply(model)) === HasMeanStructure μ₋ = μₒ - μ - objective = dot(σ₋, V, σ₋) + dot(μ₋', V_μ, μ₋) + objective = dot(σ₋, V, σ₋) + dot(μ₋, V_μ, μ₋) gradient = -2 * (σ₋' * V * ∇σ + μ₋' * V_μ * ∇μ)' return objective, gradient else @@ -217,12 +176,11 @@ function objective_gradient!( end end -function objective_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} +function objective_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) + if MeanStructure(imply(model)) === HasMeanStructure + throw(DomainError(H, "hessian of WLS with meanstructure is not available")) + end + let σ = Σ(imply(model)), σₒ = semwls.σₒ, V = semwls.V, @@ -235,7 +193,7 @@ function objective_hessian!( objective = dot(σ₋, V, σ₋) hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian + if HessianEvaluation(semwls) === ExactHessian J = -2 * (σ₋' * semwls.V)' ∇²Σ_function!(∇²Σ, J, par) hessian .+= ∇²Σ @@ -245,19 +203,11 @@ function objective_hessian!( end end -objective_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, -) = throw(DomainError(H, "hessian of WLS with meanstructure is not available")) - -function gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, -) +function gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) + if MeanStructure(imply(model)) === HasMeanStructure + throw(DomainError(H, "hessian of WLS with meanstructure is not available")) + end + let σ = Σ(imply(model)), σₒ = semwls.σₒ, V = semwls.V, @@ -270,7 +220,7 @@ function gradient_hessian!( gradient = -2 * (σ₋' * V * ∇σ)' hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian + if HessianEvaluation(semwls) === ExactHessian J = -2 * (σ₋' * semwls.V)' ∇²Σ_function!(∇²Σ, J, par) hessian .+= ∇²Σ @@ -280,19 +230,11 @@ function gradient_hessian!( end end -gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, -) = throw(DomainError(H, "hessian of WLS with meanstructure is not available")) - -function objective_gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, -) +function objective_gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) + if MeanStructure(imply(model)) === HasMeanStructure + throw(DomainError(H, "hessian of WLS with meanstructure is not available")) + end + let σ = Σ(imply(model)), σₒ = semwls.σₒ, V = semwls.V, @@ -305,7 +247,7 @@ function objective_gradient_hessian!( objective = dot(σ₋, V, σ₋) gradient = -2 * (σ₋' * V * ∇σ)' hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian + if HessianEvaluation(semwls) === ExactHessian J = -2 * (σ₋' * semwls.V)' ∇²Σ_function!(∇²Σ, J, par) hessian .+= ∇²Σ @@ -314,13 +256,6 @@ function objective_gradient_hessian!( end end -objective_gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, -) = throw(DomainError(H, "hessian of WLS with meanstructure is not available")) - ############################################################################################ ### Recommended methods ############################################################################################ diff --git a/src/loss/constant/constant.jl b/src/loss/constant/constant.jl index f3165b541..9b3dfcd34 100644 --- a/src/loss/constant/constant.jl +++ b/src/loss/constant/constant.jl @@ -25,7 +25,7 @@ Analytic gradients and hessians are available. ## Implementation Subtype of `SemLossFunction`. """ -struct SemConstant{C} <: SemLossFunction +struct SemConstant{C} <: SemLossFunction{ExactHessian} c::C end diff --git a/src/loss/regularization/ridge.jl b/src/loss/regularization/ridge.jl index 66ce37428..e89ceeed7 100644 --- a/src/loss/regularization/ridge.jl +++ b/src/loss/regularization/ridge.jl @@ -29,7 +29,7 @@ Analytic gradients and hessians are available. ## Implementation Subtype of `SemLossFunction`. """ -struct SemRidge{P, W1, W2, GT, HT} <: SemLossFunction +struct SemRidge{P, W1, W2, GT, HT} <: SemLossFunction{ExactHessian} α::P which::W1 which_H::W2 diff --git a/src/types.jl b/src/types.jl index 0493da8fa..6cdf9bead 100644 --- a/src/types.jl +++ b/src/types.jl @@ -10,8 +10,32 @@ abstract type AbstractSemSingle{O, I, L, D} <: AbstractSem end "Supertype for all collections of multiple SEMs" abstract type AbstractSemCollection <: AbstractSem end +"Meanstructure trait for `SemImply` subtypes" +abstract type MeanStructure end +"Indicates that `SemImply` subtype supports meanstructure" +struct HasMeanStructure <: MeanStructure end +"Indicates that `SemImply` subtype does not support meanstructure" +struct NoMeanStructure <: MeanStructure end + +# fallback implementation +MeanStructure(::Type{T}) where {T} = + error("Objects of type $T do not support MeanStructure trait") +MeanStructure(semobj) = MeanStructure(typeof(semobj)) + +"Hessian Evaluation trait for `SemImply` and `SemLossFunction` subtypes" +abstract type HessianEvaluation end +struct ApproximateHessian <: HessianEvaluation end +struct ExactHessian <: HessianEvaluation end + +# fallback implementation +HessianEvaluation(::Type{T}) where {T} = + error("Objects of type $T do not support HessianEvaluation trait") +HessianEvaluation(semobj) = HessianEvaluation(typeof(semobj)) + "Supertype for all loss functions of SEMs. If you want to implement a custom loss function, it should be a subtype of `SemLossFunction`." -abstract type SemLossFunction end +abstract type SemLossFunction{HE <: HessianEvaluation} end + +HessianEvaluation(::Type{<:SemLossFunction{HE}}) where {HE <: HessianEvaluation} = HE """ SemLoss(args...; loss_weights = nothing, ...) @@ -73,10 +97,13 @@ Computed model-implied values that should be compared with the observed data to e. g. the model implied covariance or mean. If you would like to implement a different notation, e.g. LISREL, you should implement a subtype of SemImply. """ -abstract type SemImply end +abstract type SemImply{MS <: MeanStructure, HE <: HessianEvaluation} end + +MeanStructure(::Type{<:SemImply{MS}}) where {MS <: MeanStructure} = MS +HessianEvaluation(::Type{<:SemImply{MS, HE}}) where {MS, HE <: MeanStructure} = HE "Subtype of SemImply for all objects that can serve as the imply field of a SEM and use some form of symbolic precomputation." -abstract type SemImplySymbolic <: SemImply end +abstract type SemImplySymbolic{MS, HE} <: SemImply{MS, HE} end """ Sem(;observed = SemObservedData, imply = RAM, loss = SemML, optimizer = SemOptimizerOptim, kwargs...) diff --git a/test/examples/multigroup/build_models.jl b/test/examples/multigroup/build_models.jl index 70d2bb914..265ab178a 100644 --- a/test/examples/multigroup/build_models.jl +++ b/test/examples/multigroup/build_models.jl @@ -114,7 +114,7 @@ end # ML estimation - user defined loss function ############################################################################################ -struct UserSemML <: SemLossFunction end +struct UserSemML <: SemLossFunction{ExactHessian} end ############################################################################################ ### functors From af09c79613856adc7a5fba9d0f09cf92f303b313 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 11 Aug 2024 14:05:22 -0700 Subject: [PATCH 04/20] obj/grad/hess: refactor evaluation API the intent of this commit is to refactor the API for objective, gradient and hessian evaluation, such that the evaluation code does not have to be duplicates across functions that calculate different combinations of those functions * introduce EvaluationTargets class that handles selection of what to evaluate * add evaluate!(EvalTargets, ...) methods for loss and imply objs that evaluate only what is required * objective!(), obj_grad!() etc calls are just a wrapper of evaluate!() with proper targets --- src/frontend/fit/standard_errors/hessian.jl | 2 +- src/imply/RAM/generic.jl | 116 ++--- src/imply/RAM/symbolic.jl | 46 +- src/imply/empty.jl | 4 +- src/loss/ML/FIML.jl | 90 ++-- src/loss/ML/ML.jl | 475 +++++--------------- src/loss/WLS/WLS.jl | 199 ++------ src/loss/constant/constant.jl | 7 +- src/loss/regularization/ridge.jl | 7 +- src/objective_gradient_hessian.jl | 442 ++++++------------ test/examples/multigroup/build_models.jl | 4 +- 11 files changed, 377 insertions(+), 1015 deletions(-) diff --git a/src/frontend/fit/standard_errors/hessian.jl b/src/frontend/fit/standard_errors/hessian.jl index afcb570bc..e71e601fb 100644 --- a/src/frontend/fit/standard_errors/hessian.jl +++ b/src/frontend/fit/standard_errors/hessian.jl @@ -17,7 +17,7 @@ function se_hessian(sem_fit::SemFit; hessian = :finitediff) hessian!(H, sem_fit.model, sem_fit.solution) elseif hessian == :finitediff H = FiniteDiff.finite_difference_hessian( - Base.Fix1(objective!, sem_fit.model), + p -> evaluate!(zero(eltype(sem_fit.solution)), nothing, nothing, fit.model, p), sem_fit.solution, ) elseif hessian == :optimizer diff --git a/src/imply/RAM/generic.jl b/src/imply/RAM/generic.jl index c749e3ff0..85cbc0220 100644 --- a/src/imply/RAM/generic.jl +++ b/src/imply/RAM/generic.jl @@ -65,8 +65,26 @@ Additional interfaces Only available in gradient! calls: - `I_A⁻¹(::RAM)` -> ``(I-A)^{-1}`` """ -mutable struct RAM{MS, A1, A2, A3, A4, A5, A6, V2, I1, I2, I3, M1, M2, M3, M4, S1, S2, S3} <: - SemImply{MS, ExactHessian} +mutable struct RAM{ + MS, + A1, + A2, + A3, + A4, + A5, + A6, + V2, + I1, + I2, + I3, + M1, + M2, + M3, + M4, + S1, + S2, + S3, +} <: SemImply{MS, ExactHessian} Σ::A1 A::A2 S::A3 @@ -89,8 +107,7 @@ mutable struct RAM{MS, A1, A2, A3, A4, A5, A6, V2, I1, I2, I3, M1, M2, M3, M4, S ∇S::S2 ∇M::S3 - RAM{MS}(args...) where {MS <: MeanStructure} = - new{MS, map(typeof, args)...}(args...) + RAM{MS}(args...) where {MS <: MeanStructure} = new{MS, map(typeof, args)...}(args...) end ############################################################################################ @@ -100,7 +117,7 @@ end function RAM(; specification::SemSpecification, #vech = false, - gradient = true, + gradient_required = true, meanstructure = false, kwargs..., ) @@ -133,7 +150,7 @@ function RAM(; F⨉I_A⁻¹S = zeros(n_obs, n_var) I_A = similar(A_pre) - if gradient + if gradient_required ∇A = matrix_gradient(A_indices, n_var^2) ∇S = matrix_gradient(S_indices, n_var^2) else @@ -149,7 +166,7 @@ function RAM(; "You set `meanstructure = true`, but your model specification contains no mean parameters.", ), ) - ∇M = gradient ? matrix_gradient(M_indices, n_var) : nothing + ∇M = gradient_required ? matrix_gradient(M_indices, n_var) : nothing μ = zeros(n_obs) else MS = NoMeanStructure @@ -184,8 +201,7 @@ end ### methods ############################################################################################ -# objective and gradient -function objective!(imply::RAM, params, model) +function update!(targets::EvaluationTargets, imply::RAM, model::AbstractSemSingle, params) fill_A_S_M!( imply.A, imply.S, @@ -199,48 +215,22 @@ function objective!(imply::RAM, params, model) @. imply.I_A = -imply.A @view(imply.I_A[diagind(imply.I_A)]) .+= 1 - copyto!(imply.F⨉I_A⁻¹, imply.F) - rdiv!(imply.F⨉I_A⁻¹, factorize(imply.I_A)) - - Σ_RAM!(imply.Σ, imply.F⨉I_A⁻¹, imply.S, imply.F⨉I_A⁻¹S) - - if MeanStructure(imply) === HasMeanStructure - μ_RAM!(imply.μ, imply.F⨉I_A⁻¹, imply.M) + if is_gradient_required(targets) || is_hessian_required(targets) + imply.I_A⁻¹ = LinearAlgebra.inv!(factorize(imply.I_A)) + mul!(imply.F⨉I_A⁻¹, imply.F, imply.I_A⁻¹) + else + copyto!(imply.F⨉I_A⁻¹, imply.F) + rdiv!(imply.F⨉I_A⁻¹, factorize(imply.I_A)) end -end - -function gradient!(imply::RAM, params, model::AbstractSemSingle) - fill_A_S_M!( - imply.A, - imply.S, - imply.M, - imply.A_indices, - imply.S_indices, - imply.M_indices, - params, - ) - - @. imply.I_A = -imply.A - @view(imply.I_A[diagind(imply.I_A)]) .+= 1 - - imply.I_A⁻¹ = LinearAlgebra.inv!(factorize(imply.I_A)) - mul!(imply.F⨉I_A⁻¹, imply.F, imply.I_A⁻¹) - Σ_RAM!(imply.Σ, imply.F⨉I_A⁻¹, imply.S, imply.F⨉I_A⁻¹S) + mul!(imply.F⨉I_A⁻¹S, imply.F⨉I_A⁻¹, imply.S) + mul!(imply.Σ, imply.F⨉I_A⁻¹S, imply.F⨉I_A⁻¹') if MeanStructure(imply) === HasMeanStructure - μ_RAM!(imply.μ, imply.F⨉I_A⁻¹, imply.M) + mul!(imply.μ, imply.F⨉I_A⁻¹, imply.M) end end -hessian!(imply::RAM, par, model::AbstractSemSingle) = gradient!(imply, par, model) -objective_gradient!(imply::RAM, par, model::AbstractSemSingle) = - gradient!(imply, par, model) -objective_hessian!(imply::RAM, par, model::AbstractSemSingle) = gradient!(imply, par, model) -gradient_hessian!(imply::RAM, par, model::AbstractSemSingle) = gradient!(imply, par, model) -objective_gradient_hessian!(imply::RAM, par, model::AbstractSemSingle) = - gradient!(imply, par, model) - ############################################################################################ ### Recommended methods ############################################################################################ @@ -253,48 +243,10 @@ function update_observed(imply::RAM, observed::SemObserved; kwargs...) end end -############################################################################################ -### additional methods -############################################################################################ - -Σ(imply::RAM) = imply.Σ -μ(imply::RAM) = imply.μ - -A(imply::RAM) = imply.A -S(imply::RAM) = imply.S -F(imply::RAM) = imply.F -M(imply::RAM) = imply.M - -∇A(imply::RAM) = imply.∇A -∇S(imply::RAM) = imply.∇S -∇M(imply::RAM) = imply.∇M - -A_indices(imply::RAM) = imply.A_indices -S_indices(imply::RAM) = imply.S_indices -M_indices(imply::RAM) = imply.M_indices - -F⨉I_A⁻¹(imply::RAM) = imply.F⨉I_A⁻¹ -F⨉I_A⁻¹S(imply::RAM) = imply.F⨉I_A⁻¹S -I_A(imply::RAM) = imply.I_A -I_A⁻¹(imply::RAM) = imply.I_A⁻¹ # only for gradient available! - -has_meanstructure(imply::RAM) = imply.has_meanstructure - -ram_matrices(imply::RAM) = imply.ram_matrices - ############################################################################################ ### additional functions ############################################################################################ -function Σ_RAM!(Σ, F⨉I_A⁻¹, S, pre2) - mul!(pre2, F⨉I_A⁻¹, S) - mul!(Σ, pre2, F⨉I_A⁻¹') -end - -function μ_RAM!(μ, F⨉I_A⁻¹, M) - mul!(μ, F⨉I_A⁻¹, M) -end - function check_acyclic(A_pre, n_par, A_indices) # fill copy of A-matrix with random parameters A_rand = copy(A_pre) diff --git a/src/imply/RAM/symbolic.jl b/src/imply/RAM/symbolic.jl index a0e68c298..d79454f3f 100644 --- a/src/imply/RAM/symbolic.jl +++ b/src/imply/RAM/symbolic.jl @@ -206,30 +206,25 @@ end ### objective, gradient, hessian ############################################################################################ -# objective -function objective!(imply::RAMSymbolic, par, model) +function update!( + targets::EvaluationTargets, + imply::RAMSymbolic, + model::AbstractSemSingle, + par, +) imply.Σ_function(imply.Σ, par) if MeanStructure(imply) === HasMeanStructure imply.μ_function(imply.μ, par) end -end -# gradient -function gradient!(imply::RAMSymbolic, par, model) - objective!(imply, par, model, imply.has_meanstructure) - imply.∇Σ_function(imply.∇Σ, par) - if MeanStructure(imply) === HasMeanStructure - imply.∇μ_function(imply.∇μ, par) + if is_gradient_required(targets) || is_hessian_required(targets) + imply.∇Σ_function(imply.∇Σ, par) + if MeanStructure(imply) === HasMeanStructure + imply.∇μ_function(imply.∇μ, par) + end end end -# other methods -hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -objective_gradient!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -objective_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -gradient_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -objective_gradient_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) - ############################################################################################ ### Recommended methods ############################################################################################ @@ -242,25 +237,6 @@ function update_observed(imply::RAMSymbolic, observed::SemObserved; kwargs...) end end -############################################################################################ -### additional methods -############################################################################################ - -Σ(imply::RAMSymbolic) = imply.Σ -∇Σ(imply::RAMSymbolic) = imply.∇Σ -∇²Σ(imply::RAMSymbolic) = imply.∇²Σ - -μ(imply::RAMSymbolic) = imply.μ -∇μ(imply::RAMSymbolic) = imply.∇μ - -Σ_function(imply::RAMSymbolic) = imply.Σ_function -∇Σ_function(imply::RAMSymbolic) = imply.∇Σ_function -∇²Σ_function(imply::RAMSymbolic) = imply.∇²Σ_function - -has_meanstructure(imply::RAMSymbolic) = imply.has_meanstructure - -ram_matrices(imply::RAMSymbolic) = imply.ram_matrices - ############################################################################################ ### additional functions ############################################################################################ diff --git a/src/imply/empty.jl b/src/imply/empty.jl index cf5270599..8b23194ac 100644 --- a/src/imply/empty.jl +++ b/src/imply/empty.jl @@ -41,9 +41,7 @@ end ### methods ############################################################################################ -objective!(imply::ImplyEmpty, par, model) = nothing -gradient!(imply::ImplyEmpty, par, model) = nothing -hessian!(imply::ImplyEmpty, par, model) = nothing +update!(targets::EvaluationTargets, imply::ImplyEmpty, par, model) = nothing ############################################################################################ ### Recommended methods diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index 4a6d6b5c3..92ecf73ca 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -82,43 +82,32 @@ end ### methods ############################################################################################ -function objective!(semfiml::SemFIML, params, model) - if !check_fiml(semfiml, model) - return non_posdef_return(params) - end - - prepare_SemFIML!(semfiml, model) - - objective = F_FIML(pattern_rows(observed(model)), semfiml, model, params) - return objective / nsamples(observed(model)) -end - -function gradient!(semfiml::SemFIML, params, model) - if !check_fiml(semfiml, model) - return ones(eltype(params), size(params)) - end - - prepare_SemFIML!(semfiml, model) - - gradient = - ∇F_FIML(pattern_rows(observed(model)), semfiml, model) / nsamples(observed(model)) - return gradient -end +function evaluate!( + objective, + gradient, + hessian, + semfiml::SemFIML, + implied::SemImply, + model::AbstractSemSingle, + params, +) + isnothing(hessian) || error("Hessian not implemented for FIML") -function objective_gradient!(semfiml::SemFIML, params, model) if !check_fiml(semfiml, model) - return non_posdef_return(params), ones(eltype(params), size(params)) + isnothing(objective) || (objective = non_posdef_return(params)) + isnothing(gradient) || fill!(gradient, 1) + return objective end prepare_SemFIML!(semfiml, model) - objective = - F_FIML(pattern_rows(observed(model)), semfiml, model, params) / - nsamples(observed(model)) - gradient = - ∇F_FIML(pattern_rows(observed(model)), semfiml, model) / nsamples(observed(model)) + scale = inv(nsamples(observed(model))) + obs_rows = pattern_rows(observed(model)) + isnothing(objective) || (objective = scale * F_FIML(obs_rows, semfiml, model, params)) + isnothing(gradient) || + (∇F_FIML!(gradient, obs_rows, semfiml, model); gradient .*= scale) - return objective, gradient + return objective end ############################################################################################ @@ -133,13 +122,11 @@ update_observed(lossfun::SemFIML, observed::SemObserved; kwargs...) = ############################################################################################ function F_one_pattern(meandiff, inverse, obs_cov, logdet, N) - F = logdet - F += meandiff' * inverse * meandiff + F = logdet + dot(meandiff, inverse, meandiff) if N > one(N) F += dot(obs_cov, inverse) end - F = N * F - return F + return F * N end function ∇F_one_pattern(μ_diff, Σ⁻¹, S, pattern, ∇ind, N, Jμ, JΣ, model) @@ -155,26 +142,23 @@ function ∇F_one_pattern(μ_diff, Σ⁻¹, S, pattern, ∇ind, N, Jμ, JΣ, mod end end -function ∇F_fiml_outer(JΣ, Jμ, imply::SemImplySymbolic, model, semfiml) - G = transpose(JΣ' * ∇Σ(imply) - Jμ' * ∇μ(imply)) - return G +function ∇F_fiml_outer!(G, JΣ, Jμ, imply::SemImplySymbolic, model, semfiml) + mul!(G, imply.∇Σ', JΣ) # should be transposed + G .-= imply.∇μ' * Jμ end -function ∇F_fiml_outer(JΣ, Jμ, imply, model, semfiml) - Iₙ = sparse(1.0I, size(A(imply))...) - P = kron(F⨉I_A⁻¹(imply), F⨉I_A⁻¹(imply)) - Q = kron(S(imply) * I_A⁻¹(imply)', Iₙ) +function ∇F_fiml_outer!(G, JΣ, Jμ, imply, model, semfiml) + Iₙ = sparse(1.0I, size(imply.A)...) + P = kron(imply.F⨉I_A⁻¹, imply.F⨉I_A⁻¹) + Q = kron(imply.S * imply.I_A⁻¹', Iₙ) Q .+= semfiml.commutator * Q - ∇Σ = P * (∇S(imply) + Q * ∇A(imply)) - - ∇μ = - F⨉I_A⁻¹(imply) * ∇M(imply) + - kron((I_A⁻¹(imply) * M(imply))', F⨉I_A⁻¹(imply)) * ∇A(imply) + ∇Σ = P * (imply.∇S + Q * imply.∇A) - G = transpose(JΣ' * ∇Σ - Jμ' * ∇μ) + ∇μ = imply.F⨉I_A⁻¹ * imply.∇M + kron((imply.I_A⁻¹ * imply.M)', imply.F⨉I_A⁻¹) * imply.∇A - return G + mul!(G, ∇Σ', JΣ) # actually transposed + G .-= ∇μ' * Jμ end function F_FIML(rows, semfiml, model, params) @@ -191,7 +175,7 @@ function F_FIML(rows, semfiml, model, params) return F end -function ∇F_FIML(rows, semfiml, model) +function ∇F_FIML!(G, rows, semfiml, model) Jμ = zeros(nobserved_vars(model)) JΣ = zeros(nobserved_vars(model)^2) @@ -208,7 +192,7 @@ function ∇F_FIML(rows, semfiml, model) model, ) end - return ∇F_fiml_outer(JΣ, Jμ, imply(model), model, semfiml) + return ∇F_fiml_outer!(G, JΣ, Jμ, imply(model), model, semfiml) end function prepare_SemFIML!(semfiml, model) @@ -233,9 +217,9 @@ end copy_per_pattern!(semfiml, model::M where {M <: AbstractSem}) = copy_per_pattern!( semfiml.inverses, - Σ(imply(model)), + imply(model).Σ, semfiml.imp_mean, - μ(imply(model)), + imply(model).μ, patterns(observed(model)), ) @@ -248,7 +232,7 @@ function batch_cholesky!(semfiml, model) end function check_fiml(semfiml, model) - copyto!(semfiml.imp_inv, Σ(imply(model))) + copyto!(semfiml.imp_inv, imply(model).Σ) a = cholesky!(Symmetric(semfiml.imp_inv); check = false) return isposdef(a) end diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 85d36ca78..445a557a7 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -56,415 +56,149 @@ end ### objective, gradient, hessian methods ############################################################################################ -# dispatch for SemImply -objective!(semml::SemML, par, model::AbstractSemSingle) = - objective!(semml, par, model, imply(model)) -gradient!(semml::SemML, par, model::AbstractSemSingle) = - gradient!(semml, par, model, imply(model)) -hessian!(semml::SemML, par, model::AbstractSemSingle) = - hessian!(semml, par, model, imply(model)) -objective_gradient!(semml::SemML, par, model::AbstractSemSingle) = - objective_gradient!(semml, par, model, imply(model)) -objective_hessian!(semml::SemML, par, model::AbstractSemSingle) = - objective_hessian!(semml, par, model, imply(model)) -gradient_hessian!(semml::SemML, par, model::AbstractSemSingle) = - gradient_hessian!(semml, par, model, imply(model)) -objective_gradient_hessian!(semml::SemML, par, model::AbstractSemSingle) = - objective_gradient_hessian!(semml, par, model, imply(model)) - ############################################################################################ ### Symbolic Imply Types -function objective!(semml::SemML, par, model::AbstractSemSingle, imp::SemImplySymbolic) - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return non_posdef_return(par) - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - #mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - - if MeanStructure(imply(model)) === HasMeanStructure - μ₋ = μₒ - μ - return ld + dot(Σ⁻¹, Σₒ) + dot(μ₋, Σ⁻¹, μ₋) - else - return ld + dot(Σ⁻¹, Σₒ) - end - end -end - -function gradient!(semml::SemML, par, model::AbstractSemSingle, imp::SemImplySymbolic) - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - μ = μ(imply(model)), - ∇μ = ∇μ(imply(model)), - μₒ = obs_mean(observed(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return ones(eltype(par), size(par)) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - - if MeanStructure(imply(model)) === HasMeanStructure - μ₋ = μₒ - μ - μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ - gradient = vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹ - μ₋ᵀΣ⁻¹'μ₋ᵀΣ⁻¹)' * ∇Σ - 2 * μ₋ᵀΣ⁻¹ * ∇μ - else - gradient = vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹)' * ∇Σ - end - return gradient' - end -end - -function hessian!(semml::SemML, par, model::AbstractSemSingle, imp::SemImplySymbolic) - if MeanStructure(imply(model)) === HasMeanStructure - throw(DomainError(H, "hessian of ML + meanstructure is not available")) - end - - let Σ = Σ(imply(model)), - ∇Σ = ∇Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - if HessianEvaluation(semml) === ApproximateHessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ - else - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - # inner - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' - ∇²Σ_function!(∇²Σ, J, par) - # outer - H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ - hessian .+= ∇²Σ - end - - return hessian - end -end - -function objective_gradient!( +function evaluate!( + objective, + gradient, + hessian, semml::SemML, - par, + implied::SemImplySymbolic, model::AbstractSemSingle, - imp::SemImplySymbolic, -) - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)), - ∇Σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - return non_posdef_return(par), ones(eltype(par), size(par)) - else - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - - if MeanStructure(imply(model)) === HasMeanStructure - μ₋ = μₒ - μ - μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ - - objective = ld + tr(Σ⁻¹Σₒ) + dot(μ₋, Σ⁻¹, μ₋) - gradient = vec(Σ⁻¹ * (I - Σₒ * Σ⁻¹ - μ₋ * μ₋ᵀΣ⁻¹))' * ∇Σ - 2 * μ₋ᵀΣ⁻¹ * ∇μ - else - objective = ld + tr(Σ⁻¹Σₒ) - gradient = (vec(Σ⁻¹) - vec(Σ⁻¹Σₒ * Σ⁻¹))' * ∇Σ - end - return objective, gradient' - end - end -end - -function objective_hessian!( - semml::SemML, par, - model::AbstractSemSingle, - imp::SemImplySymbolic, ) - if MeanStructure(imply(model)) === HasMeanStructure - throw(DomainError(H, "hessian of ML + meanstructure is not available")) - end - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - return non_posdef_return(par), diagm(fill(one(eltype(par)), length(par))) - else - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - objective = ld + tr(Σ⁻¹Σₒ) + if !isnothing(hessian) + (MeanStructure(implied) === HasMeanStructure) && + throw(DomainError(H, "hessian of ML + meanstructure is not available")) + end + + Σ = implied.Σ + Σₒ = obs_cov(observed(model)) + Σ⁻¹Σₒ = semml.Σ⁻¹Σₒ + Σ⁻¹ = semml.Σ⁻¹ + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + if !isposdef(Σ_chol) + #@warn "∑⁻¹ is not positive definite" + isnothing(objective) || (objective = non_posdef_return(par)) + isnothing(gradient) || fill!(gradient, 1) + isnothing(hessian) || copyto!(hessian, I) + return objective + end + ld = logdet(Σ_chol) + Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + isnothing(objective) || (objective = ld + tr(Σ⁻¹Σₒ)) + + if MeanStructure(implied) === HasMeanStructure + μ = implied.μ + μₒ = obs_mean(observed(model)) + μ₋ = μₒ - μ + isnothing(objective) || (objective += dot(μ₋, Σ⁻¹, μ₋)) + if !isnothing(gradient) + ∇Σ = implied.∇Σ + ∇μ = implied.∇μ + μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ + gradient .= (vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹ - μ₋ᵀΣ⁻¹'μ₋ᵀΣ⁻¹)' * ∇Σ)' + gradient .-= (2 * μ₋ᵀΣ⁻¹ * ∇μ)' + end + elseif !isnothing(gradient) || !isnothing(hessian) + ∇Σ = implied.∇Σ + Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ + J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' + if !isnothing(gradient) + gradient .= (J * ∇Σ)' + end + if !isnothing(hessian) if HessianEvaluation(semml) === ApproximateHessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ + mul!(hessian, 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹), ∇Σ) else - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ + ∇²Σ_function! = implied.∇²Σ_function + ∇²Σ = implied.∇²Σ # inner - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' ∇²Σ_function!(∇²Σ, J, par) # outer H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ + mul!(hessian, ∇Σ' * H_outer, ∇Σ) hessian .+= ∇²Σ end - - return objective, hessian end end + return objective end -function gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - imp::SemImplySymbolic, -) - if MeanStructure(imply(model)) === HasMeanStructure - throw(DomainError(H, "hessian of ML + meanstructure is not available")) - end - - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || - return ones(eltype(par), size(par)), diagm(fill(one(eltype(par)), length(par))) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' - gradient = J * ∇Σ - - if HessianEvaluation(semml) === ApproximateHessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ - else - # inner - ∇²Σ_function!(∇²Σ, J, par) - # outer - H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ - hessian .+= ∇²Σ - end - - return gradient', hessian - end -end +############################################################################################ +### Non-Symbolic Imply Types -function objective_gradient_hessian!( +function evaluate!( + objective, + gradient, + hessian, semml::SemML, - par, + implied::RAM, model::AbstractSemSingle, - imp::SemImplySymbolic, + par, ) - if MeanStructure(imply(model)) === HasMeanStructure - throw(DomainError(H, "hessian of ML + meanstructure is not available")) + if !isnothing(hessian) + error("hessian of ML + non-symbolic imply type is not available") end - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - objective = non_posdef_return(par) - gradient = ones(eltype(par), size(par)) - hessian = diagm(fill(one(eltype(par)), length(par))) - return objective, gradient, hessian - end - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - objective = ld + tr(Σ⁻¹Σₒ) + Σ = implied.Σ + Σₒ = obs_cov(observed(model)) + Σ⁻¹Σₒ = semml.Σ⁻¹Σₒ + Σ⁻¹ = semml.Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' - gradient = J * ∇Σ - - if HessianEvaluation(semml) == ApproximateHessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ - else - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - # inner - ∇²Σ_function!(∇²Σ, J, par) - # outer - H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ - hessian .+= ∇²Σ - end - - return objective, gradient', hessian + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + if !isposdef(Σ_chol) + #@warn "Σ⁻¹ is not positive definite" + isnothing(objective) || (objective = non_posdef_return(par)) + isnothing(gradient) || fill!(gradient, 1) + isnothing(hessian) || copyto!(hessian, I) + return objective end -end - -############################################################################################ -### Non-Symbolic Imply Types - -# no hessians ------------------------------------------------------------------------------ - -function hessian!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end - -function objective_hessian!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end - -function gradient_hessian!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end + ld = logdet(Σ_chol) + Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) -function objective_gradient_hessian!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end - -# objective, gradient ---------------------------------------------------------------------- - -function objective!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return non_posdef_return(par) - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + if !isnothing(objective) + objective = ld + tr(Σ⁻¹Σₒ) - if MeanStructure(imply(model)) === HasMeanStructure + if MeanStructure(implied) === HasMeanStructure + μ = implied.μ + μₒ = obs_mean(observed(model)) μ₋ = μₒ - μ - return ld + tr(Σ⁻¹Σₒ) + dot(μ₋, Σ⁻¹, μ₋) - else - return ld + tr(Σ⁻¹Σₒ) + objective += dot(μ₋, Σ⁻¹, μ₋) end end -end -function gradient!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - S = S(imply(model)), - M = M(imply(model)), - F⨉I_A⁻¹ = F⨉I_A⁻¹(imply(model)), - I_A⁻¹ = I_A⁻¹(imply(model)), - ∇A = ∇A(imply(model)), - ∇S = ∇S(imply(model)), - ∇M = ∇M(imply(model)), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return ones(eltype(par), size(par)) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + if !isnothing(gradient) + S = implied.S + F⨉I_A⁻¹ = implied.F⨉I_A⁻¹ + I_A⁻¹ = implied.I_A⁻¹ + ∇A = implied.∇A + ∇S = implied.∇S C = F⨉I_A⁻¹' * (I - Σ⁻¹Σₒ) * Σ⁻¹ * F⨉I_A⁻¹ - gradient = 2vec(C * S * I_A⁻¹')'∇A + vec(C)'∇S + gradᵀ = 2vec(C * S * I_A⁻¹')'∇A + vec(C)'∇S - if MeanStructure(imply(model)) === HasMeanStructure + if MeanStructure(implied) === HasMeanStructure + μ = implied.μ + μₒ = obs_mean(observed(model)) + ∇M = implied.∇M + M = implied.M μ₋ = μₒ - μ μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ k = μ₋ᵀΣ⁻¹ * F⨉I_A⁻¹ - - gradient .+= -2k * ∇M - 2vec(k' * (M' + k * S) * I_A⁻¹')'∇A - vec(k'k)'∇S + gradᵀ .+= -2k * ∇M - 2vec(k' * (M' + k * S) * I_A⁻¹')'∇A - vec(k'k)'∇S end - - return gradient' + copyto!(gradient, gradᵀ') end -end - -function objective_gradient!(semml::SemML, par, model::AbstractSemSingle, imp::RAM) - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - S = S(imply(model)), - M = M(imply(model)), - F⨉I_A⁻¹ = F⨉I_A⁻¹(imply(model)), - I_A⁻¹ = I_A⁻¹(imply(model)), - ∇A = ∇A(imply(model)), - ∇S = ∇S(imply(model)), - ∇M = ∇M(imply(model)), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - objective = non_posdef_return(par) - gradient = ones(eltype(par), size(par)) - return objective, gradient - else - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - objective = ld + tr(Σ⁻¹Σₒ) - - C = F⨉I_A⁻¹' * (I - Σ⁻¹Σₒ) * Σ⁻¹ * F⨉I_A⁻¹ - gradient = 2vec(C * S * I_A⁻¹')'∇A + vec(C)'∇S - - if MeanStructure(semml) === HasMeanStructure - μ₋ = μₒ - μ - objective += dot(μ₋, Σ⁻¹, μ₋) - - μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ - k = μ₋ᵀΣ⁻¹ * F⨉I_A⁻¹ - gradient .+= -2k * ∇M - 2vec(k' * (M' + k * S) * I_A⁻¹')'∇A - vec(k'k)'∇S - end - return objective, gradient' - end - end + return objective end ############################################################################################ @@ -484,7 +218,7 @@ end ############################################################################################ update_observed(lossfun::SemML, observed::SemObservedMissing; kwargs...) = - throw(ArgumentError("ML estimation does not work with missing data - use FIML instead")) + error("ML estimation does not work with missing data - use FIML instead") function update_observed(lossfun::SemML, observed::SemObserved; kwargs...) if size(lossfun.Σ⁻¹) == size(obs_cov(observed)) @@ -493,10 +227,3 @@ function update_observed(lossfun::SemML, observed::SemObserved; kwargs...) return SemML(; observed = observed, kwargs...) end end - -############################################################################################ -### additional methods -############################################################################################ - -Σ⁻¹(semml::SemML) = semml.Σ⁻¹ -Σ⁻¹Σₒ(semml::SemML) = semml.Σ⁻¹Σₒ diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index b75d47454..60a454e37 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -78,7 +78,7 @@ function SemWLS(; else wls_weight_matrix_mean = nothing end - HE = approximate_hessian ? ApproximateHessian : AnalyticHessian + HE = approximate_hessian ? ApproximateHessian : ExactHessian return SemWLS{HE}(wls_weight_matrix, s, wls_weight_matrix_mean) end @@ -87,173 +87,58 @@ end ### methods ############################################################################ -function objective!(semwls::SemWLS, par, model::AbstractSemSingle) - let σ = Σ(imply(model)), - μ = μ(imply(model)), - σₒ = semwls.σₒ, - μₒ = obs_mean(observed(model)), - V = semwls.V, - V_μ = semwls.V_μ, - - σ₋ = σₒ - σ - - if MeanStructure(imply(model)) === HasMeanStructure - μ₋ = μₒ - μ - return dot(σ₋, V, σ₋) + dot(μ₋, V_μ, μ₋) - else - return dot(σ₋, V, σ₋) - end - end -end - -function gradient!(semwls::SemWLS, par, model::AbstractSemSingle) - let σ = Σ(imply(model)), - μ = μ(imply(model)), - σₒ = semwls.σₒ, - μₒ = obs_mean(observed(model)), - V = semwls.V, - V_μ = semwls.V_μ, - ∇σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)) - - σ₋ = σₒ - σ - - if MeanStructure(imply(model)) === HasMeanStructure - μ₋ = μₒ - μ - return -2 * (σ₋' * V * ∇σ + μ₋' * V_μ * ∇μ)' - else - return -2 * (σ₋' * V * ∇σ)' - end - end -end - -function hessian!(semwls::SemWLS, par, model::AbstractSemSingle) - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - σ₋ = σₒ - σ - - if MeanStructure(imply(model)) === HasMeanStructure - throw(DomainError(H, "hessian of WLS with meanstructure is not available")) - else - hessian = 2 * ∇σ' * V * ∇σ - if HessianEvaluation(semwls) === ExactHessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ - end - return hessian - end +function evaluate!( + objective, + gradient, + hessian, + semwls::SemWLS, + implied::SemImplySymbolic, + model::AbstractSemSingle, + par, +) + if !isnothing(hessian) && (MeanStructure(implied) === HasMeanStructure) + error("hessian of WLS with meanstructure is not available") end -end -function objective_gradient!(semwls::SemWLS, par, model::AbstractSemSingle) - let σ = Σ(imply(model)), - μ = μ(imply(model)), - σₒ = semwls.σₒ, - μₒ = obs_mean(observed(model)), - V = semwls.V, - V_μ = semwls.V_μ, - ∇σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)) + V = semwls.V + ∇σ = implied.∇Σ - σ₋ = σₒ - σ + σ = implied.Σ + σₒ = semwls.σₒ + σ₋ = σₒ - σ - if MeanStructure(imply(model)) === HasMeanStructure - μ₋ = μₒ - μ - objective = dot(σ₋, V, σ₋) + dot(μ₋, V_μ, μ₋) - gradient = -2 * (σ₋' * V * ∇σ + μ₋' * V_μ * ∇μ)' - return objective, gradient - else - objective = dot(σ₋, V, σ₋) - gradient = -2 * (σ₋' * V * ∇σ)' - return objective, gradient + isnothing(objective) || (objective = dot(σ₋, V, σ₋)) + if !isnothing(gradient) + if issparse(∇σ) + gradient .= (σ₋' * V * ∇σ)' + else # save one allocation + mul!(gradient, σ₋' * V, ∇σ) # actually transposed, but should be fine for vectors end + gradient .*= -2 end -end - -function objective_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) - if MeanStructure(imply(model)) === HasMeanStructure - throw(DomainError(H, "hessian of WLS with meanstructure is not available")) + isnothing(hessian) || (mul!(hessian, ∇σ' * V, ∇σ); + hessian .*= 2) + if !isnothing(hessian) && (HessianEvaluation(semwls) === ExactHessian) + ∇²Σ_function! = implied.∇²Σ_function + ∇²Σ = implied.∇²Σ + J = -2 * (σ₋' * semwls.V)' + ∇²Σ_function!(∇²Σ, J, par) + hessian .+= ∇²Σ end - - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - σ₋ = σₒ - σ - - objective = dot(σ₋, V, σ₋) - - hessian = 2 * ∇σ' * V * ∇σ - if HessianEvaluation(semwls) === ExactHessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ + if MeanStructure(implied) === HasMeanStructure + μ = implied.μ + μₒ = obs_mean(observed(model)) + μ₋ = μₒ - μ + V_μ = semwls.V_μ + if !isnothing(objective) + objective += dot(μ₋, V_μ, μ₋) end - - return objective, hessian - end -end - -function gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) - if MeanStructure(imply(model)) === HasMeanStructure - throw(DomainError(H, "hessian of WLS with meanstructure is not available")) - end - - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - σ₋ = σₒ - σ - - gradient = -2 * (σ₋' * V * ∇σ)' - - hessian = 2 * ∇σ' * V * ∇σ - if HessianEvaluation(semwls) === ExactHessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ + if !isnothing(gradient) + gradient .-= 2 * (μ₋' * V_μ * implied.∇μ)' end - - return gradient, hessian - end -end - -function objective_gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) - if MeanStructure(imply(model)) === HasMeanStructure - throw(DomainError(H, "hessian of WLS with meanstructure is not available")) end - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - σ₋ = σₒ - σ - - objective = dot(σ₋, V, σ₋) - gradient = -2 * (σ₋' * V * ∇σ)' - hessian = 2 * ∇σ' * V * ∇σ - if HessianEvaluation(semwls) === ExactHessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ - end - return objective, gradient, hessian - end + return objective end ############################################################################################ diff --git a/src/loss/constant/constant.jl b/src/loss/constant/constant.jl index 9b3dfcd34..639864610 100644 --- a/src/loss/constant/constant.jl +++ b/src/loss/constant/constant.jl @@ -41,9 +41,10 @@ end ### methods ############################################################################################ -objective!(constant::SemConstant, par, model) = constant.c -gradient!(constant::SemConstant, par, model) = zero(par) -hessian!(constant::SemConstant, par, model) = zeros(eltype(par), length(par), length(par)) +objective(constant::SemConstant, model::AbstractSem, par) = constant.c +gradient(constant::SemConstant, model::AbstractSem, par) = zero(par) +hessian(constant::SemConstant, model::AbstractSem, par) = + zeros(eltype(par), length(par), length(par)) ############################################################################################ ### Recommended methods diff --git a/src/loss/regularization/ridge.jl b/src/loss/regularization/ridge.jl index e89ceeed7..be9b14fa5 100644 --- a/src/loss/regularization/ridge.jl +++ b/src/loss/regularization/ridge.jl @@ -76,14 +76,15 @@ end ### methods ############################################################################################ -objective!(ridge::SemRidge, par, model) = @views ridge.α * sum(abs2, par[ridge.which]) +objective(ridge::SemRidge, model::AbstractSem, par) = + @views ridge.α * sum(abs2, par[ridge.which]) -function gradient!(ridge::SemRidge, par, model) +function gradient(ridge::SemRidge, model::AbstractSem, par) @views ridge.gradient[ridge.which] .= (2 * ridge.α) * par[ridge.which] return ridge.gradient end -function hessian!(ridge::SemRidge, par, model) +function hessian(ridge::SemRidge, model::AbstractSem, par) @views @. ridge.hessian[ridge.which_H] .= 2 * ridge.α return ridge.hessian end diff --git a/src/objective_gradient_hessian.jl b/src/objective_gradient_hessian.jl index 2debbcd40..f07b572aa 100644 --- a/src/objective_gradient_hessian.jl +++ b/src/objective_gradient_hessian.jl @@ -1,298 +1,150 @@ -############################################################################################ -# methods for AbstractSem -############################################################################################ - -function objective!(model::AbstractSemSingle, params) - objective!(imply(model), params, model) - return objective!(loss(model), params, model) -end - -function gradient!(gradient, model::AbstractSemSingle, params) - fill!(gradient, zero(eltype(gradient))) - gradient!(imply(model), params, model) - gradient!(gradient, loss(model), params, model) -end - -function hessian!(hessian, model::AbstractSemSingle, params) - fill!(hessian, zero(eltype(hessian))) - hessian!(imply(model), params, model) - hessian!(hessian, loss(model), params, model) -end - -function objective_gradient!(gradient, model::AbstractSemSingle, params) - fill!(gradient, zero(eltype(gradient))) - objective_gradient!(imply(model), params, model) - objective_gradient!(gradient, loss(model), params, model) -end - -function objective_hessian!(hessian, model::AbstractSemSingle, params) - fill!(hessian, zero(eltype(hessian))) - objective_hessian!(imply(model), params, model) - objective_hessian!(hessian, loss(model), params, model) -end - -function gradient_hessian!(gradient, hessian, model::AbstractSemSingle, params) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - gradient_hessian!(imply(model), params, model) - gradient_hessian!(gradient, hessian, loss(model), params, model) -end - -function objective_gradient_hessian!(gradient, hessian, model::AbstractSemSingle, params) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - objective_gradient_hessian!(imply(model), params, model) - return objective_gradient_hessian!(gradient, hessian, loss(model), params, model) -end +"Specifies whether objective (O), gradient (G) or hessian (H) evaluation is required" +struct EvaluationTargets{O, G, H} end + +EvaluationTargets(objective, gradient, hessian) = + EvaluationTargets{!isnothing(objective), !isnothing(gradient), !isnothing(hessian)}() + +# convenience methods to check type params +is_objective_required(::EvaluationTargets{O}) where {O} = O +is_gradient_required(::EvaluationTargets{<:Any, G}) where {G} = G +is_hessian_required(::EvaluationTargets{<:Any, <:Any, H}) where {H} = H + +# return the tuple of the required results +(::EvaluationTargets{true, false, false})(objective, gradient, hessian) = objective +(::EvaluationTargets{false, true, false})(objective, gradient, hessian) = gradient +(::EvaluationTargets{false, false, true})(objective, gradient, hessian) = hessian +(::EvaluationTargets{true, true, false})(objective, gradient, hessian) = + (objective, gradient) +(::EvaluationTargets{true, false, true})(objective, gradient, hessian) = + (objective, hessian) +(::EvaluationTargets{false, true, true})(objective, gradient, hessian) = (gradient, hessian) +(::EvaluationTargets{true, true, true})(objective, gradient, hessian) = + (objective, gradient, hessian) + +(targets::EvaluationTargets)(arg_tuple::Tuple) = targets(arg_tuple...) + +# dispatch on SemImply +evaluate!(objective, gradient, hessian, loss::SemLossFunction, model::AbstractSem, params) = + evaluate!(objective, gradient, hessian, loss, imply(model), model, params) + +# fallback method +function evaluate!(obj, grad, hess, loss::SemLossFunction, imply::SemImply, model, params) + isnothing(obj) || (obj = objective(loss, imply, model, params)) + isnothing(grad) || copyto!(grad, gradient(loss, imply, model, params)) + isnothing(hess) || copyto!(hess, hessian(loss, imply, model, params)) + return obj +end + +# fallback methods +objective(f::SemLossFunction, imply::SemImply, model, params) = objective(f, model, params) +gradient(f::SemLossFunction, imply::SemImply, model, params) = gradient(f, model, params) +hessian(f::SemLossFunction, imply::SemImply, model, params) = hessian(f, model, params) + +# fallback method for SemImply that calls update_xxx!() methods +function update!(targets::EvaluationTargets, imply::SemImply, model, params) + is_objective_required(targets) && update_objective!(imply, model, params) + is_gradient_required(targets) && update_gradient!(imply, model, params) + is_hessian_required(targets) && update_hessian!(imply, model, params) +end + +# guess objective type +objective_type(model::AbstractSem, params::Any) = Float64 +objective_type(model::AbstractSem, params::AbstractVector{T}) where {T <: Number} = T +objective_zero(model::AbstractSem, params::Any) = zero(objective_type(model, params)) + +objective_type(objective::T, gradient, hessian) where {T <: Number} = T +objective_type( + objective::Nothing, + gradient::AbstractArray{T}, + hessian, +) where {T <: Number} = T +objective_type( + objective::Nothing, + gradient::Nothing, + hessian::AbstractArray{T}, +) where {T <: Number} = T +objective_zero(objective, gradient, hessian) = + zero(objective_type(objective, gradient, hessian)) ############################################################################################ -# methods for SemFiniteDiff +# methods for AbstractSem ############################################################################################ -gradient!(gradient, model::SemFiniteDiff, par) = - FiniteDiff.finite_difference_gradient!(gradient, x -> objective!(model, x), par) - -hessian!(hessian, model::SemFiniteDiff, par) = - FiniteDiff.finite_difference_hessian!(hessian, x -> objective!(model, x), par) - -function objective_gradient!(gradient, model::SemFiniteDiff, params) - gradient!(gradient, model, params) - return objective!(model, params) -end - -# other methods -function gradient_hessian!(gradient, hessian, model::SemFiniteDiff, params) - gradient!(gradient, model, params) - hessian!(hessian, model, params) -end - -function objective_hessian!(hessian, model::SemFiniteDiff, params) - hessian!(hessian, model, params) - return objective!(model, params) -end - -function objective_gradient_hessian!(gradient, hessian, model::SemFiniteDiff, params) - hessian!(hessian, model, params) - return objective_gradient!(gradient, model, params) +function evaluate!(objective, gradient, hessian, model::AbstractSemSingle, params) + targets = EvaluationTargets(objective, gradient, hessian) + # update imply state, its gradient and hessian (if required) + update!(targets, imply(model), model, params) + return evaluate!( + !isnothing(objective) ? zero(objective) : nothing, + gradient, + hessian, + loss(model), + model, + params, + ) end ############################################################################################ -# methods for SemLoss +# methods for SemFiniteDiff (approximate gradient and hessian with finite differences of objective) ############################################################################################ -function objective!(loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> weight * objective!(fun, par, model), - +, - loss.functions, - loss.weights, - ) -end - -function gradient!(gradient, loss::SemLoss, par, model) - for (lossfun, w) in zip(loss.functions, loss.weights) - new_gradient = gradient!(lossfun, par, model) - gradient .+= w * new_gradient - end -end - -function hessian!(hessian, loss::SemLoss, par, model) - for (lossfun, w) in zip(loss.functions, loss.weights) - hessian .+= w * hessian!(lossfun, par, model) - end -end - -function objective_gradient!(gradient, loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> objective_gradient_wrap_(gradient, fun, par, model, weight), - +, - loss.functions, - loss.weights, - ) -end - -function objective_hessian!(hessian, loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> objective_hessian_wrap_(hessian, fun, par, model, weight), - +, - loss.functions, - loss.weights, - ) -end - -function gradient_hessian!(gradient, hessian, loss::SemLoss, par, model) - for (lossfun, w) in zip(loss.functions, loss.weights) - new_gradient, new_hessian = gradient_hessian!(lossfun, par, model) - gradient .+= w * new_gradient - hessian .+= w * new_hessian +function evaluate!(objective, gradient, hessian, model::SemFiniteDiff, params) + function obj(p) + # recalculate imply state for p + update!(EvaluationTargets{true, false, false}(), imply(model), model, p) + evaluate!( + objective_zero(objective, gradient, hessian), + nothing, + nothing, + loss(model), + model, + p, + ) end + isnothing(gradient) || FiniteDiff.finite_difference_gradient!(gradient, obj, params) + isnothing(hessian) || FiniteDiff.finite_difference_hessian!(hessian, obj, params) + return !isnothing(objective) ? obj(params) : nothing end -function objective_gradient_hessian!(gradient, hessian, loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> - objective_gradient_hessian_wrap_(gradient, hessian, fun, par, model, weight), - +, - loss.functions, - loss.weights, - ) -end - -# wrapper to update gradient/hessian and return objective value -function objective_gradient_wrap_(gradient, lossfun, par, model, w) - new_objective, new_gradient = objective_gradient!(lossfun, par, model) - gradient .+= w * new_gradient - return w * new_objective -end - -function objective_hessian_wrap_(hessian, lossfun, par, model, w) - new_objective, new_hessian = objective_hessian!(lossfun, par, model) - hessian .+= w * new_hessian - return w * new_objective -end - -function objective_gradient_hessian_wrap_(gradient, hessian, lossfun, par, model, w) - new_objective, new_gradient, new_hessian = - objective_gradient_hessian!(lossfun, par, model) - gradient .+= w * new_gradient - hessian .+= w * new_hessian - return w * new_objective -end +objective(model::AbstractSem, params) = + evaluate!(objective_zero(model, params), nothing, nothing, model, params) ############################################################################################ -# methods for SemEnsemble +# methods for SemLoss (weighted sum of individual SemLossFunctions) ############################################################################################ -function objective!(ensemble::SemEnsemble, par) - return mapreduce( - (model, weight) -> weight * objective!(model, par), - +, - ensemble.sems, - ensemble.weights, - ) -end - -function gradient!(gradient, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - for (model, w) in zip(ensemble.sems, ensemble.weights) - gradient_new = similar(gradient) - gradient!(gradient_new, model, par) - gradient .+= w * gradient_new - end -end - -function hessian!(hessian, ensemble::SemEnsemble, par) - fill!(hessian, zero(eltype(hessian))) - for (model, w) in zip(ensemble.sems, ensemble.weights) - hessian_new = similar(hessian) - hessian!(hessian_new, model, par) - hessian .+= w * hessian_new - end -end - -function objective_gradient!(gradient, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - return mapreduce( - (model, weight) -> objective_gradient_wrap_(gradient, model, par, weight), - +, - ensemble.sems, - ensemble.weights, - ) -end - -function objective_hessian!(hessian, ensemble::SemEnsemble, par) - fill!(hessian, zero(eltype(hessian))) - return mapreduce( - (model, weight) -> objective_hessian_wrap_(hessian, model, par, weight), - +, - ensemble.sems, - ensemble.weights, - ) -end - -function gradient_hessian!(gradient, hessian, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - for (model, w) in zip(ensemble.sems, ensemble.weights) - new_gradient = similar(gradient) - new_hessian = similar(hessian) - - gradient_hessian!(new_gradient, new_hessian, model, par) - - gradient .+= w * new_gradient - hessian .+= w * new_hessian +function evaluate!(objective, gradient, hessian, loss::SemLoss, model::AbstractSem, params) + isnothing(objective) || (objective = zero(objective)) + isnothing(gradient) || fill!(gradient, zero(eltype(gradient))) + isnothing(hessian) || fill!(hessian, zero(eltype(hessian))) + f_grad = isnothing(gradient) ? nothing : similar(gradient) + f_hess = isnothing(hessian) ? nothing : similar(hessian) + for (f, weight) in zip(loss.functions, loss.weights) + f_obj = evaluate!(objective, f_grad, f_hess, f, model, params) + isnothing(objective) || (objective += weight * f_obj) + isnothing(gradient) || (gradient .+= weight * f_grad) + isnothing(hessian) || (hessian .+= weight * f_hess) end -end - -function objective_gradient_hessian!(gradient, hessian, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - return mapreduce( - (model, weight) -> - objective_gradient_hessian_wrap_(gradient, hessian, model, par, model, weight), - +, - ensemble.sems, - ensemble.weights, - ) -end - -# wrapper to update gradient/hessian and return objective value -function objective_gradient_wrap_(gradient, model::AbstractSemSingle, par, w) - gradient_pre = similar(gradient) - new_objective = objective_gradient!(gradient_pre, model, par) - gradient .+= w * gradient_pre - return w * new_objective -end - -function objective_hessian_wrap_(hessian, model::AbstractSemSingle, par, w) - hessian_pre = similar(hessian) - new_objective = objective_hessian!(hessian_pre, model, par) - hessian .+= w * new_hessian - return w * new_objective -end - -function objective_gradient_hessian_wrap_( - gradient, - hessian, - model::AbstractSemSingle, - par, - w, -) - gradient_pre = similar(gradient) - hessian_pre = similar(hessian) - new_objective = objective_gradient_hessian!(gradient_pre, hessian_pre, model, par) - gradient .+= w * new_gradient - hessian .+= w * new_hessian - return w * new_objective + return objective end ############################################################################################ -# generic methods for loss functions +# methods for SemEnsemble (weighted sum of individual AbstractSemSingle models) ############################################################################################ -function objective_gradient!(lossfun::SemLossFunction, par, model) - objective = objective!(lossfun::SemLossFunction, par, model) - gradient = gradient!(lossfun::SemLossFunction, par, model) - return objective, gradient -end - -function objective_hessian!(lossfun::SemLossFunction, par, model) - objective = objective!(lossfun::SemLossFunction, par, model) - hessian = hessian!(lossfun::SemLossFunction, par, model) - return objective, hessian -end - -function gradient_hessian!(lossfun::SemLossFunction, par, model) - gradient = gradient!(lossfun::SemLossFunction, par, model) - hessian = hessian!(lossfun::SemLossFunction, par, model) - return gradient, hessian -end - -function objective_gradient_hessian!(lossfun::SemLossFunction, par, model) - objective = objective!(lossfun::SemLossFunction, par, model) - gradient = gradient!(lossfun::SemLossFunction, par, model) - hessian = hessian!(lossfun::SemLossFunction, par, model) - return objective, gradient, hessian +function evaluate!(objective, gradient, hessian, ensemble::SemEnsemble, params) + isnothing(objective) || (objective = zero(objective)) + isnothing(gradient) || fill!(gradient, zero(eltype(gradient))) + isnothing(hessian) || fill!(hessian, zero(eltype(hessian))) + sem_grad = isnothing(gradient) ? nothing : similar(gradient) + sem_hess = isnothing(hessian) ? nothing : similar(hessian) + for (sem, weight) in zip(ensemble.sems, ensemble.weights) + sem_obj = evaluate!(objective, sem_grad, sem_hess, sem, params) + isnothing(objective) || (objective += weight * sem_obj) + isnothing(gradient) || (gradient .+= weight * sem_grad) + isnothing(hessian) || (hessian .+= weight * sem_hess) + end + return objective end # throw an error by default if gradient! and hessian! are not implemented @@ -303,35 +155,6 @@ end hessian!(lossfun::SemLossFunction, par, model) = throw(ArgumentError("hessian for $(typeof(lossfun).name.wrapper) is not available")) =# -############################################################################################ -# generic methods for imply -############################################################################################ - -function objective_gradient!(semimp::SemImply, par, model) - objective!(semimp::SemImply, par, model) - gradient!(semimp::SemImply, par, model) - return nothing -end - -function objective_hessian!(semimp::SemImply, par, model) - objective!(semimp::SemImply, par, model) - hessian!(semimp::SemImply, par, model) - return nothing -end - -function gradient_hessian!(semimp::SemImply, par, model) - gradient!(semimp::SemImply, par, model) - hessian!(semimp::SemImply, par, model) - return nothing -end - -function objective_gradient_hessian!(semimp::SemImply, par, model) - objective!(semimp::SemImply, par, model) - gradient!(semimp::SemImply, par, model) - hessian!(semimp::SemImply, par, model) - return nothing -end - ############################################################################################ # Documentation ############################################################################################ @@ -377,3 +200,18 @@ To implement a new `AbstractSem` subtype, you can add a method for hessian!(hessian, model::MyNewType, params) """ function hessian! end + +objective!(model::AbstractSem, params) = + evaluate!(objective_zero(model, params), nothing, nothing, model, params) +gradient!(gradient, model::AbstractSem, params) = + evaluate!(nothing, gradient, nothing, model, params) +hessian!(hessian, model::AbstractSem, params) = + evaluate!(nothing, nothing, hessian, model, params) +objective_gradient!(gradient, model::AbstractSem, params) = + evaluate!(objective_zero(model, params), gradient, nothing, model, params) +objective_hessian!(hessian, model::AbstractSem, params) = + evaluate!(objective_zero(model, params), nothing, hessian, model, params) +gradient_hessian!(gradient, hessian, model::AbstractSem, params) = + evaluate!(nothing, gradient, hessian, model, params) +objective_gradient_hessian!(gradient, hessian, model::AbstractSem, params) = + evaluate!(objective_zero(model, params), gradient, hessian, model, params) diff --git a/test/examples/multigroup/build_models.jl b/test/examples/multigroup/build_models.jl index 265ab178a..4790c9d36 100644 --- a/test/examples/multigroup/build_models.jl +++ b/test/examples/multigroup/build_models.jl @@ -70,7 +70,7 @@ end grad = similar(start_test) gradient!(grad, model_ml_multigroup, rand(36)) grad_fd = FiniteDiff.finite_difference_gradient( - x -> objective!(model_ml_multigroup, x), + Base.Fix1(SEM.objective, model_ml_multigroup), start_test, ) @@ -122,7 +122,7 @@ struct UserSemML <: SemLossFunction{ExactHessian} end using LinearAlgebra: isposdef, logdet, tr, inv -function SEM.objective!(semml::UserSemML, params, model::AbstractSem) +function SEM.objective(ml::UserSemML, model::AbstractSem, params) Σ = imply(model).Σ Σₒ = SEM.obs_cov(observed(model)) if !isposdef(Σ) From 3c533b6c75ccb46721bf18d079449a7faa96d3d2 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 19 Mar 2024 20:36:06 -0700 Subject: [PATCH 05/20] se_hessian(): rename hessian -> method for clarity --- src/frontend/fit/standard_errors/hessian.jl | 47 +++++++++----------- src/frontend/specification/ParameterTable.jl | 8 ++-- 2 files changed, 24 insertions(+), 31 deletions(-) diff --git a/src/frontend/fit/standard_errors/hessian.jl b/src/frontend/fit/standard_errors/hessian.jl index e71e601fb..8a4be88e3 100644 --- a/src/frontend/fit/standard_errors/hessian.jl +++ b/src/frontend/fit/standard_errors/hessian.jl @@ -1,39 +1,32 @@ """ - se_hessian(semfit::SemFit; hessian = :finitediff) + se_hessian(fit::SemFit; method = :finitediff) -Return hessian based standard errors. +Return hessian-based standard errors. # Arguments -- `hessian`: how to compute the hessian. Options are +- `method`: how to compute the hessian. Options are - `:analytic`: (only if an analytic hessian for the model can be computed) - `:finitediff`: for finite difference approximation """ -function se_hessian(sem_fit::SemFit; hessian = :finitediff) - c = H_scaling(sem_fit.model) - - if hessian == :analytic - par = solution(sem_fit) - H = zeros(eltype(par), length(par), length(par)) - hessian!(H, sem_fit.model, sem_fit.solution) - elseif hessian == :finitediff - H = FiniteDiff.finite_difference_hessian( - p -> evaluate!(zero(eltype(sem_fit.solution)), nothing, nothing, fit.model, p), - sem_fit.solution, - ) - elseif hessian == :optimizer - throw( - ArgumentError( - "standard errors from the optimizer hessian are not implemented yet", - ), - ) - elseif hessian == :expected - throw( - ArgumentError( - "standard errors based on the expected hessian are not implemented yet", - ), +function se_hessian(fit::SemFit; method = :finitediff) + c = H_scaling(fit.model) + params = solution(fit) + H = similar(params, (length(params), length(params))) + + if method == :analytic + evaluate!(nothing, nothing, H, fit.model, params) + elseif method == :finitediff + FiniteDiff.finite_difference_hessian!( + H, + p -> evaluate!(zero(eltype(H)), nothing, nothing, fit.model, p), + params, ) + elseif method == :optimizer + error("Standard errors from the optimizer hessian are not implemented yet") + elseif method == :expected + error("Standard errors based on the expected hessian are not implemented yet") else - throw(ArgumentError("I don't know how to compute `$hessian` standard-errors")) + throw(ArgumentError("Unsupported hessian calculation method :$method")) end invH = c * inv(H) diff --git a/src/frontend/specification/ParameterTable.jl b/src/frontend/specification/ParameterTable.jl index 8970b7430..687b712ba 100644 --- a/src/frontend/specification/ParameterTable.jl +++ b/src/frontend/specification/ParameterTable.jl @@ -362,12 +362,12 @@ end update_se_hessian!( partable::AbstractParameterTable, fit::SemFit; - hessian = :finitediff) + method = :finitediff) Write hessian standard errors computed for `fit` to the `:se` column of `partable` # Arguments -- `hessian::Symbol`: how to compute the hessian, see [se_hessian](@ref) for more information. +- `method::Symbol`: how to compute the hessian, see [se_hessian](@ref) for more information. # Examples @@ -375,9 +375,9 @@ Write hessian standard errors computed for `fit` to the `:se` column of `partabl function update_se_hessian!( partable::AbstractParameterTable, fit::SemFit; - hessian = :finitediff, + method = :finitediff, ) - se = se_hessian(fit; hessian = hessian) + se = se_hessian(fit; method) return update_partable!(partable, :se, params(fit), se) end From 9e33add58c7a9e7a4e1c21c26543aa089dd68584 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sat, 23 Mar 2024 14:20:11 -0700 Subject: [PATCH 06/20] se_hessian!(): optimize calc * explicitly use Cholesky factorization --- src/frontend/fit/standard_errors/hessian.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/frontend/fit/standard_errors/hessian.jl b/src/frontend/fit/standard_errors/hessian.jl index 8a4be88e3..4de2db2f7 100644 --- a/src/frontend/fit/standard_errors/hessian.jl +++ b/src/frontend/fit/standard_errors/hessian.jl @@ -29,10 +29,9 @@ function se_hessian(fit::SemFit; method = :finitediff) throw(ArgumentError("Unsupported hessian calculation method :$method")) end - invH = c * inv(H) - se = sqrt.(diag(invH)) - - return se + H_chol = cholesky!(Symmetric(H)) + H_inv = LinearAlgebra.inv!(H_chol) + return [sqrt(c * H_inv[i]) for i in diagind(H_inv)] end # Addition functions ------------------------------------------------------------- From 5ad013e01309a9b09ce8690e5820845e8b77ec92 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 19 Mar 2024 20:38:00 -0700 Subject: [PATCH 07/20] H_scaling(): cleanup remove unnecesary arguments --- src/frontend/fit/standard_errors/hessian.jl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/frontend/fit/standard_errors/hessian.jl b/src/frontend/fit/standard_errors/hessian.jl index 4de2db2f7..6ae53407f 100644 --- a/src/frontend/fit/standard_errors/hessian.jl +++ b/src/frontend/fit/standard_errors/hessian.jl @@ -35,16 +35,20 @@ function se_hessian(fit::SemFit; method = :finitediff) end # Addition functions ------------------------------------------------------------- -H_scaling(model::AbstractSemSingle) = - H_scaling(model, model.observed, model.imply, model.optimizer, model.loss.functions...) +function H_scaling(model::AbstractSemSingle) + if length(model.loss.functions) > 1 + @warn "Hessian scaling for multiple loss functions is not implemented yet" + end + return H_scaling(model.loss.functions[1], model) +end -H_scaling(model, obs, imp, optimizer, lossfun::SemML) = 2 / (nsamples(model) - 1) +H_scaling(lossfun::SemML, model::AbstractSemSingle) = 2 / (nsamples(model) - 1) -function H_scaling(model, obs, imp, optimizer, lossfun::SemWLS) +function H_scaling(lossfun::SemWLS, model::AbstractSemSingle) @warn "Standard errors for WLS are only correct if a GLS weight matrix (the default) is used." return 2 / (nsamples(model) - 1) end -H_scaling(model, obs, imp, optimizer, lossfun::SemFIML) = 2 / nsamples(model) +H_scaling(lossfun::SemFIML, model::AbstractSemSingle) = 2 / nsamples(model) H_scaling(model::SemEnsemble) = 2 / nsamples(model) From a32903e7d6397aac49245ec927db8b4f11d372c8 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 20 Mar 2024 18:35:50 -0700 Subject: [PATCH 08/20] SemOptOptim: remove redundant sem_fit() by dispatching over optimizer --- src/optimizer/documentation.jl | 7 +++++++ src/optimizer/optim.jl | 25 ++++--------------------- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/src/optimizer/documentation.jl b/src/optimizer/documentation.jl index 83b4f7a98..7c17e6ce2 100644 --- a/src/optimizer/documentation.jl +++ b/src/optimizer/documentation.jl @@ -20,3 +20,10 @@ sem_fit( ``` """ function sem_fit end + +# dispatch on optimizer +sem_fit(model::AbstractSem; kwargs...) = sem_fit(model.optimizer, model; kwargs...) + +# fallback method +sem_fit(optimizer::SemOptimizer, model::AbstractSem; kwargs...) = + error("Optimizer $(optimizer) support not implemented.") diff --git a/src/optimizer/optim.jl b/src/optimizer/optim.jl index 68617fdb8..6acf665e6 100644 --- a/src/optimizer/optim.jl +++ b/src/optimizer/optim.jl @@ -45,29 +45,12 @@ n_iterations(res::Optim.MultivariateOptimizationResults) = Optim.iterations(res) convergence(res::Optim.MultivariateOptimizationResults) = Optim.converged(res) function sem_fit( - model::AbstractSemSingle{O, I, L, D}; + optim::SemOptimizerOptim, + model::AbstractSem; start_val = start_val, kwargs..., -) where {O, I, L, D <: SemOptimizerOptim} - if !isa(start_val, Vector) - start_val = start_val(model; kwargs...) - end - - result = Optim.optimize( - Optim.only_fgh!((F, G, H, par) -> sem_wrap_optim(par, F, G, H, model)), - start_val, - model.optimizer.algorithm, - model.optimizer.options, - ) - return SemFit(result, model, start_val) -end - -function sem_fit( - model::SemEnsemble{N, T, V, D, S}; - start_val = start_val, - kwargs..., -) where {N, T, V, D <: SemOptimizerOptim, S} - if !isa(start_val, Vector) +) + if !isa(start_val, AbstractVector) start_val = start_val(model; kwargs...) end From 7ab2dcbc92b50fff9f0feb64d19976a2609883ad Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 20 Mar 2024 18:37:05 -0700 Subject: [PATCH 09/20] SemOptNLopt: remove redundant sem_fit() by dispatching over optimizer --- src/optimizer/NLopt.jl | 42 ++++-------------------------------------- 1 file changed, 4 insertions(+), 38 deletions(-) diff --git a/src/optimizer/NLopt.jl b/src/optimizer/NLopt.jl index ffe2ffed0..1fa475ab4 100644 --- a/src/optimizer/NLopt.jl +++ b/src/optimizer/NLopt.jl @@ -34,48 +34,14 @@ end # sem_fit method function sem_fit( - model::Sem{O, I, L, D}; + optimizer::SemOptimizerNLopt, + model::AbstractSem; start_val = start_val, kwargs..., -) where {O, I, L, D <: SemOptimizerNLopt} +) # starting values - if !isa(start_val, Vector) - start_val = start_val(model; kwargs...) - end - - # construct the NLopt problem - opt = construct_NLopt_problem( - model.optimizer.algorithm, - model.optimizer.options, - length(start_val), - ) - set_NLopt_constraints!(opt, model.optimizer) - opt.min_objective = (par, G) -> sem_wrap_nlopt(par, G, model) - - if !isnothing(model.optimizer.local_algorithm) - opt_local = construct_NLopt_problem( - model.optimizer.local_algorithm, - model.optimizer.local_options, - length(start_val), - ) - opt.local_optimizer = opt_local - end - - # fit - result = NLopt.optimize(opt, start_val) - - return SemFit_NLopt(result, model, start_val, opt) -end - -function sem_fit( - model::SemEnsemble{N, T, V, D, S}; - start_val = start_val, - kwargs..., -) where {N, T, V, D <: SemOptimizerNLopt, S} - - # starting values - if !isa(start_val, Vector) + if !isa(start_val, AbstractVector) start_val = start_val(model; kwargs...) end From 65d111236ec9cf83b0c7ca6a8cb44a050f6a2275 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 20 Mar 2024 18:37:45 -0700 Subject: [PATCH 10/20] SemOptOptim: use evaluate!() directly no wrapper required --- src/optimizer/optim.jl | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/src/optimizer/optim.jl b/src/optimizer/optim.jl index 6acf665e6..bb1bf507e 100644 --- a/src/optimizer/optim.jl +++ b/src/optimizer/optim.jl @@ -1,30 +1,4 @@ ## connect to Optim.jl as backend -function sem_wrap_optim(par, F, G, H, model::AbstractSem) - if !isnothing(F) - if !isnothing(G) - if !isnothing(H) - return objective_gradient_hessian!(G, H, model, par) - else - return objective_gradient!(G, model, par) - end - else - if !isnothing(H) - return objective_hessian!(H, model, par) - else - return objective!(model, par) - end - end - else - if !isnothing(G) - if !isnothing(H) - gradient_hessian!(G, H, model, par) - else - gradient!(G, model, par) - end - end - end - return nothing -end function SemFit( optimization_result::Optim.MultivariateOptimizationResults, @@ -55,7 +29,7 @@ function sem_fit( end result = Optim.optimize( - Optim.only_fgh!((F, G, H, par) -> sem_wrap_optim(par, F, G, H, model)), + Optim.only_fgh!((F, G, H, par) -> evaluate!(F, G, H, model, par)), start_val, model.optimizer.algorithm, model.optimizer.options, From 9ac8f8846503d788745f670925444f0345a6dfc7 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 20 Mar 2024 18:38:30 -0700 Subject: [PATCH 11/20] SemOptNLopt: use evaluate!() directly --- src/optimizer/NLopt.jl | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/optimizer/NLopt.jl b/src/optimizer/NLopt.jl index 1fa475ab4..7f4f61e1e 100644 --- a/src/optimizer/NLopt.jl +++ b/src/optimizer/NLopt.jl @@ -2,16 +2,6 @@ ### connect to NLopt.jl as backend ############################################################################################ -# wrapper to define the objective -function sem_wrap_nlopt(par, G, model::AbstractSem) - need_gradient = length(G) != 0 - if need_gradient - return objective_gradient!(G, model, par) - else - return objective!(model, par) - end -end - mutable struct NLoptResult result::Any problem::Any @@ -52,7 +42,14 @@ function sem_fit( length(start_val), ) set_NLopt_constraints!(opt, model.optimizer) - opt.min_objective = (par, G) -> sem_wrap_nlopt(par, G, model) + opt.min_objective = + (par, G) -> evaluate!( + eltype(par), + !isnothing(G) && !isempty(G) ? G : nothing, + nothing, + model, + par, + ) if !isnothing(model.optimizer.local_algorithm) opt_local = construct_NLopt_problem( From cc778e28c9df7e03bab3710a916c19b3b6ae636c Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 3 Apr 2024 00:46:20 -0700 Subject: [PATCH 12/20] SemWLS: dim checks --- src/loss/WLS/WLS.jl | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index 60a454e37..fa193a565 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -59,23 +59,34 @@ function SemWLS(; meanstructure = false, kwargs..., ) - ind = CartesianIndices(obs_cov(observed)) - ind = filter(x -> (x[1] >= x[2]), ind) - s = obs_cov(observed)[ind] + nobs_vars = nobserved_vars(observed) + tril_ind = filter(x -> (x[1] >= x[2]), CartesianIndices(obs_cov(observed))) + s = obs_cov(observed)[tril_ind] # compute V here if isnothing(wls_weight_matrix) - D = duplication_matrix(nobserved_vars(observed)) + D = duplication_matrix(nobs_vars) S = inv(obs_cov(observed)) S = kron(S, S) wls_weight_matrix = 0.5 * (D' * S * D) + else + size(wls_weight_matrix) == (length(tril_ind), length(tril_ind)) || + DimensionMismatch( + "wls_weight_matrix has to be of size $(length(tril_ind))×$(length(tril_ind))", + ) end if meanstructure if isnothing(wls_weight_matrix_mean) wls_weight_matrix_mean = inv(obs_cov(observed)) + else + size(wls_weight_matrix_mean) == (nobs_vars, nobs_vars) || DimensionMismatch( + "wls_weight_matrix_mean has to be of size $(nobs_vars)×$(nobs_vars)", + ) end else + isnothing(wls_weight_matrix_mean) || + @warn "Ignoring wls_weight_matrix_mean since meanstructure is disabled" wls_weight_matrix_mean = nothing end HE = approximate_hessian ? ApproximateHessian : ExactHessian From 0d33ba410f6b4ff38998caa768f037ea7525ddc0 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 11 Aug 2024 13:50:38 -0700 Subject: [PATCH 13/20] fixup formatting --- src/frontend/specification/StenoGraphs.jl | 9 +++------ test/unit_tests/specification.jl | 4 ++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/frontend/specification/StenoGraphs.jl b/src/frontend/specification/StenoGraphs.jl index 035d9588b..64a33f13e 100644 --- a/src/frontend/specification/StenoGraphs.jl +++ b/src/frontend/specification/StenoGraphs.jl @@ -73,12 +73,9 @@ function ParameterTable( ) end if element isa ModifiedEdge - if any(Base.Fix2(isa, Fixed), values(element.modifiers)) & any(Base.Fix2(isa, Label), values(element.modifiers)) - throw( - ArgumentError( - "It is not allowed to label fixed parameters." - ) - ) + if any(Base.Fix2(isa, Fixed), values(element.modifiers)) && + any(Base.Fix2(isa, Label), values(element.modifiers)) + throw(ArgumentError("It is not allowed to label fixed parameters.")) end for modifier in values(element.modifiers) if isnothing(group) && diff --git a/test/unit_tests/specification.jl b/test/unit_tests/specification.jl index e307d60f2..ef9fc73a1 100644 --- a/test/unit_tests/specification.jl +++ b/test/unit_tests/specification.jl @@ -29,7 +29,7 @@ end fixed_and_labeled_graph = @StenoGraph begin # measurement model - visual → fixed(1.0)*label(:λ)*x1 + visual → fixed(1.0) * label(:λ) * x1 end @testset "ParameterTable" begin @@ -42,7 +42,7 @@ end @test_throws ArgumentError("It is not allowed to label fixed parameters.") ParameterTable( fixed_and_labeled_graph, observed_vars = obs_vars, - latent_vars = lat_vars + latent_vars = lat_vars, ) partable = @inferred( ParameterTable(graph, observed_vars = obs_vars, latent_vars = lat_vars) From 1c376a585e656d8fe5f46bad9cd7da02cf9aa89e Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 20 Mar 2024 12:24:56 -0700 Subject: [PATCH 14/20] WLS: use 5-arg mul!() to reduce allocations --- src/loss/WLS/WLS.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index fa193a565..3cac6ee12 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -127,8 +127,7 @@ function evaluate!( end gradient .*= -2 end - isnothing(hessian) || (mul!(hessian, ∇σ' * V, ∇σ); - hessian .*= 2) + isnothing(hessian) || (mul!(hessian, ∇σ' * V, ∇σ, 2, 0)) if !isnothing(hessian) && (HessianEvaluation(semwls) === ExactHessian) ∇²Σ_function! = implied.∇²Σ_function ∇²Σ = implied.∇²Σ @@ -145,7 +144,7 @@ function evaluate!( objective += dot(μ₋, V_μ, μ₋) end if !isnothing(gradient) - gradient .-= 2 * (μ₋' * V_μ * implied.∇μ)' + mul!(gradient, (V_μ * implied.∇μ)', μ₋, -2, 1) end end From 4a6f51b6a9a53cb52a9e6b0de68a804142d5d9f1 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sat, 23 Mar 2024 16:10:26 -0700 Subject: [PATCH 15/20] ML: use 5-arg mul!() to reduce allocations --- src/loss/ML/ML.jl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 445a557a7..33b6319aa 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -102,19 +102,19 @@ function evaluate!( ∇Σ = implied.∇Σ ∇μ = implied.∇μ μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ - gradient .= (vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹ - μ₋ᵀΣ⁻¹'μ₋ᵀΣ⁻¹)' * ∇Σ)' - gradient .-= (2 * μ₋ᵀΣ⁻¹ * ∇μ)' + mul!(gradient, ∇Σ', vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹ - μ₋ᵀΣ⁻¹'μ₋ᵀΣ⁻¹)) + mul!(gradient, ∇μ', μ₋ᵀΣ⁻¹', -2, 1) end elseif !isnothing(gradient) || !isnothing(hessian) ∇Σ = implied.∇Σ Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' if !isnothing(gradient) - gradient .= (J * ∇Σ)' + mul!(gradient, ∇Σ', J') end if !isnothing(hessian) if HessianEvaluation(semml) === ApproximateHessian - mul!(hessian, 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹), ∇Σ) + mul!(hessian, ∇Σ' * kron(Σ⁻¹, Σ⁻¹), ∇Σ, 2, 0) else ∇²Σ_function! = implied.∇²Σ_function ∇²Σ = implied.∇²Σ @@ -183,7 +183,8 @@ function evaluate!( ∇S = implied.∇S C = F⨉I_A⁻¹' * (I - Σ⁻¹Σₒ) * Σ⁻¹ * F⨉I_A⁻¹ - gradᵀ = 2vec(C * S * I_A⁻¹')'∇A + vec(C)'∇S + mul!(gradient, ∇A', vec(C * S * I_A⁻¹'), 2, 0) + mul!(gradient, ∇S', vec(C), 1, 1) if MeanStructure(implied) === HasMeanStructure μ = implied.μ @@ -193,9 +194,10 @@ function evaluate!( μ₋ = μₒ - μ μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ k = μ₋ᵀΣ⁻¹ * F⨉I_A⁻¹ - gradᵀ .+= -2k * ∇M - 2vec(k' * (M' + k * S) * I_A⁻¹')'∇A - vec(k'k)'∇S + mul!(gradient, ∇M', k', -2, 1) + mul!(gradient, ∇A', vec(k' * (I_A⁻¹ * (M + S * k'))'), -2, 1) + mul!(gradient, ∇S', vec(k'k), -1, 1) end - copyto!(gradient, gradᵀ') end return objective From d2b7e8c68a86ed9bce7e8b776a3eaa834cb862cd Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 20 Mar 2024 12:25:31 -0700 Subject: [PATCH 16/20] FIML: use 5-arg mul! to avoid extra allocation --- src/loss/ML/FIML.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index 92ecf73ca..20e837997 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -144,7 +144,7 @@ end function ∇F_fiml_outer!(G, JΣ, Jμ, imply::SemImplySymbolic, model, semfiml) mul!(G, imply.∇Σ', JΣ) # should be transposed - G .-= imply.∇μ' * Jμ + mul!(G, imply.∇μ', Jμ, -1, 1) end function ∇F_fiml_outer!(G, JΣ, Jμ, imply, model, semfiml) @@ -158,7 +158,7 @@ function ∇F_fiml_outer!(G, JΣ, Jμ, imply, model, semfiml) ∇μ = imply.F⨉I_A⁻¹ * imply.∇M + kron((imply.I_A⁻¹ * imply.M)', imply.F⨉I_A⁻¹) * imply.∇A mul!(G, ∇Σ', JΣ) # actually transposed - G .-= ∇μ' * Jμ + mul!(G, ∇μ', Jμ, -1, 1) end function F_FIML(rows, semfiml, model, params) From d0ea4066bd36c1cbacf3b46812a16ebe3da9b8d7 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 14 Aug 2024 09:29:51 -0700 Subject: [PATCH 17/20] fix the error message Co-authored-by: Maximilian Ernst <34346372+Maximilian-Stefan-Ernst@users.noreply.github.com> --- src/types.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types.jl b/src/types.jl index 6cdf9bead..5ae337e11 100644 --- a/src/types.jl +++ b/src/types.jl @@ -19,7 +19,7 @@ struct NoMeanStructure <: MeanStructure end # fallback implementation MeanStructure(::Type{T}) where {T} = - error("Objects of type $T do not support MeanStructure trait") + error("Objects of type $T do not support the MeanStructure trait") MeanStructure(semobj) = MeanStructure(typeof(semobj)) "Hessian Evaluation trait for `SemImply` and `SemLossFunction` subtypes" From 56ec1c9856484bd92279c6b1ac35daaac96f04cc Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 8 Oct 2024 01:07:22 -0700 Subject: [PATCH 18/20] HessianEvaluation -> HessianEval --- src/StructuralEquationModels.jl | 4 ++-- src/loss/ML/ML.jl | 8 ++++---- src/loss/WLS/WLS.jl | 8 ++++---- src/types.jl | 20 ++++++++++---------- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index a171c29d0..2a469dd91 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -98,9 +98,9 @@ export AbstractSem, MeanStructure, NoMeanStructure, HasMeanStructure, - HessianEvaluation, + HessianEval, ExactHessian, - ApproximateHessian, + ApproxHessian, SemImply, RAMSymbolic, RAM, diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 33b6319aa..261b260d6 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -27,12 +27,12 @@ Analytic gradients are available, and for models without a meanstructure, also a ## Implementation Subtype of `SemLossFunction`. """ -struct SemML{HE <: HessianEvaluation, INV, M, M2} <: SemLossFunction{HE} +struct SemML{HE <: HessianEval, INV, M, M2} <: SemLossFunction{HE} Σ⁻¹::INV Σ⁻¹Σₒ::M meandiff::M2 - SemML{HE}(args...) where {HE <: HessianEvaluation} = + SemML{HE}(args...) where {HE <: HessianEval} = new{HE, map(typeof, args)...}(args...) end @@ -45,7 +45,7 @@ function SemML(; observed::SemObserved, approximate_hessian::Bool = false, kwarg obscov = obs_cov(observed) meandiff = isnothing(obsmean) ? nothing : copy(obsmean) - return SemML{approximate_hessian ? ApproximateHessian : ExactHessian}( + return SemML{approximate_hessian ? ApproxHessian : ExactHessian}( similar(obscov), similar(obscov), meandiff, @@ -113,7 +113,7 @@ function evaluate!( mul!(gradient, ∇Σ', J') end if !isnothing(hessian) - if HessianEvaluation(semml) === ApproximateHessian + if HessianEval(semml) === ApproxHessian mul!(hessian, ∇Σ' * kron(Σ⁻¹, Σ⁻¹), ∇Σ, 2, 0) else ∇²Σ_function! = implied.∇²Σ_function diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index 3cac6ee12..2345f859d 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -38,7 +38,7 @@ Analytic gradients are available, and for models without a meanstructure, also a ## Implementation Subtype of `SemLossFunction`. """ -struct SemWLS{HE <: HessianEvaluation, Vt, St, C} <: SemLossFunction{HE} +struct SemWLS{HE <: HessianEval, Vt, St, C} <: SemLossFunction{HE} V::Vt σₒ::St V_μ::C @@ -48,7 +48,7 @@ end ### Constructors ############################################################################################ -SemWLS{HE}(args...) where {HE <: HessianEvaluation} = +SemWLS{HE}(args...) where {HE <: HessianEval} = SemWLS{HE, map(typeof, args)...}(args...) function SemWLS(; @@ -89,7 +89,7 @@ function SemWLS(; @warn "Ignoring wls_weight_matrix_mean since meanstructure is disabled" wls_weight_matrix_mean = nothing end - HE = approximate_hessian ? ApproximateHessian : ExactHessian + HE = approximate_hessian ? ApproxHessian : ExactHessian return SemWLS{HE}(wls_weight_matrix, s, wls_weight_matrix_mean) end @@ -128,7 +128,7 @@ function evaluate!( gradient .*= -2 end isnothing(hessian) || (mul!(hessian, ∇σ' * V, ∇σ, 2, 0)) - if !isnothing(hessian) && (HessianEvaluation(semwls) === ExactHessian) + if !isnothing(hessian) && (HessianEval(semwls) === ExactHessian) ∇²Σ_function! = implied.∇²Σ_function ∇²Σ = implied.∇²Σ J = -2 * (σ₋' * semwls.V)' diff --git a/src/types.jl b/src/types.jl index 5ae337e11..12082be12 100644 --- a/src/types.jl +++ b/src/types.jl @@ -23,19 +23,19 @@ MeanStructure(::Type{T}) where {T} = MeanStructure(semobj) = MeanStructure(typeof(semobj)) "Hessian Evaluation trait for `SemImply` and `SemLossFunction` subtypes" -abstract type HessianEvaluation end -struct ApproximateHessian <: HessianEvaluation end -struct ExactHessian <: HessianEvaluation end +abstract type HessianEval end +struct ApproxHessian <: HessianEval end +struct ExactHessian <: HessianEval end # fallback implementation -HessianEvaluation(::Type{T}) where {T} = - error("Objects of type $T do not support HessianEvaluation trait") -HessianEvaluation(semobj) = HessianEvaluation(typeof(semobj)) +HessianEval(::Type{T}) where {T} = + error("Objects of type $T do not support HessianEval trait") +HessianEval(semobj) = HessianEval(typeof(semobj)) "Supertype for all loss functions of SEMs. If you want to implement a custom loss function, it should be a subtype of `SemLossFunction`." -abstract type SemLossFunction{HE <: HessianEvaluation} end +abstract type SemLossFunction{HE <: HessianEval} end -HessianEvaluation(::Type{<:SemLossFunction{HE}}) where {HE <: HessianEvaluation} = HE +HessianEval(::Type{<:SemLossFunction{HE}}) where {HE <: HessianEval} = HE """ SemLoss(args...; loss_weights = nothing, ...) @@ -97,10 +97,10 @@ Computed model-implied values that should be compared with the observed data to e. g. the model implied covariance or mean. If you would like to implement a different notation, e.g. LISREL, you should implement a subtype of SemImply. """ -abstract type SemImply{MS <: MeanStructure, HE <: HessianEvaluation} end +abstract type SemImply{MS <: MeanStructure, HE <: HessianEval} end MeanStructure(::Type{<:SemImply{MS}}) where {MS <: MeanStructure} = MS -HessianEvaluation(::Type{<:SemImply{MS, HE}}) where {MS, HE <: MeanStructure} = HE +HessianEval(::Type{<:SemImply{MS, HE}}) where {MS, HE <: MeanStructure} = HE "Subtype of SemImply for all objects that can serve as the imply field of a SEM and use some form of symbolic precomputation." abstract type SemImplySymbolic{MS, HE} <: SemImply{MS, HE} end From c673cd1d8eed836ac09812547ee5220fc9251a5f Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 8 Oct 2024 01:12:51 -0700 Subject: [PATCH 19/20] MeanStructure -> MeanStruct --- src/StructuralEquationModels.jl | 6 +++--- src/imply/RAM/generic.jl | 8 ++++---- src/imply/RAM/symbolic.jl | 10 +++++----- src/imply/empty.jl | 2 +- src/loss/ML/ML.jl | 8 ++++---- src/loss/WLS/WLS.jl | 4 ++-- src/types.jl | 22 +++++++++++----------- 7 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 2a469dd91..944542379 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -95,9 +95,9 @@ export AbstractSem, Sem, SemFiniteDiff, SemEnsemble, - MeanStructure, - NoMeanStructure, - HasMeanStructure, + MeanStruct, + NoMeanStruct, + HasMeanStruct, HessianEval, ExactHessian, ApproxHessian, diff --git a/src/imply/RAM/generic.jl b/src/imply/RAM/generic.jl index 85cbc0220..d7b0f8097 100644 --- a/src/imply/RAM/generic.jl +++ b/src/imply/RAM/generic.jl @@ -107,7 +107,7 @@ mutable struct RAM{ ∇S::S2 ∇M::S3 - RAM{MS}(args...) where {MS <: MeanStructure} = new{MS, map(typeof, args)...}(args...) + RAM{MS}(args...) where {MS <: MeanStruct} = new{MS, map(typeof, args)...}(args...) end ############################################################################################ @@ -160,7 +160,7 @@ function RAM(; # μ if meanstructure - MS = HasMeanStructure + MS = HasMeanStruct !isnothing(M_indices) || throw( ArgumentError( "You set `meanstructure = true`, but your model specification contains no mean parameters.", @@ -169,7 +169,7 @@ function RAM(; ∇M = gradient_required ? matrix_gradient(M_indices, n_var) : nothing μ = zeros(n_obs) else - MS = NoMeanStructure + MS = NoMeanStruct M_indices = nothing M_pre = nothing μ = nothing @@ -226,7 +226,7 @@ function update!(targets::EvaluationTargets, imply::RAM, model::AbstractSemSingl mul!(imply.F⨉I_A⁻¹S, imply.F⨉I_A⁻¹, imply.S) mul!(imply.Σ, imply.F⨉I_A⁻¹S, imply.F⨉I_A⁻¹') - if MeanStructure(imply) === HasMeanStructure + if MeanStruct(imply) === HasMeanStruct mul!(imply.μ, imply.F⨉I_A⁻¹, imply.M) end end diff --git a/src/imply/RAM/symbolic.jl b/src/imply/RAM/symbolic.jl index d79454f3f..3e2fc0ad3 100644 --- a/src/imply/RAM/symbolic.jl +++ b/src/imply/RAM/symbolic.jl @@ -79,7 +79,7 @@ struct RAMSymbolic{MS, F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5} < ∇μ_function::F5 ∇μ::A5 - RAMSymbolic{MS}(args...) where {MS <: MeanStructure} = + RAMSymbolic{MS}(args...) where {MS <: MeanStruct} = new{MS, map(typeof, args)...}(args...) end @@ -163,7 +163,7 @@ function RAMSymbolic(; # μ if meanstructure - MS = HasMeanStructure + MS = HasMeanStruct μ_symbolic = eval_μ_symbolic(M, I_A⁻¹, F) μ_function = Symbolics.build_function(μ_symbolic, par, expression = Val{false})[2] μ = zeros(size(μ_symbolic)) @@ -177,7 +177,7 @@ function RAMSymbolic(; ∇μ = nothing end else - MS = NoMeanStructure + MS = NoMeanStruct μ_function = nothing μ = nothing ∇μ_function = nothing @@ -213,13 +213,13 @@ function update!( par, ) imply.Σ_function(imply.Σ, par) - if MeanStructure(imply) === HasMeanStructure + if MeanStruct(imply) === HasMeanStruct imply.μ_function(imply.μ, par) end if is_gradient_required(targets) || is_hessian_required(targets) imply.∇Σ_function(imply.∇Σ, par) - if MeanStructure(imply) === HasMeanStructure + if MeanStruct(imply) === HasMeanStruct imply.∇μ_function(imply.∇μ, par) end end diff --git a/src/imply/empty.jl b/src/imply/empty.jl index 8b23194ac..6716e2c05 100644 --- a/src/imply/empty.jl +++ b/src/imply/empty.jl @@ -25,7 +25,7 @@ model per group and an additional model with `ImplyEmpty` and `SemRidge` for the ## Implementation Subtype of `SemImply`. """ -struct ImplyEmpty{V2} <: SemImply{NoMeanStructure, ExactHessian} +struct ImplyEmpty{V2} <: SemImply{NoMeanStruct, ExactHessian} ram_matrices::V2 end diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 261b260d6..20028ad77 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -69,7 +69,7 @@ function evaluate!( par, ) if !isnothing(hessian) - (MeanStructure(implied) === HasMeanStructure) && + (MeanStruct(implied) === HasMeanStruct) && throw(DomainError(H, "hessian of ML + meanstructure is not available")) end @@ -92,7 +92,7 @@ function evaluate!( mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) isnothing(objective) || (objective = ld + tr(Σ⁻¹Σₒ)) - if MeanStructure(implied) === HasMeanStructure + if MeanStruct(implied) === HasMeanStruct μ = implied.μ μₒ = obs_mean(observed(model)) μ₋ = μₒ - μ @@ -167,7 +167,7 @@ function evaluate!( if !isnothing(objective) objective = ld + tr(Σ⁻¹Σₒ) - if MeanStructure(implied) === HasMeanStructure + if MeanStruct(implied) === HasMeanStruct μ = implied.μ μₒ = obs_mean(observed(model)) μ₋ = μₒ - μ @@ -186,7 +186,7 @@ function evaluate!( mul!(gradient, ∇A', vec(C * S * I_A⁻¹'), 2, 0) mul!(gradient, ∇S', vec(C), 1, 1) - if MeanStructure(implied) === HasMeanStructure + if MeanStruct(implied) === HasMeanStruct μ = implied.μ μₒ = obs_mean(observed(model)) ∇M = implied.∇M diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index 2345f859d..620784620 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -107,7 +107,7 @@ function evaluate!( model::AbstractSemSingle, par, ) - if !isnothing(hessian) && (MeanStructure(implied) === HasMeanStructure) + if !isnothing(hessian) && (MeanStruct(implied) === HasMeanStruct) error("hessian of WLS with meanstructure is not available") end @@ -135,7 +135,7 @@ function evaluate!( ∇²Σ_function!(∇²Σ, J, par) hessian .+= ∇²Σ end - if MeanStructure(implied) === HasMeanStructure + if MeanStruct(implied) === HasMeanStruct μ = implied.μ μₒ = obs_mean(observed(model)) μ₋ = μₒ - μ diff --git a/src/types.jl b/src/types.jl index 12082be12..53eec1496 100644 --- a/src/types.jl +++ b/src/types.jl @@ -11,16 +11,16 @@ abstract type AbstractSemSingle{O, I, L, D} <: AbstractSem end abstract type AbstractSemCollection <: AbstractSem end "Meanstructure trait for `SemImply` subtypes" -abstract type MeanStructure end -"Indicates that `SemImply` subtype supports meanstructure" -struct HasMeanStructure <: MeanStructure end -"Indicates that `SemImply` subtype does not support meanstructure" -struct NoMeanStructure <: MeanStructure end +abstract type MeanStruct end +"Indicates that `SemImply` subtype supports mean structure" +struct HasMeanStruct <: MeanStruct end +"Indicates that `SemImply` subtype does not support mean structure" +struct NoMeanStruct <: MeanStruct end # fallback implementation -MeanStructure(::Type{T}) where {T} = - error("Objects of type $T do not support the MeanStructure trait") -MeanStructure(semobj) = MeanStructure(typeof(semobj)) +MeanStruct(::Type{T}) where {T} = + error("Objects of type $T do not support MeanStruct trait") +MeanStruct(semobj) = MeanStruct(typeof(semobj)) "Hessian Evaluation trait for `SemImply` and `SemLossFunction` subtypes" abstract type HessianEval end @@ -97,10 +97,10 @@ Computed model-implied values that should be compared with the observed data to e. g. the model implied covariance or mean. If you would like to implement a different notation, e.g. LISREL, you should implement a subtype of SemImply. """ -abstract type SemImply{MS <: MeanStructure, HE <: HessianEval} end +abstract type SemImply{MS <: MeanStruct, HE <: HessianEval} end -MeanStructure(::Type{<:SemImply{MS}}) where {MS <: MeanStructure} = MS -HessianEval(::Type{<:SemImply{MS, HE}}) where {MS, HE <: MeanStructure} = HE +MeanStruct(::Type{<:SemImply{MS}}) where {MS <: MeanStruct} = MS +HessianEval(::Type{<:SemImply{MS, HE}}) where {MS, HE <: MeanStruct} = HE "Subtype of SemImply for all objects that can serve as the imply field of a SEM and use some form of symbolic precomputation." abstract type SemImplySymbolic{MS, HE} <: SemImply{MS, HE} end From 0cecaa857821eba2a619419ca04392b0c3db0c9b Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 8 Oct 2024 01:14:48 -0700 Subject: [PATCH 20/20] SemImply: replace common type params with fields --- src/imply/RAM/generic.jl | 8 ++++++-- src/imply/RAM/symbolic.jl | 6 ++++-- src/imply/empty.jl | 6 ++++-- src/loss/ML/FIML.jl | 4 +++- src/loss/ML/ML.jl | 5 +++-- src/loss/WLS/WLS.jl | 5 +++-- src/loss/constant/constant.jl | 5 +++-- src/loss/regularization/ridge.jl | 4 +++- src/types.jl | 19 +++++++++---------- test/examples/multigroup/build_models.jl | 6 +++++- 10 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/imply/RAM/generic.jl b/src/imply/RAM/generic.jl index d7b0f8097..e7e0b36f5 100644 --- a/src/imply/RAM/generic.jl +++ b/src/imply/RAM/generic.jl @@ -84,7 +84,10 @@ mutable struct RAM{ S1, S2, S3, -} <: SemImply{MS, ExactHessian} +} <: SemImply + meanstruct::MS + hessianeval::ExactHessian + Σ::A1 A::A2 S::A3 @@ -107,7 +110,8 @@ mutable struct RAM{ ∇S::S2 ∇M::S3 - RAM{MS}(args...) where {MS <: MeanStruct} = new{MS, map(typeof, args)...}(args...) + RAM{MS}(args...) where {MS <: MeanStruct} = + new{MS, map(typeof, args)...}(MS(), ExactHessian(), args...) end ############################################################################################ diff --git a/src/imply/RAM/symbolic.jl b/src/imply/RAM/symbolic.jl index 3e2fc0ad3..9a96942ae 100644 --- a/src/imply/RAM/symbolic.jl +++ b/src/imply/RAM/symbolic.jl @@ -63,7 +63,9 @@ and for models with a meanstructure, the model implied means are computed as ``` """ struct RAMSymbolic{MS, F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5} <: - SemImplySymbolic{MS, ExactHessian} + SemImplySymbolic + meanstruct::MS + hessianeval::ExactHessian Σ_function::F1 ∇Σ_function::F2 ∇²Σ_function::F3 @@ -80,7 +82,7 @@ struct RAMSymbolic{MS, F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5} < ∇μ::A5 RAMSymbolic{MS}(args...) where {MS <: MeanStruct} = - new{MS, map(typeof, args)...}(args...) + new{MS, map(typeof, args)...}(MS(), ExactHessian(), args...) end ############################################################################################ diff --git a/src/imply/empty.jl b/src/imply/empty.jl index 6716e2c05..66373bc1b 100644 --- a/src/imply/empty.jl +++ b/src/imply/empty.jl @@ -25,7 +25,9 @@ model per group and an additional model with `ImplyEmpty` and `SemRidge` for the ## Implementation Subtype of `SemImply`. """ -struct ImplyEmpty{V2} <: SemImply{NoMeanStruct, ExactHessian} +struct ImplyEmpty{V2} <: SemImply + hessianeval::ExactHessian + meanstruct::NoMeanStruct ram_matrices::V2 end @@ -34,7 +36,7 @@ end ############################################################################################ function ImplyEmpty(; specification, kwargs...) - return ImplyEmpty(convert(RAMMatrices, specification)) + return ImplyEmpty(hessianeval, meanstruct, convert(RAMMatrices, specification)) end ############################################################################################ diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index 20e837997..20c81b831 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -24,7 +24,8 @@ Analytic gradients are available. ## Implementation Subtype of `SemLossFunction`. """ -mutable struct SemFIML{INV, C, L, O, M, IM, I, T, W} <: SemLossFunction{ExactHessian} +mutable struct SemFIML{INV, C, L, O, M, IM, I, T, W} <: SemLossFunction + hessianeval::ExactHessian inverses::INV #preallocated inverses of imp_cov choleskys::C #preallocated choleskys logdets::L #logdets of implied covmats @@ -65,6 +66,7 @@ function SemFIML(; observed, specification, kwargs...) [findall(x -> !(x[1] ∈ ind || x[2] ∈ ind), ∇ind) for ind in patterns_not(observed)] return SemFIML( + ExactHessian(), inverses, choleskys, logdets, diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 20028ad77..e81d27de7 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -27,13 +27,14 @@ Analytic gradients are available, and for models without a meanstructure, also a ## Implementation Subtype of `SemLossFunction`. """ -struct SemML{HE <: HessianEval, INV, M, M2} <: SemLossFunction{HE} +struct SemML{HE <: HessianEval, INV, M, M2} <: SemLossFunction + hessianeval::HE Σ⁻¹::INV Σ⁻¹Σₒ::M meandiff::M2 SemML{HE}(args...) where {HE <: HessianEval} = - new{HE, map(typeof, args)...}(args...) + new{HE, map(typeof, args)...}(HE(), args...) end ############################################################################################ diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index 620784620..9702a9cf4 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -38,7 +38,8 @@ Analytic gradients are available, and for models without a meanstructure, also a ## Implementation Subtype of `SemLossFunction`. """ -struct SemWLS{HE <: HessianEval, Vt, St, C} <: SemLossFunction{HE} +struct SemWLS{HE <: HessianEval, Vt, St, C} <: SemLossFunction + hessianeval::HE V::Vt σₒ::St V_μ::C @@ -49,7 +50,7 @@ end ############################################################################################ SemWLS{HE}(args...) where {HE <: HessianEval} = - SemWLS{HE, map(typeof, args)...}(args...) + SemWLS{HE, map(typeof, args)...}(HE(), args...) function SemWLS(; observed, diff --git a/src/loss/constant/constant.jl b/src/loss/constant/constant.jl index 639864610..cb5157346 100644 --- a/src/loss/constant/constant.jl +++ b/src/loss/constant/constant.jl @@ -25,7 +25,8 @@ Analytic gradients and hessians are available. ## Implementation Subtype of `SemLossFunction`. """ -struct SemConstant{C} <: SemLossFunction{ExactHessian} +struct SemConstant{C} <: SemLossFunction + hessianeval::ExactHessian c::C end @@ -34,7 +35,7 @@ end ############################################################################################ function SemConstant(; constant_loss, kwargs...) - return SemConstant(constant_loss) + return SemConstant(ExactHessian(), constant_loss) end ############################################################################################ diff --git a/src/loss/regularization/ridge.jl b/src/loss/regularization/ridge.jl index be9b14fa5..6ec59ec39 100644 --- a/src/loss/regularization/ridge.jl +++ b/src/loss/regularization/ridge.jl @@ -29,7 +29,8 @@ Analytic gradients and hessians are available. ## Implementation Subtype of `SemLossFunction`. """ -struct SemRidge{P, W1, W2, GT, HT} <: SemLossFunction{ExactHessian} +struct SemRidge{P, W1, W2, GT, HT} <: SemLossFunction + hessianeval::ExactHessian α::P which::W1 which_H::W2 @@ -64,6 +65,7 @@ function SemRidge(; end which_H = [CartesianIndex(x, x) for x in which_ridge] return SemRidge( + ExactHessian(), α_ridge, which_ridge, which_H, diff --git a/src/types.jl b/src/types.jl index 53eec1496..020f6e77d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -17,9 +17,11 @@ struct HasMeanStruct <: MeanStruct end "Indicates that `SemImply` subtype does not support mean structure" struct NoMeanStruct <: MeanStruct end -# fallback implementation +# default implementation MeanStruct(::Type{T}) where {T} = + hasfield(T, :meanstruct) ? fieldtype(T, :meanstruct) : error("Objects of type $T do not support MeanStruct trait") + MeanStruct(semobj) = MeanStruct(typeof(semobj)) "Hessian Evaluation trait for `SemImply` and `SemLossFunction` subtypes" @@ -27,15 +29,15 @@ abstract type HessianEval end struct ApproxHessian <: HessianEval end struct ExactHessian <: HessianEval end -# fallback implementation +# default implementation HessianEval(::Type{T}) where {T} = + hasfield(T, :hessianeval) ? fieldtype(T, :hessianeval) : error("Objects of type $T do not support HessianEval trait") + HessianEval(semobj) = HessianEval(typeof(semobj)) "Supertype for all loss functions of SEMs. If you want to implement a custom loss function, it should be a subtype of `SemLossFunction`." -abstract type SemLossFunction{HE <: HessianEval} end - -HessianEval(::Type{<:SemLossFunction{HE}}) where {HE <: HessianEval} = HE +abstract type SemLossFunction end """ SemLoss(args...; loss_weights = nothing, ...) @@ -97,13 +99,10 @@ Computed model-implied values that should be compared with the observed data to e. g. the model implied covariance or mean. If you would like to implement a different notation, e.g. LISREL, you should implement a subtype of SemImply. """ -abstract type SemImply{MS <: MeanStruct, HE <: HessianEval} end - -MeanStruct(::Type{<:SemImply{MS}}) where {MS <: MeanStruct} = MS -HessianEval(::Type{<:SemImply{MS, HE}}) where {MS, HE <: MeanStruct} = HE +abstract type SemImply end "Subtype of SemImply for all objects that can serve as the imply field of a SEM and use some form of symbolic precomputation." -abstract type SemImplySymbolic{MS, HE} <: SemImply{MS, HE} end +abstract type SemImplySymbolic <: SemImply end """ Sem(;observed = SemObservedData, imply = RAM, loss = SemML, optimizer = SemOptimizerOptim, kwargs...) diff --git a/test/examples/multigroup/build_models.jl b/test/examples/multigroup/build_models.jl index 4790c9d36..2e1af38a2 100644 --- a/test/examples/multigroup/build_models.jl +++ b/test/examples/multigroup/build_models.jl @@ -114,7 +114,11 @@ end # ML estimation - user defined loss function ############################################################################################ -struct UserSemML <: SemLossFunction{ExactHessian} end +struct UserSemML <: SemLossFunction + hessianeval::ExactHessian + + UserSemML() = new(ExactHessian()) +end ############################################################################################ ### functors