From 0645a4af8afc09a3444bec18a45e8f06054217dd Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sat, 1 Jun 2024 23:09:50 -0700 Subject: [PATCH 01/26] common.jl: common vars API methods --- src/StructuralEquationModels.jl | 1 + src/frontend/common.jl | 48 +++++++++++++++++++++++++++++++++ src/types.jl | 28 ------------------- 3 files changed, 49 insertions(+), 28 deletions(-) create mode 100644 src/frontend/common.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 19c7653b..33ca5a24 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -30,6 +30,7 @@ include("additional_functions/commutation_matrix.jl") # fitted objects include("frontend/fit/SemFit.jl") # specification of models +include("frontend/common.jl") include("frontend/specification/checks.jl") include("frontend/specification/ParameterTable.jl") include("frontend/specification/RAMMatrices.jl") diff --git a/src/frontend/common.jl b/src/frontend/common.jl new file mode 100644 index 00000000..110e59a5 --- /dev/null +++ b/src/frontend/common.jl @@ -0,0 +1,48 @@ +# API methods supported by multiple SEM.jl types + +""" + nparams(semobj) + +Return the number of parameters in a SEM model associated with `semboj`. + +See also [`params`](@ref). +""" +nparams(semobj) = length(params(semobj)) + +""" + nvars(semobj) + +Return the number of variables in a SEM model associated with `semobj`. + +See also [`vars`](@ref). +""" +nvars(semobj) = length(vars(semobj)) + +""" + nobserved_vars(semobj) + +Return the number of observed variables in a SEM model associated with `semobj`. +""" +nobserved_vars(semobj) = length(observed_vars(semobj)) + +""" + nlatent_vars(semobj) + +Return the number of latent variables in a SEM model associated with `semobj`. +""" +nlatent_vars(semobj) = length(latent_vars(semobj)) + +""" + param_indices(semobj) + +Returns a dict of parameter names and their indices in `semobj`. + +# Examples +```julia +parind = param_indices(my_fitted_sem) +parind[:param_name] +``` + +See also [`params`](@ref). +""" +param_indices(semobj) = Dict(par => i for (i, par) in enumerate(params(semobj))) \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index d194709b..d3e1cde2 100644 --- a/src/types.jl +++ b/src/types.jl @@ -20,29 +20,6 @@ Return the vector of SEM model parameters. """ params(model::AbstractSem) = model.params -""" - nparams(semobj) - -Return the number of SEM model parameters. -""" -nparams(model::AbstractSem) = length(params(model)) - -params(model::AbstractSemSingle) = params(model.imply) -nparams(model::AbstractSemSingle) = nparams(model.imply) - -""" - param_indices(semobj) - -Returns a dict of parameter names and their indices in `semobj`. - -# Examples -```julia -parind = param_indices(my_fitted_sem) -parind[:param_name] -``` -""" -param_indices(semobj) = Dict(par => i for (i, par) in enumerate(params(semobj))) - """ SemLoss(args...; loss_weights = nothing, ...) @@ -105,9 +82,6 @@ If you would like to implement a different notation, e.g. LISREL, you should imp """ abstract type SemImply end -params(imply::SemImply) = params(imply.ram_matrices) -nparams(imply::SemImply) = nparams(imply.ram_matrices) - "Subtype of SemImply for all objects that can serve as the imply field of a SEM and use some form of symbolic precomputation." abstract type SemImplySymbolic <: SemImply end @@ -225,7 +199,6 @@ function SemEnsemble(models...; optimizer = SemOptimizerOptim, weights = nothing end params(ensemble::SemEnsemble) = ensemble.params -nparams(ensemble::SemEnsemble) = length(ensemble.params) """ n_models(ensemble::SemEnsemble) -> Integer @@ -289,6 +262,5 @@ Base type for all SEM specifications. abstract type SemSpecification end params(spec::SemSpecification) = spec.params -nparams(spec::SemSpecification) = length(params(spec)) abstract type AbstractParameterTable <: SemSpecification end From 8b4abcf57f11afcb1b69282e5086c5e715794b3f Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 16 Jun 2024 20:58:57 -0700 Subject: [PATCH 02/26] SemSpecification: vars API --- src/frontend/specification/documentation.jl | 41 +++++++++++++++++++++ src/types.jl | 2 - 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/frontend/specification/documentation.jl b/src/frontend/specification/documentation.jl index 27bedfea..464af144 100644 --- a/src/frontend/specification/documentation.jl +++ b/src/frontend/specification/documentation.jl @@ -1,3 +1,44 @@ +""" + params(semobj) -> Vector{Symbol} + +Return the vector of SEM model parameter identifiers. +""" +function params end + +params(spec::SemSpecification) = spec.params + +""" + vars(semobj) -> Vector{Symbol} + +Return the vector of SEM model variables (both observed and latent) +in the order specified by the model. +""" +function vars end + +vars(spec::SemSpecification) = error("vars(spec::$(typeof(spec))) is not implemented") + +""" + observed_vars(semobj) -> Vector{Symbol} + +Return the vector of SEM model observed variable in the order specified by the +model, which also should match the order of variables in [`SemObserved`](@ref). +""" +function observed_vars end + +observed_vars(spec::SemSpecification) = + error("observed_vars(spec::$(typeof(spec))) is not implemented") + +""" + latent_vars(semobj) -> Vector{Symbol} + +Return the vector of SEM model latent variable in the order specified by the +model. +""" +function latent_vars end + +latent_vars(spec::SemSpecification) = + error("latent_vars(spec::$(typeof(spec))) is not implemented") + """ `ParameterTable`s contain the specification of a structural equation model. diff --git a/src/types.jl b/src/types.jl index d3e1cde2..99153622 100644 --- a/src/types.jl +++ b/src/types.jl @@ -261,6 +261,4 @@ Base type for all SEM specifications. """ abstract type SemSpecification end -params(spec::SemSpecification) = spec.params - abstract type AbstractParameterTable <: SemSpecification end From a90defce716b6a4d00da67a14162476ff274d589 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 20 Mar 2024 16:42:59 -0700 Subject: [PATCH 03/26] RAMMatrices: vars API --- src/frontend/specification/RAMMatrices.jl | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/frontend/specification/RAMMatrices.jl b/src/frontend/specification/RAMMatrices.jl index b52c15a5..6ba6be3d 100644 --- a/src/frontend/specification/RAMMatrices.jl +++ b/src/frontend/specification/RAMMatrices.jl @@ -65,6 +65,30 @@ end nparams(ram::RAMMatrices) = length(ram.A_ind) +nvars(ram::RAMMatrices) = ram.size_F[2] +nobserved_vars(ram::RAMMatrices) = ram.size_F[1] +nlatent_vars(ram::RAMMatrices) = nvars(ram) - nobserved_vars(ram) + +vars(ram::RAMMatrices) = ram.colnames + +function observed_vars(ram::RAMMatrices) + if isnothing(ram.colnames) + @warn "Your RAMMatrices do not contain column names. Please make sure the order of variables in your data is correct!" + return nothing + else + return view(ram.colnames, ram.F_ind) + end +end + +function latent_vars(ram::RAMMatrices) + if isnothing(ram.colnames) + @warn "Your RAMMatrices do not contain column names. Please make sure the order of variables in your data is correct!" + return nothing + else + return view(ram.colnames, setdiff(eachindex(ram.colnames), ram.F_ind)) + end +end + ############################################################################################ ### Constructor ############################################################################################ From 74c4550d942515223eddf3f206a4adf3037357a3 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 27 May 2024 14:06:22 -0700 Subject: [PATCH 04/26] ParamTable: vars API --- src/frontend/specification/ParameterTable.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/frontend/specification/ParameterTable.jl b/src/frontend/specification/ParameterTable.jl index 03b111e8..91d55ce4 100644 --- a/src/frontend/specification/ParameterTable.jl +++ b/src/frontend/specification/ParameterTable.jl @@ -60,6 +60,15 @@ function ParameterTable( ) end +vars(partable::ParameterTable) = + !isempty(partable.sorted_vars) ? partable.sorted_vars : + vcat(partable.latent_vars, partable.observed_vars) +observed_vars(partable::ParameterTable) = partable.observed_vars +latent_vars(partable::ParameterTable) = partable.latent_vars + +nvars(partable::ParameterTable) = + length(partable.latent_vars) + length(partable.observed_vars) + ############################################################################################ ### Convert to other types ############################################################################################ @@ -206,8 +215,7 @@ function sort_vars!(partable::ParameterTable) end copyto!(resize!(partable.sorted_vars, length(sorted_vars)), sorted_vars) - @assert length(partable.sorted_vars) == - length(partable.observed_vars) + length(partable.latent_vars) + @assert length(partable.sorted_vars) == nvars(partable) return partable end From 800198c271e4a62e61a47c687911753d0d3dee4f Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 16 Jun 2024 18:49:59 -0700 Subject: [PATCH 05/26] SemImply: vars and params API --- src/StructuralEquationModels.jl | 1 + src/imply/abstract.jl | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 src/imply/abstract.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 33ca5a24..a77bc8d9 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -49,6 +49,7 @@ include("observed/EM.jl") include("frontend/specification/Sem.jl") include("frontend/specification/documentation.jl") # imply +include("imply/abstract.jl") include("imply/RAM/symbolic.jl") include("imply/RAM/generic.jl") include("imply/empty.jl") diff --git a/src/imply/abstract.jl b/src/imply/abstract.jl new file mode 100644 index 00000000..3898a65a --- /dev/null +++ b/src/imply/abstract.jl @@ -0,0 +1,30 @@ + +vars(imply::SemImply) = vars(imply.ram_matrices) +observed_vars(imply::SemImply) = observed_vars(imply.ram_matrices) +latent_vars(imply::SemImply) = latent_vars(imply.ram_matrices) + +nvars(imply::SemImply) = nvars(imply.ram_matrices) +nobserved_vars(imply::SemImply) = nobserved_vars(imply.ram_matrices) +nlatent_vars(imply::SemImply) = nlatent_vars(imply.ram_matrices) + +params(imply::SemImply) = params(imply.ram_matrices) +nparams(imply::SemImply) = nparams(imply.ram_matrices) + +function check_acyclic(A::AbstractMatrix) + # check if the model is acyclic + acyclic = isone(det(I-A)) + + # check if A is lower or upper triangular + if istril(A) + @info "A matrix is lower triangular" + return LowerTriangular(A) + elseif istriu(A) + @info "A matrix is upper triangular" + return UpperTriangular(A) + else + if acyclic + @info "Your model is acyclic, specifying the A Matrix as either Upper or Lower Triangular can have great performance benefits.\n" maxlog=1 + end + return A + end +end \ No newline at end of file From c7a58ec65b9371e4bf841758b6e5827a320b2b21 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sat, 16 Mar 2024 23:55:42 -0700 Subject: [PATCH 06/26] RAM imply: use vars API --- src/imply/RAM/generic.jl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/imply/RAM/generic.jl b/src/imply/RAM/generic.jl index bc81a71a..8ace8075 100644 --- a/src/imply/RAM/generic.jl +++ b/src/imply/RAM/generic.jl @@ -108,7 +108,8 @@ function RAM(; # get dimensions of the model n_par = nparams(ram_matrices) - n_var, n_nod = ram_matrices.size_F + n_obs = nobserved_vars(ram_matrices) + n_var = nvars(ram_matrices) F = zeros(ram_matrices.size_F) F[CartesianIndex.(1:n_var, ram_matrices.F_ind)] .= 1.0 @@ -118,23 +119,23 @@ function RAM(; M_indices = !isnothing(ram_matrices.M_ind) ? copy(ram_matrices.M_ind) : nothing #preallocate arrays - A_pre = zeros(n_nod, n_nod) - S_pre = zeros(n_nod, n_nod) - !isnothing(M_indices) ? M_pre = zeros(n_nod) : M_pre = nothing + A_pre = zeros(n_var, n_var) + S_pre = zeros(n_var, n_var) + M_pre = !isnothing(M_indices) ? zeros(n_var) : nothing set_RAMConstants!(A_pre, S_pre, M_pre, ram_matrices.constants) A_pre = check_acyclic(A_pre, n_par, A_indices) # pre-allocate some matrices - Σ = zeros(n_var, n_var) - F⨉I_A⁻¹ = zeros(n_var, n_nod) - F⨉I_A⁻¹S = zeros(n_var, n_nod) + Σ = zeros(n_obs, n_obs) + F⨉I_A⁻¹ = zeros(n_obs, n_var) + F⨉I_A⁻¹S = zeros(n_obs, n_var) I_A = similar(A_pre) if gradient - ∇A = matrix_gradient(A_indices, n_nod^2) - ∇S = matrix_gradient(S_indices, n_nod^2) + ∇A = matrix_gradient(A_indices, n_var^2) + ∇S = matrix_gradient(S_indices, n_var^2) else ∇A = nothing ∇S = nothing @@ -144,7 +145,7 @@ function RAM(; if meanstructure has_meanstructure = Val(true) !isnothing(M_indices) || throw(ArgumentError("You set `meanstructure = true`, but your model specification contains no mean parameters.")) - ∇M = gradient ? matrix_gradient(M_indices, n_nod) : nothing + ∇M = gradient ? matrix_gradient(M_indices, n_var) : nothing μ = zeros(n_var) else has_meanstructure = Val(false) From 534330d8c9a35b52248ebe1cd4b8b3c989deb408 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sat, 16 Mar 2024 23:55:42 -0700 Subject: [PATCH 07/26] RAMSymbolic: use vars API --- src/imply/RAM/symbolic.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/imply/RAM/symbolic.jl b/src/imply/RAM/symbolic.jl index 6eb372d4..3c99053b 100644 --- a/src/imply/RAM/symbolic.jl +++ b/src/imply/RAM/symbolic.jl @@ -98,15 +98,16 @@ function RAMSymbolic(; ram_matrices = convert(RAMMatrices, specification) n_par = nparams(ram_matrices) - n_var, n_nod = ram_matrices.size_F + n_obs = nobserved_vars(ram_matrices) + n_var = nvars(ram_matrices) par = (Symbolics.@variables θ[1:n_par])[1] - A = zeros(Num, n_nod, n_nod) - S = zeros(Num, n_nod, n_nod) - !isnothing(ram_matrices.M_ind) ? M = zeros(Num, n_nod) : M = nothing + A = zeros(Num, n_var, n_var) + S = zeros(Num, n_var, n_var) + !isnothing(ram_matrices.M_ind) ? M = zeros(Num, n_var) : M = nothing F = zeros(ram_matrices.size_F) - F[CartesianIndex.(1:n_var, ram_matrices.F_ind)] .= 1.0 + F[CartesianIndex.(1:n_obs, ram_matrices.F_ind)] .= 1.0 set_RAMConstants!(A, S, M, ram_matrices.constants) fill_A_S_M!(A, S, M, ram_matrices.A_ind, ram_matrices.S_ind, ram_matrices.M_ind, par) From 2be665ea8a6955f7b373ec1f915fe511dc80dc29 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sat, 16 Mar 2024 23:55:42 -0700 Subject: [PATCH 08/26] start_simple(): use vars API --- src/additional_functions/start_val/start_simple.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/additional_functions/start_val/start_simple.jl b/src/additional_functions/start_val/start_simple.jl index 2c4f661c..8e3cb32c 100644 --- a/src/additional_functions/start_val/start_simple.jl +++ b/src/additional_functions/start_val/start_simple.jl @@ -62,17 +62,17 @@ function start_simple( start_means = 0.0, kwargs..., ) - A_ind, S_ind, F_ind, M_ind, params = ram_matrices.A_ind, + A_ind, S_ind, F_ind, M_ind, n_par = ram_matrices.A_ind, ram_matrices.S_ind, ram_matrices.F_ind, ram_matrices.M_ind, - ram_matrices.params + nparams(ram_matrices) - n_par = length(params) start_val = zeros(n_par) - n_var, n_nod = ram_matrices.size_F + n_obs = nobserved_vars(ram_matrices) + n_var = nvars(ram_matrices) - C_indices = CartesianIndices((n_nod, n_nod)) + C_indices = CartesianIndices((n_var, n_var)) for i in 1:n_par if length(S_ind[i]) != 0 From 8cf1f244d003d38aa143df21d1988b73caaa68d5 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sat, 16 Mar 2024 23:55:42 -0700 Subject: [PATCH 09/26] starts_fabin3: use vars API --- .../start_val/start_fabin3.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/additional_functions/start_val/start_fabin3.jl b/src/additional_functions/start_val/start_fabin3.jl index b56ee60a..081af3ba 100644 --- a/src/additional_functions/start_val/start_fabin3.jl +++ b/src/additional_functions/start_val/start_fabin3.jl @@ -31,18 +31,18 @@ function start_fabin3(observed::SemObservedMissing, imply, optimizer, args...; k end function start_fabin3(ram_matrices::RAMMatrices, Σ, μ) - A_ind, S_ind, F_ind, M_ind, params = ram_matrices.A_ind, + A_ind, S_ind, F_ind, M_ind, n_par = ram_matrices.A_ind, ram_matrices.S_ind, ram_matrices.F_ind, ram_matrices.M_ind, - ram_matrices.params + nparams(ram_matrices) - n_par = length(params) start_val = zeros(n_par) - n_var, n_nod = ram_matrices.size_F - n_latent = n_nod - n_var + n_obs = nobserved_vars(ram_matrices) + n_var = nvars(ram_matrices) + n_latent = nlatent_vars(ram_matrices) - C_indices = CartesianIndices((n_nod, n_nod)) + C_indices = CartesianIndices((n_var, n_var)) # check in which matrix each parameter appears @@ -50,7 +50,7 @@ function start_fabin3(ram_matrices::RAMMatrices, Σ, μ) #= in_S = length.(S_ind) .!= 0 in_A = length.(A_ind) .!= 0 - A_ind_c = [linear2cartesian(ind, (n_nod, n_nod)) for ind in A_ind] + A_ind_c = [linear2cartesian(ind, (n_var, n_var)) for ind in A_ind] in_Λ = [any(ind[2] .∈ F_ind) for ind in A_ind_c] if !isnothing(M) @@ -77,7 +77,7 @@ function start_fabin3(ram_matrices::RAMMatrices, Σ, μ) # set loadings constants = ram_matrices.constants - A_ind_c = [linear2cartesian(ind, (n_nod, n_nod)) for ind in A_ind] + A_ind_c = [linear2cartesian(ind, (n_var, n_var)) for ind in A_ind] # ind_Λ = findall([is_in_Λ(ind_vec, F_ind) for ind_vec in A_ind_c]) function calculate_lambda( @@ -99,7 +99,7 @@ function start_fabin3(ram_matrices::RAMMatrices, Σ, μ) end end - for i in setdiff(1:n_nod, F_ind) + for i in setdiff(1:n_var, F_ind) reference = Int64[] indicators = Int64[] indicator2parampos = Dict{Int, Int}() From 7c51f8fbb96729a9383814e5188a0718a4b096cc Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 Mar 2024 22:03:40 -0700 Subject: [PATCH 10/26] remove get_colnames() replaced by observed_vars() --- src/StructuralEquationModels.jl | 1 - src/observed/covariance.jl | 4 ++-- src/observed/data.jl | 4 ++-- src/observed/get_colnames.jl | 21 --------------------- src/observed/missing.jl | 4 ++-- 5 files changed, 6 insertions(+), 28 deletions(-) delete mode 100644 src/observed/get_colnames.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index a77bc8d9..69038c0d 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -40,7 +40,6 @@ include("frontend/fit/summary.jl") # pretty printing include("frontend/pretty_printing.jl") # observed -include("observed/get_colnames.jl") include("observed/covariance.jl") include("observed/data.jl") include("observed/missing.jl") diff --git a/src/observed/covariance.jl b/src/observed/covariance.jl index 9be35e51..28430263 100644 --- a/src/observed/covariance.jl +++ b/src/observed/covariance.jl @@ -63,8 +63,8 @@ function SemObservedCovariance(; throw(ArgumentError("`meanstructure = true`, but no observed means were passed")) end - if isnothing(spec_colnames) - spec_colnames = get_colnames(specification) + if isnothing(spec_colnames) && !isnothing(specification) + spec_colnames = observed_vars(specification) end if !isnothing(spec_colnames) & isnothing(obs_colnames) diff --git a/src/observed/data.jl b/src/observed/data.jl index 89deefd0..8886c18b 100644 --- a/src/observed/data.jl +++ b/src/observed/data.jl @@ -64,8 +64,8 @@ function SemObservedData(; rowwise = false, kwargs..., ) - if isnothing(spec_colnames) - spec_colnames = get_colnames(specification) + if isnothing(spec_colnames) && !isnothing(specification) + spec_colnames = observed_vars(specification) end if !isnothing(spec_colnames) diff --git a/src/observed/get_colnames.jl b/src/observed/get_colnames.jl deleted file mode 100644 index b8d89c3d..00000000 --- a/src/observed/get_colnames.jl +++ /dev/null @@ -1,21 +0,0 @@ -# specification colnames (only observed) -function get_colnames(specification::ParameterTable) - colnames = - isempty(specification.sorted_vars) ? specification.observed_vars : - filter(in(Set(specification.observed_vars)), specification.sorted_vars) - return colnames -end - -function get_colnames(specification::RAMMatrices) - if isnothing(specification.colnames) - @warn "Your RAMMatrices do not contain column names. Please make sure the order of variables in your data is correct!" - return nothing - else - colnames = specification.colnames[specification.F_ind] - return colnames - end -end - -function get_colnames(specification::Nothing) - return nothing -end diff --git a/src/observed/missing.jl b/src/observed/missing.jl index 439e3d83..af673bec 100644 --- a/src/observed/missing.jl +++ b/src/observed/missing.jl @@ -92,8 +92,8 @@ function SemObservedMissing(; spec_colnames = nothing, kwargs..., ) - if isnothing(spec_colnames) - spec_colnames = get_colnames(specification) + if isnothing(spec_colnames) && !isnothing(specification) + spec_colnames = observed_vars(specification) end if !isnothing(spec_colnames) From 5ed430b83461d1d4355a5db88d6c761bf45a41d5 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 17 Mar 2024 00:40:33 -0700 Subject: [PATCH 11/26] remove get_n_nodes() replaced by nvars() --- src/loss/ML/FIML.jl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index d4870ac1..135bb411 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -73,7 +73,7 @@ function SemFIML(; observed, specification, kwargs...) meandiff, imp_inv, mult, - CommutationMatrix(get_n_nodes(specification)), + CommutationMatrix(nvars(specification)), nothing, ) end @@ -249,7 +249,3 @@ function check_fiml(semfiml, model) a = cholesky!(Symmetric(semfiml.imp_inv); check = false) return isposdef(a) end - -get_n_nodes(specification::RAMMatrices) = specification.size_F[2] -get_n_nodes(specification::ParameterTable) = - length(specification.observed_vars) + length(specification.latent_vars) From 277f1460d1065f1a59709f107964766fbc763f64 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 16 Jun 2024 20:55:29 -0700 Subject: [PATCH 12/26] get_data() -> samples() and add default implementation samples(::SemObserved) --- src/StructuralEquationModels.jl | 1 + src/frontend/fit/standard_errors/bootstrap.jl | 2 +- src/observed/abstract.jl | 10 ++++++++++ src/observed/data.jl | 3 +-- src/observed/missing.jl | 3 +-- test/unit_tests/data_input_formats.jl | 18 +++++++++--------- 6 files changed, 23 insertions(+), 14 deletions(-) create mode 100644 src/observed/abstract.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 69038c0d..89439ee9 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -40,6 +40,7 @@ include("frontend/fit/summary.jl") # pretty printing include("frontend/pretty_printing.jl") # observed +include("observed/abstract.jl") include("observed/covariance.jl") include("observed/data.jl") include("observed/missing.jl") diff --git a/src/frontend/fit/standard_errors/bootstrap.jl b/src/frontend/fit/standard_errors/bootstrap.jl index 814f46e5..9695e4cb 100644 --- a/src/frontend/fit/standard_errors/bootstrap.jl +++ b/src/frontend/fit/standard_errors/bootstrap.jl @@ -25,7 +25,7 @@ function se_bootstrap( end if isnothing(data) - data = get_data(observed(model(semfit))) + data = samples(observed(model(semfit))) end data = prepare_data_bootstrap(data) diff --git a/src/observed/abstract.jl b/src/observed/abstract.jl new file mode 100644 index 00000000..90de8b5a --- /dev/null +++ b/src/observed/abstract.jl @@ -0,0 +1,10 @@ +""" + samples(observed::SemObservedData) + +Gets the matrix of observed data samples. +Rows are samples, columns are observed variables. + +## See Also +[`nsamples`](@ref), [`observed_vars`](@ref). +""" +samples(observed::SemObserved) = observed.data diff --git a/src/observed/data.jl b/src/observed/data.jl index 8886c18b..f1c0ff9b 100644 --- a/src/observed/data.jl +++ b/src/observed/data.jl @@ -21,7 +21,7 @@ For observed data without missings. - `n_obs(::SemObservedData)` -> number of observed data points - `n_man(::SemObservedData)` -> number of manifest variables -- `get_data(::SemObservedData)` -> observed data +- `samples(::SemObservedData)` -> observed data - `obs_cov(::SemObservedData)` -> observed.obs_cov - `obs_mean(::SemObservedData)` -> observed.obs_mean - `data_rowwise(::SemObservedData)` -> observed data, stored as vectors per observation @@ -124,7 +124,6 @@ n_man(observed::SemObservedData) = observed.n_man ### additional methods ############################################################################################ -get_data(observed::SemObservedData) = observed.data obs_cov(observed::SemObservedData) = observed.obs_cov obs_mean(observed::SemObservedData) = observed.obs_mean data_rowwise(observed::SemObservedData) = observed.data_rowwise diff --git a/src/observed/missing.jl b/src/observed/missing.jl index af673bec..159a4915 100644 --- a/src/observed/missing.jl +++ b/src/observed/missing.jl @@ -30,7 +30,7 @@ For observed data with missing values. - `n_obs(::SemObservedMissing)` -> number of observed data points - `n_man(::SemObservedMissing)` -> number of manifest variables -- `get_data(::SemObservedMissing)` -> observed data +- `samples(::SemObservedMissing)` -> observed data - `data_rowwise(::SemObservedMissing)` -> observed data as vector per observation, with missing values deleted - `patterns(::SemObservedMissing)` -> indices of non-missing variables per missing patterns @@ -211,7 +211,6 @@ n_man(observed::SemObservedMissing) = observed.n_man ### Additional methods ############################################################################################ -get_data(observed::SemObservedMissing) = observed.data patterns(observed::SemObservedMissing) = observed.patterns patterns_not(observed::SemObservedMissing) = observed.patterns_not rows(observed::SemObservedMissing) = observed.rows diff --git a/test/unit_tests/data_input_formats.jl b/test/unit_tests/data_input_formats.jl index 44656c33..070c1931 100644 --- a/test/unit_tests/data_input_formats.jl +++ b/test/unit_tests/data_input_formats.jl @@ -1,5 +1,5 @@ using StructuralEquationModels, Test, Statistics -using StructuralEquationModels: obs_cov, obs_mean, get_data +using StructuralEquationModels: obs_cov, obs_mean, samples ### model specification -------------------------------------------------------------------- spec = ParameterTable( @@ -64,8 +64,8 @@ all_equal_cov = (obs_cov(observed) == obs_cov(observed_matrix)) all_equal_data = - (get_data(observed) == get_data(observed_nospec)) & - (get_data(observed) == get_data(observed_matrix)) + (samples(observed) == samples(observed_nospec)) & + (samples(observed) == samples(observed_matrix)) @testset "unit tests | SemObservedData | input formats" begin @test all_equal_cov @@ -94,8 +94,8 @@ all_equal_cov_suffled = (obs_cov(observed) == obs_cov(observed_matrix_shuffle)) all_equal_data_suffled = - (get_data(observed) == get_data(observed_shuffle)) & - (get_data(observed) == get_data(observed_matrix_shuffle)) + (samples(observed) == samples(observed_shuffle)) & + (samples(observed) == samples(observed_matrix_shuffle)) @testset "unit tests | SemObservedData | input formats shuffled " begin @test all_equal_cov_suffled @@ -396,8 +396,8 @@ observed_matrix = SemObservedMissing( ) all_equal_data = - isequal(get_data(observed), get_data(observed_nospec)) & - isequal(get_data(observed), get_data(observed_matrix)) + isequal(samples(observed), samples(observed_nospec)) & + isequal(samples(observed), samples(observed_matrix)) @testset "unit tests | SemObservedMissing | input formats" begin @test all_equal_data @@ -421,8 +421,8 @@ observed_matrix_shuffle = SemObservedMissing( ) all_equal_data_shuffled = - isequal(get_data(observed), get_data(observed_shuffle)) & - isequal(get_data(observed), get_data(observed_matrix_shuffle)) + isequal(samples(observed), samples(observed_shuffle)) & + isequal(samples(observed), samples(observed_matrix_shuffle)) @testset "unit tests | SemObservedMissing | input formats shuffled " begin @test all_equal_data_suffled From a197fa3b1c615f1e3a46ea1099faa62aafddf171 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Thu, 18 Apr 2024 21:46:58 -0700 Subject: [PATCH 13/26] SemObsData: remove rowwise * it is unused * if ever rowwise access would be required, it could be done with eachrow(data) without allocation --- src/observed/data.jl | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/observed/data.jl b/src/observed/data.jl index f1c0ff9b..7573b974 100644 --- a/src/observed/data.jl +++ b/src/observed/data.jl @@ -24,7 +24,6 @@ For observed data without missings. - `samples(::SemObservedData)` -> observed data - `obs_cov(::SemObservedData)` -> observed.obs_cov - `obs_mean(::SemObservedData)` -> observed.obs_mean -- `data_rowwise(::SemObservedData)` -> observed data, stored as vectors per observation ## Implementation Subtype of `SemObserved` @@ -37,15 +36,13 @@ use this if you are sure your observed data is in the right format. ## Additional keyword arguments: - `spec_colnames::Vector{Symbol} = nothing`: overwrites column names of the specification object - `compute_covariance::Bool ) = true`: should the covariance of `data` be computed and stored? -- `rowwise::Bool = false`: should the data be stored also as vectors per observation """ -struct SemObservedData{A, B, C, R} <: SemObserved +struct SemObservedData{A, B, C} <: SemObserved data::A obs_cov::B obs_mean::C n_man::Int n_obs::Int - data_rowwise::R end # error checks @@ -61,7 +58,6 @@ function SemObservedData(; spec_colnames = nothing, meanstructure = false, compute_covariance = true, - rowwise = false, kwargs..., ) if isnothing(spec_colnames) && !isnothing(specification) @@ -109,7 +105,6 @@ function SemObservedData(; meanstructure ? vec(Statistics.mean(data, dims = 1)) : nothing, size(data, 2), size(data, 1), - rowwise ? [data[i, :] for i in axes(data, 1)] : nothing, ) end @@ -126,7 +121,6 @@ n_man(observed::SemObservedData) = observed.n_man obs_cov(observed::SemObservedData) = observed.obs_cov obs_mean(observed::SemObservedData) = observed.obs_mean -data_rowwise(observed::SemObservedData) = observed.data_rowwise ############################################################################################ ### Additional functions From 0a152044ffcc6ef0a35bdf591dd47ce8f210d385 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 16 Jun 2024 20:39:53 -0700 Subject: [PATCH 14/26] AbstractSemSingle: vars API --- src/frontend/specification/Sem.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/frontend/specification/Sem.jl b/src/frontend/specification/Sem.jl index 73d4e81d..14f4d5a7 100644 --- a/src/frontend/specification/Sem.jl +++ b/src/frontend/specification/Sem.jl @@ -20,6 +20,14 @@ function Sem(; return sem end +nvars(sem::AbstractSemSingle) = nvars(sem.imply) +nobserved_vars(sem::AbstractSemSingle) = nobserved_vars(sem.imply) +nlatent_vars(sem::AbstractSemSingle) = nlatent_vars(sem.imply) + +vars(sem::AbstractSemSingle) = vars(sem.imply) +observed_vars(sem::AbstractSemSingle) = observed_vars(sem.imply) +latent_vars(sem::AbstractSemSingle) = latent_vars(sem.imply) + function SemFiniteDiff(; observed::O = SemObservedData, imply::I = RAM, From 6503a3422d4b598736fdb6c876b2744053ed5c3f Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 16 Jun 2024 20:55:56 -0700 Subject: [PATCH 15/26] rename n_obs() -> nsamples() --- src/StructuralEquationModels.jl | 3 +- src/frontend/common.jl | 11 +++++++- src/frontend/fit/SemFit.jl | 1 + src/frontend/fit/fitmeasures/BIC.jl | 2 +- src/frontend/fit/fitmeasures/RMSEA.jl | 8 +++--- src/frontend/fit/fitmeasures/chi2.jl | 8 +++--- src/frontend/fit/fitmeasures/minus2ll.jl | 20 ++++++------- src/frontend/fit/fitmeasures/n_obs.jl | 16 ----------- src/frontend/fit/standard_errors/hessian.jl | 8 +++--- src/frontend/fit/summary.jl | 2 +- src/frontend/specification/Sem.jl | 5 ++++ src/loss/ML/FIML.jl | 14 +++++----- src/observed/EM.jl | 19 +++++++------ src/observed/covariance.jl | 14 +++++----- src/observed/data.jl | 6 ++-- src/observed/missing.jl | 26 ++++++++--------- src/types.jl | 4 +-- test/examples/political_democracy/by_parts.jl | 4 +-- .../political_democracy/constructor.jl | 8 +++--- test/unit_tests/data_input_formats.jl | 28 +++++++++---------- 20 files changed, 103 insertions(+), 104 deletions(-) delete mode 100644 src/frontend/fit/fitmeasures/n_obs.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 89439ee9..1f69b2e8 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -82,7 +82,6 @@ include("frontend/fit/fitmeasures/BIC.jl") include("frontend/fit/fitmeasures/chi2.jl") include("frontend/fit/fitmeasures/df.jl") include("frontend/fit/fitmeasures/minus2ll.jl") -include("frontend/fit/fitmeasures/n_obs.jl") include("frontend/fit/fitmeasures/p.jl") include("frontend/fit/fitmeasures/RMSEA.jl") include("frontend/fit/fitmeasures/n_man.jl") @@ -165,7 +164,7 @@ export AbstractSem, df, fit_measures, minus2ll, - n_obs, + nsamples, p_value, RMSEA, n_man, diff --git a/src/frontend/common.jl b/src/frontend/common.jl index 110e59a5..155a32ca 100644 --- a/src/frontend/common.jl +++ b/src/frontend/common.jl @@ -45,4 +45,13 @@ parind[:param_name] See also [`params`](@ref). """ -param_indices(semobj) = Dict(par => i for (i, par) in enumerate(params(semobj))) \ No newline at end of file +param_indices(semobj) = Dict(par => i for (i, par) in enumerate(params(semobj))) + +""" + nsamples(semobj) + +Return the number of samples (observed data points). + +For ensemble models, return the sum over all submodels. +""" +function nsamples end diff --git a/src/frontend/fit/SemFit.jl b/src/frontend/fit/SemFit.jl index ace9ed32..84d2f502 100644 --- a/src/frontend/fit/SemFit.jl +++ b/src/frontend/fit/SemFit.jl @@ -48,6 +48,7 @@ end params(fit::SemFit) = params(fit.model) nparams(fit::SemFit) = nparams(fit.model) +nsamples(fit::SemFit) = nsamples(fit.model) # access fields minimum(sem_fit::SemFit) = sem_fit.minimum diff --git a/src/frontend/fit/fitmeasures/BIC.jl b/src/frontend/fit/fitmeasures/BIC.jl index 47bd12f1..20638f4e 100644 --- a/src/frontend/fit/fitmeasures/BIC.jl +++ b/src/frontend/fit/fitmeasures/BIC.jl @@ -3,4 +3,4 @@ Return the bayesian information criterion. """ -BIC(sem_fit::SemFit) = minus2ll(sem_fit) + log(n_obs(sem_fit)) * nparams(sem_fit) +BIC(sem_fit::SemFit) = minus2ll(sem_fit) + log(nsamples(sem_fit)) * nparams(sem_fit) diff --git a/src/frontend/fit/fitmeasures/RMSEA.jl b/src/frontend/fit/fitmeasures/RMSEA.jl index 3b3eb384..b91e81d3 100644 --- a/src/frontend/fit/fitmeasures/RMSEA.jl +++ b/src/frontend/fit/fitmeasures/RMSEA.jl @@ -6,13 +6,13 @@ Return the RMSEA. function RMSEA end RMSEA(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: AbstractSemSingle, O}) = - RMSEA(df(sem_fit), χ²(sem_fit), n_obs(sem_fit)) + RMSEA(df(sem_fit), χ²(sem_fit), nsamples(sem_fit)) RMSEA(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: SemEnsemble, O}) = - sqrt(length(sem_fit.model.sems)) * RMSEA(df(sem_fit), χ²(sem_fit), n_obs(sem_fit)) + sqrt(length(sem_fit.model.sems)) * RMSEA(df(sem_fit), χ²(sem_fit), nsamples(sem_fit)) -function RMSEA(df, chi2, n_obs) - rmsea = (chi2 - df) / (n_obs * df) +function RMSEA(df, chi2, nsamples) + rmsea = (chi2 - df) / (nsamples * df) rmsea > 0 ? nothing : rmsea = 0 return sqrt(rmsea) end diff --git a/src/frontend/fit/fitmeasures/chi2.jl b/src/frontend/fit/fitmeasures/chi2.jl index 51fe6f0c..2abebd96 100644 --- a/src/frontend/fit/fitmeasures/chi2.jl +++ b/src/frontend/fit/fitmeasures/chi2.jl @@ -20,11 +20,11 @@ function χ² end # RAM + SemML χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemML) = - (n_obs(sem_fit) - 1) * (sem_fit.minimum - logdet(observed.obs_cov) - observed.n_man) + (nsamples(sem_fit) - 1) * (sem_fit.minimum - logdet(observed.obs_cov) - observed.n_man) # bollen, p. 115, only correct for GLS weight matrix χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemWLS) = - (n_obs(sem_fit) - 1) * sem_fit.minimum + (nsamples(sem_fit) - 1) * sem_fit.minimum # FIML function χ²(sem_fit::SemFit, observed::SemObservedMissing, imp, optimizer, loss_ml::SemFIML) @@ -45,7 +45,7 @@ end function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemWLS} check_ensemble_length(model) check_lossfun_types(model, L) - return (sum(n_obs.(model.sems)) - 1) * sem_fit.minimum + return (nsamples(model) - 1) * sem_fit.minimum end function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemML} @@ -56,7 +56,7 @@ function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemML w * (logdet(m.observed.obs_cov) + m.observed.n_man) for (w, m) in zip(model.weights, model.sems) ]) - return (sum(n_obs.(model.sems)) - 1) * F_G + return (nsamples(model) - 1) * F_G end function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemFIML} diff --git a/src/frontend/fit/fitmeasures/minus2ll.jl b/src/frontend/fit/fitmeasures/minus2ll.jl index c984555b..67d69bbe 100644 --- a/src/frontend/fit/fitmeasures/minus2ll.jl +++ b/src/frontend/fit/fitmeasures/minus2ll.jl @@ -25,7 +25,7 @@ minus2ll(sem_fit::SemFit, obs, imp, optimizer, args...) = # SemML ------------------------------------------------------------------------------------ minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemML) = - n_obs(obs) * (minimum + log(2π) * n_man(obs)) + nsamples(obs) * (minimum + log(2π) * n_man(obs)) # WLS -------------------------------------------------------------------------------------- minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemWLS) = @@ -41,8 +41,8 @@ function minus2ll( loss_ml::SemFIML, ) F = minimum - F *= n_obs(observed) - F += sum(log(2π) * observed.pattern_n_obs .* observed.pattern_nvar_obs) + F *= nsamples(observed) + F += sum(log(2π) * observed.pattern_nsamples .* observed.pattern_nvar_obs) return F end @@ -53,12 +53,12 @@ function minus2ll(observed::SemObservedMissing) minus2ll( observed.em_model.μ, observed.em_model.Σ, - observed.n_obs, + nsamples(observed), observed.rows, observed.patterns, observed.obs_mean, observed.obs_cov, - observed.pattern_n_obs, + observed.pattern_nsamples, observed.pattern_nvar_obs, ) else @@ -66,12 +66,12 @@ function minus2ll(observed::SemObservedMissing) minus2ll( observed.em_model.μ, observed.em_model.Σ, - observed.n_obs, + nsamples(observed), observed.rows, observed.patterns, observed.obs_mean, observed.obs_cov, - observed.pattern_n_obs, + observed.pattern_nsamples, observed.pattern_nvar_obs, ) end @@ -85,13 +85,13 @@ function minus2ll( patterns, obs_mean, obs_cov, - pattern_n_obs, + pattern_nsamples, pattern_nvar_obs, ) F = 0.0 for i in 1:length(rows) - nᵢ = pattern_n_obs[i] + nᵢ = pattern_nsamples[i] # missing pattern pattern = patterns[i] # observed data @@ -106,7 +106,7 @@ function minus2ll( F += F_one_pattern(meandiffᵢ, Σᵢ⁻¹, Sᵢ, ld, nᵢ) end - F += sum(log(2π) * pattern_n_obs .* pattern_nvar_obs) + F += sum(log(2π) * pattern_nsamples .* pattern_nvar_obs) #F *= N return F diff --git a/src/frontend/fit/fitmeasures/n_obs.jl b/src/frontend/fit/fitmeasures/n_obs.jl deleted file mode 100644 index cd4bdca3..00000000 --- a/src/frontend/fit/fitmeasures/n_obs.jl +++ /dev/null @@ -1,16 +0,0 @@ -""" - n_obs(sem_fit::SemFit) - n_obs(model::AbstractSemSingle) - n_obs(model::SemEnsemble) - -Return the number of observed data points. - -For ensemble models, return the sum over all submodels. -""" -function n_obs end - -n_obs(sem_fit::SemFit) = n_obs(sem_fit.model) - -n_obs(model::AbstractSemSingle) = n_obs(model.observed) - -n_obs(model::SemEnsemble) = sum(n_obs, model.sems) diff --git a/src/frontend/fit/standard_errors/hessian.jl b/src/frontend/fit/standard_errors/hessian.jl index 396d3b98..afcb570b 100644 --- a/src/frontend/fit/standard_errors/hessian.jl +++ b/src/frontend/fit/standard_errors/hessian.jl @@ -46,13 +46,13 @@ end H_scaling(model::AbstractSemSingle) = H_scaling(model, model.observed, model.imply, model.optimizer, model.loss.functions...) -H_scaling(model, obs, imp, optimizer, lossfun::SemML) = 2 / (n_obs(model) - 1) +H_scaling(model, obs, imp, optimizer, lossfun::SemML) = 2 / (nsamples(model) - 1) function H_scaling(model, obs, imp, optimizer, lossfun::SemWLS) @warn "Standard errors for WLS are only correct if a GLS weight matrix (the default) is used." - return 2 / (n_obs(model) - 1) + return 2 / (nsamples(model) - 1) end -H_scaling(model, obs, imp, optimizer, lossfun::SemFIML) = 2 / n_obs(model) +H_scaling(model, obs, imp, optimizer, lossfun::SemFIML) = 2 / nsamples(model) -H_scaling(model::SemEnsemble) = 2 / n_obs(model) +H_scaling(model::SemEnsemble) = 2 / nsamples(model) diff --git a/src/frontend/fit/summary.jl b/src/frontend/fit/summary.jl index 62179121..507e835f 100644 --- a/src/frontend/fit/summary.jl +++ b/src/frontend/fit/summary.jl @@ -17,7 +17,7 @@ function sem_summary( println("No. iterations/evaluations: $(n_iterations(sem_fit))") print("\n") println("Number of parameters: $(nparams(sem_fit))") - println("Number of observations: $(n_obs(sem_fit))") + println("Number of data samples: $(nsamples(sem_fit))") print("\n") printstyled( "----------------------------------- Model ----------------------------------- \n"; diff --git a/src/frontend/specification/Sem.jl b/src/frontend/specification/Sem.jl index 14f4d5a7..1befb9aa 100644 --- a/src/frontend/specification/Sem.jl +++ b/src/frontend/specification/Sem.jl @@ -28,6 +28,11 @@ vars(sem::AbstractSemSingle) = vars(sem.imply) observed_vars(sem::AbstractSemSingle) = observed_vars(sem.imply) latent_vars(sem::AbstractSemSingle) = latent_vars(sem.imply) +nsamples(sem::AbstractSemSingle) = nsamples(sem.observed) + +# sum of samples in all sub-models +nsamples(ensemble::SemEnsemble) = sum(nsamples, ensemble.sems) + function SemFiniteDiff(; observed::O = SemObservedData, imply::I = RAM, diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index 135bb411..6ff8e4f0 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -90,7 +90,7 @@ function objective!(semfiml::SemFIML, params, model) prepare_SemFIML!(semfiml, model) objective = F_FIML(rows(observed(model)), semfiml, model, params) - return objective / n_obs(observed(model)) + return objective / nsamples(observed(model)) end function gradient!(semfiml::SemFIML, params, model) @@ -100,7 +100,7 @@ function gradient!(semfiml::SemFIML, params, model) prepare_SemFIML!(semfiml, model) - gradient = ∇F_FIML(rows(observed(model)), semfiml, model) / n_obs(observed(model)) + gradient = ∇F_FIML(rows(observed(model)), semfiml, model) / nsamples(observed(model)) return gradient end @@ -112,8 +112,8 @@ function objective_gradient!(semfiml::SemFIML, params, model) prepare_SemFIML!(semfiml, model) objective = - F_FIML(rows(observed(model)), semfiml, model, params) / n_obs(observed(model)) - gradient = ∇F_FIML(rows(observed(model)), semfiml, model) / n_obs(observed(model)) + F_FIML(rows(observed(model)), semfiml, model, params) / nsamples(observed(model)) + gradient = ∇F_FIML(rows(observed(model)), semfiml, model) / nsamples(observed(model)) return objective, gradient end @@ -182,7 +182,7 @@ function F_FIML(rows, semfiml, model, params) semfiml.inverses[i], obs_cov(observed(model))[i], semfiml.logdets[i], - pattern_n_obs(observed(model))[i], + pattern_nsamples(observed(model))[i], ) end return F @@ -199,7 +199,7 @@ function ∇F_FIML(rows, semfiml, model) obs_cov(observed(model))[i], patterns(observed(model))[i], semfiml.∇ind[i], - pattern_n_obs(observed(model))[i], + pattern_nsamples(observed(model))[i], Jμ, JΣ, model, @@ -213,7 +213,7 @@ function prepare_SemFIML!(semfiml, model) batch_cholesky!(semfiml, model) #batch_sym_inv_update!(semfiml, model) batch_inv!(semfiml, model) - for i in 1:size(pattern_n_obs(observed(model)), 1) + for i in 1:size(pattern_nsamples(observed(model)), 1) semfiml.meandiff[i] .= obs_mean(observed(model))[i] - semfiml.imp_mean[i] end end diff --git a/src/observed/EM.jl b/src/observed/EM.jl index 09dfbd82..4640a713 100644 --- a/src/observed/EM.jl +++ b/src/observed/EM.jl @@ -29,7 +29,8 @@ function em_mvn( rtol_em = 1e-4, kwargs..., ) - n_obs, n_man = observed.n_obs, Int(observed.n_man) + n_man = observed.n_man + nsamps = nsamples(observed) # preallocate stuff? 𝔼x_pre = zeros(n_man) @@ -44,8 +45,8 @@ function em_mvn( end end - # ess = 𝔼x, 𝔼xxᵀ, ismissing, missingRows, n_obs - # estepFn = (em_model, data) -> estep(em_model, data, EXsum, EXXsum, ismissing, missingRows, n_obs) + # ess = 𝔼x, 𝔼xxᵀ, ismissing, missingRows, nsamps + # estepFn = (em_model, data) -> estep(em_model, data, EXsum, EXXsum, ismissing, missingRows, nsamps) # initialize em_model = start_em(observed; kwargs...) @@ -57,7 +58,7 @@ function em_mvn( while !done em_mvn_Estep!(𝔼x, 𝔼xxᵀ, em_model, observed, 𝔼x_pre, 𝔼xxᵀ_pre) - em_mvn_Mstep!(em_model, n_obs, 𝔼x, 𝔼xxᵀ) + em_mvn_Mstep!(em_model, nsamps, 𝔼x, 𝔼xxᵀ) if iter > max_iter_em done = true @@ -96,7 +97,7 @@ function em_mvn_Estep!(𝔼x, 𝔼xxᵀ, em_model, observed, 𝔼x_pre, 𝔼xx Σ = em_model.Σ # Compute the expected sufficient statistics - for i in 2:length(observed.pattern_n_obs) + for i in 2:length(observed.pattern_nsamples) # observed and unobserved vars u = observed.patterns_not[i] @@ -125,9 +126,9 @@ function em_mvn_Estep!(𝔼x, 𝔼xxᵀ, em_model, observed, 𝔼x_pre, 𝔼xx 𝔼xxᵀ .+= 𝔼xxᵀ_pre end -function em_mvn_Mstep!(em_model, n_obs, 𝔼x, 𝔼xxᵀ) - em_model.μ = 𝔼x / n_obs - Σ = Symmetric(𝔼xxᵀ / n_obs - em_model.μ * em_model.μ') +function em_mvn_Mstep!(em_model, nsamples, 𝔼x, 𝔼xxᵀ) + em_model.μ = 𝔼x / nsamples + Σ = Symmetric(𝔼xxᵀ / nsamples - em_model.μ * em_model.μ') # ridge Σ # while !isposdef(Σ) @@ -152,7 +153,7 @@ end # use μ and Σ of full cases function start_em_observed(observed::SemObservedMissing; kwargs...) - if (length(observed.patterns[1]) == observed.n_man) & (observed.pattern_n_obs[1] > 1) + if (length(observed.patterns[1]) == observed.n_man) & (observed.pattern_nsamples[1] > 1) μ = copy(observed.obs_mean[1]) Σ = copy(Symmetric(observed.obs_cov[1])) if !isposdef(Σ) diff --git a/src/observed/covariance.jl b/src/observed/covariance.jl index 28430263..a3ff822a 100644 --- a/src/observed/covariance.jl +++ b/src/observed/covariance.jl @@ -9,7 +9,7 @@ For observed covariance matrices and means. obs_colnames = nothing, meanstructure = false, obs_mean = nothing, - n_obs = nothing, + nsamples = nothing, kwargs...) # Arguments @@ -18,11 +18,11 @@ For observed covariance matrices and means. - `obs_colnames::Vector{Symbol}`: column names of the covariance matrix - `meanstructure::Bool`: does the model have a meanstructure? - `obs_mean`: observed mean vector -- `n_obs::Number`: number of observed data points (necessary for fit statistics) +- `nsamples::Number`: number of samples (observed data points); necessary for fit statistics # Extended help ## Interfaces -- `n_obs(::SemObservedCovariance)` -> number of observed data points +- `nsamples(::SemObservedCovariance)`: number of samples (observed data points) - `n_man(::SemObservedCovariance)` -> number of manifest variables - `obs_cov(::SemObservedCovariance)` -> observed covariance matrix @@ -43,7 +43,7 @@ struct SemObservedCovariance{B, C} <: SemObserved obs_cov::B obs_mean::C n_man::Int - n_obs::Int + nsamples::Int end function SemObservedCovariance(; @@ -53,7 +53,7 @@ function SemObservedCovariance(; spec_colnames = nothing, obs_mean = nothing, meanstructure = false, - n_obs::Integer, + nsamples::Integer, kwargs..., ) if !meanstructure & !isnothing(obs_mean) @@ -80,14 +80,14 @@ function SemObservedCovariance(; (obs_mean = reorder_obs_mean(obs_mean, spec_colnames, obs_colnames)) end - return SemObservedCovariance(obs_cov, obs_mean, size(obs_cov, 1), n_obs) + return SemObservedCovariance(obs_cov, obs_mean, size(obs_cov, 1), nsamples) end ############################################################################################ ### Recommended methods ############################################################################################ -n_obs(observed::SemObservedCovariance) = observed.n_obs +nsamples(observed::SemObservedCovariance) = observed.nsamples n_man(observed::SemObservedCovariance) = observed.n_man ############################################################################################ diff --git a/src/observed/data.jl b/src/observed/data.jl index 7573b974..c0a42d9c 100644 --- a/src/observed/data.jl +++ b/src/observed/data.jl @@ -18,7 +18,7 @@ For observed data without missings. # Extended help ## Interfaces -- `n_obs(::SemObservedData)` -> number of observed data points +- `nsamples(::SemObservedData)` -> number of observed data points - `n_man(::SemObservedData)` -> number of manifest variables - `samples(::SemObservedData)` -> observed data @@ -42,7 +42,7 @@ struct SemObservedData{A, B, C} <: SemObserved obs_cov::B obs_mean::C n_man::Int - n_obs::Int + nsamples::Int end # error checks @@ -112,7 +112,7 @@ end ### Recommended methods ############################################################################################ -n_obs(observed::SemObservedData) = observed.n_obs +nsamples(observed::SemObservedData) = observed.nsamples n_man(observed::SemObservedData) = observed.n_man ############################################################################################ diff --git a/src/observed/missing.jl b/src/observed/missing.jl index 159a4915..6a78b516 100644 --- a/src/observed/missing.jl +++ b/src/observed/missing.jl @@ -27,7 +27,7 @@ For observed data with missing values. # Extended help ## Interfaces -- `n_obs(::SemObservedMissing)` -> number of observed data points +- `nsamples(::SemObservedMissing)` -> number of observed data points - `n_man(::SemObservedMissing)` -> number of manifest variables - `samples(::SemObservedMissing)` -> observed data @@ -36,7 +36,7 @@ For observed data with missing values. - `patterns(::SemObservedMissing)` -> indices of non-missing variables per missing patterns - `patterns_not(::SemObservedMissing)` -> indices of missing variables per missing pattern - `rows(::SemObservedMissing)` -> row indices of observed data points that belong to each pattern -- `pattern_n_obs(::SemObservedMissing)` -> number of data points per pattern +- `pattern_nsamples(::SemObservedMissing)` -> number of data points per pattern - `pattern_nvar_obs(::SemObservedMissing)` -> number of non-missing observed variables per pattern - `obs_mean(::SemObservedMissing)` -> observed mean per pattern - `obs_cov(::SemObservedMissing)` -> observed covariance per pattern @@ -56,7 +56,7 @@ use this if you are sure your observed data is in the right format. mutable struct SemObservedMissing{ A <: AbstractArray, D <: AbstractFloat, - O <: AbstractFloat, + O <: Number, P <: Vector, P2 <: Vector, R <: Vector, @@ -69,12 +69,12 @@ mutable struct SemObservedMissing{ } <: SemObserved data::A n_man::D - n_obs::O + nsamples::O patterns::P # missing patterns patterns_not::P2 rows::R # coresponding rows in data_rowwise data_rowwise::PD # list of data - pattern_n_obs::PO # observed rows per pattern + pattern_nsamples::PO # observed rows per pattern pattern_nvar_obs::PVO # number of non-missing variables per pattern obs_mean::A2 obs_cov::A3 @@ -140,14 +140,14 @@ function SemObservedMissing(; end data = data[keep, :] - n_obs, n_man = size(data) + nsamples, n_man = size(data) # compute and store the different missing patterns with their rowindices missings = ismissing.(data) patterns = [missings[i, :] for i in 1:size(missings, 1)] patterns_cart = findall.(!, patterns) - data_rowwise = [data[i, patterns_cart[i]] for i in 1:n_obs] + data_rowwise = [data[i, patterns_cart[i]] for i in 1:nsamples] data_rowwise = convert.(Array{Float64}, data_rowwise) remember = Vector{BitArray{1}}() @@ -175,7 +175,7 @@ function SemObservedMissing(; remember_cart_not = findall.(remember) rows = rows[sort_n_miss] - pattern_n_obs = size.(rows, 1) + pattern_nsamples = size.(rows, 1) pattern_nvar_obs = length.(remember_cart) cov_mean = [cov_and_mean(data_rowwise[rows]) for rows in rows] @@ -186,13 +186,13 @@ function SemObservedMissing(; return SemObservedMissing( data, - Float64(n_man), - Float64(n_obs), + Float64(nobs_vars), + nsamples, remember_cart, remember_cart_not, rows, data_rowwise, - Float64.(pattern_n_obs), + pattern_nsamples, Float64.(pattern_nvar_obs), obs_mean, obs_cov, @@ -204,7 +204,7 @@ end ### Recommended methods ############################################################################################ -n_obs(observed::SemObservedMissing) = observed.n_obs +nsamples(observed::SemObservedMissing) = observed.nsamples n_man(observed::SemObservedMissing) = observed.n_man ############################################################################################ @@ -215,7 +215,7 @@ patterns(observed::SemObservedMissing) = observed.patterns patterns_not(observed::SemObservedMissing) = observed.patterns_not rows(observed::SemObservedMissing) = observed.rows data_rowwise(observed::SemObservedMissing) = observed.data_rowwise -pattern_n_obs(observed::SemObservedMissing) = observed.pattern_n_obs +pattern_nsamples(observed::SemObservedMissing) = observed.pattern_nsamples pattern_nvar_obs(observed::SemObservedMissing) = observed.pattern_nvar_obs obs_mean(observed::SemObservedMissing) = observed.obs_mean obs_cov(observed::SemObservedMissing) = observed.obs_cov diff --git a/src/types.jl b/src/types.jl index 99153622..98d5f87e 100644 --- a/src/types.jl +++ b/src/types.jl @@ -176,8 +176,8 @@ function SemEnsemble(models...; optimizer = SemOptimizerOptim, weights = nothing # default weights if isnothing(weights) - nobs_total = sum(n_obs, models) - weights = [n_obs(model) / nobs_total for model in models] + nsamples_total = sum(nsamples, models) + weights = [nsamples(model) / nsamples_total for model in models] end # check parameters equality diff --git a/test/examples/political_democracy/by_parts.jl b/test/examples/political_democracy/by_parts.jl index 11953ccb..09d40cc2 100644 --- a/test/examples/political_democracy/by_parts.jl +++ b/test/examples/political_democracy/by_parts.jl @@ -41,7 +41,7 @@ model_ridge = Sem(observed, imply_ram, SemLoss(ml, ridge), optimizer_obj) model_constant = Sem(observed, imply_ram, SemLoss(ml, constant), optimizer_obj) model_ml_weighted = - Sem(observed, imply_ram, SemLoss(ml; loss_weights = [n_obs(model_ml)]), optimizer_obj) + Sem(observed, imply_ram, SemLoss(ml; loss_weights = [nsamples(model_ml)]), optimizer_obj) ############################################################################################ ### test gradients @@ -101,7 +101,7 @@ end solution_ml = sem_fit(model_ml) solution_ml_weighted = sem_fit(model_ml_weighted) @test solution(solution_ml) ≈ solution(solution_ml_weighted) rtol = 1e-3 - @test n_obs(model_ml) * StructuralEquationModels.minimum(solution_ml) ≈ + @test nsamples(model_ml) * StructuralEquationModels.minimum(solution_ml) ≈ StructuralEquationModels.minimum(solution_ml_weighted) rtol = 1e-6 end diff --git a/test/examples/political_democracy/constructor.jl b/test/examples/political_democracy/constructor.jl index 5f1c838e..bf674dd7 100644 --- a/test/examples/political_democracy/constructor.jl +++ b/test/examples/political_democracy/constructor.jl @@ -12,7 +12,7 @@ model_ml_cov = Sem( obs_cov = cov(Matrix(dat)), obs_colnames = Symbol.(names(dat)), optimizer = semoptimizer, - n_obs = 75, + nsamples = 75, ) model_ls_sym = Sem( @@ -46,7 +46,7 @@ model_constant = Sem( model_ml_weighted = Sem( specification = partable, data = dat, - loss_weights = (n_obs(model_ml),), + loss_weights = (nsamples(model_ml),), optimizer = semoptimizer, ) @@ -116,7 +116,7 @@ end solution_ml_weighted = sem_fit(model_ml_weighted) @test isapprox(solution(solution_ml), solution(solution_ml_weighted), rtol = 1e-3) @test isapprox( - n_obs(model_ml) * StructuralEquationModels.minimum(solution_ml), + nsamples(model_ml) * StructuralEquationModels.minimum(solution_ml), StructuralEquationModels.minimum(solution_ml_weighted), rtol = 1e-6, ) @@ -244,7 +244,7 @@ model_ml_cov = Sem( obs_colnames = Symbol.(names(dat)), meanstructure = true, optimizer = semoptimizer, - n_obs = 75, + nsamples = 75, ) model_ml_sym = Sem( diff --git a/test/unit_tests/data_input_formats.jl b/test/unit_tests/data_input_formats.jl index 070c1931..f1adaf62 100644 --- a/test/unit_tests/data_input_formats.jl +++ b/test/unit_tests/data_input_formats.jl @@ -205,14 +205,14 @@ end specification = nothing, obs_cov = dat_cov, obs_mean = dat_mean, - n_obs = 75, + nsamples = 75, ) end -@test_throws UndefKeywordError(:n_obs) SemObservedCovariance(obs_cov = dat_cov) +@test_throws UndefKeywordError(:nsamples) SemObservedCovariance(obs_cov = dat_cov) @test_throws ArgumentError("no `obs_colnames` were specified") begin - SemObservedCovariance(specification = spec, obs_cov = dat_cov, n_obs = 75) + SemObservedCovariance(specification = spec, obs_cov = dat_cov, nsamples = 75) end @test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin @@ -220,7 +220,7 @@ end specification = spec, obs_cov = dat_cov, obs_colnames = names(dat), - n_obs = 75, + nsamples = 75, ) end @@ -229,18 +229,18 @@ observed = SemObservedCovariance( specification = spec, obs_cov = dat_cov, obs_colnames = obs_colnames = Symbol.(names(dat)), - n_obs = 75, + nsamples = 75, ) observed_nospec = - SemObservedCovariance(specification = nothing, obs_cov = dat_cov, n_obs = 75) + SemObservedCovariance(specification = nothing, obs_cov = dat_cov, nsamples = 75) all_equal_cov = (obs_cov(observed) == obs_cov(observed_nospec)) @testset "unit tests | SemObservedCovariance | input formats" begin @test all_equal_cov - @test n_obs(observed) == 75 - @test n_obs(observed_nospec) == 75 + @test nsamples(observed) == 75 + @test nsamples(observed_nospec) == 75 end # shuffle variables @@ -256,7 +256,7 @@ observed_shuffle = SemObservedCovariance( specification = spec, obs_cov = shuffle_dat_cov, obs_colnames = shuffle_names, - n_obs = 75, + nsamples = 75, ) all_equal_cov_suffled = (obs_cov(observed) ≈ obs_cov(observed_shuffle)) @@ -273,7 +273,7 @@ end specification = spec, obs_cov = dat_cov, meanstructure = true, - n_obs = 75, + nsamples = 75, ) end @@ -293,7 +293,7 @@ end obs_cov = dat_cov, obs_colnames = Symbol.(names(dat)), meanstructure = true, - n_obs = 75, + nsamples = 75, ) end @@ -303,7 +303,7 @@ observed = SemObservedCovariance( obs_cov = dat_cov, obs_mean = dat_mean, obs_colnames = Symbol.(names(dat)), - n_obs = 75, + nsamples = 75, meanstructure = true, ) @@ -312,7 +312,7 @@ observed_nospec = SemObservedCovariance( obs_cov = dat_cov, obs_mean = dat_mean, meanstructure = true, - n_obs = 75, + nsamples = 75, ) all_equal_mean = (obs_mean(observed) == obs_mean(observed_nospec)) @@ -338,7 +338,7 @@ observed_shuffle = SemObservedCovariance( obs_cov = shuffle_dat_cov, obs_mean = shuffle_dat_mean, obs_colnames = shuffle_names, - n_obs = 75, + nsamples = 75, meanstructure = true, ) From 77a0cba005064301279f61b8f1494951c8d3e770 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 25 Jun 2024 17:43:36 -0700 Subject: [PATCH 16/26] rename n_man() -> nobserved_vars() for missing data pattern: nobserved_vars() -> nmeasured_vars(), obs_cov/obs_mean -> measured_cov/measured_mean --- src/StructuralEquationModels.jl | 2 -- src/frontend/fit/fitmeasures/chi2.jl | 5 +++-- src/frontend/fit/fitmeasures/df.jl | 6 +++--- src/frontend/fit/fitmeasures/minus2ll.jl | 12 ++++++------ src/frontend/fit/fitmeasures/n_man.jl | 11 ----------- src/imply/RAM/generic.jl | 6 +++--- src/imply/RAM/symbolic.jl | 2 +- src/loss/ML/FIML.jl | 16 ++++++++-------- src/loss/WLS/WLS.jl | 2 +- src/observed/EM.jl | 24 ++++++++++++------------ src/observed/covariance.jl | 4 ++-- src/observed/data.jl | 6 +++--- src/observed/missing.jl | 24 ++++++++++++------------ 13 files changed, 54 insertions(+), 66 deletions(-) delete mode 100644 src/frontend/fit/fitmeasures/n_man.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 1f69b2e8..6d2a8282 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -84,7 +84,6 @@ include("frontend/fit/fitmeasures/df.jl") include("frontend/fit/fitmeasures/minus2ll.jl") include("frontend/fit/fitmeasures/p.jl") include("frontend/fit/fitmeasures/RMSEA.jl") -include("frontend/fit/fitmeasures/n_man.jl") include("frontend/fit/fitmeasures/fit_measures.jl") # standard errors include("frontend/fit/standard_errors/hessian.jl") @@ -167,7 +166,6 @@ export AbstractSem, nsamples, p_value, RMSEA, - n_man, EmMVNModel, se_hessian, se_bootstrap, diff --git a/src/frontend/fit/fitmeasures/chi2.jl b/src/frontend/fit/fitmeasures/chi2.jl index 2abebd96..df1027bd 100644 --- a/src/frontend/fit/fitmeasures/chi2.jl +++ b/src/frontend/fit/fitmeasures/chi2.jl @@ -20,7 +20,8 @@ function χ² end # RAM + SemML χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemML) = - (nsamples(sem_fit) - 1) * (sem_fit.minimum - logdet(observed.obs_cov) - observed.n_man) + (nsamples(sem_fit) - 1) * + (sem_fit.minimum - logdet(observed.obs_cov) - nobserved_vars(observed)) # bollen, p. 115, only correct for GLS weight matrix χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemWLS) = @@ -53,7 +54,7 @@ function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemML check_lossfun_types(model, L) F_G = sem_fit.minimum F_G -= sum([ - w * (logdet(m.observed.obs_cov) + m.observed.n_man) for + w * (logdet(m.observed.obs_cov) + nobserved_vars(m.observed)) for (w, m) in zip(model.weights, model.sems) ]) return (nsamples(model) - 1) * F_G diff --git a/src/frontend/fit/fitmeasures/df.jl b/src/frontend/fit/fitmeasures/df.jl index d4a4376d..e8e72d59 100644 --- a/src/frontend/fit/fitmeasures/df.jl +++ b/src/frontend/fit/fitmeasures/df.jl @@ -11,10 +11,10 @@ df(sem_fit::SemFit) = df(sem_fit.model) df(model::AbstractSem) = n_dp(model) - nparams(model) function n_dp(model::AbstractSemSingle) - nman = n_man(model) - ndp = 0.5(nman^2 + nman) + nvars = nobserved_vars(model) + ndp = 0.5(nvars^2 + nvars) if !isnothing(model.imply.μ) - ndp += n_man(model) + ndp += nvars end return ndp end diff --git a/src/frontend/fit/fitmeasures/minus2ll.jl b/src/frontend/fit/fitmeasures/minus2ll.jl index 67d69bbe..bfd16114 100644 --- a/src/frontend/fit/fitmeasures/minus2ll.jl +++ b/src/frontend/fit/fitmeasures/minus2ll.jl @@ -25,7 +25,7 @@ minus2ll(sem_fit::SemFit, obs, imp, optimizer, args...) = # SemML ------------------------------------------------------------------------------------ minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemML) = - nsamples(obs) * (minimum + log(2π) * n_man(obs)) + nsamples(obs) * (minimum + log(2π) * nobserved_vars(obs)) # WLS -------------------------------------------------------------------------------------- minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemWLS) = @@ -42,7 +42,7 @@ function minus2ll( ) F = minimum F *= nsamples(observed) - F += sum(log(2π) * observed.pattern_nsamples .* observed.pattern_nvar_obs) + F += sum(log(2π) * observed.pattern_nsamples .* observed.pattern_nobs_vars) return F end @@ -59,7 +59,7 @@ function minus2ll(observed::SemObservedMissing) observed.obs_mean, observed.obs_cov, observed.pattern_nsamples, - observed.pattern_nvar_obs, + observed.pattern_nobs_vars, ) else em_mvn(observed) @@ -72,7 +72,7 @@ function minus2ll(observed::SemObservedMissing) observed.obs_mean, observed.obs_cov, observed.pattern_nsamples, - observed.pattern_nvar_obs, + observed.pattern_nobs_vars, ) end end @@ -86,7 +86,7 @@ function minus2ll( obs_mean, obs_cov, pattern_nsamples, - pattern_nvar_obs, + pattern_nobs_vars, ) F = 0.0 @@ -106,7 +106,7 @@ function minus2ll( F += F_one_pattern(meandiffᵢ, Σᵢ⁻¹, Sᵢ, ld, nᵢ) end - F += sum(log(2π) * pattern_nsamples .* pattern_nvar_obs) + F += sum(log(2π) * pattern_nsamples .* pattern_nobs_vars) #F *= N return F diff --git a/src/frontend/fit/fitmeasures/n_man.jl b/src/frontend/fit/fitmeasures/n_man.jl deleted file mode 100644 index 45a7d99d..00000000 --- a/src/frontend/fit/fitmeasures/n_man.jl +++ /dev/null @@ -1,11 +0,0 @@ -""" - n_man(sem_fit::SemFit) - n_man(model::AbstractSemSingle) - -Return the number of manifest variables. -""" -function n_man end - -n_man(sem_fit::SemFit) = n_man(sem_fit.model) - -n_man(model::AbstractSemSingle) = n_man(model.observed) diff --git a/src/imply/RAM/generic.jl b/src/imply/RAM/generic.jl index 8ace8075..e93f5c5c 100644 --- a/src/imply/RAM/generic.jl +++ b/src/imply/RAM/generic.jl @@ -111,7 +111,7 @@ function RAM(; n_obs = nobserved_vars(ram_matrices) n_var = nvars(ram_matrices) F = zeros(ram_matrices.size_F) - F[CartesianIndex.(1:n_var, ram_matrices.F_ind)] .= 1.0 + F[CartesianIndex.(1:n_obs, ram_matrices.F_ind)] .= 1.0 # get indices A_indices = copy(ram_matrices.A_ind) @@ -146,7 +146,7 @@ function RAM(; has_meanstructure = Val(true) !isnothing(M_indices) || throw(ArgumentError("You set `meanstructure = true`, but your model specification contains no mean parameters.")) ∇M = gradient ? matrix_gradient(M_indices, n_var) : nothing - μ = zeros(n_var) + μ = zeros(n_obs) else has_meanstructure = Val(false) M_indices = nothing @@ -257,7 +257,7 @@ objective_gradient_hessian!(imply::RAM, par, model::AbstractSemSingle, has_means ############################################################################################ function update_observed(imply::RAM, observed::SemObserved; kwargs...) - if n_man(observed) == size(imply.Σ, 1) + if nobserved_vars(observed) == size(imply.Σ, 1) return imply else return RAM(; observed = observed, kwargs...) diff --git a/src/imply/RAM/symbolic.jl b/src/imply/RAM/symbolic.jl index 3c99053b..b8da2014 100644 --- a/src/imply/RAM/symbolic.jl +++ b/src/imply/RAM/symbolic.jl @@ -236,7 +236,7 @@ objective_gradient_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, p ############################################################################################ function update_observed(imply::RAMSymbolic, observed::SemObserved; kwargs...) - if n_man(observed) == size(imply.Σ, 1) + if nobserved_vars(observed) == size(imply.Σ, 1) return imply else return RAMSymbolic(; observed = observed, kwargs...) diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index 6ff8e4f0..3f39245d 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -47,20 +47,20 @@ end ############################################################################################ function SemFIML(; observed, specification, kwargs...) - inverses = broadcast(x -> zeros(x, x), Int64.(pattern_nvar_obs(observed))) + inverses = broadcast(x -> zeros(x, x), pattern_nobs_vars(observed)) choleskys = Array{Cholesky{Float64, Array{Float64, 2}}, 1}(undef, length(inverses)) n_patterns = size(rows(observed), 1) logdets = zeros(n_patterns) - imp_mean = zeros.(Int64.(pattern_nvar_obs(observed))) - meandiff = zeros.(Int64.(pattern_nvar_obs(observed))) + imp_mean = zeros.(pattern_nobs_vars(observed)) + meandiff = zeros.(pattern_nobs_vars(observed)) - nman = Int64(n_man(observed)) - imp_inv = zeros(nman, nman) + nobs_vars = nobserved_vars(observed) + imp_inv = zeros(nobs_vars, nobs_vars) mult = similar.(inverses) - ∇ind = vec(CartesianIndices(Array{Float64}(undef, nman, nman))) + ∇ind = vec(CartesianIndices(Array{Float64}(undef, nobs_vars, nobs_vars))) ∇ind = [findall(x -> !(x[1] ∈ ind || x[2] ∈ ind), ∇ind) for ind in patterns_not(observed)] @@ -189,8 +189,8 @@ function F_FIML(rows, semfiml, model, params) end function ∇F_FIML(rows, semfiml, model) - Jμ = zeros(Int64(n_man(model))) - JΣ = zeros(Int64(n_man(model)^2)) + Jμ = zeros(nobserved_vars(model)) + JΣ = zeros(nobserved_vars(model)^2) for i in 1:size(rows, 1) ∇F_one_pattern( diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index 61c89fc8..8fcc84a9 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -64,7 +64,7 @@ function SemWLS(; # compute V here if isnothing(wls_weight_matrix) - D = duplication_matrix(n_man(observed)) + D = duplication_matrix(nobserved_vars(observed)) S = inv(obs_cov(observed)) S = kron(S, S) wls_weight_matrix = 0.5 * (D' * S * D) diff --git a/src/observed/EM.jl b/src/observed/EM.jl index 4640a713..2807a281 100644 --- a/src/observed/EM.jl +++ b/src/observed/EM.jl @@ -29,15 +29,15 @@ function em_mvn( rtol_em = 1e-4, kwargs..., ) - n_man = observed.n_man + nvars = nobserved_vars(observed) nsamps = nsamples(observed) # preallocate stuff? - 𝔼x_pre = zeros(n_man) - 𝔼xxᵀ_pre = zeros(n_man, n_man) + 𝔼x_pre = zeros(nvars) + 𝔼xxᵀ_pre = zeros(nvars, nvars) ### precompute for full cases - if length(observed.patterns[1]) == observed.n_man + if length(observed.patterns[1]) == nvars for row in observed.rows[1] row = observed.data_rowwise[row] 𝔼x_pre += row @@ -50,11 +50,11 @@ function em_mvn( # initialize em_model = start_em(observed; kwargs...) - em_model_prev = EmMVNModel(zeros(n_man, n_man), zeros(n_man), false) + em_model_prev = EmMVNModel(zeros(nvars, nvars), zeros(nvars), false) iter = 1 done = false - 𝔼x = zeros(n_man) - 𝔼xxᵀ = zeros(n_man, n_man) + 𝔼x = zeros(nvars) + 𝔼xxᵀ = zeros(nvars, nvars) while !done em_mvn_Estep!(𝔼x, 𝔼xxᵀ, em_model, observed, 𝔼x_pre, 𝔼xxᵀ_pre) @@ -153,7 +153,7 @@ end # use μ and Σ of full cases function start_em_observed(observed::SemObservedMissing; kwargs...) - if (length(observed.patterns[1]) == observed.n_man) & (observed.pattern_nsamples[1] > 1) + if (length(observed.patterns[1]) == nobserved_vars(observed)) & (observed.pattern_nsamples[1] > 1) μ = copy(observed.obs_mean[1]) Σ = copy(Symmetric(observed.obs_cov[1])) if !isposdef(Σ) @@ -167,11 +167,11 @@ end # use μ = O and Σ = I function start_em_simple(observed::SemObservedMissing; kwargs...) - n_man = Int(observed.n_man) - μ = zeros(n_man) - Σ = rand(n_man, n_man) + nvars = nobserved_vars(observed) + μ = zeros(nvars) + Σ = rand(nvars, nvars) Σ = Σ * Σ' - # Σ = Matrix(1.0I, n_man, n_man) + # Σ = Matrix(1.0I, nvars, nvars) return EmMVNModel(Σ, μ, false) end diff --git a/src/observed/covariance.jl b/src/observed/covariance.jl index a3ff822a..f851fd5b 100644 --- a/src/observed/covariance.jl +++ b/src/observed/covariance.jl @@ -42,7 +42,7 @@ use this if you are sure your covariance matrix is in the right format. struct SemObservedCovariance{B, C} <: SemObserved obs_cov::B obs_mean::C - n_man::Int + nobs_vars::Int nsamples::Int end @@ -88,7 +88,7 @@ end ############################################################################################ nsamples(observed::SemObservedCovariance) = observed.nsamples -n_man(observed::SemObservedCovariance) = observed.n_man +nobserved_vars(observed::SemObservedCovariance) = observed.nobs_vars ############################################################################################ ### additional methods diff --git a/src/observed/data.jl b/src/observed/data.jl index c0a42d9c..c9b50e59 100644 --- a/src/observed/data.jl +++ b/src/observed/data.jl @@ -19,7 +19,7 @@ For observed data without missings. # Extended help ## Interfaces - `nsamples(::SemObservedData)` -> number of observed data points -- `n_man(::SemObservedData)` -> number of manifest variables +- `nobserved_vars(::SemObservedData)` -> number of observed (manifested) variables - `samples(::SemObservedData)` -> observed data - `obs_cov(::SemObservedData)` -> observed.obs_cov @@ -41,7 +41,7 @@ struct SemObservedData{A, B, C} <: SemObserved data::A obs_cov::B obs_mean::C - n_man::Int + nobs_vars::Int nsamples::Int end @@ -113,7 +113,7 @@ end ############################################################################################ nsamples(observed::SemObservedData) = observed.nsamples -n_man(observed::SemObservedData) = observed.n_man +nobserved_vars(observed::SemObservedData) = observed.nobs_vars ############################################################################################ ### additional methods diff --git a/src/observed/missing.jl b/src/observed/missing.jl index 6a78b516..859a9d19 100644 --- a/src/observed/missing.jl +++ b/src/observed/missing.jl @@ -28,7 +28,7 @@ For observed data with missing values. # Extended help ## Interfaces - `nsamples(::SemObservedMissing)` -> number of observed data points -- `n_man(::SemObservedMissing)` -> number of manifest variables +- `nobserved_vars(::SemObservedMissing)` -> number of manifest variables - `samples(::SemObservedMissing)` -> observed data - `data_rowwise(::SemObservedMissing)` -> observed data as vector per observation, with missing values deleted @@ -37,7 +37,7 @@ For observed data with missing values. - `patterns_not(::SemObservedMissing)` -> indices of missing variables per missing pattern - `rows(::SemObservedMissing)` -> row indices of observed data points that belong to each pattern - `pattern_nsamples(::SemObservedMissing)` -> number of data points per pattern -- `pattern_nvar_obs(::SemObservedMissing)` -> number of non-missing observed variables per pattern +- `pattern_nobs_vars(::SemObservedMissing)` -> number of non-missing observed variables per pattern - `obs_mean(::SemObservedMissing)` -> observed mean per pattern - `obs_cov(::SemObservedMissing)` -> observed covariance per pattern - `em_model(::SemObservedMissing)` -> `EmMVNModel` that contains the covariance matrix and mean vector found via optimization maximization @@ -55,7 +55,7 @@ use this if you are sure your observed data is in the right format. """ mutable struct SemObservedMissing{ A <: AbstractArray, - D <: AbstractFloat, + D <: Number, O <: Number, P <: Vector, P2 <: Vector, @@ -68,14 +68,14 @@ mutable struct SemObservedMissing{ S <: EmMVNModel, } <: SemObserved data::A - n_man::D + nobs_vars::D nsamples::O patterns::P # missing patterns patterns_not::P2 rows::R # coresponding rows in data_rowwise data_rowwise::PD # list of data pattern_nsamples::PO # observed rows per pattern - pattern_nvar_obs::PVO # number of non-missing variables per pattern + pattern_nobs_vars::PVO # number of non-missing variables per pattern obs_mean::A2 obs_cov::A3 em_model::S @@ -140,7 +140,7 @@ function SemObservedMissing(; end data = data[keep, :] - nsamples, n_man = size(data) + nsamples, nobs_vars = size(data) # compute and store the different missing patterns with their rowindices missings = ismissing.(data) @@ -176,24 +176,24 @@ function SemObservedMissing(; rows = rows[sort_n_miss] pattern_nsamples = size.(rows, 1) - pattern_nvar_obs = length.(remember_cart) + pattern_nobs_vars = length.(remember_cart) cov_mean = [cov_and_mean(data_rowwise[rows]) for rows in rows] obs_cov = [cov_mean[1] for cov_mean in cov_mean] obs_mean = [cov_mean[2] for cov_mean in cov_mean] - em_model = EmMVNModel(zeros(n_man, n_man), zeros(n_man), false) + em_model = EmMVNModel(zeros(nobs_vars, nobs_vars), zeros(nobs_vars), false) return SemObservedMissing( data, - Float64(nobs_vars), + nobs_vars, nsamples, remember_cart, remember_cart_not, rows, data_rowwise, pattern_nsamples, - Float64.(pattern_nvar_obs), + pattern_nobs_vars, obs_mean, obs_cov, em_model, @@ -205,7 +205,7 @@ end ############################################################################################ nsamples(observed::SemObservedMissing) = observed.nsamples -n_man(observed::SemObservedMissing) = observed.n_man +nobserved_vars(observed::SemObservedMissing) = observed.nobs_vars ############################################################################################ ### Additional methods @@ -216,7 +216,7 @@ patterns_not(observed::SemObservedMissing) = observed.patterns_not rows(observed::SemObservedMissing) = observed.rows data_rowwise(observed::SemObservedMissing) = observed.data_rowwise pattern_nsamples(observed::SemObservedMissing) = observed.pattern_nsamples -pattern_nvar_obs(observed::SemObservedMissing) = observed.pattern_nvar_obs +pattern_nobs_vars(observed::SemObservedMissing) = observed.pattern_nobs_vars obs_mean(observed::SemObservedMissing) = observed.obs_mean obs_cov(observed::SemObservedMissing) = observed.obs_cov em_model(observed::SemObservedMissing) = observed.em_model From ffc43a0af095a20aacb2ee1a0246a1e4267543d9 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 16 Jun 2024 20:59:10 -0700 Subject: [PATCH 17/26] move Sem methods out of types.jl --- src/frontend/specification/Sem.jl | 33 +++++++++++++++++++++++++++ src/types.jl | 38 ------------------------------- 2 files changed, 33 insertions(+), 38 deletions(-) diff --git a/src/frontend/specification/Sem.jl b/src/frontend/specification/Sem.jl index 1befb9aa..758bc073 100644 --- a/src/frontend/specification/Sem.jl +++ b/src/frontend/specification/Sem.jl @@ -30,9 +30,42 @@ latent_vars(sem::AbstractSemSingle) = latent_vars(sem.imply) nsamples(sem::AbstractSemSingle) = nsamples(sem.observed) +params(model::AbstractSem) = params(model.imply) + # sum of samples in all sub-models nsamples(ensemble::SemEnsemble) = sum(nsamples, ensemble.sems) +############################################################################################ +# additional methods +############################################################################################ +""" + observed(model::AbstractSemSingle) -> SemObserved + +Returns the observed part of a model. +""" +observed(model::AbstractSemSingle) = model.observed + +""" + imply(model::AbstractSemSingle) -> SemImply + +Returns the imply part of a model. +""" +imply(model::AbstractSemSingle) = model.imply + +""" + loss(model::AbstractSemSingle) -> SemLoss + +Returns the loss part of a model. +""" +loss(model::AbstractSemSingle) = model.loss + +""" + optimizer(model::AbstractSemSingle) -> SemOptimizer + +Returns the optimizer part of a model. +""" +optimizer(model::AbstractSemSingle) = model.optimizer + function SemFiniteDiff(; observed::O = SemObservedData, imply::I = RAM, diff --git a/src/types.jl b/src/types.jl index 98d5f87e..0493da8f 100644 --- a/src/types.jl +++ b/src/types.jl @@ -13,13 +13,6 @@ abstract type AbstractSemCollection <: AbstractSem end "Supertype for all loss functions of SEMs. If you want to implement a custom loss function, it should be a subtype of `SemLossFunction`." abstract type SemLossFunction end -""" - params(semobj) - -Return the vector of SEM model parameters. -""" -params(model::AbstractSem) = model.params - """ SemLoss(args...; loss_weights = nothing, ...) @@ -225,37 +218,6 @@ Returns the optimizer part of an ensemble model. """ optimizer(ensemble::SemEnsemble) = ensemble.optimizer -############################################################################################ -# additional methods -############################################################################################ -""" - observed(model::AbstractSemSingle) -> SemObserved - -Returns the observed part of a model. -""" -observed(model::AbstractSemSingle) = model.observed - -""" - imply(model::AbstractSemSingle) -> SemImply - -Returns the imply part of a model. -""" -imply(model::AbstractSemSingle) = model.imply - -""" - loss(model::AbstractSemSingle) -> SemLoss - -Returns the loss part of a model. -""" -loss(model::AbstractSemSingle) = model.loss - -""" - optimizer(model::AbstractSemSingle) -> SemOptimizer - -Returns the optimizer part of a model. -""" -optimizer(model::AbstractSemSingle) = model.optimizer - """ Base type for all SEM specifications. """ From ab47745b99d99cf1b30465ce88467914f04652fa Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 16 Jun 2024 20:58:22 -0700 Subject: [PATCH 18/26] rows(::SemObservedMissing) -> pattern_rows() --- src/frontend/fit/fitmeasures/minus2ll.jl | 4 ++-- src/loss/ML/FIML.jl | 10 +++++----- src/observed/EM.jl | 4 ++-- src/observed/missing.jl | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/frontend/fit/fitmeasures/minus2ll.jl b/src/frontend/fit/fitmeasures/minus2ll.jl index bfd16114..88948d4d 100644 --- a/src/frontend/fit/fitmeasures/minus2ll.jl +++ b/src/frontend/fit/fitmeasures/minus2ll.jl @@ -54,7 +54,7 @@ function minus2ll(observed::SemObservedMissing) observed.em_model.μ, observed.em_model.Σ, nsamples(observed), - observed.rows, + pattern_rows(observed), observed.patterns, observed.obs_mean, observed.obs_cov, @@ -67,7 +67,7 @@ function minus2ll(observed::SemObservedMissing) observed.em_model.μ, observed.em_model.Σ, nsamples(observed), - observed.rows, + pattern_rows(observed), observed.patterns, observed.obs_mean, observed.obs_cov, diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index 3f39245d..5609224e 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -50,7 +50,7 @@ function SemFIML(; observed, specification, kwargs...) inverses = broadcast(x -> zeros(x, x), pattern_nobs_vars(observed)) choleskys = Array{Cholesky{Float64, Array{Float64, 2}}, 1}(undef, length(inverses)) - n_patterns = size(rows(observed), 1) + n_patterns = size(pattern_rows(observed), 1) logdets = zeros(n_patterns) imp_mean = zeros.(pattern_nobs_vars(observed)) @@ -89,7 +89,7 @@ function objective!(semfiml::SemFIML, params, model) prepare_SemFIML!(semfiml, model) - objective = F_FIML(rows(observed(model)), semfiml, model, params) + objective = F_FIML(pattern_rows(observed(model)), semfiml, model, params) return objective / nsamples(observed(model)) end @@ -100,7 +100,7 @@ function gradient!(semfiml::SemFIML, params, model) prepare_SemFIML!(semfiml, model) - gradient = ∇F_FIML(rows(observed(model)), semfiml, model) / nsamples(observed(model)) + gradient = ∇F_FIML(pattern_rows(observed(model)), semfiml, model) / nsamples(observed(model)) return gradient end @@ -112,8 +112,8 @@ function objective_gradient!(semfiml::SemFIML, params, model) prepare_SemFIML!(semfiml, model) objective = - F_FIML(rows(observed(model)), semfiml, model, params) / nsamples(observed(model)) - gradient = ∇F_FIML(rows(observed(model)), semfiml, model) / nsamples(observed(model)) + F_FIML(pattern_rows(observed(model)), semfiml, model, params) / nsamples(observed(model)) + gradient = ∇F_FIML(pattern_rows(observed(model)), semfiml, model) / nsamples(observed(model)) return objective, gradient end diff --git a/src/observed/EM.jl b/src/observed/EM.jl index 2807a281..a681e0fc 100644 --- a/src/observed/EM.jl +++ b/src/observed/EM.jl @@ -38,7 +38,7 @@ function em_mvn( ### precompute for full cases if length(observed.patterns[1]) == nvars - for row in observed.rows[1] + for row in pattern_rows(observed)[1] row = observed.data_rowwise[row] 𝔼x_pre += row 𝔼xxᵀ_pre += row * row' @@ -107,7 +107,7 @@ function em_mvn_Estep!(𝔼x, 𝔼xxᵀ, em_model, observed, 𝔼x_pre, 𝔼xx V = Σ[u, u] - Σ[u, o] * (Σ[o, o] \ Σ[o, u]) # loop trough data - for row in observed.rows[i] + for row in pattern_rows(observed)[i] m = μ[u] + Σ[u, o] * (Σ[o, o] \ (observed.data_rowwise[row] - μ[o])) 𝔼xᵢ[u] = m diff --git a/src/observed/missing.jl b/src/observed/missing.jl index 859a9d19..b628a313 100644 --- a/src/observed/missing.jl +++ b/src/observed/missing.jl @@ -35,7 +35,7 @@ For observed data with missing values. - `patterns(::SemObservedMissing)` -> indices of non-missing variables per missing patterns - `patterns_not(::SemObservedMissing)` -> indices of missing variables per missing pattern -- `rows(::SemObservedMissing)` -> row indices of observed data points that belong to each pattern +- `pattern_rows(::SemObservedMissing)` -> row indices of observed data points that belong to each pattern - `pattern_nsamples(::SemObservedMissing)` -> number of data points per pattern - `pattern_nobs_vars(::SemObservedMissing)` -> number of non-missing observed variables per pattern - `obs_mean(::SemObservedMissing)` -> observed mean per pattern @@ -72,7 +72,7 @@ mutable struct SemObservedMissing{ nsamples::O patterns::P # missing patterns patterns_not::P2 - rows::R # coresponding rows in data_rowwise + pattern_rows::R # coresponding rows in data_rowwise data_rowwise::PD # list of data pattern_nsamples::PO # observed rows per pattern pattern_nobs_vars::PVO # number of non-missing variables per pattern @@ -213,7 +213,7 @@ nobserved_vars(observed::SemObservedMissing) = observed.nobs_vars patterns(observed::SemObservedMissing) = observed.patterns patterns_not(observed::SemObservedMissing) = observed.patterns_not -rows(observed::SemObservedMissing) = observed.rows +pattern_rows(observed::SemObservedMissing) = observed.pattern_rows data_rowwise(observed::SemObservedMissing) = observed.data_rowwise pattern_nsamples(observed::SemObservedMissing) = observed.pattern_nsamples pattern_nobs_vars(observed::SemObservedMissing) = observed.pattern_nobs_vars From c54fa3305c30f4dc2deb5ab8dfa9d12d772384a7 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 16 Jun 2024 20:59:58 -0700 Subject: [PATCH 19/26] fix formatting --- src/frontend/fit/summary.jl | 3 +-- src/frontend/specification/ParameterTable.jl | 7 +++++-- src/imply/RAM/generic.jl | 6 +++++- src/imply/abstract.jl | 7 ++++--- src/loss/ML/FIML.jl | 9 ++++++--- src/observed/EM.jl | 5 +++-- test/examples/political_democracy/by_parts.jl | 8 ++++++-- 7 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/frontend/fit/summary.jl b/src/frontend/fit/summary.jl index 507e835f..f7055105 100644 --- a/src/frontend/fit/summary.jl +++ b/src/frontend/fit/summary.jl @@ -160,8 +160,7 @@ function sem_summary( var_array = reduce( hcat, - check_round(partable.columns[c][var_indices]; digits = digits) for - c in var_columns + check_round(partable.columns[c][var_indices]; digits = digits) for c in var_columns ) var_columns[2] = Symbol("") diff --git a/src/frontend/specification/ParameterTable.jl b/src/frontend/specification/ParameterTable.jl index 91d55ce4..8970b743 100644 --- a/src/frontend/specification/ParameterTable.jl +++ b/src/frontend/specification/ParameterTable.jl @@ -235,8 +235,11 @@ sort_vars(partable::ParameterTable) = sort_vars!(deepcopy(partable)) # add a row -------------------------------------------------------------------------------- function Base.push!(partable::ParameterTable, d::Union{AbstractDict{Symbol}, NamedTuple}) - issetequal(keys(partable.columns), keys(d)) || - throw(ArgumentError("The new row needs to have the same keys as the columns of the parameter table.")) + issetequal(keys(partable.columns), keys(d)) || throw( + ArgumentError( + "The new row needs to have the same keys as the columns of the parameter table.", + ), + ) for (key, val) in pairs(d) push!(partable.columns[key], val) end diff --git a/src/imply/RAM/generic.jl b/src/imply/RAM/generic.jl index e93f5c5c..9ff46bd2 100644 --- a/src/imply/RAM/generic.jl +++ b/src/imply/RAM/generic.jl @@ -144,7 +144,11 @@ function RAM(; # μ if meanstructure has_meanstructure = Val(true) - !isnothing(M_indices) || throw(ArgumentError("You set `meanstructure = true`, but your model specification contains no mean parameters.")) + !isnothing(M_indices) || throw( + ArgumentError( + "You set `meanstructure = true`, but your model specification contains no mean parameters.", + ), + ) ∇M = gradient ? matrix_gradient(M_indices, n_var) : nothing μ = zeros(n_obs) else diff --git a/src/imply/abstract.jl b/src/imply/abstract.jl index 3898a65a..afa2e0c0 100644 --- a/src/imply/abstract.jl +++ b/src/imply/abstract.jl @@ -12,7 +12,7 @@ nparams(imply::SemImply) = nparams(imply.ram_matrices) function check_acyclic(A::AbstractMatrix) # check if the model is acyclic - acyclic = isone(det(I-A)) + acyclic = isone(det(I - A)) # check if A is lower or upper triangular if istril(A) @@ -23,8 +23,9 @@ function check_acyclic(A::AbstractMatrix) return UpperTriangular(A) else if acyclic - @info "Your model is acyclic, specifying the A Matrix as either Upper or Lower Triangular can have great performance benefits.\n" maxlog=1 + @info "Your model is acyclic, specifying the A Matrix as either Upper or Lower Triangular can have great performance benefits.\n" maxlog = + 1 end return A end -end \ No newline at end of file +end diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index 5609224e..cd5d0270 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -100,7 +100,8 @@ function gradient!(semfiml::SemFIML, params, model) prepare_SemFIML!(semfiml, model) - gradient = ∇F_FIML(pattern_rows(observed(model)), semfiml, model) / nsamples(observed(model)) + gradient = + ∇F_FIML(pattern_rows(observed(model)), semfiml, model) / nsamples(observed(model)) return gradient end @@ -112,8 +113,10 @@ function objective_gradient!(semfiml::SemFIML, params, model) prepare_SemFIML!(semfiml, model) objective = - F_FIML(pattern_rows(observed(model)), semfiml, model, params) / nsamples(observed(model)) - gradient = ∇F_FIML(pattern_rows(observed(model)), semfiml, model) / nsamples(observed(model)) + F_FIML(pattern_rows(observed(model)), semfiml, model, params) / + nsamples(observed(model)) + gradient = + ∇F_FIML(pattern_rows(observed(model)), semfiml, model) / nsamples(observed(model)) return objective, gradient end diff --git a/src/observed/EM.jl b/src/observed/EM.jl index a681e0fc..ef5da317 100644 --- a/src/observed/EM.jl +++ b/src/observed/EM.jl @@ -62,7 +62,7 @@ function em_mvn( if iter > max_iter_em done = true - @warn "EM Algorithm for MVN missing data did not converge. Likelihood for FIML is not interpretable. + @warn "EM Algorithm for MVN missing data did not converge. Likelihood for FIML is not interpretable. Maybe try passing different starting values via 'start_em = ...' " elseif iter > 1 # done = isapprox(ll, ll_prev; rtol = rtol) @@ -153,7 +153,8 @@ end # use μ and Σ of full cases function start_em_observed(observed::SemObservedMissing; kwargs...) - if (length(observed.patterns[1]) == nobserved_vars(observed)) & (observed.pattern_nsamples[1] > 1) + if (length(observed.patterns[1]) == nobserved_vars(observed)) & + (observed.pattern_nsamples[1] > 1) μ = copy(observed.obs_mean[1]) Σ = copy(Symmetric(observed.obs_cov[1])) if !isposdef(Σ) diff --git a/test/examples/political_democracy/by_parts.jl b/test/examples/political_democracy/by_parts.jl index 09d40cc2..f50fb6dd 100644 --- a/test/examples/political_democracy/by_parts.jl +++ b/test/examples/political_democracy/by_parts.jl @@ -40,8 +40,12 @@ model_ridge = Sem(observed, imply_ram, SemLoss(ml, ridge), optimizer_obj) model_constant = Sem(observed, imply_ram, SemLoss(ml, constant), optimizer_obj) -model_ml_weighted = - Sem(observed, imply_ram, SemLoss(ml; loss_weights = [nsamples(model_ml)]), optimizer_obj) +model_ml_weighted = Sem( + observed, + imply_ram, + SemLoss(ml; loss_weights = [nsamples(model_ml)]), + optimizer_obj, +) ############################################################################################ ### test gradients From ee5b0bea0764012a0de64780e57d9d19e4a6c034 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 26 Jun 2024 13:47:42 -0700 Subject: [PATCH 20/26] samples(SemObsCov) throws an exception --- src/observed/covariance.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/observed/covariance.jl b/src/observed/covariance.jl index f851fd5b..b78f4183 100644 --- a/src/observed/covariance.jl +++ b/src/observed/covariance.jl @@ -90,6 +90,9 @@ end nsamples(observed::SemObservedCovariance) = observed.nsamples nobserved_vars(observed::SemObservedCovariance) = observed.nobs_vars +samples(observed::SemObservedCovariance) = + error("$(typeof(observed)) does not store data samples") + ############################################################################################ ### additional methods ############################################################################################ From 91a149ddbc870928e92ee84925fef1596b383bb8 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 26 Jun 2024 19:48:13 -0700 Subject: [PATCH 21/26] SemObserved tests: refactor and add var API tests --- test/unit_tests/data_input_formats.jl | 755 +++++++++++++------------- test/unit_tests/unit_tests.jl | 8 +- 2 files changed, 372 insertions(+), 391 deletions(-) diff --git a/test/unit_tests/data_input_formats.jl b/test/unit_tests/data_input_formats.jl index f1adaf62..4aafda3c 100644 --- a/test/unit_tests/data_input_formats.jl +++ b/test/unit_tests/data_input_formats.jl @@ -1,5 +1,7 @@ using StructuralEquationModels, Test, Statistics -using StructuralEquationModels: obs_cov, obs_mean, samples +using StructuralEquationModels: + samples, nsamples, observed_vars, nobserved_vars, obs_cov, obs_mean + ### model specification -------------------------------------------------------------------- spec = ParameterTable( @@ -18,412 +20,391 @@ dat_missing_matrix = Matrix(dat_missing) dat_cov = Statistics.cov(dat_matrix) dat_mean = vcat(Statistics.mean(dat_matrix, dims = 1)...) -############################################################################################ -### tests - SemObservedData -############################################################################################ - -# w.o. means ------------------------------------------------------------------------------- - -# errors -@test_throws ArgumentError( - "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " * - "Please make sure the column names of your data frame indicate the correct variables " * - "or pass your data in a different format.", -) begin - SemObservedData(specification = spec, data = dat, obs_colnames = Symbol.(names(dat))) -end - -@test_throws ArgumentError( - "Your `data` can not be indexed by symbols. " * - "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.", -) begin - SemObservedData(specification = spec, data = dat_matrix) -end - -@test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin - SemObservedData(specification = spec, data = dat_matrix, obs_colnames = names(dat)) -end - -@test_throws UndefKeywordError(:data) SemObservedData(specification = spec) - -@test_throws UndefKeywordError(:specification) SemObservedData(data = dat_matrix) - -# should work -observed = SemObservedData(specification = spec, data = dat) - -observed_nospec = SemObservedData(specification = nothing, data = dat_matrix) - -observed_matrix = SemObservedData( - specification = spec, - data = dat_matrix, - obs_colnames = Symbol.(names(dat)), -) - -all_equal_cov = - (obs_cov(observed) == obs_cov(observed_nospec)) & - (obs_cov(observed) == obs_cov(observed_matrix)) - -all_equal_data = - (samples(observed) == samples(observed_nospec)) & - (samples(observed) == samples(observed_matrix)) - -@testset "unit tests | SemObservedData | input formats" begin - @test all_equal_cov - @test all_equal_data -end - -# shuffle variables -new_order = [3, 2, 7, 8, 5, 6, 9, 11, 1, 10, 4] - -shuffle_names = Symbol.(names(dat))[new_order] - -shuffle_dat = dat[:, new_order] - -shuffle_dat_matrix = dat_matrix[:, new_order] - -observed_shuffle = SemObservedData(specification = spec, data = shuffle_dat) - -observed_matrix_shuffle = SemObservedData( - specification = spec, - data = shuffle_dat_matrix, - obs_colnames = shuffle_names, -) - -all_equal_cov_suffled = - (obs_cov(observed) == obs_cov(observed_shuffle)) & - (obs_cov(observed) == obs_cov(observed_matrix_shuffle)) - -all_equal_data_suffled = - (samples(observed) == samples(observed_shuffle)) & - (samples(observed) == samples(observed_matrix_shuffle)) - -@testset "unit tests | SemObservedData | input formats shuffled " begin - @test all_equal_cov_suffled - @test all_equal_data_suffled -end - -# with means ------------------------------------------------------------------------------- - -# errors -@test_throws ArgumentError( - "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " * - "Please make sure the column names of your data frame indicate the correct variables " * - "or pass your data in a different format.", -) begin - SemObservedData( - specification = spec, - data = dat, - obs_colnames = Symbol.(names(dat)), - meanstructure = true, - ) -end - -@test_throws ArgumentError( - "Your `data` can not be indexed by symbols. " * - "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.", -) begin - SemObservedData(specification = spec, data = dat_matrix, meanstructure = true) -end - -@test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin - SemObservedData( - specification = spec, - data = dat_matrix, - obs_colnames = names(dat), - meanstructure = true, - ) -end - -@test_throws UndefKeywordError(:data) SemObservedData( - specification = spec, - meanstructure = true, -) - -@test_throws UndefKeywordError(:specification) SemObservedData( - data = dat_matrix, - meanstructure = true, -) - -# should work -observed = SemObservedData(specification = spec, data = dat, meanstructure = true) - -observed_nospec = - SemObservedData(specification = nothing, data = dat_matrix, meanstructure = true) - -observed_matrix = SemObservedData( - specification = spec, - data = dat_matrix, - obs_colnames = Symbol.(names(dat)), - meanstructure = true, -) - -all_equal_mean = - (obs_mean(observed) == obs_mean(observed_nospec)) & - (obs_mean(observed) == obs_mean(observed_matrix)) - -@testset "unit tests | SemObservedData | input formats - means" begin - @test all_equal_mean -end - -# shuffle variables -new_order = [3, 2, 7, 8, 5, 6, 9, 11, 1, 10, 4] - -shuffle_names = Symbol.(names(dat))[new_order] - -shuffle_dat = dat[:, new_order] - -shuffle_dat_matrix = dat_matrix[:, new_order] - -observed_shuffle = - SemObservedData(specification = spec, data = shuffle_dat, meanstructure = true) - -observed_matrix_shuffle = SemObservedData( - specification = spec, - data = shuffle_dat_matrix, - obs_colnames = shuffle_names, - meanstructure = true, -) - -all_equal_mean_suffled = - (obs_mean(observed) == obs_mean(observed_shuffle)) & - (obs_mean(observed) == obs_mean(observed_matrix_shuffle)) - -@testset "unit tests | SemObservedData | input formats shuffled - mean" begin - @test all_equal_mean_suffled -end - -############################################################################################ -### tests - SemObservedCovariance -############################################################################################ - -# w.o. means ------------------------------------------------------------------------------- - -# errors - -@test_throws ArgumentError("observed means were passed, but `meanstructure = false`") begin - SemObservedCovariance( - specification = nothing, - obs_cov = dat_cov, - obs_mean = dat_mean, - nsamples = 75, - ) -end - -@test_throws UndefKeywordError(:nsamples) SemObservedCovariance(obs_cov = dat_cov) - -@test_throws ArgumentError("no `obs_colnames` were specified") begin - SemObservedCovariance(specification = spec, obs_cov = dat_cov, nsamples = 75) -end - -@test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin - SemObservedCovariance( - specification = spec, - obs_cov = dat_cov, - obs_colnames = names(dat), - nsamples = 75, - ) -end - -# should work -observed = SemObservedCovariance( - specification = spec, - obs_cov = dat_cov, - obs_colnames = obs_colnames = Symbol.(names(dat)), - nsamples = 75, -) - -observed_nospec = - SemObservedCovariance(specification = nothing, obs_cov = dat_cov, nsamples = 75) - -all_equal_cov = (obs_cov(observed) == obs_cov(observed_nospec)) - -@testset "unit tests | SemObservedCovariance | input formats" begin - @test all_equal_cov - @test nsamples(observed) == 75 - @test nsamples(observed_nospec) == 75 -end - -# shuffle variables -new_order = [3, 2, 7, 8, 5, 6, 9, 11, 1, 10, 4] - -shuffle_names = Symbol.(names(dat))[new_order] - -shuffle_dat_matrix = dat_matrix[:, new_order] - -shuffle_dat_cov = Statistics.cov(shuffle_dat_matrix) - -observed_shuffle = SemObservedCovariance( - specification = spec, - obs_cov = shuffle_dat_cov, - obs_colnames = shuffle_names, - nsamples = 75, -) - -all_equal_cov_suffled = (obs_cov(observed) ≈ obs_cov(observed_shuffle)) - -@testset "unit tests | SemObservedCovariance | input formats shuffled " begin - @test all_equal_cov_suffled -end - -# with means ------------------------------------------------------------------------------- - -# errors -@test_throws ArgumentError("`meanstructure = true`, but no observed means were passed") begin - SemObservedCovariance( - specification = spec, - obs_cov = dat_cov, - meanstructure = true, - nsamples = 75, - ) -end - -@test_throws UndefKeywordError SemObservedCovariance( - data = dat_matrix, - meanstructure = true, -) - -@test_throws UndefKeywordError SemObservedCovariance( - obs_cov = dat_cov, - meanstructure = true, -) - -@test_throws ArgumentError("`meanstructure = true`, but no observed means were passed") begin - SemObservedCovariance( - specification = spec, - obs_cov = dat_cov, - obs_colnames = Symbol.(names(dat)), - meanstructure = true, - nsamples = 75, - ) -end - -# should work -observed = SemObservedCovariance( - specification = spec, - obs_cov = dat_cov, - obs_mean = dat_mean, - obs_colnames = Symbol.(names(dat)), - nsamples = 75, - meanstructure = true, -) - -observed_nospec = SemObservedCovariance( - specification = nothing, - obs_cov = dat_cov, - obs_mean = dat_mean, - meanstructure = true, - nsamples = 75, -) - -all_equal_mean = (obs_mean(observed) == obs_mean(observed_nospec)) - -@testset "unit tests | SemObservedCovariance | input formats - means" begin - @test all_equal_mean -end - # shuffle variables new_order = [3, 2, 7, 8, 5, 6, 9, 11, 1, 10, 4] shuffle_names = Symbol.(names(dat))[new_order] shuffle_dat = dat[:, new_order] +shuffle_dat_missing = dat_missing[:, new_order] shuffle_dat_matrix = dat_matrix[:, new_order] +shuffle_dat_missing_matrix = dat_missing_matrix[:, new_order] shuffle_dat_cov = Statistics.cov(shuffle_dat_matrix) shuffle_dat_mean = vcat(Statistics.mean(shuffle_dat_matrix, dims = 1)...) -observed_shuffle = SemObservedCovariance( - specification = spec, - obs_cov = shuffle_dat_cov, - obs_mean = shuffle_dat_mean, - obs_colnames = shuffle_names, - nsamples = 75, - meanstructure = true, +# common tests for SemObserved subtypes +function test_observed( + observed::SemObserved, + dat, + dat_matrix, + dat_cov, + dat_mean; + meanstructure::Bool, + approx_cov::Bool = false, ) - -all_equal_mean_suffled = (obs_mean(observed) == obs_mean(observed_shuffle)) - -@testset "unit tests | SemObservedCovariance | input formats shuffled - mean" begin - @test all_equal_mean_suffled + @test @inferred(nobserved_vars(observed)) == size(dat, 2) + # FIXME observed should provide names of observed variables + @test @inferred(observed_vars(observed)) == names(dat) broken = true + @test @inferred(nsamples(observed)) == size(dat, 1) + + hasmissing = + !isnothing(dat_matrix) && any(ismissing, dat_matrix) || + !isnothing(dat_cov) && any(ismissing, dat_cov) + + if !isnothing(dat_matrix) + if hasmissing + @test isequal(@inferred(samples(observed)), dat_matrix) + else + @test @inferred(samples(observed)) == dat_matrix + end + end + + if !isnothing(dat_cov) + if hasmissing + @test isequal(@inferred(obs_cov(observed)), dat_cov) + else + if approx_cov + @test @inferred(obs_cov(observed)) ≈ dat_cov + else + @test @inferred(obs_cov(observed)) == dat_cov + end + end + end + + # FIXME actually, SemObserved should not use meanstructure and always provide obs_mean() + # meanstructure is a part of SEM model + if meanstructure + if !isnothing(dat_mean) + if hasmissing + @test isequal(@inferred(obs_mean(observed)), dat_mean) + else + @test isequal(@inferred(obs_mean(observed)), dat_mean) + end + else + # FIXME if meanstructure is present, obs_mean() should provide something (currently Missing don't support it) + @test (@inferred(obs_mean(observed)) isa AbstractVector{Float64}) broken = true + end + else + @test @inferred(obs_mean(observed)) === nothing skip = true + end end ############################################################################################ -### tests - SemObservedMissing +@testset "SemObservedData" begin + + # errors + @test_throws ArgumentError( + "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " * + "Please make sure the column names of your data frame indicate the correct variables " * + "or pass your data in a different format.", + ) begin + SemObservedData( + specification = spec, + data = dat, + obs_colnames = Symbol.(names(dat)), + ) + end + + @test_throws ArgumentError( + "Your `data` can not be indexed by symbols. " * + "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.", + ) begin + SemObservedData(specification = spec, data = dat_matrix) + end + + @test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin + SemObservedData(specification = spec, data = dat_matrix, obs_colnames = names(dat)) + end + + @test_throws UndefKeywordError(:data) SemObservedData(specification = spec) + + @test_throws UndefKeywordError(:specification) SemObservedData(data = dat_matrix) + + @testset "meanstructure=$meanstructure" for meanstructure in (false, true) + observed = SemObservedData(specification = spec, data = dat; meanstructure) + + test_observed(observed, dat, dat_matrix, dat_cov, dat_mean; meanstructure) + + observed_nospec = + SemObservedData(specification = nothing, data = dat_matrix; meanstructure) + + test_observed(observed_nospec, dat, dat_matrix, dat_cov, dat_mean; meanstructure) + + observed_matrix = SemObservedData( + specification = spec, + data = dat_matrix, + obs_colnames = Symbol.(names(dat)), + meanstructure = meanstructure, + ) + + test_observed(observed_matrix, dat, dat_matrix, dat_cov, dat_mean; meanstructure) + + observed_shuffle = + SemObservedData(specification = spec, data = shuffle_dat; meanstructure) + + test_observed(observed_shuffle, dat, dat_matrix, dat_cov, dat_mean; meanstructure) + + observed_matrix_shuffle = SemObservedData( + specification = spec, + data = shuffle_dat_matrix, + obs_colnames = shuffle_names; + meanstructure, + ) + + test_observed( + observed_matrix_shuffle, + dat, + dat_matrix, + dat_cov, + dat_mean; + meanstructure, + ) + end # meanstructure +end # SemObservedData + ############################################################################################ -# errors -@test_throws ArgumentError( - "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " * - "Please make sure the column names of your data frame indicate the correct variables " * - "or pass your data in a different format.", -) begin - SemObservedMissing( - specification = spec, - data = dat_missing, - obs_colnames = Symbol.(names(dat)), - ) -end +@testset "SemObservedCovariance" begin + + # errors + + @test_throws UndefKeywordError(:nsamples) SemObservedCovariance(obs_cov = dat_cov) + + @test_throws ArgumentError("no `obs_colnames` were specified") begin + SemObservedCovariance( + specification = spec, + obs_cov = dat_cov, + nsamples = size(dat, 1), + ) + end + + @test_throws ArgumentError("observed means were passed, but `meanstructure = false`") begin + SemObservedCovariance( + specification = nothing, + obs_cov = dat_cov, + obs_mean = dat_mean, + nsamples = size(dat, 1), + ) + end + + @test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin + SemObservedCovariance( + specification = spec, + obs_cov = dat_cov, + obs_colnames = names(dat), + nsamples = size(dat, 1), + meanstructure = false, + ) + end + + @test_throws ArgumentError("`meanstructure = true`, but no observed means were passed") begin + SemObservedCovariance( + specification = spec, + obs_cov = dat_cov, + obs_colnames = Symbol.(names(dat)), + meanstructure = true, + nsamples = size(dat, 1), + ) + end + + @testset "meanstructure=$meanstructure" for meanstructure in (false, true) + + # errors + @test_throws UndefKeywordError SemObservedCovariance( + obs_cov = dat_cov; + meanstructure, + ) + + @test_throws UndefKeywordError SemObservedCovariance( + data = dat_matrix; + meanstructure, + ) + + # should work + observed = SemObservedCovariance( + specification = spec, + obs_cov = dat_cov, + obs_mean = meanstructure ? dat_mean : nothing, + obs_colnames = obs_colnames = Symbol.(names(dat)), + nsamples = size(dat, 1), + meanstructure = meanstructure, + ) + + test_observed( + observed, + dat, + nothing, + dat_cov, + dat_mean; + meanstructure, + approx_cov = true, + ) + + @test_throws ErrorException samples(observed) + + observed_nospec = SemObservedCovariance( + specification = nothing, + obs_cov = dat_cov, + obs_mean = meanstructure ? dat_mean : nothing, + nsamples = size(dat, 1); + meanstructure, + ) + + test_observed( + observed_nospec, + dat, + nothing, + dat_cov, + dat_mean; + meanstructure, + approx_cov = true, + ) + + @test_throws ErrorException samples(observed_nospec) + + observed_shuffle = SemObservedCovariance( + specification = spec, + obs_cov = shuffle_dat_cov, + obs_mean = meanstructure ? dat_mean[new_order] : nothing, + obs_colnames = shuffle_names, + nsamples = size(dat, 1); + meanstructure, + ) + + test_observed( + observed_shuffle, + dat, + nothing, + dat_cov, + dat_mean; + meanstructure, + approx_cov = true, + ) + + @test_throws ErrorException samples(observed_shuffle) + + # respect specification order + @test @inferred(obs_cov(observed_shuffle)) ≈ obs_cov(observed) + @test @inferred(observed_vars(observed_shuffle)) == shuffle_names broken = true + end # meanstructure +end # SemObservedCovariance -@test_throws ArgumentError( - "Your `data` can not be indexed by symbols. " * - "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.", -) begin - SemObservedMissing(specification = spec, data = dat_missing_matrix) -end +############################################################################################ -@test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin - SemObservedMissing( - specification = spec, +@testset "SemObservedMissing" begin + + # errors + @test_throws ArgumentError( + "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " * + "Please make sure the column names of your data frame indicate the correct variables " * + "or pass your data in a different format.", + ) begin + SemObservedMissing( + specification = spec, + data = dat_missing, + obs_colnames = Symbol.(names(dat)), + ) + end + + @test_throws ArgumentError( + "Your `data` can not be indexed by symbols. " * + "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.", + ) begin + SemObservedMissing(specification = spec, data = dat_missing_matrix) + end + + @test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin + SemObservedMissing( + specification = spec, + data = dat_missing_matrix, + obs_colnames = names(dat), + ) + end + + @test_throws UndefKeywordError(:data) SemObservedMissing(specification = spec) + + @test_throws UndefKeywordError(:specification) SemObservedMissing( data = dat_missing_matrix, - obs_colnames = names(dat), ) -end - -@test_throws UndefKeywordError(:data) SemObservedMissing(specification = spec) - -@test_throws UndefKeywordError(:specification) SemObservedMissing(data = dat_missing_matrix) - -# should work -observed = SemObservedMissing(specification = spec, data = dat_missing) - -observed_nospec = SemObservedMissing(specification = nothing, data = dat_missing_matrix) -observed_matrix = SemObservedMissing( - specification = spec, - data = dat_missing_matrix, - obs_colnames = Symbol.(names(dat)), -) - -all_equal_data = - isequal(samples(observed), samples(observed_nospec)) & - isequal(samples(observed), samples(observed_matrix)) - -@testset "unit tests | SemObservedMissing | input formats" begin - @test all_equal_data -end - -# shuffle variables -new_order = [3, 2, 7, 8, 5, 6, 9, 11, 1, 10, 4] - -shuffle_names = Symbol.(names(dat))[new_order] - -shuffle_dat_missing = dat_missing[:, new_order] - -shuffle_dat_missing_matrix = dat_missing_matrix[:, new_order] - -observed_shuffle = SemObservedMissing(specification = spec, data = shuffle_dat_missing) - -observed_matrix_shuffle = SemObservedMissing( - specification = spec, - data = shuffle_dat_missing_matrix, - obs_colnames = shuffle_names, -) - -all_equal_data_shuffled = - isequal(samples(observed), samples(observed_shuffle)) & - isequal(samples(observed), samples(observed_matrix_shuffle)) - -@testset "unit tests | SemObservedMissing | input formats shuffled " begin - @test all_equal_data_suffled -end + @testset "meanstructure=$meanstructure" for meanstructure in (false, true) + observed = + SemObservedMissing(specification = spec, data = dat_missing; meanstructure) + + test_observed( + observed, + dat_missing, + dat_missing_matrix, + nothing, + nothing; + meanstructure, + ) + + @test @inferred(length(StructuralEquationModels.patterns(observed))) == 55 + @test sum(@inferred(StructuralEquationModels.pattern_nsamples(observed))) == + size(dat_missing, 1) + @test all( + <=(size(dat_missing, 2)), + @inferred(StructuralEquationModels.pattern_nsamples(observed)) + ) + + observed_nospec = SemObservedMissing( + specification = nothing, + data = dat_missing_matrix; + meanstructure, + ) + + test_observed( + observed_nospec, + dat_missing, + dat_missing_matrix, + nothing, + nothing; + meanstructure, + ) + + observed_matrix = SemObservedMissing( + specification = spec, + data = dat_missing_matrix, + obs_colnames = Symbol.(names(dat)), + ) + + test_observed( + observed_matrix, + dat_missing, + dat_missing_matrix, + nothing, + nothing; + meanstructure, + ) + + observed_shuffle = + SemObservedMissing(specification = spec, data = shuffle_dat_missing) + + test_observed( + observed_shuffle, + dat_missing, + dat_missing_matrix, + nothing, + nothing; + meanstructure, + ) + + observed_matrix_shuffle = SemObservedMissing( + specification = spec, + data = shuffle_dat_missing_matrix, + obs_colnames = shuffle_names, + ) + + test_observed( + observed_matrix_shuffle, + dat_missing, + dat_missing_matrix, + nothing, + nothing; + meanstructure, + ) + end # meanstructure +end # SemObservedMissing diff --git a/test/unit_tests/unit_tests.jl b/test/unit_tests/unit_tests.jl index eb58650c..b8400e54 100644 --- a/test/unit_tests/unit_tests.jl +++ b/test/unit_tests/unit_tests.jl @@ -4,10 +4,10 @@ using Test, SafeTestsets include("multithreading.jl") end -@safetestset "SemObs" begin - include("data_input_formats.jl") -end - @safetestset "Matrix algebra helper functions" begin include("matrix_helpers.jl") end + +@safetestset "SemObserved" begin + include("data_input_formats.jl") +end From 1f1f976ed6787cb3ecd0178d07bf4b8a030b7e98 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 26 Jun 2024 19:45:39 -0700 Subject: [PATCH 22/26] ParTable(graph): group is only valid for ensemble --- src/frontend/specification/StenoGraphs.jl | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/frontend/specification/StenoGraphs.jl b/src/frontend/specification/StenoGraphs.jl index 67bb7973..9f72f36a 100644 --- a/src/frontend/specification/StenoGraphs.jl +++ b/src/frontend/specification/StenoGraphs.jl @@ -36,7 +36,7 @@ function ParameterTable( observed_vars::AbstractVector{Symbol}, latent_vars::AbstractVector{Symbol}, params::Union{AbstractVector{Symbol}, Nothing} = nothing, - group::Integer = 1, + group::Union{Integer, Nothing} = nothing, param_prefix = :θ, ) graph = unique(graph) @@ -69,7 +69,17 @@ function ParameterTable( end if element isa ModifiedEdge for modifier in values(element.modifiers) - modval = modifier.value[group] + if isnothing(group) && + modifier.value isa Union{AbstractVector, Tuple} && + length(modifier.value) > 1 + throw( + ArgumentError( + "The graph contains a group of parameters, ParameterTable expects a single value.\n" * + "For SEM ensembles, use EnsembleParameterTable instead.", + ), + ) + end + modval = modifier.value[something(group, 1)] if modifier isa Fixed if modval == :NaN free[i] = true From 1f120879d3e3cfe204b7d5d6b842ea9070d8e939 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 26 Jun 2024 19:46:31 -0700 Subject: [PATCH 23/26] ParTable(graph): fix NaN modif detection --- src/frontend/specification/StenoGraphs.jl | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/frontend/specification/StenoGraphs.jl b/src/frontend/specification/StenoGraphs.jl index 9f72f36a..69e7bc94 100644 --- a/src/frontend/specification/StenoGraphs.jl +++ b/src/frontend/specification/StenoGraphs.jl @@ -27,6 +27,11 @@ struct Label{N} <: EdgeModifier end label(args...) = Label(args) +# test whether the modifier is NaN +isnanmodval(val::Number) = isnan(val) +isnanmodval(val::Symbol) = val == :NaN +isnanmodval(val::SimpleNode{Symbol}) = val.node == :NaN + ############################################################################################ ### constructor for parameter table from graph ############################################################################################ @@ -81,7 +86,7 @@ function ParameterTable( end modval = modifier.value[something(group, 1)] if modifier isa Fixed - if modval == :NaN + if isnanmodval(modval) free[i] = true value_fixed[i] = 0.0 else @@ -89,9 +94,11 @@ function ParameterTable( value_fixed[i] = modval end elseif modifier isa Start - start[i] = modval + if !isnanmodval(modval) + start[i] = modval + end elseif modifier isa Label - if modval == :NaN + if isnanmodval(modval) throw(DomainError(NaN, "NaN is not allowed as a parameter label.")) end param_refs[i] = modval From 6f3afa8d844b73e9667015094050a8c4b0f73db6 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 26 Jun 2024 19:47:48 -0700 Subject: [PATCH 24/26] refactor SemSpec tests --- test/Project.toml | 1 + test/unit_tests/specification.jl | 133 ++++++++++++++++++++++++++++--- test/unit_tests/unit_tests.jl | 4 + 3 files changed, 127 insertions(+), 11 deletions(-) diff --git a/test/Project.toml b/test/Project.toml index c5124c65..dad8fb6d 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -12,4 +12,5 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StenoGraphs = "78862bba-adae-4a83-bb4d-33c106177f81" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/unit_tests/specification.jl b/test/unit_tests/specification.jl index c081dc0f..336b718b 100644 --- a/test/unit_tests/specification.jl +++ b/test/unit_tests/specification.jl @@ -1,22 +1,133 @@ -@testset "ParameterTable - RAMMatrices conversion" begin - partable = ParameterTable(ram_matrices) - @test ram_matrices == RAMMatrices(partable) -end +using StenoGraphs, StructuralEquationModels +using StructuralEquationModels: + vars, nvars, observed_vars, latent_vars, nobserved_vars, nlatent_vars, params, nparams -@testset "params()" begin - @test params(model_ml)[2, 10, 28] == [:x2, :x10, :x28] - @test params(model_ml) == params(partable) - @test params(model_ml) == params(RAMMatrices(partable)) -end +obs_vars = Symbol.("x", 1:9) +lat_vars = [:visual, :textual, :speed] graph = @StenoGraph begin + # measurement model + visual → fixed(1.0) * x1 + fixed(0.5) * x2 + fixed(0.6) * x3 + textual → fixed(1.0) * x4 + x5 + label(:a₁) * x6 + speed → fixed(1.0) * x7 + fixed(1.0) * x8 + label(:λ₉) * x9 + # variances and covariances + _(obs_vars) ↔ _(obs_vars) + _(lat_vars) ↔ _(lat_vars) + visual ↔ textual + speed + textual ↔ speed +end + +ens_graph = @StenoGraph begin # measurement model visual → fixed(1.0, 1.0) * x1 + fixed(0.5, 0.5) * x2 + fixed(0.6, 0.8) * x3 textual → fixed(1.0, 1.0) * x4 + x5 + label(:a₁, :a₂) * x6 speed → fixed(1.0, 1.0) * x7 + fixed(1.0, NaN) * x8 + label(:λ₉, :λ₉) * x9 # variances and covariances - _(observed_vars) ↔ _(observed_vars) - _(latent_vars) ↔ _(latent_vars) + _(obs_vars) ↔ _(obs_vars) + _(lat_vars) ↔ _(lat_vars) visual ↔ textual + speed textual ↔ speed end + +@testset "ParameterTable" begin + @testset "from StenoGraph" begin + @test_throws UndefKeywordError(:observed_vars) ParameterTable(graph) + @test_throws UndefKeywordError(:latent_vars) ParameterTable( + graph, + observed_vars = obs_vars, + ) + partable = @inferred( + ParameterTable(graph, observed_vars = obs_vars, latent_vars = lat_vars) + ) + + @test partable isa ParameterTable + + # vars API + @test observed_vars(partable) == obs_vars + @test nobserved_vars(partable) == length(obs_vars) + @test latent_vars(partable) == lat_vars + @test nlatent_vars(partable) == length(lat_vars) + @test nvars(partable) == length(obs_vars) + length(lat_vars) + @test issetequal(vars(partable), [obs_vars; lat_vars]) + + # params API + @test params(partable) == [[:θ_1, :a₁, :λ₉]; Symbol.("θ_", 2:16)] + @test nparams(partable) == 18 + + # don't allow constructing ParameterTable from a graph for an ensemble + @test_throws ArgumentError ParameterTable( + ens_graph, + observed_vars = obs_vars, + latent_vars = lat_vars, + ) + end + + @testset "from RAMMatrices" begin + partable_orig = + ParameterTable(graph, observed_vars = obs_vars, latent_vars = lat_vars) + ram_matrices = RAMMatrices(partable_orig) + + partable = @inferred(ParameterTable(ram_matrices)) + @test partable isa ParameterTable + @test issetequal(keys(partable.columns), keys(partable_orig.columns)) + # FIXME nrow()? + @test length(partable.columns[:from]) == length(partable_orig.columns[:from]) + @test partable == partable_orig broken = true + end +end + +@testset "EnsembleParameterTable" begin + groups = [:Pasteur, :Grant_White], + @test_throws UndefKeywordError(:observed_vars) EnsembleParameterTable(ens_graph) + @test_throws UndefKeywordError(:latent_vars) EnsembleParameterTable( + ens_graph, + observed_vars = obs_vars, + ) + @test_throws UndefKeywordError(:groups) EnsembleParameterTable( + ens_graph, + observed_vars = obs_vars, + latent_vars = lat_vars, + ) + + enspartable = @inferred( + EnsembleParameterTable( + ens_graph, + observed_vars = obs_vars, + latent_vars = lat_vars, + groups = [:Pasteur, :Grant_White], + ) + ) + @test enspartable isa EnsembleParameterTable + + @test nobserved_vars(enspartable) == length(obs_vars) broken = true + @test observed_vars(enspartable) == obs_vars broken = true + @test nlatent_vars(enspartable) == length(lat_vars) broken = true + @test latent_vars(enspartable) == lat_vars broken = true + @test nvars(enspartable) == length(obs_vars) + length(lat_vars) broken = true + @test issetequal(vars(enspartable), [obs_vars; lat_vars]) broken = true + + @test nparams(enspartable) == 36 + @test issetequal( + params(enspartable), + [Symbol.("gPasteur_", 1:16); Symbol.("gGrant_White_", 1:17); [:a₁, :a₂, :λ₉]], + ) +end + +@testset "RAMMatrices" begin + partable = ParameterTable(graph, observed_vars = obs_vars, latent_vars = lat_vars) + + ram_matrices = @inferred(RAMMatrices(partable)) + @test ram_matrices isa RAMMatrices + + # vars API + @test nobserved_vars(ram_matrices) == length(obs_vars) + @test observed_vars(ram_matrices) == obs_vars + @test nlatent_vars(ram_matrices) == length(lat_vars) + @test latent_vars(ram_matrices) == lat_vars + @test nvars(ram_matrices) == length(obs_vars) + length(lat_vars) + @test issetequal(vars(ram_matrices), [obs_vars; lat_vars]) + + # params API + @test nparams(ram_matrices) == nparams(partable) + @test params(ram_matrices) == params(partable) +end diff --git a/test/unit_tests/unit_tests.jl b/test/unit_tests/unit_tests.jl index b8400e54..c0505148 100644 --- a/test/unit_tests/unit_tests.jl +++ b/test/unit_tests/unit_tests.jl @@ -11,3 +11,7 @@ end @safetestset "SemObserved" begin include("data_input_formats.jl") end + +@safetestset "SemSpecification" begin + include("specification.jl") +end From 29827749c822b91a227aace7740ccc05f5e168e4 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Thu, 27 Jun 2024 00:06:46 -0700 Subject: [PATCH 25/26] add Sem unit tests --- test/unit_tests/model.jl | 91 +++++++++++++++++++++++++++++++++++ test/unit_tests/unit_tests.jl | 4 ++ 2 files changed, 95 insertions(+) create mode 100644 test/unit_tests/model.jl diff --git a/test/unit_tests/model.jl b/test/unit_tests/model.jl new file mode 100644 index 00000000..0bf2c762 --- /dev/null +++ b/test/unit_tests/model.jl @@ -0,0 +1,91 @@ +using StructuralEquationModels, Test, Statistics +using StructuralEquationModels: + SemSpecification, + samples, + nsamples, + observed_vars, + nobserved_vars, + obs_cov, + obs_mean, + vars, + nvars, + observed_vars, + latent_vars, + nobserved_vars, + nlatent_vars, + params, + nparams + +dat = example_data("political_democracy") +dat_missing = example_data("political_democracy_missing")[:, names(dat)] + +obs_vars = [Symbol.("x", 1:3); Symbol.("y", 1:8)] +lat_vars = [:ind60, :dem60, :dem65] + +graph = @StenoGraph begin + # loadings + ind60 → fixed(1) * x1 + x2 + x3 + dem60 → fixed(1) * y1 + y2 + y3 + y4 + dem65 → fixed(1) * y5 + y6 + y7 + y8 + # latent regressions + label(:a) * dem60 ← ind60 + dem65 ← dem60 + dem65 ← ind60 + # variances + _(obs_vars) ↔ _(obs_vars) + _(lat_vars) ↔ _(lat_vars) + # covariances + y1 ↔ y5 + y2 ↔ y4 + y6 + y3 ↔ y7 + y8 ↔ y4 + y6 +end + +ram_matrices = + RAMMatrices(ParameterTable(graph, observed_vars = obs_vars, latent_vars = lat_vars)) + +obs = SemObservedData(specification = ram_matrices, data = dat) + +function test_vars_api(semobj, spec::SemSpecification) + @test @inferred(nobserved_vars(semobj)) == nobserved_vars(spec) + @test observed_vars(semobj) == observed_vars(spec) + + @test @inferred(nlatent_vars(semobj)) == nlatent_vars(spec) + @test latent_vars(semobj) == latent_vars(spec) + + @test @inferred(nvars(semobj)) == nvars(spec) + @test vars(semobj) == vars(spec) +end + +function test_params_api(semobj, spec::SemSpecification) + @test @inferred(nparams(semobj)) == nparams(spec) + @test @inferred(params(semobj)) == params(spec) +end + +@testset "Sem(imply=$implytype, loss=$losstype)" for implytype in (RAM, RAMSymbolic), + losstype in (SemML, SemWLS) + + model = Sem( + specification = ram_matrices, + observed = obs, + imply = implytype, + loss = losstype, + ) + + @test model isa Sem + @test @inferred(imply(model)) isa implytype + @test @inferred(observed(model)) isa SemObserved + @test @inferred(optimizer(model)) isa SemOptimizer + + test_vars_api(model, ram_matrices) + test_params_api(model, ram_matrices) + + test_vars_api(imply(model), ram_matrices) + test_params_api(imply(model), ram_matrices) + + @test @inferred(loss(model)) isa SemLoss + semloss = loss(model).functions[1] + @test semloss isa losstype + + @test @inferred(nsamples(model)) == nsamples(obs) +end diff --git a/test/unit_tests/unit_tests.jl b/test/unit_tests/unit_tests.jl index c0505148..a638b991 100644 --- a/test/unit_tests/unit_tests.jl +++ b/test/unit_tests/unit_tests.jl @@ -15,3 +15,7 @@ end @safetestset "SemSpecification" begin include("specification.jl") end + +@safetestset "Sem model" begin + include("model.jl") +end From cf50d8cbb46fb4903229b6d07bfbca40a3f6cb83 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Thu, 27 Jun 2024 00:56:19 -0700 Subject: [PATCH 26/26] test check_acyclic() --- src/imply/abstract.jl | 2 +- test/unit_tests/matrix_helpers.jl | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/imply/abstract.jl b/src/imply/abstract.jl index afa2e0c0..6b26bdd0 100644 --- a/src/imply/abstract.jl +++ b/src/imply/abstract.jl @@ -23,7 +23,7 @@ function check_acyclic(A::AbstractMatrix) return UpperTriangular(A) else if acyclic - @info "Your model is acyclic, specifying the A Matrix as either Upper or Lower Triangular can have great performance benefits.\n" maxlog = + @warn "Your model is acyclic, specifying the A Matrix as either Upper or Lower Triangular can have great performance benefits.\n" maxlog = 1 end return A diff --git a/test/unit_tests/matrix_helpers.jl b/test/unit_tests/matrix_helpers.jl index b2f32f31..1556aa20 100644 --- a/test/unit_tests/matrix_helpers.jl +++ b/test/unit_tests/matrix_helpers.jl @@ -1,6 +1,10 @@ using StructuralEquationModels, Test, Random, SparseArrays, LinearAlgebra using StructuralEquationModels: - CommutationMatrix, transpose_linear_indices, duplication_matrix, elimination_matrix + CommutationMatrix, + check_acyclic, + transpose_linear_indices, + duplication_matrix, + elimination_matrix Random.seed!(73721) @@ -47,3 +51,15 @@ end E = elimination_matrix(m) @test E * vec(A) == A[tril(trues(size(A)))] end + +@testset "check_acyclic()" begin + @test check_acyclic([1 0; 0 1]) isa LowerTriangular{Int, Matrix{Int}} + @test check_acyclic([1 0; 1 1]) isa LowerTriangular{Int, Matrix{Int}} + @test check_acyclic([1.0 1.0; 0.0 1.0]) isa UpperTriangular{Float64, Matrix{Float64}} + + A = [0 1; 1 0] + @test check_acyclic(A) === A # returns the input if cyclic + + # acyclic, but not u/l triangular + @test_logs (:warn, r"^Your model is acyclic") check_acyclic([0 0 0; 1 0 1; 0 0 0]) +end