From 1fd1a6a3458c0a48442d56e5973a046a103414fd Mon Sep 17 00:00:00 2001
From: Maximilian Ernst <maximilian.stefan.ernst@gmail.com>
Date: Thu, 21 Nov 2024 11:51:24 +0100
Subject: [PATCH 1/3] add method for

---
 src/additional_functions/simulation.jl        | 46 ++++++++++-
 .../political_democracy/constructor.jl        | 79 +++++++++++++++++++
 .../recover_parameters_twofact.jl             |  2 +-
 3 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/src/additional_functions/simulation.jl b/src/additional_functions/simulation.jl
index 0dda725c6..68ec62142 100644
--- a/src/additional_functions/simulation.jl
+++ b/src/additional_functions/simulation.jl
@@ -6,7 +6,7 @@
 Return a new model with swaped observed part.
 
 # Arguments
-- `model::AbstractSemSingle`: optimization algorithm.
+- `model::AbstractSemSingle`: model to swap the observed part of.
 - `kwargs`: additional keyword arguments; typically includes `data = ...`
 - `observed`: Either an object of subtype of `SemObserved` or a subtype of `SemObserved`
 
@@ -98,3 +98,47 @@ function update_observed(loss::SemLoss, new_observed; kwargs...)
     )
     return SemLoss(new_functions, loss.weights)
 end
+
+
+############################################################################################
+# simulate data
+############################################################################################
+"""
+    (1) rand(model::AbstractSemSingle, params, n)
+
+    (2) rand(model::AbstractSemSingle, n)
+
+Sample normally distributed data from the model-implied covariance matrix and mean vector.
+
+# Arguments
+- `model::AbstractSemSingle`: model to simulate from.
+- `params`: parameter values to simulate from.
+- `n::Integer`: Number of samples.
+
+# Examples
+```julia
+rand(model, start_simple(model), 100)
+```
+"""
+function Distributions.rand(
+        model::AbstractSemSingle{O, I, L, D}, 
+        params, 
+        n::Integer) where {O, I <: Union{RAM, RAMSymbolic}, L, D}
+    update!(
+        EvaluationTargets{true, false, false}(),
+        model.imply,
+        model,
+        params)
+    return rand(model, n)
+end
+
+function Distributions.rand(
+        model::AbstractSemSingle{O, I, L, D},
+        n::Integer) where {O, I <: Union{RAM, RAMSymbolic}, L, D}
+    if MeanStruct(model.imply) === NoMeanStruct
+        data = permutedims(rand(MvNormal(Symmetric(model.imply.Σ)), n))
+    elseif MeanStruct(model.imply) === HasMeanStruct
+        data = permutedims(rand(MvNormal(model.imply.μ, Symmetric(model.imply.Σ)), n))
+    end
+    return data
+end
\ No newline at end of file
diff --git a/test/examples/political_democracy/constructor.jl b/test/examples/political_democracy/constructor.jl
index bf674dd73..3fc99289a 100644
--- a/test/examples/political_democracy/constructor.jl
+++ b/test/examples/political_democracy/constructor.jl
@@ -1,4 +1,5 @@
 using Statistics: cov, mean
+using Random
 
 ############################################################################################
 ### models w.o. meanstructure
@@ -161,6 +162,43 @@ end
     )
 end
 
+############################################################################################
+### data simulation
+############################################################################################
+
+@testset "data_simulation_wo_mean" begin
+    # parameters to recover
+    params = start_simple(
+        model_ml;
+        start_loadings = 0.5,
+        start_regressions = 0.5,
+        start_variances_observed = 0.5,
+        start_variances_latent = 1.0,
+        start_covariances_observed = 0.2)
+    # set seed for simulation
+    Random.seed!(83472834)
+    colnames = Symbol.(names(example_data("political_democracy")))
+    # simulate data
+    model_ml_new = swap_observed(
+        model_ml,
+        data = rand(model_ml, params, 100_000),
+        specification = spec,
+        obs_colnames = colnames
+    )
+    model_ml_sym_new = swap_observed(
+        model_ml_sym,
+        data = rand(model_ml_sym, params, 100_000),
+        specification = spec,
+        obs_colnames = colnames
+    )
+    # fit models
+    sol_ml = solution(sem_fit(model_ml_new))
+    sol_ml_sym = solution(sem_fit(model_ml_sym_new))
+    # check solution
+    @test maximum(abs.(sol_ml - params)) < 0.01
+    @test maximum(abs.(sol_ml_sym - params)) < 0.01
+end
+
 ############################################################################################
 ### test hessians
 ############################################################################################
@@ -332,6 +370,47 @@ end
     )
 end
 
+
+############################################################################################
+### data simulation
+############################################################################################
+
+@testset "data_simulation_with_mean" begin
+    # parameters to recover
+    params = start_simple(
+        model_ml;
+        start_loadings = 0.5,
+        start_regressions = 0.5,
+        start_variances_observed = 0.5,
+        start_variances_latent = 1.0,
+        start_covariances_observed = 0.2,
+        start_means = 0.5)
+    # set seed for simulation
+    Random.seed!(83472834)
+    colnames = Symbol.(names(example_data("political_democracy")))
+    # simulate data
+    model_ml_new = swap_observed(
+        model_ml,
+        data = rand(model_ml, params, 100_000),
+        specification = spec,
+        obs_colnames = colnames,
+        meanstructure = true
+    )
+    model_ml_sym_new = swap_observed(
+        model_ml_sym,
+        data = rand(model_ml_sym, params, 100_000),
+        specification = spec,
+        obs_colnames = colnames,
+        meanstructure = true
+    )
+    # fit models
+    sol_ml = solution(sem_fit(model_ml_new))
+    sol_ml_sym = solution(sem_fit(model_ml_sym_new))
+    # check solution
+    @test maximum(abs.(sol_ml - params)) < 0.01
+    @test maximum(abs.(sol_ml_sym - params)) < 0.01
+end
+
 ############################################################################################
 ### fiml
 ############################################################################################
diff --git a/test/examples/recover_parameters/recover_parameters_twofact.jl b/test/examples/recover_parameters/recover_parameters_twofact.jl
index 5aa79842c..f00187fac 100644
--- a/test/examples/recover_parameters/recover_parameters_twofact.jl
+++ b/test/examples/recover_parameters/recover_parameters_twofact.jl
@@ -60,7 +60,7 @@ imply_ml.Σ_function(imply_ml.Σ, true_val)
 true_dist = MultivariateNormal(imply_ml.Σ)
 
 Random.seed!(1234)
-x = transpose(rand(true_dist, 100000))
+x = transpose(rand(true_dist, 100_000))
 semobserved = SemObservedData(data = x, specification = nothing)
 
 loss_ml = SemLoss(SemML(; observed = semobserved, nparams = length(start)))

From 071005bb9d6b04301e9d1ff0d3fc0129ed38d4ec Mon Sep 17 00:00:00 2001
From: Maximilian Ernst <maximilian.stefan.ernst@gmail.com>
Date: Thu, 21 Nov 2024 12:14:38 +0100
Subject: [PATCH 2/3] format

---
 src/additional_functions/simulation.jl        | 21 ++++++++-----------
 .../political_democracy/constructor.jl        | 15 ++++++-------
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/src/additional_functions/simulation.jl b/src/additional_functions/simulation.jl
index 68ec62142..f1e41f360 100644
--- a/src/additional_functions/simulation.jl
+++ b/src/additional_functions/simulation.jl
@@ -99,7 +99,6 @@ function update_observed(loss::SemLoss, new_observed; kwargs...)
     return SemLoss(new_functions, loss.weights)
 end
 
-
 ############################################################################################
 # simulate data
 ############################################################################################
@@ -121,24 +120,22 @@ rand(model, start_simple(model), 100)
 ```
 """
 function Distributions.rand(
-        model::AbstractSemSingle{O, I, L, D}, 
-        params, 
-        n::Integer) where {O, I <: Union{RAM, RAMSymbolic}, L, D}
-    update!(
-        EvaluationTargets{true, false, false}(),
-        model.imply,
-        model,
-        params)
+    model::AbstractSemSingle{O, I, L, D},
+    params,
+    n::Integer,
+) where {O, I <: Union{RAM, RAMSymbolic}, L, D}
+    update!(EvaluationTargets{true, false, false}(), model.imply, model, params)
     return rand(model, n)
 end
 
 function Distributions.rand(
-        model::AbstractSemSingle{O, I, L, D},
-        n::Integer) where {O, I <: Union{RAM, RAMSymbolic}, L, D}
+    model::AbstractSemSingle{O, I, L, D},
+    n::Integer,
+) where {O, I <: Union{RAM, RAMSymbolic}, L, D}
     if MeanStruct(model.imply) === NoMeanStruct
         data = permutedims(rand(MvNormal(Symmetric(model.imply.Σ)), n))
     elseif MeanStruct(model.imply) === HasMeanStruct
         data = permutedims(rand(MvNormal(model.imply.μ, Symmetric(model.imply.Σ)), n))
     end
     return data
-end
\ No newline at end of file
+end
diff --git a/test/examples/political_democracy/constructor.jl b/test/examples/political_democracy/constructor.jl
index 3fc99289a..6e16553f7 100644
--- a/test/examples/political_democracy/constructor.jl
+++ b/test/examples/political_democracy/constructor.jl
@@ -174,7 +174,8 @@ end
         start_regressions = 0.5,
         start_variances_observed = 0.5,
         start_variances_latent = 1.0,
-        start_covariances_observed = 0.2)
+        start_covariances_observed = 0.2,
+    )
     # set seed for simulation
     Random.seed!(83472834)
     colnames = Symbol.(names(example_data("political_democracy")))
@@ -183,13 +184,13 @@ end
         model_ml,
         data = rand(model_ml, params, 100_000),
         specification = spec,
-        obs_colnames = colnames
+        obs_colnames = colnames,
     )
     model_ml_sym_new = swap_observed(
         model_ml_sym,
         data = rand(model_ml_sym, params, 100_000),
         specification = spec,
-        obs_colnames = colnames
+        obs_colnames = colnames,
     )
     # fit models
     sol_ml = solution(sem_fit(model_ml_new))
@@ -370,7 +371,6 @@ end
     )
 end
 
-
 ############################################################################################
 ### data simulation
 ############################################################################################
@@ -384,7 +384,8 @@ end
         start_variances_observed = 0.5,
         start_variances_latent = 1.0,
         start_covariances_observed = 0.2,
-        start_means = 0.5)
+        start_means = 0.5,
+    )
     # set seed for simulation
     Random.seed!(83472834)
     colnames = Symbol.(names(example_data("political_democracy")))
@@ -394,14 +395,14 @@ end
         data = rand(model_ml, params, 100_000),
         specification = spec,
         obs_colnames = colnames,
-        meanstructure = true
+        meanstructure = true,
     )
     model_ml_sym_new = swap_observed(
         model_ml_sym,
         data = rand(model_ml_sym, params, 100_000),
         specification = spec,
         obs_colnames = colnames,
-        meanstructure = true
+        meanstructure = true,
     )
     # fit models
     sol_ml = solution(sem_fit(model_ml_new))

From b7c111df9a6f075782ff222b1d30ed3556bee22c Mon Sep 17 00:00:00 2001
From: Maximilian Ernst <maximilian.stefan.ernst@gmail.com>
Date: Thu, 21 Nov 2024 12:36:21 +0100
Subject: [PATCH 3/3] increase test sample size

---
 test/examples/political_democracy/constructor.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/examples/political_democracy/constructor.jl b/test/examples/political_democracy/constructor.jl
index 6e16553f7..99ef06b3a 100644
--- a/test/examples/political_democracy/constructor.jl
+++ b/test/examples/political_democracy/constructor.jl
@@ -182,13 +182,13 @@ end
     # simulate data
     model_ml_new = swap_observed(
         model_ml,
-        data = rand(model_ml, params, 100_000),
+        data = rand(model_ml, params, 1_000_000),
         specification = spec,
         obs_colnames = colnames,
     )
     model_ml_sym_new = swap_observed(
         model_ml_sym,
-        data = rand(model_ml_sym, params, 100_000),
+        data = rand(model_ml_sym, params, 1_000_000),
         specification = spec,
         obs_colnames = colnames,
     )
@@ -392,14 +392,14 @@ end
     # simulate data
     model_ml_new = swap_observed(
         model_ml,
-        data = rand(model_ml, params, 100_000),
+        data = rand(model_ml, params, 1_000_000),
         specification = spec,
         obs_colnames = colnames,
         meanstructure = true,
     )
     model_ml_sym_new = swap_observed(
         model_ml_sym,
-        data = rand(model_ml_sym, params, 100_000),
+        data = rand(model_ml_sym, params, 1_000_000),
         specification = spec,
         obs_colnames = colnames,
         meanstructure = true,