diff --git a/Project.toml b/Project.toml
index 1bd335f19..b0e421f21 100644
--- a/Project.toml
+++ b/Project.toml
@@ -34,6 +34,7 @@ NLSolversBase = "7"
 NLopt = "0.6, 1"
 Optim = "1"
 PrettyTables = "2"
+ProximalAlgorithms = "0.5"
 StatsBase = "0.33, 0.34"
 Symbolics = "4, 5, 6"
 SymbolicUtils = "1.4 - 1.5, 1.7, 2, 3"
@@ -48,7 +49,7 @@ test = ["Test"]
 NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd"
 ProximalAlgorithms = "140ffc9f-1907-541a-a177-7475e0a401e9"
 ProximalCore = "dc4f5ac2-75d1-4f31-931e-60435d74994b"
-ProximalOperators = "f3b72e0c-5f3e-4b3e-8f3e-3f4f3e3e3e3e"
+ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537"
 
 [extensions]
 SEMNLOptExt = "NLopt"
diff --git a/docs/src/developer/observed.md b/docs/src/developer/observed.md
index 2b695e597..93eca6ed9 100644
--- a/docs/src/developer/observed.md
+++ b/docs/src/developer/observed.md
@@ -22,10 +22,10 @@ end
 To compute some fit indices, you need to provide methods for
 
 ```julia
-# Number of observed datapoints
-n_obs(observed::MyObserved) = ...
-# Number of manifest variables
-n_man(observed::MyObserved) = ...
+# Number of samples (observations) in the dataset
+nsamples(observed::MyObserved) = ...
+# Number of observed variables
+nobserved_vars(observed::MyObserved) = ...
 ```
 
 As always, you can add additional methods for properties that imply types and loss function want to access, for example (from the `SemObservedCommon` implementation):
diff --git a/docs/src/tutorials/inspection/inspection.md b/docs/src/tutorials/inspection/inspection.md
index b2eefadb2..88caf5812 100644
--- a/docs/src/tutorials/inspection/inspection.md
+++ b/docs/src/tutorials/inspection/inspection.md
@@ -1,7 +1,7 @@
 # Model inspection
 
 ```@setup colored
-using StructuralEquationModels 
+using StructuralEquationModels
 
 observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8]
 latent_vars = [:ind60, :dem60, :dem65]
@@ -32,7 +32,7 @@ end
 
 partable = ParameterTable(
     graph,
-    latent_vars = latent_vars, 
+    latent_vars = latent_vars,
     observed_vars = observed_vars)
 
 data = example_data("political_democracy")
@@ -128,8 +128,8 @@ BIC
 χ²
 df
 minus2ll
-n_man
-n_obs
+nobserved_vars
+nsamples
 nparams
 p_value
 RMSEA
diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl
index 9e0fc3669..a6677a4ed 100644
--- a/src/StructuralEquationModels.jl
+++ b/src/StructuralEquationModels.jl
@@ -41,8 +41,9 @@ include("frontend/fit/summary.jl")
 include("frontend/pretty_printing.jl")
 # observed
 include("observed/abstract.jl")
-include("observed/covariance.jl")
 include("observed/data.jl")
+include("observed/covariance.jl")
+include("observed/missing_pattern.jl")
 include("observed/missing.jl")
 include("observed/EM.jl")
 # constructor
diff --git a/src/additional_functions/helper.jl b/src/additional_functions/helper.jl
index be559b0d9..71b2559a8 100644
--- a/src/additional_functions/helper.jl
+++ b/src/additional_functions/helper.jl
@@ -98,11 +98,6 @@ function sparse_outer_mul!(C, A, B::Vector, ind) #computes A*S*B -> C, where ind
     end
 end
 
-function cov_and_mean(rows; corrected = false)
-    obs_mean, obs_cov = StatsBase.mean_and_cov(reduce(hcat, rows), 2, corrected = corrected)
-    return obs_cov, vec(obs_mean)
-end
-
 # n²×(n(n+1)/2) matrix to transform a vector of lower
 # triangular entries into a vectorized form of a n×n symmetric matrix,
 # opposite of elimination_matrix()
diff --git a/src/frontend/fit/fitmeasures/minus2ll.jl b/src/frontend/fit/fitmeasures/minus2ll.jl
index 54a4ce12d..1cddee71d 100644
--- a/src/frontend/fit/fitmeasures/minus2ll.jl
+++ b/src/frontend/fit/fitmeasures/minus2ll.jl
@@ -31,74 +31,33 @@ minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, loss_ml::SemWLS) =
 # compute likelihood for missing data - H0 -------------------------------------------------
 # -2ll = (∑ log(2π)*(nᵢ + mᵢ)) + F*n
 function minus2ll(minimum::Number, observed, imp::Union{RAM, RAMSymbolic}, loss_ml::SemFIML)
-    F = minimum
-    F *= nsamples(observed)
-    F += sum(log(2π) * observed.pattern_nsamples .* observed.pattern_nobs_vars)
+    F = minimum * nsamples(observed)
+    F += log(2π) * sum(pat -> nsamples(pat) * nmeasured_vars(pat), observed.patterns)
     return F
 end
 
 # compute likelihood for missing data - H1 -------------------------------------------------
 # -2ll =  ∑ log(2π)*(nᵢ + mᵢ) + ln(Σᵢ) + (mᵢ - μᵢ)ᵀ Σᵢ⁻¹ (mᵢ - μᵢ)) + tr(SᵢΣᵢ)
 function minus2ll(observed::SemObservedMissing)
-    if observed.em_model.fitted
-        minus2ll(
-            observed.em_model.μ,
-            observed.em_model.Σ,
-            nsamples(observed),
-            pattern_rows(observed),
-            observed.patterns,
-            observed.obs_mean,
-            observed.obs_cov,
-            observed.pattern_nsamples,
-            observed.pattern_nobs_vars,
-        )
-    else
-        em_mvn(observed)
-        minus2ll(
-            observed.em_model.μ,
-            observed.em_model.Σ,
-            nsamples(observed),
-            pattern_rows(observed),
-            observed.patterns,
-            observed.obs_mean,
-            observed.obs_cov,
-            observed.pattern_nsamples,
-            observed.pattern_nobs_vars,
-        )
-    end
-end
-
-function minus2ll(
-    μ,
-    Σ,
-    N,
-    rows,
-    patterns,
-    obs_mean,
-    obs_cov,
-    pattern_nsamples,
-    pattern_nobs_vars,
-)
-    F = 0.0
+    # fit EM-based mean and cov if not yet fitted
+    # FIXME EM could be very computationally expensive
+    observed.em_model.fitted || em_mvn(observed)
 
-    for i in 1:length(rows)
-        nᵢ = pattern_nsamples[i]
-        # missing pattern
-        pattern = patterns[i]
-        # observed data
-        Sᵢ = obs_cov[i]
+    Σ = observed.em_model.Σ
+    μ = observed.em_model.μ
 
+    F = sum(observed.patterns) do pat
         # implied covariance/mean
-        Σᵢ = Σ[pattern, pattern]
-        ld = logdet(Σᵢ)
-        Σᵢ⁻¹ = inv(cholesky(Σᵢ))
-        meandiffᵢ = obs_mean[i] - μ[pattern]
+        Σᵢ = Σ[pat.measured_mask, pat.measured_mask]
+        Σᵢ_chol = cholesky!(Σᵢ)
+        ld = logdet(Σᵢ_chol)
+        Σᵢ⁻¹ = LinearAlgebra.inv!(Σᵢ_chol)
+        meandiffᵢ = pat.measured_mean - μ[pat.measured_mask]
 
-        F += F_one_pattern(meandiffᵢ, Σᵢ⁻¹, Sᵢ, ld, nᵢ)
+        F_one_pattern(meandiffᵢ, Σᵢ⁻¹, pat.measured_cov, ld, nsamples(pat))
     end
 
-    F += sum(log(2π) * pattern_nsamples .* pattern_nobs_vars)
-    #F *= N
+    F += log(2π) * sum(pat -> nsamples(pat) * nmeasured_vars(pat), observed.patterns)
 
     return F
 end
diff --git a/src/frontend/specification/ParameterTable.jl b/src/frontend/specification/ParameterTable.jl
index 8b7cc0973..07c24e46e 100644
--- a/src/frontend/specification/ParameterTable.jl
+++ b/src/frontend/specification/ParameterTable.jl
@@ -27,9 +27,9 @@ empty_partable_columns(nrows::Integer = 0) = Dict{Symbol, Vector}(
     :param => fill(Symbol(), nrows),
 )
 
-# construct using the provided columns data or create and empty table
+# construct using the provided columns data or create an empty table
 function ParameterTable(
-    columns::Dict{Symbol, Vector} = empty_partable_columns();
+    columns::Dict{Symbol, Vector};
     observed_vars::Union{AbstractVector{Symbol}, Nothing} = nothing,
     latent_vars::Union{AbstractVector{Symbol}, Nothing} = nothing,
     params::Union{AbstractVector{Symbol}, Nothing} = nothing,
diff --git a/src/frontend/specification/Sem.jl b/src/frontend/specification/Sem.jl
index 741d5f3c6..28984dbe9 100644
--- a/src/frontend/specification/Sem.jl
+++ b/src/frontend/specification/Sem.jl
@@ -3,6 +3,7 @@
 ############################################################################################
 
 function Sem(;
+    specification = ParameterTable,
     observed::O = SemObservedData,
     imply::I = RAM,
     loss::L = SemML,
@@ -12,7 +13,7 @@ function Sem(;
 
     set_field_type_kwargs!(kwdict, observed, imply, loss, O, I)
 
-    observed, imply, loss = get_fields!(kwdict, observed, imply, loss)
+    observed, imply, loss = get_fields!(kwdict, specification, observed, imply, loss)
 
     sem = Sem(observed, imply, loss)
 
@@ -59,6 +60,7 @@ Returns the loss part of a model.
 loss(model::AbstractSemSingle) = model.loss
 
 function SemFiniteDiff(;
+    specification = ParameterTable,
     observed::O = SemObservedData,
     imply::I = RAM,
     loss::L = SemML,
@@ -68,7 +70,7 @@ function SemFiniteDiff(;
 
     set_field_type_kwargs!(kwdict, observed, imply, loss, O, I)
 
-    observed, imply, loss = get_fields!(kwdict, observed, imply, loss)
+    observed, imply, loss = get_fields!(kwdict, specification, observed, imply, loss)
 
     sem = SemFiniteDiff(observed, imply, loss)
 
@@ -96,23 +98,27 @@ function set_field_type_kwargs!(kwargs, observed, imply, loss, O, I)
 end
 
 # construct Sem fields
-function get_fields!(kwargs, observed, imply, loss)
+function get_fields!(kwargs, specification, observed, imply, loss)
+    if !isa(specification, SemSpecification)
+        specification = specification(; kwargs...)
+    end
+
     # observed
     if !isa(observed, SemObserved)
-        observed = observed(; kwargs...)
+        observed = observed(; specification, kwargs...)
     end
     kwargs[:observed] = observed
 
     # imply
     if !isa(imply, SemImply)
-        imply = imply(; kwargs...)
+        imply = imply(; specification, kwargs...)
     end
 
     kwargs[:imply] = imply
     kwargs[:nparams] = nparams(imply)
 
     # loss
-    loss = get_SemLoss(loss; kwargs...)
+    loss = get_SemLoss(loss; specification, kwargs...)
     kwargs[:loss] = loss
 
     return observed, imply, loss
diff --git a/src/frontend/specification/StenoGraphs.jl b/src/frontend/specification/StenoGraphs.jl
index 5cf87c07a..65bace302 100644
--- a/src/frontend/specification/StenoGraphs.jl
+++ b/src/frontend/specification/StenoGraphs.jl
@@ -129,6 +129,17 @@ function ParameterTable(
     return ParameterTable(columns; latent_vars, observed_vars, params)
 end
 
+############################################################################################
+### keyword only constructor (for call in `Sem` constructor)
+############################################################################################
+
+# FIXME: this kw-only ctor conflicts with the empty ParTable constructor;
+#        it is left here for compatibility with the current Sem construction API,
+#        the proper fix would be to move away from kw-only ctors in general
+ParameterTable(; graph::Union{AbstractStenoGraph, Nothing} = nothing, kwargs...) =
+    !isnothing(graph) ? ParameterTable(graph; kwargs...) :
+    ParameterTable(empty_partable_columns(); kwargs...)
+
 ############################################################################################
 ### constructor for EnsembleParameterTable from graph
 ############################################################################################
diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl
index 20c81b831..bf020d561 100644
--- a/src/loss/ML/FIML.jl
+++ b/src/loss/ML/FIML.jl
@@ -47,23 +47,27 @@ end
 ### Constructors
 ############################################################################################
 
-function SemFIML(; observed, specification, kwargs...)
-    inverses = broadcast(x -> zeros(x, x), pattern_nobs_vars(observed))
+function SemFIML(; observed::SemObservedMissing, specification, kwargs...)
+    inverses =
+        [zeros(nmeasured_vars(pat), nmeasured_vars(pat)) for pat in observed.patterns]
     choleskys = Array{Cholesky{Float64, Array{Float64, 2}}, 1}(undef, length(inverses))
 
-    n_patterns = size(pattern_rows(observed), 1)
+    n_patterns = length(observed.patterns)
     logdets = zeros(n_patterns)
 
-    imp_mean = zeros.(pattern_nobs_vars(observed))
-    meandiff = zeros.(pattern_nobs_vars(observed))
+    imp_mean = [zeros(nmeasured_vars(pat)) for pat in observed.patterns]
+    meandiff = [zeros(nmeasured_vars(pat)) for pat in observed.patterns]
 
     nobs_vars = nobserved_vars(observed)
     imp_inv = zeros(nobs_vars, nobs_vars)
     mult = similar.(inverses)
 
-    ∇ind = vec(CartesianIndices(Array{Float64}(undef, nobs_vars, nobs_vars)))
-    ∇ind =
-        [findall(x -> !(x[1] ∈ ind || x[2] ∈ ind), ∇ind) for ind in patterns_not(observed)]
+    # generate linear indicies of co-observed variable pairs for each pattern
+    Σ_linind = LinearIndices((nobs_vars, nobs_vars))
+    ∇ind = map(observed.patterns) do pat
+        pat_vars = findall(pat.measured_mask)
+        vec(Σ_linind[pat_vars, pat_vars])
+    end
 
     return SemFIML(
         ExactHessian(),
@@ -104,10 +108,10 @@ function evaluate!(
     prepare_SemFIML!(semfiml, model)
 
     scale = inv(nsamples(observed(model)))
-    obs_rows = pattern_rows(observed(model))
-    isnothing(objective) || (objective = scale * F_FIML(obs_rows, semfiml, model, params))
+    isnothing(objective) ||
+        (objective = scale * F_FIML(observed(model), semfiml, model, params))
     isnothing(gradient) ||
-        (∇F_FIML!(gradient, obs_rows, semfiml, model); gradient .*= scale)
+        (∇F_FIML!(gradient, observed(model), semfiml, model); gradient .*= scale)
 
     return objective
 end
@@ -131,16 +135,16 @@ function F_one_pattern(meandiff, inverse, obs_cov, logdet, N)
     return F * N
 end
 
-function ∇F_one_pattern(μ_diff, Σ⁻¹, S, pattern, ∇ind, N, Jμ, JΣ, model)
+function ∇F_one_pattern(μ_diff, Σ⁻¹, S, obs_mask, ∇ind, N, Jμ, JΣ, model)
     diff⨉inv = μ_diff' * Σ⁻¹
 
     if N > one(N)
         JΣ[∇ind] .+= N * vec(Σ⁻¹ * (I - S * Σ⁻¹ - μ_diff * diff⨉inv))
-        @. Jμ[pattern] += (N * 2 * diff⨉inv)'
+        @. Jμ[obs_mask] += (N * 2 * diff⨉inv)'
 
     else
         JΣ[∇ind] .+= vec(Σ⁻¹ * (I - μ_diff * diff⨉inv))
-        @. Jμ[pattern] += (2 * diff⨉inv)'
+        @. Jμ[obs_mask] += (2 * diff⨉inv)'
     end
 end
 
@@ -163,32 +167,32 @@ function ∇F_fiml_outer!(G, JΣ, Jμ, imply, model, semfiml)
     mul!(G, ∇μ', Jμ, -1, 1)
 end
 
-function F_FIML(rows, semfiml, model, params)
+function F_FIML(observed::SemObservedMissing, semfiml, model, params)
     F = zero(eltype(params))
-    for i in 1:size(rows, 1)
+    for (i, pat) in enumerate(observed.patterns)
         F += F_one_pattern(
             semfiml.meandiff[i],
             semfiml.inverses[i],
-            obs_cov(observed(model))[i],
+            pat.measured_cov,
             semfiml.logdets[i],
-            pattern_nsamples(observed(model))[i],
+            nsamples(pat),
         )
     end
     return F
 end
 
-function ∇F_FIML!(G, rows, semfiml, model)
+function ∇F_FIML!(G, observed::SemObservedMissing, semfiml, model)
     Jμ = zeros(nobserved_vars(model))
     JΣ = zeros(nobserved_vars(model)^2)
 
-    for i in 1:size(rows, 1)
+    for (i, pat) in enumerate(observed.patterns)
         ∇F_one_pattern(
             semfiml.meandiff[i],
             semfiml.inverses[i],
-            obs_cov(observed(model))[i],
-            patterns(observed(model))[i],
+            pat.measured_cov,
+            pat.measured_mask,
             semfiml.∇ind[i],
-            pattern_nsamples(observed(model))[i],
+            nsamples(pat),
             Jμ,
             JΣ,
             model,
@@ -202,29 +206,21 @@ function prepare_SemFIML!(semfiml, model)
     batch_cholesky!(semfiml, model)
     #batch_sym_inv_update!(semfiml, model)
     batch_inv!(semfiml, model)
-    for i in 1:size(pattern_nsamples(observed(model)), 1)
-        semfiml.meandiff[i] .= obs_mean(observed(model))[i] - semfiml.imp_mean[i]
+    for (i, pat) in enumerate(observed(model).patterns)
+        semfiml.meandiff[i] .= pat.measured_mean .- semfiml.imp_mean[i]
     end
 end
 
-function copy_per_pattern!(inverses, source_inverses, means, source_means, patterns)
-    @views for i in 1:size(patterns, 1)
-        inverses[i] .= source_inverses[patterns[i], patterns[i]]
-    end
-
-    @views for i in 1:size(patterns, 1)
-        means[i] .= source_means[patterns[i]]
+function copy_per_pattern!(fiml::SemFIML, model::AbstractSem)
+    Σ = imply(model).Σ
+    μ = imply(model).μ
+    data = observed(model)
+    @inbounds @views for (i, pat) in enumerate(data.patterns)
+        fiml.inverses[i] .= Σ[pat.measured_mask, pat.measured_mask]
+        fiml.imp_mean[i] .= μ[pat.measured_mask]
     end
 end
 
-copy_per_pattern!(semfiml, model::M where {M <: AbstractSem}) = copy_per_pattern!(
-    semfiml.inverses,
-    imply(model).Σ,
-    semfiml.imp_mean,
-    imply(model).μ,
-    patterns(observed(model)),
-)
-
 function batch_cholesky!(semfiml, model)
     for i in 1:size(semfiml.inverses, 1)
         semfiml.choleskys[i] = cholesky!(Symmetric(semfiml.inverses[i]))
diff --git a/src/observed/EM.jl b/src/observed/EM.jl
index ef5da317d..beac45ca8 100644
--- a/src/observed/EM.jl
+++ b/src/observed/EM.jl
@@ -37,9 +37,9 @@ function em_mvn(
     𝔼xxᵀ_pre = zeros(nvars, nvars)
 
     ### precompute for full cases
-    if length(observed.patterns[1]) == nvars
-        for row in pattern_rows(observed)[1]
-            row = observed.data_rowwise[row]
+    fullpat = observed.patterns[1]
+    if nmissed_vars(fullpat) == 0
+        for row in eachrow(fullpat.data)
             𝔼x_pre += row
             𝔼xxᵀ_pre += row * row'
         end
@@ -97,21 +97,27 @@ function em_mvn_Estep!(𝔼x, 𝔼xxᵀ, em_model, observed, 𝔼x_pre, 𝔼xx
     Σ = em_model.Σ
 
     # Compute the expected sufficient statistics
-    for i in 2:length(observed.pattern_nsamples)
+    for pat in observed.patterns
+        (nmissed_vars(pat) == 0) && continue # skip full cases
 
         # observed and unobserved vars
-        u = observed.patterns_not[i]
-        o = observed.patterns[i]
+        u = pat.miss_mask
+        o = pat.measured_mask
 
         # precompute for pattern
-        V = Σ[u, u] - Σ[u, o] * (Σ[o, o] \ Σ[o, u])
+        Σoo = Σ[o, o]
+        Σuo = Σ[u, o]
+        μu = μ[u]
+        μo = μ[o]
+
+        V = Σ[u, u] - Σuo * (Σoo \ Σ[o, u])
 
         # loop trough data
-        for row in pattern_rows(observed)[i]
-            m = μ[u] + Σ[u, o] * (Σ[o, o] \ (observed.data_rowwise[row] - μ[o]))
+        for rowdata in eachrow(pat.data)
+            m = μu + Σuo * (Σoo \ (rowdata - μo))
 
             𝔼xᵢ[u] = m
-            𝔼xᵢ[o] = observed.data_rowwise[row]
+            𝔼xᵢ[o] = rowdata
             𝔼xxᵀᵢ[u, u] = 𝔼xᵢ[u] * 𝔼xᵢ[u]' + V
             𝔼xxᵀᵢ[o, o] = 𝔼xᵢ[o] * 𝔼xᵢ[o]'
             𝔼xxᵀᵢ[o, u] = 𝔼xᵢ[o] * 𝔼xᵢ[u]'
@@ -153,10 +159,10 @@ end
 
 # use μ and Σ of full cases
 function start_em_observed(observed::SemObservedMissing; kwargs...)
-    if (length(observed.patterns[1]) == nobserved_vars(observed)) &
-       (observed.pattern_nsamples[1] > 1)
-        μ = copy(observed.obs_mean[1])
-        Σ = copy(Symmetric(observed.obs_cov[1]))
+    fullpat = observed.patterns[1]
+    if (nmissed_vars(fullpat) == 0) && (nobserved_vars(fullpat) > 1)
+        μ = copy(fullpat.measured_mean)
+        Σ = copy(Symmetric(fullpat.measured_cov))
         if !isposdef(Σ)
             Σ = Matrix(Diagonal(Σ))
         end
diff --git a/src/observed/abstract.jl b/src/observed/abstract.jl
index 90de8b5a6..bb92ea12e 100644
--- a/src/observed/abstract.jl
+++ b/src/observed/abstract.jl
@@ -8,3 +8,146 @@ Rows are samples, columns are observed variables.
 [`nsamples`](@ref), [`observed_vars`](@ref).
 """
 samples(observed::SemObserved) = observed.data
+nsamples(observed::SemObserved) = observed.nsamples
+
+observed_vars(observed::SemObserved) = observed.observed_vars
+
+############################################################################################
+### Additional functions
+############################################################################################
+
+# generate default observed variable names if none provided
+default_observed_vars(nobserved_vars::Integer, prefix::Union{Symbol, AbstractString}) =
+    Symbol.(prefix, 1:nobserved_vars)
+
+# compute the permutation that subsets and reorders source elements
+# to match the destination order.
+# if multiple identical elements are present in the source, the last one is used.
+# if one_to_one is true, checks that the source and destination have the same length.
+function source_to_dest_perm(
+    src::AbstractVector,
+    dest::AbstractVector;
+    one_to_one::Bool = false,
+    entities::String = "elements",
+)
+    if dest == src # exact match
+        return eachindex(dest)
+    else
+        one_to_one &&
+            length(dest) != length(src) &&
+            throw(
+                DimensionMismatch(
+                    "The length of the new $entities order ($(length(dest))) " *
+                    "does not match the number of $entities ($(length(src)))",
+                ),
+            )
+        src_inds = Dict(el => i for (i, el) in enumerate(src))
+        return [src_inds[el] for el in dest]
+    end
+end
+
+# function to prepare input data shared by SemObserved implementations
+# returns tuple of
+# 1) the matrix of data
+# 2) the observed variable symbols that match matrix columns
+# 3) the permutation of the original observed_vars (nothing if no reordering)
+# If observed_vars is not specified, the vars order is taken from the specification.
+# If both observed_vars and specification are provided, the observed_vars are used to match
+# the column of the user-provided data matrix, and observed_vars(specification) is used to
+# reorder the columns of the data to match the speciation.
+# If no variable names are provided at all, generates the symbols in the form
+# Symbol(observed_var_prefix, i) for i=1:nobserved_vars.
+function prepare_data(
+    data::Union{AbstractDataFrame, AbstractMatrix, NTuple{2, Integer}, Nothing},
+    observed_vars::Union{AbstractVector, Nothing},
+    spec::Union{SemSpecification, Nothing};
+    observed_var_prefix::Union{Symbol, AbstractString},
+)
+    obs_vars = nothing
+    obs_vars_perm = nothing
+    if !isnothing(observed_vars)
+        obs_vars = Symbol.(observed_vars)
+        if !isnothing(spec)
+            obs_vars_spec = SEM.observed_vars(spec)
+            try
+                obs_vars_perm = source_to_dest_perm(
+                    obs_vars,
+                    obs_vars_spec,
+                    one_to_one = false,
+                    entities = "observed_vars",
+                )
+            catch err
+                if isa(err, KeyError)
+                    throw(
+                        ArgumentError(
+                            "observed_var \"$(err.key)\" from SEM specification is not listed in observed_vars argument",
+                        ),
+                    )
+                else
+                    rethrow(err)
+                end
+            end
+            # ignore trivial reorder
+            if obs_vars_perm == eachindex(obs_vars)
+                obs_vars_perm = nothing
+            end
+        end
+    elseif !isnothing(spec)
+        obs_vars = SEM.observed_vars(spec)
+    end
+    # observed vars in the order that matches the specification
+    obs_vars_reordered = isnothing(obs_vars_perm) ? obs_vars : obs_vars[obs_vars_perm]
+
+    # subset the data, check that obs_vars matches data or guess the obs_vars
+    if data isa AbstractDataFrame
+        if !isnothing(obs_vars_reordered) # subset/reorder columns
+            data = data[:, obs_vars_reordered]
+            if obs_vars_reordered != obs_vars
+                @warn "The order of variables in observed_vars argument does not match the order of observed_vars(specification). The specification order is used."
+            end
+        else # default symbol names
+            obs_vars = obs_vars_reordered = Symbol.(names(data))
+        end
+        data_mtx = Matrix(data)
+    elseif data isa AbstractMatrix
+        if !isnothing(obs_vars)
+            size(data, 2) == length(obs_vars) || DimensionMismatch(
+                "The number of columns in the data matrix ($(size(data, 2))) does not match the length of observed_vars ($(length(obs_vars))).",
+            )
+            # reorder columns to match the spec
+            data_ordered = !isnothing(obs_vars_perm) ? data[:, obs_vars_perm] : data
+        else
+            obs_vars =
+                obs_vars_reordered =
+                    default_observed_vars(size(data, 2), observed_var_prefix)
+            data_ordered = data
+        end
+        # make sure data_mtx is a dense matrix (required for methods like mean_and_cov())
+        data_mtx = convert(Matrix, data_ordered)
+    elseif data isa NTuple{2, Integer} # given the dimensions of the data matrix, but no data itself
+        data_mtx = nothing
+        nobs_vars = data[2]
+        if isnothing(obs_vars)
+            obs_vars =
+                obs_vars_reordered = default_observed_vars(nobs_vars, observed_var_prefix)
+        elseif length(obs_vars) != nobs_vars
+            throw(
+                DimensionMismatch(
+                    "The length of observed_vars ($(length(obs_vars))) does not match the data matrix columns ($(nobs_vars)).",
+                ),
+            )
+        end
+    elseif isnothing(data)
+        data_mtx = nothing
+        if isnothing(obs_vars)
+            throw(
+                ArgumentError(
+                    "No data, specification or observed_vars provided. Cannot infer observed_vars from provided inputs",
+                ),
+            )
+        end
+    else
+        throw(ArgumentError("Unsupported data type: $(typeof(data))"))
+    end
+    return data_mtx, obs_vars_reordered, obs_vars_perm
+end
diff --git a/src/observed/covariance.jl b/src/observed/covariance.jl
index b78f41833..221ef5ca3 100644
--- a/src/observed/covariance.jl
+++ b/src/observed/covariance.jl
@@ -1,128 +1,80 @@
 """
-For observed covariance matrices and means.
+Type alias for [`SemObservedData`](@ref) that has mean and covariance, but no actual data.
 
-# Constructor
+For instances of `SemObservedCovariance` [`samples`](@ref) returns `nothing`.
+"""
+const SemObservedCovariance{S} = SemObservedData{Nothing, S}
 
+"""
     SemObservedCovariance(;
         specification,
         obs_cov,
         obs_colnames = nothing,
         meanstructure = false,
         obs_mean = nothing,
-        nsamples = nothing,
+        nsamples::Integer,
         kwargs...)
 
-# Arguments
-- `specification`: either a `RAMMatrices` or `ParameterTable` object (1)
-- `obs_cov`: observed covariance matrix
-- `obs_colnames::Vector{Symbol}`: column names of the covariance matrix
-- `meanstructure::Bool`: does the model have a meanstructure?
-- `obs_mean`: observed mean vector
-- `nsamples::Number`: number of samples (observed data points); necessary for fit statistics
-
-# Extended help
-## Interfaces
-- `nsamples(::SemObservedCovariance)`: number of samples (observed data points)
-- `n_man(::SemObservedCovariance)` -> number of manifest variables
+Construct [`SemObserved`](@ref) without providing the observations data,
+but with the covariations (`obs_cov`) and the means (`obs_means`) of the observed variables.
 
-- `obs_cov(::SemObservedCovariance)` -> observed covariance matrix
-- `obs_mean(::SemObservedCovariance)` -> observed means
+Returns [`SemObservedCovariance`](@ref) object.
 
-## Implementation
-Subtype of `SemObserved`
-
-## Remarks
-(1) the `specification` argument can also be `nothing`, but this turns of checking whether
-the observed data/covariance columns are in the correct order! As a result, you should only
-use this if you are sure your covariance matrix is in the right format.
-
-## Additional keyword arguments:
-- `spec_colnames::Vector{Symbol} = nothing`: overwrites column names of the specification object
+# Arguments
+- `obs_cov`: pre-computed covariations of the observed variables
+- `obs_mean`: optional pre-computed means of the observed variables
+- `observed_vars::AbstractVector`: IDs of the observed variables (rows and columns of the `obs_cov` matrix)
+- `specification`: optional SEM specification ([`SemSpecification`](@ref))
+- `nsamples::Number`: number of samples (observed data points) used to compute `obs_cov` and `obs_means`
+   necessary for calculating fit statistics
 """
-struct SemObservedCovariance{B, C} <: SemObserved
-    obs_cov::B
-    obs_mean::C
-    nobs_vars::Int
-    nsamples::Int
-end
-
 function SemObservedCovariance(;
+    obs_cov::AbstractMatrix,
+    obs_mean::Union{AbstractVector, Nothing} = nothing,
+    observed_vars::Union{AbstractVector, Nothing} = nothing,
     specification::Union{SemSpecification, Nothing} = nothing,
-    obs_cov,
-    obs_colnames = nothing,
-    spec_colnames = nothing,
-    obs_mean = nothing,
-    meanstructure = false,
     nsamples::Integer,
+    observed_var_prefix::Union{Symbol, AbstractString} = :obs,
     kwargs...,
 )
-    if !meanstructure & !isnothing(obs_mean)
-        throw(ArgumentError("observed means were passed, but `meanstructure = false`"))
-
-    elseif meanstructure & isnothing(obs_mean)
-        throw(ArgumentError("`meanstructure = true`, but no observed means were passed"))
-    end
-
-    if isnothing(spec_colnames) && !isnothing(specification)
-        spec_colnames = observed_vars(specification)
+    nvars = size(obs_cov, 1)
+    size(obs_cov, 2) == nvars || throw(
+        DimensionMismatch(
+            "The covariance matrix should be square, $(size(obs_cov)) was found.",
+        ),
+    )
+    S = eltype(obs_cov)
+
+    if isnothing(obs_mean)
+        obs_mean = zeros(S, nvars)
+    else
+        length(obs_mean) == nvars || throw(
+            DimensionMismatch(
+                "The length of the mean vector $(length(obs_mean)) does not match the size of the covariance matrix $(size(obs_cov))",
+            ),
+        )
+        S = promote_type(S, eltype(obs_mean))
     end
 
-    if !isnothing(spec_colnames) & isnothing(obs_colnames)
-        throw(ArgumentError("no `obs_colnames` were specified"))
+    obs_cov = convert(Matrix{S}, obs_cov)
+    obs_mean = convert(Vector{S}, obs_mean)
 
-    elseif !isnothing(spec_colnames) & !(eltype(obs_colnames) <: Symbol)
-        throw(ArgumentError("please specify `obs_colnames` as a vector of Symbols"))
+    if !isnothing(observed_vars)
+        length(observed_vars) == nvars || throw(
+            DimensionMismatch(
+                "The length of the observed_vars $(length(observed_vars)) does not match the size of the covariance matrix $(size(obs_cov))",
+            ),
+        )
     end
 
-    if !isnothing(spec_colnames)
-        obs_cov = reorder_obs_cov(obs_cov, spec_colnames, obs_colnames)
-        isnothing(obs_mean) ||
-            (obs_mean = reorder_obs_mean(obs_mean, spec_colnames, obs_colnames))
-    end
+    _, obs_vars, obs_vars_perm =
+        prepare_data((nsamples, nvars), observed_vars, specification; observed_var_prefix)
 
-    return SemObservedCovariance(obs_cov, obs_mean, size(obs_cov, 1), nsamples)
-end
-
-############################################################################################
-### Recommended methods
-############################################################################################
-
-nsamples(observed::SemObservedCovariance) = observed.nsamples
-nobserved_vars(observed::SemObservedCovariance) = observed.nobs_vars
-
-samples(observed::SemObservedCovariance) =
-    error("$(typeof(observed)) does not store data samples")
-
-############################################################################################
-### additional methods
-############################################################################################
-
-obs_cov(observed::SemObservedCovariance) = observed.obs_cov
-obs_mean(observed::SemObservedCovariance) = observed.obs_mean
-
-############################################################################################
-### Additional functions
-############################################################################################
-
-# reorder covariance matrices --------------------------------------------------------------
-function reorder_obs_cov(obs_cov, spec_colnames, obs_colnames)
-    if spec_colnames == obs_colnames
-        return obs_cov
-    else
-        new_position = [findfirst(==(x), obs_colnames) for x in spec_colnames]
-        obs_cov = obs_cov[new_position, new_position]
-        return obs_cov
+    # reorder to match the specification
+    if !isnothing(obs_vars_perm)
+        obs_cov = obs_cov[obs_vars_perm, obs_vars_perm]
+        obs_mean = obs_mean[obs_vars_perm]
     end
-end
 
-# reorder means ----------------------------------------------------------------------------
-
-function reorder_obs_mean(obs_mean, spec_colnames, obs_colnames)
-    if spec_colnames == obs_colnames
-        return obs_mean
-    else
-        new_position = [findfirst(==(x), obs_colnames) for x in spec_colnames]
-        obs_mean = obs_mean[new_position]
-        return obs_mean
-    end
+    return SemObservedData(nothing, obs_vars, obs_cov, obs_mean, nsamples)
 end
diff --git a/src/observed/data.jl b/src/observed/data.jl
index c9b50e597..b6ddaa43d 100644
--- a/src/observed/data.jl
+++ b/src/observed/data.jl
@@ -4,17 +4,15 @@ For observed data without missings.
 # Constructor
 
     SemObservedData(;
-        specification,
         data,
-        meanstructure = false,
-        obs_colnames = nothing,
+        observed_vars = nothing,
+        specification = nothing,
         kwargs...)
 
 # Arguments
-- `specification`: either a `RAMMatrices` or `ParameterTable` object (1)
-- `data`: observed data
-- `meanstructure::Bool`: does the model have a meanstructure?
-- `obs_colnames::Vector{Symbol}`: column names of the data (if the object passed as data does not have column names, i.e. is not a data frame)
+- `specification`: optional SEM specification ([`SemSpecification`](@ref))
+- `data`: observed data -- *DataFrame* or *Matrix*
+- `observed_vars::Vector{Symbol}`: column names of the data (if the object passed as data does not have column names, i.e. is not a data frame)
 
 # Extended help
 ## Interfaces
@@ -27,112 +25,36 @@ For observed data without missings.
 
 ## Implementation
 Subtype of `SemObserved`
-
-## Remarks
-(1) the `specification` argument can also be `nothing`, but this turns of checking whether
-the observed data/covariance columns are in the correct order! As a result, you should only
-use this if you are sure your observed data is in the right format.
-
-## Additional keyword arguments:
-- `spec_colnames::Vector{Symbol} = nothing`: overwrites column names of the specification object
-- `compute_covariance::Bool ) = true`: should the covariance of `data` be computed and stored?
 """
-struct SemObservedData{A, B, C} <: SemObserved
-    data::A
-    obs_cov::B
-    obs_mean::C
-    nobs_vars::Int
+struct SemObservedData{D <: Union{Nothing, AbstractMatrix}, S <: Number} <: SemObserved
+    data::D
+    observed_vars::Vector{Symbol}
+    obs_cov::Matrix{S}
+    obs_mean::Vector{S}
     nsamples::Int
 end
 
-# error checks
-function check_arguments_SemObservedData(kwargs...)
-    # data is a data frame,
-
-end
-
 function SemObservedData(;
-    specification::Union{SemSpecification, Nothing},
     data,
-    obs_colnames = nothing,
-    spec_colnames = nothing,
-    meanstructure = false,
-    compute_covariance = true,
+    observed_vars::Union{AbstractVector, Nothing} = nothing,
+    specification::Union{SemSpecification, Nothing} = nothing,
+    observed_var_prefix::Union{Symbol, AbstractString} = :obs,
     kwargs...,
 )
-    if isnothing(spec_colnames) && !isnothing(specification)
-        spec_colnames = observed_vars(specification)
-    end
-
-    if !isnothing(spec_colnames)
-        if isnothing(obs_colnames)
-            try
-                data = data[:, spec_colnames]
-            catch
-                throw(
-                    ArgumentError(
-                        "Your `data` can not be indexed by symbols. " *
-                        "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.",
-                    ),
-                )
-            end
-        else
-            if data isa DataFrame
-                throw(
-                    ArgumentError(
-                        "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " *
-                        "Please make sure the column names of your data frame indicate the correct variables " *
-                        "or pass your data in a different format.",
-                    ),
-                )
-            end
-
-            if !(eltype(obs_colnames) <: Symbol)
-                throw(ArgumentError("please specify `obs_colnames` as a vector of Symbols"))
-            end
-
-            data = reorder_data(data, spec_colnames, obs_colnames)
-        end
-    end
+    data, obs_vars, _ =
+        prepare_data(data, observed_vars, specification; observed_var_prefix)
+    obs_mean, obs_cov = mean_and_cov(data, 1)
 
-    if data isa DataFrame
-        data = Matrix(data)
-    end
-
-    return SemObservedData(
-        data,
-        compute_covariance ? Statistics.cov(data) : nothing,
-        meanstructure ? vec(Statistics.mean(data, dims = 1)) : nothing,
-        size(data, 2),
-        size(data, 1),
-    )
+    return SemObservedData(data, obs_vars, obs_cov, vec(obs_mean), size(data, 1))
 end
 
 ############################################################################################
 ### Recommended methods
 ############################################################################################
 
-nsamples(observed::SemObservedData) = observed.nsamples
-nobserved_vars(observed::SemObservedData) = observed.nobs_vars
-
 ############################################################################################
 ### additional methods
 ############################################################################################
 
 obs_cov(observed::SemObservedData) = observed.obs_cov
 obs_mean(observed::SemObservedData) = observed.obs_mean
-
-############################################################################################
-### Additional functions
-############################################################################################
-
-# reorder data -----------------------------------------------------------------------------
-function reorder_data(data::AbstractArray, spec_colnames, obs_colnames)
-    if spec_colnames == obs_colnames
-        return data
-    else
-        obs_positions = Dict(col => i for (i, col) in enumerate(obs_colnames))
-        new_positions = [obs_positions[col] for col in spec_colnames]
-        return data[:, new_positions]
-    end
-end
diff --git a/src/observed/missing.jl b/src/observed/missing.jl
index b628a313b..cf699252e 100644
--- a/src/observed/missing.jl
+++ b/src/observed/missing.jl
@@ -9,76 +9,44 @@ mutable struct EmMVNModel{A, b, B}
     fitted::B
 end
 
+# FIXME type unstable
+obs_mean(em::EmMVNModel) = ifelse(em.fitted, em.μ, nothing)
+obs_cov(em::EmMVNModel) = ifelse(em.fitted, em.Σ, nothing)
+
 """
 For observed data with missing values.
 
 # Constructor
 
     SemObservedMissing(;
-        specification,
         data,
-        obs_colnames = nothing,
+        observed_vars = nothing,
+        specification = nothing,
         kwargs...)
 
 # Arguments
-- `specification`: either a `RAMMatrices` or `ParameterTable` object (1)
+- `specification`: optional SEM model specification ([`SemSpecification`](@ref))
 - `data`: observed data
-- `obs_colnames::Vector{Symbol}`: column names of the data (if the object passed as data does not have column names, i.e. is not a data frame)
+- `observed_vars::Vector{Symbol}`: column names of the data (if the object passed as data does not have column names, i.e. is not a data frame)
 
 # Extended help
 ## Interfaces
-- `nsamples(::SemObservedMissing)` -> number of observed data points
-- `nobserved_vars(::SemObservedMissing)` -> number of manifest variables
-
-- `samples(::SemObservedMissing)` -> observed data
-- `data_rowwise(::SemObservedMissing)` -> observed data as vector per observation, with missing values deleted
+- `nsamples(::SemObservedMissing)` -> number of samples (data points)
+- `nobserved_vars(::SemObservedMissing)` -> number of observed variables
 
-- `patterns(::SemObservedMissing)` -> indices of non-missing variables per missing patterns
-- `patterns_not(::SemObservedMissing)` -> indices of missing variables per missing pattern
-- `pattern_rows(::SemObservedMissing)` -> row indices of observed data points that belong to each pattern
-- `pattern_nsamples(::SemObservedMissing)` -> number of data points per pattern
-- `pattern_nobs_vars(::SemObservedMissing)` -> number of non-missing observed variables per pattern
-- `obs_mean(::SemObservedMissing)` -> observed mean per pattern
-- `obs_cov(::SemObservedMissing)` -> observed covariance per pattern
-- `em_model(::SemObservedMissing)` -> `EmMVNModel` that contains the covariance matrix and mean vector found via optimization maximization
+- `samples(::SemObservedMissing)` -> data matrix (contains both measured and missing values)
+- `em_model(::SemObservedMissing)` -> `EmMVNModel` that contains the covariance matrix and mean vector found via expectation maximization
 
 ## Implementation
 Subtype of `SemObserved`
-
-## Remarks
-(1) the `specification` argument can also be `nothing`, but this turns of checking whether
-the observed data/covariance columns are in the correct order! As a result, you should only
-use this if you are sure your observed data is in the right format.
-
-## Additional keyword arguments:
-- `spec_colnames::Vector{Symbol} = nothing`: overwrites column names of the specification object
 """
-mutable struct SemObservedMissing{
-    A <: AbstractArray,
-    D <: Number,
-    O <: Number,
-    P <: Vector,
-    P2 <: Vector,
-    R <: Vector,
-    PD <: AbstractArray,
-    PO <: AbstractArray,
-    PVO <: AbstractArray,
-    A2 <: AbstractArray,
-    A3 <: AbstractArray,
-    S <: EmMVNModel,
-} <: SemObserved
-    data::A
-    nobs_vars::D
-    nsamples::O
-    patterns::P # missing patterns
-    patterns_not::P2
-    pattern_rows::R # coresponding rows in data_rowwise
-    data_rowwise::PD # list of data
-    pattern_nsamples::PO # observed rows per pattern
-    pattern_nobs_vars::PVO # number of non-missing variables per pattern
-    obs_mean::A2
-    obs_cov::A3
-    em_model::S
+struct SemObservedMissing{T <: Real, S <: Real, E <: EmMVNModel} <: SemObserved
+    data::Matrix{Union{T, Missing}}
+    observed_vars::Vector{Symbol}
+    nsamples::Int
+    patterns::Vector{SemObservedMissingPattern{T, S}}
+
+    em_model::E
 end
 
 ############################################################################################
@@ -86,116 +54,39 @@ end
 ############################################################################################
 
 function SemObservedMissing(;
-    specification::Union{SemSpecification, Nothing},
     data,
-    obs_colnames = nothing,
-    spec_colnames = nothing,
+    observed_vars::Union{AbstractVector, Nothing} = nothing,
+    specification::Union{SemSpecification, Nothing} = nothing,
+    observed_var_prefix::Union{Symbol, AbstractString} = :obs,
     kwargs...,
 )
-    if isnothing(spec_colnames) && !isnothing(specification)
-        spec_colnames = observed_vars(specification)
-    end
-
-    if !isnothing(spec_colnames)
-        if isnothing(obs_colnames)
-            try
-                data = data[:, spec_colnames]
-            catch
-                throw(
-                    ArgumentError(
-                        "Your `data` can not be indexed by symbols. " *
-                        "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.",
-                    ),
-                )
-            end
-        else
-            if data isa DataFrame
-                throw(
-                    ArgumentError(
-                        "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " *
-                        "Please make sure the column names of your data frame indicate the correct variables " *
-                        "or pass your data in a different format.",
-                    ),
-                )
-            end
-
-            if !(eltype(obs_colnames) <: Symbol)
-                throw(ArgumentError("please specify `obs_colnames` as a vector of Symbols"))
-            end
-
-            data = reorder_data(data, spec_colnames, obs_colnames)
-        end
-    end
-
-    if data isa DataFrame
-        data = Matrix(data)
-    end
-
-    # remove persons with only missings
-    keep = Vector{Int64}()
-    for i in 1:size(data, 1)
-        if any(.!ismissing.(data[i, :]))
-            push!(keep, i)
-        end
-    end
-    data = data[keep, :]
-
+    data, obs_vars, _ =
+        prepare_data(data, observed_vars, specification; observed_var_prefix)
     nsamples, nobs_vars = size(data)
 
-    # compute and store the different missing patterns with their rowindices
-    missings = ismissing.(data)
-    patterns = [missings[i, :] for i in 1:size(missings, 1)]
-
-    patterns_cart = findall.(!, patterns)
-    data_rowwise = [data[i, patterns_cart[i]] for i in 1:nsamples]
-    data_rowwise = convert.(Array{Float64}, data_rowwise)
-
-    remember = Vector{BitArray{1}}()
-    rows = [Vector{Int64}(undef, 0) for i in 1:size(patterns, 1)]
-    for i in 1:size(patterns, 1)
-        unknown = true
-        for j in 1:size(remember, 1)
-            if patterns[i] == remember[j]
-                push!(rows[j], i)
-                unknown = false
-            end
-        end
-        if unknown
-            push!(remember, patterns[i])
-            push!(rows[size(remember, 1)], i)
+    # detect all different missing patterns with their row indices
+    pattern_to_rows = Dict{BitVector, Vector{Int}}()
+    for (i, datarow) in zip(axes(data, 1), eachrow(data))
+        pattern = BitVector(.!ismissing.(datarow))
+        if sum(pattern) > 0 # skip all-missing rows
+            pattern_rows = get!(() -> Vector{Int}(), pattern_to_rows, pattern)
+            push!(pattern_rows, i)
         end
     end
-    rows = rows[1:length(remember)]
-    n_patterns = size(rows, 1)
-
-    # sort by number of missings
-    sort_n_miss = sortperm(sum.(remember))
-    remember = remember[sort_n_miss]
-    remember_cart = findall.(!, remember)
-    remember_cart_not = findall.(remember)
-    rows = rows[sort_n_miss]
-
-    pattern_nsamples = size.(rows, 1)
-    pattern_nobs_vars = length.(remember_cart)
-
-    cov_mean = [cov_and_mean(data_rowwise[rows]) for rows in rows]
-    obs_cov = [cov_mean[1] for cov_mean in cov_mean]
-    obs_mean = [cov_mean[2] for cov_mean in cov_mean]
+    # process each pattern and sort from most to least number of observed vars
+    patterns = [
+        SemObservedMissingPattern(pat, rows, data) for (pat, rows) in pairs(pattern_to_rows)
+    ]
+    sort!(patterns, by = nmissed_vars)
 
+    # allocate EM model (but don't fit)
     em_model = EmMVNModel(zeros(nobs_vars, nobs_vars), zeros(nobs_vars), false)
 
     return SemObservedMissing(
-        data,
-        nobs_vars,
+        convert(Matrix{Union{nonmissingtype(eltype(data)), Missing}}, data),
+        obs_vars,
         nsamples,
-        remember_cart,
-        remember_cart_not,
-        rows,
-        data_rowwise,
-        pattern_nsamples,
-        pattern_nobs_vars,
-        obs_mean,
-        obs_cov,
+        patterns,
         em_model,
     )
 end
@@ -204,19 +95,10 @@ end
 ### Recommended methods
 ############################################################################################
 
-nsamples(observed::SemObservedMissing) = observed.nsamples
-nobserved_vars(observed::SemObservedMissing) = observed.nobs_vars
-
 ############################################################################################
 ### Additional methods
 ############################################################################################
 
-patterns(observed::SemObservedMissing) = observed.patterns
-patterns_not(observed::SemObservedMissing) = observed.patterns_not
-pattern_rows(observed::SemObservedMissing) = observed.pattern_rows
-data_rowwise(observed::SemObservedMissing) = observed.data_rowwise
-pattern_nsamples(observed::SemObservedMissing) = observed.pattern_nsamples
-pattern_nobs_vars(observed::SemObservedMissing) = observed.pattern_nobs_vars
-obs_mean(observed::SemObservedMissing) = observed.obs_mean
-obs_cov(observed::SemObservedMissing) = observed.obs_cov
 em_model(observed::SemObservedMissing) = observed.em_model
+obs_mean(observed::SemObservedMissing) = obs_mean(em_model(observed))
+obs_cov(observed::SemObservedMissing) = obs_cov(em_model(observed))
diff --git a/src/observed/missing_pattern.jl b/src/observed/missing_pattern.jl
new file mode 100644
index 000000000..6ac6a360b
--- /dev/null
+++ b/src/observed/missing_pattern.jl
@@ -0,0 +1,45 @@
+# data associated with the observed variables that all share the same missingness pattern
+# variables that have values within that pattern are termed "measured"
+# variables that have no measurements are termed "missing"
+struct SemObservedMissingPattern{T, S}
+    measured_mask::BitVector    # measured vars mask
+    miss_mask::BitVector        # missing vars mask
+    rows::Vector{Int}           # rows in original data
+    data::Matrix{T}             # non-missing submatrix of data
+
+    measured_mean::Vector{S}    # means of measured vars
+    measured_cov::Matrix{S}     # covariance of measured vars
+end
+
+function SemObservedMissingPattern(
+    measured_mask::BitVector,
+    rows::AbstractVector{<:Integer},
+    data::AbstractMatrix,
+)
+    T = nonmissingtype(eltype(data))
+
+    pat_data = convert(Matrix{T}, view(data, rows, measured_mask))
+    if size(pat_data, 1) > 1
+        pat_mean, pat_cov = mean_and_cov(pat_data, 1, corrected = false)
+        @assert size(pat_cov) == (size(pat_data, 2), size(pat_data, 2))
+    else
+        pat_mean = reshape(pat_data[1, :], 1, :)
+        # 1x1 covariance matrix since it is not meant to be used
+        pat_cov = fill(zero(T), 1, 1)
+    end
+
+    return SemObservedMissingPattern{T, eltype(pat_mean)}(
+        measured_mask,
+        .!measured_mask,
+        rows,
+        pat_data,
+        dropdims(pat_mean, dims = 1),
+        pat_cov,
+    )
+end
+
+nobserved_vars(pat::SemObservedMissingPattern) = length(pat.measured_mask)
+nsamples(pat::SemObservedMissingPattern) = length(pat.rows)
+
+nmeasured_vars(pat::SemObservedMissingPattern) = length(pat.measured_mean)
+nmissed_vars(pat::SemObservedMissingPattern) = nobserved_vars(pat) - nmeasured_vars(pat)
diff --git a/test/unit_tests/data_input_formats.jl b/test/unit_tests/data_input_formats.jl
index 3fc255b84..cc72673a6 100644
--- a/test/unit_tests/data_input_formats.jl
+++ b/test/unit_tests/data_input_formats.jl
@@ -7,11 +7,19 @@ spec = ParameterTable(
     latent_vars = [:ind60, :dem60, :dem65],
 )
 
+# specification with non-existent observed var z1
+wrong_spec = ParameterTable(
+    observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :z1],
+    latent_vars = [:ind60, :dem60, :dem65],
+)
+
 ### data -----------------------------------------------------------------------------------
 
 dat = example_data("political_democracy")
 dat_missing = example_data("political_democracy_missing")[:, names(dat)]
 
+@assert Symbol.(names(dat)) == observed_vars(spec)
+
 dat_matrix = Matrix(dat)
 dat_missing_matrix = Matrix(dat_missing)
 
@@ -21,7 +29,12 @@ dat_mean = vcat(Statistics.mean(dat_matrix, dims = 1)...)
 # shuffle variables
 new_order = [3, 2, 7, 8, 5, 6, 9, 11, 1, 10, 4]
 
-shuffle_names = Symbol.(names(dat))[new_order]
+shuffle_names = names(dat)[new_order]
+
+shuffle_spec = ParameterTable(
+    observed_vars = Symbol.(shuffle_names),
+    latent_vars = [:ind60, :dem60, :dem65],
+)
 
 shuffle_dat = dat[:, new_order]
 shuffle_dat_missing = dat_missing[:, new_order]
@@ -29,8 +42,8 @@ shuffle_dat_missing = dat_missing[:, new_order]
 shuffle_dat_matrix = dat_matrix[:, new_order]
 shuffle_dat_missing_matrix = dat_missing_matrix[:, new_order]
 
-shuffle_dat_cov = Statistics.cov(shuffle_dat_matrix)
-shuffle_dat_mean = vcat(Statistics.mean(shuffle_dat_matrix, dims = 1)...)
+shuffle_dat_cov = cov(shuffle_dat_matrix)
+shuffle_dat_mean = vec(mean(shuffle_dat_matrix, dims = 1))
 
 # common tests for SemObserved subtypes
 function test_observed(
@@ -42,17 +55,16 @@ function test_observed(
     meanstructure::Bool,
     approx_cov::Bool = false,
 )
-    @test @inferred(nobserved_vars(observed)) == size(dat, 2)
-    # FIXME observed should provide names of observed variables
-    @test @inferred(observed_vars(observed)) == names(dat) broken = true
-    @test @inferred(nsamples(observed)) == size(dat, 1)
-
-    hasmissing =
-        !isnothing(dat_matrix) && any(ismissing, dat_matrix) ||
-        !isnothing(dat_cov) && any(ismissing, dat_cov)
+    if !isnothing(dat)
+        @test @inferred(nsamples(observed)) == size(dat, 1)
+        @test @inferred(nobserved_vars(observed)) == size(dat, 2)
+        @test @inferred(observed_vars(observed)) == Symbol.(names(dat))
+    end
 
     if !isnothing(dat_matrix)
-        if hasmissing
+        @test @inferred(nsamples(observed)) == size(dat_matrix, 1)
+
+        if any(ismissing, dat_matrix)
             @test isequal(@inferred(samples(observed)), dat_matrix)
         else
             @test @inferred(samples(observed)) == dat_matrix
@@ -60,7 +72,7 @@ function test_observed(
     end
 
     if !isnothing(dat_cov)
-        if hasmissing
+        if any(ismissing, dat_cov)
             @test isequal(@inferred(obs_cov(observed)), dat_cov)
         else
             if approx_cov
@@ -72,17 +84,17 @@ function test_observed(
     end
 
     # FIXME actually, SemObserved should not use meanstructure and always provide obs_mean()
-    # meanstructure is a part of SEM model
+    # since meanstructure belongs to the implied part of a SEM model
     if meanstructure
         if !isnothing(dat_mean)
-            if hasmissing
+            if any(ismissing, dat_mean)
                 @test isequal(@inferred(obs_mean(observed)), dat_mean)
             else
                 @test @inferred(obs_mean(observed)) == dat_mean
             end
         else
-            # FIXME if meanstructure is present, obs_mean() should provide something (currently Missing don't support it)
-            @test (@inferred(obs_mean(observed)) isa AbstractVector{Float64}) broken = true
+            # FIXME @inferred is broken for EM cov/mean since it may return nothing if EM was not run
+            @test @inferred(obs_mean(observed)) isa AbstractVector{Float64} broken = true # EM-based means
         end
     else
         @test @inferred(obs_mean(observed)) === nothing skip = true
@@ -93,32 +105,29 @@ end
 @testset "SemObservedData" begin
 
     # errors
-    @test_throws ArgumentError(
-        "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " *
-        "Please make sure the column names of your data frame indicate the correct variables " *
-        "or pass your data in a different format.",
-    ) begin
-        SemObservedData(
-            specification = spec,
-            data = dat,
-            obs_colnames = Symbol.(names(dat)),
-        )
-    end
+    obs_data_redundant = SemObservedData(
+        specification = spec,
+        data = dat,
+        observed_vars = Symbol.(names(dat)),
+    )
+    @test observed_vars(obs_data_redundant) == Symbol.(names(dat))
+    @test observed_vars(obs_data_redundant) == observed_vars(spec)
 
-    @test_throws ArgumentError(
-        "Your `data` can not be indexed by symbols. " *
-        "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.",
-    ) begin
-        SemObservedData(specification = spec, data = dat_matrix)
-    end
+    obs_data_spec = SemObservedData(specification = spec, data = dat_matrix)
+    @test observed_vars(obs_data_spec) == observed_vars(spec)
 
-    @test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin
-        SemObservedData(specification = spec, data = dat_matrix, obs_colnames = names(dat))
-    end
+    obs_data_strnames =
+        SemObservedData(specification = spec, data = dat_matrix, observed_vars = names(dat))
+    @test observed_vars(obs_data_strnames) == Symbol.(names(dat))
 
     @test_throws UndefKeywordError(:data) SemObservedData(specification = spec)
 
-    @test_throws UndefKeywordError(:specification) SemObservedData(data = dat_matrix)
+    obs_data_nonames = SemObservedData(data = dat_matrix)
+    @test observed_vars(obs_data_nonames) == Symbol.("obs", 1:size(dat_matrix, 2))
+
+    obs_data_nonames2 =
+        SemObservedData(data = dat_matrix, observed_var_prefix = "observed_")
+    @test observed_vars(obs_data_nonames2) == Symbol.("observed_", 1:size(dat_matrix, 2))
 
     @testset "meanstructure=$meanstructure" for meanstructure in (false, true)
         observed = SemObservedData(specification = spec, data = dat; meanstructure)
@@ -128,35 +137,98 @@ end
         observed_nospec =
             SemObservedData(specification = nothing, data = dat_matrix; meanstructure)
 
-        test_observed(observed_nospec, dat, dat_matrix, dat_cov, dat_mean; meanstructure)
+        test_observed(
+            observed_nospec,
+            nothing,
+            dat_matrix,
+            dat_cov,
+            dat_mean;
+            meanstructure,
+        )
 
         observed_matrix = SemObservedData(
             specification = spec,
             data = dat_matrix,
-            obs_colnames = Symbol.(names(dat)),
-            meanstructure = meanstructure,
+            observed_vars = Symbol.(names(dat));
+            meanstructure,
         )
 
         test_observed(observed_matrix, dat, dat_matrix, dat_cov, dat_mean; meanstructure)
 
+        # detect non-existing column
+        @test_throws "ArgumentError: column name \"z1\"" SemObservedData(
+            specification = wrong_spec,
+            data = shuffle_dat,
+        )
+
+        # detect non-existing observed_var
+        @test_throws "ArgumentError: observed_var \"z1\"" SemObservedData(
+            specification = wrong_spec,
+            data = shuffle_dat_matrix,
+            observed_vars = shuffle_names,
+        )
+
+        # cannot infer observed_vars
+        @test_throws "No data, specification or observed_vars provided" SemObservedData(
+            data = nothing,
+        )
+
+        if false # FIXME data = nothing is for simulation studies
+            # no data, just observed_vars
+            observed_nodata =
+                SemObservedData(data = nothing, observed_vars = Symbol.(names(dat)))
+            @test observed_nodata isa SemObservedData
+            @test @inferred(samples(observed_nodata)) === nothing
+            @test observed_vars(observed_nodata) == Symbol.(names(dat))
+        end
+
+        @test_warn "The order of variables in observed_vars" SemObservedData(
+            specification = spec,
+            data = shuffle_dat,
+            observed_vars = shuffle_names,
+        )
+
+        # spec takes precedence in obs_vars order
+        observed_spec = SemObservedData(
+            specification = spec,
+            data = shuffle_dat,
+            observed_vars = shuffle_names,
+        )
+
+        test_observed(
+            observed_spec,
+            dat,
+            dat_matrix,
+            dat_cov,
+            meanstructure ? dat_mean : nothing;
+            meanstructure,
+        )
+
         observed_shuffle =
-            SemObservedData(specification = spec, data = shuffle_dat; meanstructure)
+            SemObservedData(specification = shuffle_spec, data = shuffle_dat; meanstructure)
 
-        test_observed(observed_shuffle, dat, dat_matrix, dat_cov, dat_mean; meanstructure)
+        test_observed(
+            observed_shuffle,
+            shuffle_dat,
+            shuffle_dat_matrix,
+            shuffle_dat_cov,
+            meanstructure ? shuffle_dat_mean : nothing;
+            meanstructure,
+        )
 
         observed_matrix_shuffle = SemObservedData(
-            specification = spec,
+            specification = shuffle_spec,
             data = shuffle_dat_matrix,
-            obs_colnames = shuffle_names;
+            observed_vars = shuffle_names;
             meanstructure,
         )
 
         test_observed(
             observed_matrix_shuffle,
-            dat,
-            dat_matrix,
-            dat_cov,
-            dat_mean;
+            shuffle_dat,
+            shuffle_dat_matrix,
+            shuffle_dat_cov,
+            meanstructure ? shuffle_dat_mean : nothing;
             meanstructure,
         )
     end # meanstructure
@@ -170,43 +242,6 @@ end # SemObservedData
 
     @test_throws UndefKeywordError(:nsamples) SemObservedCovariance(obs_cov = dat_cov)
 
-    @test_throws ArgumentError("no `obs_colnames` were specified") begin
-        SemObservedCovariance(
-            specification = spec,
-            obs_cov = dat_cov,
-            nsamples = size(dat, 1),
-        )
-    end
-
-    @test_throws ArgumentError("observed means were passed, but `meanstructure = false`") begin
-        SemObservedCovariance(
-            specification = nothing,
-            obs_cov = dat_cov,
-            obs_mean = dat_mean,
-            nsamples = size(dat, 1),
-        )
-    end
-
-    @test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin
-        SemObservedCovariance(
-            specification = spec,
-            obs_cov = dat_cov,
-            obs_colnames = names(dat),
-            nsamples = size(dat, 1),
-            meanstructure = false,
-        )
-    end
-
-    @test_throws ArgumentError("`meanstructure = true`, but no observed means were passed") begin
-        SemObservedCovariance(
-            specification = spec,
-            obs_cov = dat_cov,
-            obs_colnames = Symbol.(names(dat)),
-            meanstructure = true,
-            nsamples = size(dat, 1),
-        )
-    end
-
     @testset "meanstructure=$meanstructure" for meanstructure in (false, true)
 
         # errors
@@ -220,12 +255,25 @@ end # SemObservedData
             meanstructure,
         )
 
-        # should work
+        # default vars
+        observed_nonames = SemObservedCovariance(
+            obs_cov = dat_cov,
+            obs_mean = meanstructure ? dat_mean : nothing,
+            nsamples = size(dat, 1),
+        )
+        @test observed_vars(observed_nonames) == Symbol.("obs", 1:size(dat_cov, 2))
+
+        @test_throws DimensionMismatch SemObservedCovariance(
+            obs_cov = dat_cov,
+            observed_vars = Symbol.("obs", 1:(size(dat_cov, 2)+1)),
+            nsamples = size(dat, 1),
+        )
+
         observed = SemObservedCovariance(
             specification = spec,
             obs_cov = dat_cov,
             obs_mean = meanstructure ? dat_mean : nothing,
-            obs_colnames = obs_colnames = Symbol.(names(dat)),
+            observed_vars = Symbol.(names(dat)),
             nsamples = size(dat, 1),
             meanstructure = meanstructure,
         )
@@ -240,7 +288,7 @@ end # SemObservedData
             approx_cov = true,
         )
 
-        @test_throws ErrorException samples(observed)
+        @test @inferred(samples(observed)) === nothing
 
         observed_nospec = SemObservedCovariance(
             specification = nothing,
@@ -252,7 +300,7 @@ end # SemObservedData
 
         test_observed(
             observed_nospec,
-            dat,
+            nothing,
             nothing,
             dat_cov,
             dat_mean;
@@ -260,32 +308,53 @@ end # SemObservedData
             approx_cov = true,
         )
 
-        @test_throws ErrorException samples(observed_nospec)
+        @test @inferred(samples(observed_nospec)) === nothing
 
-        observed_shuffle = SemObservedCovariance(
+        # detect non-existing observed_var
+        @test_throws "ArgumentError: observed_var \"z1\"" SemObservedCovariance(
+            specification = wrong_spec,
+            obs_cov = shuffle_dat_cov,
+            observed_vars = shuffle_names,
+            nsamples = size(dat, 1),
+        )
+
+        # spec takes precedence in obs_vars order
+        observed_spec = SemObservedCovariance(
             specification = spec,
             obs_cov = shuffle_dat_cov,
-            obs_mean = meanstructure ? dat_mean[new_order] : nothing,
-            obs_colnames = shuffle_names,
-            nsamples = size(dat, 1);
-            meanstructure,
+            obs_mean = meanstructure ? shuffle_dat_mean : nothing,
+            observed_vars = shuffle_names,
+            nsamples = size(dat, 1),
         )
 
         test_observed(
-            observed_shuffle,
+            observed_spec,
             dat,
             nothing,
             dat_cov,
-            dat_mean;
+            meanstructure ? dat_mean : nothing;
             meanstructure,
             approx_cov = true,
         )
 
-        @test_throws ErrorException samples(observed_shuffle)
+        observed_shuffle = SemObservedCovariance(
+            specification = shuffle_spec,
+            obs_cov = shuffle_dat_cov,
+            obs_mean = meanstructure ? shuffle_dat_mean : nothing,
+            observed_vars = shuffle_names,
+            nsamples = size(dat, 1);
+            meanstructure,
+        )
 
-        # respect specification order
-        @test @inferred(obs_cov(observed_shuffle)) ≈ obs_cov(observed)
-        @test @inferred(observed_vars(observed_shuffle)) == shuffle_names broken = true
+        test_observed(
+            observed_shuffle,
+            shuffle_dat,
+            nothing,
+            shuffle_dat_cov,
+            meanstructure ? shuffle_dat_mean : nothing;
+            meanstructure,
+            approx_cov = true,
+        )
     end # meanstructure
 end # SemObservedCovariance
 
@@ -294,38 +363,31 @@ end # SemObservedCovariance
 @testset "SemObservedMissing" begin
 
     # errors
-    @test_throws ArgumentError(
-        "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " *
-        "Please make sure the column names of your data frame indicate the correct variables " *
-        "or pass your data in a different format.",
-    ) begin
-        SemObservedMissing(
-            specification = spec,
-            data = dat_missing,
-            obs_colnames = Symbol.(names(dat)),
-        )
-    end
+    observed_redundant_names = SemObservedMissing(
+        specification = spec,
+        data = dat_missing,
+        observed_vars = Symbol.(names(dat)),
+    )
+    @test observed_vars(observed_redundant_names) == Symbol.(names(dat))
 
-    @test_throws ArgumentError(
-        "Your `data` can not be indexed by symbols. " *
-        "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.",
-    ) begin
-        SemObservedMissing(specification = spec, data = dat_missing_matrix)
-    end
+    observed_spec_only = SemObservedMissing(specification = spec, data = dat_missing_matrix)
+    @test observed_vars(observed_spec_only) == observed_vars(spec)
 
-    @test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin
-        SemObservedMissing(
-            specification = spec,
-            data = dat_missing_matrix,
-            obs_colnames = names(dat),
-        )
-    end
+    observed_str_colnames = SemObservedMissing(
+        specification = spec,
+        data = dat_missing_matrix,
+        observed_vars = names(dat),
+    )
+    @test observed_vars(observed_str_colnames) == Symbol.(names(dat))
 
     @test_throws UndefKeywordError(:data) SemObservedMissing(specification = spec)
 
-    @test_throws UndefKeywordError(:specification) SemObservedMissing(
-        data = dat_missing_matrix,
-    )
+    observed_no_names = SemObservedMissing(data = dat_missing_matrix)
+    @test observed_vars(observed_no_names) == Symbol.(:obs, 1:size(dat_missing_matrix, 2))
+
+    observed_no_names2 =
+        SemObservedMissing(data = dat_missing_matrix, observed_var_prefix = "observed_")
+    @test observed_vars(observed_no_names2) == Symbol.("observed_", 1:size(dat_matrix, 2))
 
     @testset "meanstructure=$meanstructure" for meanstructure in (false, true)
         observed =
@@ -340,13 +402,10 @@ end # SemObservedCovariance
             meanstructure,
         )
 
-        @test @inferred(length(StructuralEquationModels.patterns(observed))) == 55
-        @test sum(@inferred(StructuralEquationModels.pattern_nsamples(observed))) ==
+        @test @inferred(length(observed.patterns)) == 55
+        @test sum(@inferred(nsamples(pat)) for pat in observed.patterns) ==
               size(dat_missing, 1)
-        @test all(
-            <=(size(dat_missing, 2)),
-            @inferred(StructuralEquationModels.pattern_nsamples(observed))
-        )
+        @test all(nsamples(pat) <= size(dat_missing, 2) for pat in observed.patterns)
 
         observed_nospec = SemObservedMissing(
             specification = nothing,
@@ -356,7 +415,7 @@ end # SemObservedCovariance
 
         test_observed(
             observed_nospec,
-            dat_missing,
+            nothing,
             dat_missing_matrix,
             nothing,
             nothing;
@@ -366,7 +425,7 @@ end # SemObservedCovariance
         observed_matrix = SemObservedMissing(
             specification = spec,
             data = dat_missing_matrix,
-            obs_colnames = Symbol.(names(dat)),
+            observed_vars = Symbol.(names(dat)),
         )
 
         test_observed(
@@ -378,11 +437,28 @@ end # SemObservedCovariance
             meanstructure,
         )
 
-        observed_shuffle =
-            SemObservedMissing(specification = spec, data = shuffle_dat_missing)
+        # detect non-existing column
+        @test_throws "ArgumentError: column name \"z1\"" SemObservedMissing(
+            specification = wrong_spec,
+            data = shuffle_dat,
+        )
+
+        # detect non-existing observed_var
+        @test_throws "ArgumentError: observed_var \"z1\"" SemObservedMissing(
+            specification = wrong_spec,
+            data = shuffle_dat_missing_matrix,
+            observed_vars = shuffle_names,
+        )
+
+        # spec takes precedence in obs_vars order
+        observed_spec = SemObservedMissing(
+            specification = spec,
+            observed_vars = shuffle_names,
+            data = shuffle_dat_missing,
+        )
 
         test_observed(
-            observed_shuffle,
+            observed_spec,
             dat_missing,
             dat_missing_matrix,
             nothing,
@@ -390,16 +466,28 @@ end # SemObservedCovariance
             meanstructure,
         )
 
+        observed_shuffle =
+            SemObservedMissing(specification = shuffle_spec, data = shuffle_dat_missing)
+
+        test_observed(
+            observed_shuffle,
+            shuffle_dat_missing,
+            shuffle_dat_missing_matrix,
+            nothing,
+            nothing;
+            meanstructure,
+        )
+
         observed_matrix_shuffle = SemObservedMissing(
-            specification = spec,
+            specification = shuffle_spec,
             data = shuffle_dat_missing_matrix,
-            obs_colnames = shuffle_names,
+            observed_vars = shuffle_names,
         )
 
         test_observed(
             observed_matrix_shuffle,
-            dat_missing,
-            dat_missing_matrix,
+            shuffle_dat_missing,
+            shuffle_dat_missing_matrix,
             nothing,
             nothing;
             meanstructure,