From 5c7a6922a29451a47cf64d5adbc1da3384a54b7c Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Thu, 22 Oct 2015 09:56:32 -0400
Subject: [PATCH 1/2] Added parametric state space model and changed notation

---
 src/kalman_filter.jl        |  24 +--
 src/kalman_smooth.jl        |  28 +--
 src/parameter_estimation.jl | 329 +++++++++++++++++++++++++++++++++++-
 src/statespacemodel.jl      | 280 +++++++++++++++++++++++++-----
 test/kalman_filter.jl       |   6 +-
 5 files changed, 597 insertions(+), 70 deletions(-)

diff --git a/src/kalman_filter.jl b/src/kalman_filter.jl
index b1c06eb..700fa64 100644
--- a/src/kalman_filter.jl
+++ b/src/kalman_filter.jl
@@ -9,7 +9,7 @@ type KalmanFiltered{T}
     loglik::T
 end
 
-function show{T}(io::IO, filt::KalmanFiltered{T})
+function Base.show{T}(io::IO, filt::KalmanFiltered{T})
     n = size(filt.y, 1)
     dx, dy = filt.model.nx, filt.model.ny
     println("KalmanFiltered{$T}")
@@ -24,14 +24,14 @@ function kalman_filter{T}(y::Array{T}, model::StateSpaceModel{T}; u::Array{T}=ze
     @assert size(u,2) == model.nu
 
     function kalman_recursions(y_i::Vector{T}, u_i::Vector{T},
-                                  G_i::Matrix{T}, D_i::Matrix{T}, W_i::Matrix{T},
+                                  C_i::Matrix{T}, D_i::Matrix{T}, W_i::Matrix{T},
                                   x_pred_i::Vector{T}, P_pred_i::Matrix{T})
         if !any(isnan(y_i))
-            innov =  y_i - G_i * x_pred_i - D_i * u_i
-            S = G_i * P_pred_i * G_i' + W_i  # Innovation covariance
-            K = P_pred_i * G_i' / S # Kalman gain
+            innov =  y_i - C_i * x_pred_i - D_i * u_i
+            S = C_i * P_pred_i * C_i' + W_i  # Innovation covariance
+            K = P_pred_i * C_i' / S # Kalman gain
             x_filt_i = x_pred_i + K * innov
-            P_filt_i = (I - K * G_i) * P_pred_i
+            P_filt_i = (I - K * C_i) * P_pred_i
             dll = (dot(innov,S\innov) + logdet(S))/2
         else
             x_filt_i = x_pred_i
@@ -51,20 +51,20 @@ function kalman_filter{T}(y::Array{T}, model::StateSpaceModel{T}; u::Array{T}=ze
     log_likelihood = n*model.ny*log(2pi)/2
 
     # first iteration
-    F_1 = model.F(1)
+    A_1 = model.A(1)
     x_pred[:, 1] = model.x1
     P_pred[:, :, 1] = model.P1
     x_filt[:, 1], P_filt[:,:,1], dll = kalman_recursions(y[:, 1], u[:, 1],
-                                            model.G(1), model.D(1), model.W(1),
+                                            model.C(1), model.D(1), model.W(1),
                                             model.x1, model.P1)
     log_likelihood += dll
 
     for i=2:n
-        F_i1 = model.F(i)
-        x_pred[:, i] =  F_i1 * x_filt[:, i-1] + model.B(i-1) * u[:, i-1]
-        P_pred[:, :, i] = F_i1 * P_filt[:, :, i-1] * F_i1' + model.V(i-1)
+        A_i1 = model.A(i-1)
+        x_pred[:, i] =  A_i1 * x_filt[:, i-1] + model.B(i-1) * u[:, i-1]
+        P_pred[:, :, i] = A_i1 * P_filt[:, :, i-1] * A_i1' + model.V(i-1)
         x_filt[:, i], P_filt[:,:,i], dll = kalman_recursions(y[:, i], u[:, i],
-                                                model.G(i), model.D(i), model.W(i),
+                                                model.C(i), model.D(i), model.W(i),
                                                 x_pred[:,i], P_pred[:,:,i])
         log_likelihood += dll
     end
diff --git a/src/kalman_smooth.jl b/src/kalman_smooth.jl
index 34e2476..9b2713b 100644
--- a/src/kalman_smooth.jl
+++ b/src/kalman_smooth.jl
@@ -63,7 +63,7 @@ function kalman_smooth(y::Array, model::StateSpaceModel; u::Array=zeros(size(y,1
     innov_cov_inv_t     = copy(innov_cov_t)
     Kt                  = zeros(model.nx, model.ny)
 
-    Ft, Gt, Dt, Wt, ut  = model.F(1), model.G(1), model.D(1), model.W(1), zeros(model.nu)
+    At, Ct, Dt, Wt, ut  = model.A(1), model.C(1), model.D(1), model.W(1), zeros(model.nu)
 
     log_likelihood = n*model.ny*log(2pi)/2
     marginal_likelihood(innov::Vector, innov_cov::Matrix, innov_cov_inv::Matrix) =
@@ -74,16 +74,16 @@ function kalman_smooth(y::Array, model::StateSpaceModel; u::Array=zeros(size(y,1
 
         # Predict using last iteration's values
         if t > 1
-            x_pred_t = Ft * x_pred_t + model.B(t-1) * ut + Kt * innov_t
-            V_pred_t = Ft * V_pred_t * (Ft - Kt * Gt)' + model.V(t)
+            x_pred_t = At * x_pred_t + model.B(t-1) * ut + Kt * innov_t
+            V_pred_t = At * V_pred_t * (At - Kt * Ct)' + model.V(t)
             V_pred_t = (V_pred_t + V_pred_t')/2
         end #if
         x_pred[:, t]    = x_pred_t
         V_pred[:, :, t] = V_pred_t
 
         # Assign new values
-        Ft = model.F(t)
-        Gt = model.G(t)
+        At = model.A(t)
+        Ct = model.C(t)
         Dt = model.D(t)
         Wt = model.W(t)
         ut = u[:, t]
@@ -93,15 +93,15 @@ function kalman_smooth(y::Array, model::StateSpaceModel; u::Array=zeros(size(y,1
         if missing_obs
             ynnt = y_notnan[:, t]
             I1, I2 = diagm(ynnt), diagm(!ynnt)
-            Gt, Dt = I1 * Gt, I1 * Dt
+            Ct, Dt = I1 * Ct, I1 * Dt
             Wt = I1 * Wt * I1 + I2 * Wt * I2
         end #if
 
         # Compute nessecary filtering quantities
-        innov_t         = y[:, t] - Gt * x_pred_t - Dt * ut
-        innov_cov_t     = Gt * V_pred_t * Gt' + Wt
+        innov_t         = y[:, t] - Ct * x_pred_t - Dt * ut
+        innov_cov_t     = Ct * V_pred_t * Ct' + Wt
         innov_cov_inv_t = inv(innov_cov_t)
-        Kt              = Ft * V_pred_t * Gt' * innov_cov_inv_t
+        Kt              = At * V_pred_t * Ct' * innov_cov_inv_t
 
         innov[:, t]             = innov_t
         innov_cov_inv[:, :, t]  = innov_cov_inv_t
@@ -115,13 +115,13 @@ function kalman_smooth(y::Array, model::StateSpaceModel; u::Array=zeros(size(y,1
 
     for t = n:-1:1
 
-        Gt = model.G(t)
+        Ct = model.C(t)
         innov_cov_inv_t = innov_cov_inv[:, :, t]
         V_pred_t = V_pred[:, :, t]
-        L = model.F(t) - K[:, :, t] * Gt
+        L = model.A(t) - K[:, :, t] * Ct
 
-        r = Gt' * innov_cov_inv_t * innov[:, t] + L' * r
-        N = Gt' * innov_cov_inv_t * Gt + L' * N * L
+        r = Ct' * innov_cov_inv_t * innov[:, t] + L' * r
+        N = Ct' * innov_cov_inv_t * Ct + L' * N * L
 
         x_smooth[:, t] = x_pred[:, t] + V_pred_t * r
         V_smooth_t = V_pred_t - V_pred_t * N * V_pred_t
@@ -149,7 +149,7 @@ function kalman_smooth(filt::KalmanFiltered)
     x_smooth[:, n] = x_filt[:, n]
     P_smoov[:, :, n] = P_filt[:, :, n]
     for i = (n-1):-1:1
-        J = P_filt[:, :, i] * model.F(i)' * inv(P_pred[:,:,i+1])
+        J = P_filt[:, :, i] * model.A(i)' * inv(P_pred[:,:,i+1])
         x_smooth[:, i] = x_filt[:, i] + J * (x_smooth[:, i+1] - x_pred[:, i+1])
         P_smoov[:, :, i] = P_filt[:, :, i] + J * (P_smoov[:, :, i+1] - P_pred[:,:,i+1]) * J'
     end
diff --git a/src/parameter_estimation.jl b/src/parameter_estimation.jl
index 1c456b5..80efb43 100644
--- a/src/parameter_estimation.jl
+++ b/src/parameter_estimation.jl
@@ -1,5 +1,326 @@
-function fit{T}(y::Matrix{T}, build::Function, theta0::Vector{T})
-    objective(theta) = kalman_filter(y, build(theta)).loglik
+# ML parameter estimation with filter result log-likelihood (via Nelder-Mead)
+
+function fit{T}(y::Matrix{T}, build::Function, theta0::Vector{T};
+          u::Array{T}=zeros(size(y,1), build(theta0).nu))
+    objective(theta) = kalman_filter(y, build(theta), u=u).loglik
     kfit = Optim.optimize(objective, theta0)
-    return (kfit.minimum, build(kfit.minimum))
-end
+    return (kfit.minimum, build(kfit.minimum), filter(y, build(kfit.minimum)))
+end #fit
+
+# Expectation-Maximization (EM) parameter estimation
+
+function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters;
+          u::Array{T}=zeros(size(y,1), size(model.A, 2)), eps::Float64=1e-6, niter::Int=typemax(Int))
+
+    function em_kernel{T}(y::Array, u::Array, pmodel::ParametrizedSSM, params::SSMParameters{T})
+
+        function lag1_smooth(y::Array, u::Array, model::StateSpaceModel)
+            B_stack = [model.B zeros(model.B); eye(model.nx) zeros(model.B)]
+            U_stack = [model.U; zeros(model.U)]
+            G_stack = [model.G; zeros(model.G)]
+            Z_stack = [model.Z zeros(model.Z)]
+            x1_stack = [model.x1; zeros(model.x1)]
+            V1_stack = [model.V1 zeros(model.V1); zeros(model.V1) zeros(model.V1)]
+            stack_model = StateSpaceModel(B_stack, U_stack, G_stack, model.Q, Z_stack, model.A, model.H, model.R, x1_stack, V1_stack)
+            stack_smoothed = smooth(y, stack_model, u=u)
+            V     = SparseMatrixCSC[Vt[1:model.nx, 1:model.nx] for Vt in stack_smoothed.V]
+            Vlag1 = SparseMatrixCSC[Vt[1:model.nx, (model.nx+1):end] for Vt in stack_smoothed.V]
+            return stack_smoothed.x[:, 1:model.nx]', V, Vlag1, stack_smoothed.loglik
+        end #function
+
+        estimate_B   = length(params.B) > 0
+        estimate_U   = length(params.U) > 0
+        estimate_Q   = length(params.Q) > 0
+        estimate_Z   = length(params.Z) > 0
+        estimate_A   = length(params.A) > 0
+        estimate_R   = length(params.R) > 0
+        estimate_x1  = length(params.x1) > 0
+        estimate_V1  = length(params.V1) > 0
+
+        m = pmodel(params)
+        spB = sparse(m.B)
+        spU = sparse(m.U)
+
+        print("Expectations (smoothing)... ")
+        tic()
+
+        if estimate_B | estimate_Q
+            exs, V, Vlag1, loglik = lag1_smooth(y, u, m)
+        else
+            smoothed = smooth(y, m, u=u)
+            exs, V, loglik = smoothed.x', smoothed.V, smoothed.loglik
+        end
+
+        toc()
+        println("Negative log-likelihood: ", loglik)
+
+        y = y'
+        u = u'
+        n = size(y,2)
+
+        print("Maximizations... ")
+        tic()
+
+        y_notnan = !isnan(y)
+        y = y .* y_notnan
+
+        if estimate_B | estimate_U | estimate_Q
+            phi = (m.G' * m.G) \ m.G'
+            Qinv = phi' * inv(m.Q) * phi
+            spQinv = sparse(Qinv)
+        end #if BU
+
+        if estimate_U | estimate_Z | estimate_A | estimate_R
+            xi = (m.H' * m.H) \ m.H'
+            Rinv = xi' * inv(m.R) * xi
+        end #if
+
+        if estimate_x1
+            Linv = inv(m.V1) #TODO: Degenerate accomodations?
+        end #if
+
+        HRH = m.H * m.R * m.H'
+        HRH_nonzero_rows = diag(HRH) .!= 0
+
+        function get_Nt(y_notnan_t::Vector{Bool})
+            O = eye(m.ny)[find(y_notnan_t & HRH_nonzero_rows), :]
+            return I - HRH * O' * inv(O * HRH * O') * O
+        end #get_Nt
+
+        ex  = exs[:, 1]
+        exx = zeros(m.nx, m.nx)
+
+        yt = y[:, 1]
+        Nt = get_Nt(y_notnan[:, 1])
+        ey = yt - Nt * (yt - m.Z * exs[:, 1] - m.A * u[:, 1])
+
+        Uut = spU * u[:, 1]
+        Aut = m.A * u[:, 1]
+        
+        if estimate_B # B setup
+            spB_D = sparse(pmodel.B.D)
+            beta_S1 = zeros(length(params.B), length(params.B))
+            beta_S2 = zeros(params.B)
+        end #if B
+
+        if estimate_U # U setup
+            spU_f = sparse(pmodel.U.f)
+            spU_D = sparse(pmodel.U.D)
+
+            deterministic = all(m.G .== 0, 2)
+            OQ0, OQp = speye(m.nx)[find(deterministic), :], speye(m.nx)[find(!deterministic), :]
+
+            nu_kp = kron(u[:, 1]', speye(m.nx))
+            fU = nu_kp * spU_f 
+            DU = nu_kp * spU_D 
+
+            Idt = speye(m.nx)
+            B_ = speye(m.nx)
+            f_ = nu_kp * spU_f * 0
+            D_ = nu_kp * spU_D * 0
+            M = sparse((m.B .!= 0) * 1.)
+            M_t = copy(M)
+            EX0 = exs[:, 1]
+            potential_deterministic_rows = all((OQ0 * M_t * OQp') .== 0, 2)[:]
+
+            Dt1 = ey - m.Z * (I - Idt) * ex - m.Z * Idt * (B_ * EX0 + f_) - Aut 
+            Dt2 = m.Z * Idt * D_
+            Dt3 = zeros(model.nx) 
+            Dt4 = DU * 0
+
+            Idt1 = Idt
+            Bt1_ = B_
+            ft1_ = f_
+            Dt1_ = D_ 
+
+            nu_S1 = Dt2' * Rinv * Dt2
+            nu_S2 = Dt2' * Rinv * Dt1
+        end #if
+
+        if estimate_Q # Q setup
+            q_S1 = zeros(length(params.Q), length(params.Q))
+            q_S2 = zeros(params.Q)
+        end #if
+
+        if estimate_Z # Z setup
+            zeta_S1 = zeros(length(params.Z), length(params.Z)) 
+            zeta_S2 = zeros(params.Z)
+        end #if
+
+        if estimate_A # A setup
+            alpha_S1 = zeros(length(params.A), length(params.A)) 
+            alpha_S2 = zeros(params.A)
+        end #if
+
+        if estimate_R # R setup
+            r_S1 = zeros(length(params.R), length(params.R))
+            r_S2 = zeros(params.R)
+        end #if
+
+        for t in 1:n
+
+            ex_prev = ex
+            ex      = exs[:, t]
+            Vt      = V[t]
+            ut      = u[:, t]
+
+            println(Vt)
+
+            if estimate_B | estimate_Q | estimate_Z
+
+                exx_prev = exx
+                exx = Vt + ex * ex'
+
+                if (estimate_B | estimate_Q) && t > 1
+
+                    exx1 = Vlag1[t] + ex * ex_prev'
+                    Uut_prev = Uut
+                    Uut = spU * ut
+
+                    if estimate_B
+                        beta_kp = kron(exx_prev, spQinv)
+                        beta_S1 += spB_D' * beta_kp * spB_D
+                        beta_S2 += pmodel.B.D' * (vec(Qinv * exx1) -
+                                      beta_kp * pmodel.B.f - vec(Qinv * Uut_prev * ex_prev'))
+                    end #if B
+
+                    if estimate_Q
+                        q_S1 += pmodel.Q.D' * pmodel.Q.D
+                        q_S2 += pmodel.Q.D' * vec(phi * (exx - exx1 * spB' -
+                                    spB * exx1' - ex * Uut_prev' - Uut_prev * ex' + spB * exx_prev * spB' +
+                                    spB * ex_prev * Uut_prev' + Uut_prev * ex_prev' * spB' + Uut_prev * Uut_prev') * phi')
+                    end #if Q
+
+                end #if BQ
+
+            end #if BQZ
+
+            if estimate_Z | estimate_R | estimate_U | estimate_A
+                yt = y[:, t]
+                Nt = get_Nt(y_notnan[:, t])
+                Aut = m.A * ut
+                ey = yt - Nt * (yt - m.Z * ex - Aut)
+
+                if estimate_Z | estimate_R
+
+                    eyx = Nt * m.Z * Vt + ey * ex'
+
+                    if estimate_Z
+                        zeta_kp = kron(exx, Rinv)
+                        zeta_S1 += pmodel.Z.D' * zeta_kp * pmodel.Z.D
+                        zeta_S2 += pmodel.Z.D' * (vec(Rinv * eyx) - zeta_kp * pmodel.Z.f -
+                                      vec(Rinv * Aut * ex'))
+                    end #if Z
+
+                    if estimate_R
+                        I2 = diagm(!y_notnan[:, t])
+                        eyy = I2 * (Nt * HRH' + Nt * m.Z * Vt * m.Z' * Nt') * I2 + ey * ey'
+                        r_S1 += pmodel.R.D' * pmodel.R.D
+                        r_S2 += pmodel.R.D' * vec(xi * (eyy - eyx * m.Z' -
+                                    m.Z * eyx' - ey * Aut' - Aut * ey' + m.Z * exx * m.Z' +
+                                    m.Z * ex * Aut' + Aut * ex' * m.Z' + Aut * Aut') * xi')
+                    end #if R
+
+                end #if ZR
+
+                if estimate_U
+
+                    potential_deterministic_rows = all((OQ0 * M_t * OQp') .== 0, 2)[:]
+                    Idt = sparse(diagm(OQ0' * potential_deterministic_rows))
+                    nu_kp = kron(ut', speye(m.nx))
+                    B_ = spB * Bt1_
+                    fU = nu_kp * spU_f
+                    DU = nu_kp * spU_D
+                    f_ = spB * ft1_ + fU
+                    D_ = spB * Dt1_ + DU 
+
+                    Dt1 = ey - m.Z * (I - Idt) * ex - m.Z * Idt * (B_ * EX0 + f_) - Aut 
+                    Dt2 = m.Z * Idt * D_
+                    Dt3 = ex - spB * (I - Idt1) * ex_prev -
+                                spB * Idt1 * (Bt1_ * EX0 + ft1_) - fU 
+                    Dt4 = DU + spB * Idt1 * Dt1_ 
+
+                    nu_S1 += Dt4' * spQinv * Dt4 + Dt2' * Rinv * Dt2
+                    nu_S2 += Dt4' * spQinv * Dt3 + Dt2' * Rinv * Dt1 
+
+                    t <= (model.nx +1) ? M_t *= M : nothing
+                    Idt1 = Idt
+                    Bt1_ = B_
+                    ft1_ = f_
+                    Dt1_ = D_ 
+
+                end #if U
+
+                if estimate_A
+                    alpha_kp = kron(ut', eye(m.ny))
+                    alpha_S1 += pmodel.A.D' * alpha_kp' * Rinv * alpha_kp * pmodel.A.D
+                    alpha_S2 += pmodel.A.D' * alpha_kp' * Rinv * 
+                                    (ey - m.Z * ex - alpha_kp * pmodel.A.f)
+                end #if A
+
+            end #if ZRUA
+
+        end #for
+
+        beta    = estimate_B ? beta_S1 \ beta_S2    : T[]
+        nu      = estimate_U ? nu_S1 \ nu_S2        : T[]
+        q       = estimate_Q ? q_S1 \ q_S2          : T[]
+
+        zeta    = estimate_Z ? zeta_S1 \ zeta_S2    : T[]
+        alpha   = estimate_A ? alpha_S1 \ alpha_S2  : T[]
+        r       = estimate_R ? r_S1 \ r_S2          : T[]
+
+        p       = length(params.x1) == 0 ? T[] : (pmodel.x1.D' * Linv * pmodel.x1.D) \
+                        pmodel.x1.D' * Linv * (exs[:, 1] - pmodel.x1.f)
+        lambda  = length(params.V1) == 0 ? T[] : (pmodel.V1.D' * pmodel.V1.D) \ pmodel.V1.D' *
+                        vec(V[1] + exs[:, 1] * exs[:, 1]' -
+                        exs[:, 1]*m.x1' - m.x1*exs[:, 1]' + m.x1*m.x1')
+
+        toc()
+
+        return SSMParameters(beta, nu, q, zeta, alpha, r, p, lambda), loglik
+
+    end #em_kernel
+
+    fraction_change(ll_prev, ll_curr) = isinf(ll_prev) ?
+        1 : 2 * (ll_prev - ll_curr) / (ll_prev + ll_curr + 1e-6)
+
+    ll, ll_prev = Inf, Inf
+
+    while (niter > 0) & (fraction_change(ll_prev, ll) > eps)
+        ll_prev     = ll 
+        params, ll  = em_kernel(y, u, model, params)
+        println(params)
+        niter -= 1
+    end #while
+
+    niter > 0 ? nothing :
+        warn("Parameter estimation timed out - results may have failed to converge")
+
+    return params, model(params)
+end #fit
+
+function fit{T}(y::Array{T}, model::StateSpaceModel{T}; u::Array{T}=zeros(size(y,1), model.nu),
+              eps::Float64=1e-6, niter::Int=typemax(Int))
+
+    # B, Z, x1 default to parametrized as fully unconstrained
+    B, B_params = parametrize_full(model.B)
+    Z, Z_params = parametrize_full(model.Z)
+    x1, x1_params = parametrize_full(model.x1)
+
+    # U, A default to fixed
+    U, U_params = parametrize_none(model.U)
+    A, A_params = parametrize_none(model.A)
+
+    # Q, R, V1 default to parametrized as diagonal with independent elements - any other values
+    #   are ignored / set to zero 
+    Q, Q_params = parametrize_diag(diag(model.Q))
+    R, R_params = parametrize_diag(diag(model.R))
+    V1, V1_params = parametrize_diag(diag(model.V1))
+
+    pmodel = ParametrizedSSM(B, U, model.G, Q, Z, A, model.H, R, x1, V1)
+    params = SSMParameters(B_params, U_params, Q_params, Z_params, A_params, R_params, x1_params, V1_params)
+    fit(y, pmodel, params, u=u, eps=eps, niter=niter)
+
+end #fit
+
diff --git a/src/statespacemodel.jl b/src/statespacemodel.jl
index 550b54e..efee706 100644
--- a/src/statespacemodel.jl
+++ b/src/statespacemodel.jl
@@ -1,12 +1,14 @@
-immutable StateSpaceModel{T <: Real}
+abstract AbstractStateSpaceModel{T <: Real}
+
+immutable StateSpaceModel{T} <: AbstractStateSpaceModel{T}
 
     # Process transition matrix, control matrix, and noise covariance
-    F::Function
+    A::Function
     B::Function
     V::Function
 
     # Observation matrix, feed-forward matrix, and noise covariance
-    G::Function
+    C::Function
     D::Function
     W::Function
 
@@ -19,48 +21,33 @@ immutable StateSpaceModel{T <: Real}
     ny::Int
     nu::Int
 
-    function StateSpaceModel(F::Function, B::Function, V::Function,
-                    G::Function, D::Function, W::Function, x1::Vector{T}, P1::Matrix{T})
+    function StateSpaceModel{T}(A::Function, B::Function, V::Function,
+                                C::Function, D::Function, W::Function,
+                                x1::Vector{T}, P1::Matrix{T})
 
         ispossemidef(x::Matrix) = issym(x) && (eigmin(x) >= 0)
         @assert ispossemidef(V(1))
         @assert ispossemidef(W(1))
         @assert ispossemidef(P1)
 
-        nx, ny, nu = confirm_matrix_sizes(F(1), B(1), V(1), G(1), D(1), W(1), x1, P1)
-        new(F, B, V, G, D, W, x1, P1, nx, ny, nu)
+        nx, ny, nu = confirm_matrix_sizes(A(1), B(1), V(1), C(1), D(1), W(1), x1, P1)
+        new(A, B, V, C, D, W, x1, P1, nx, ny, nu)
     end
 end
 
-# Time-dependent definitions
-function StateSpaceModel{T}(F::Function, B::Function, V::Function,
-                            G::Function, D::Function, W::Function,
-                            x1::Vector{T}, P1::Matrix{T})
-	  StateSpaceModel{T}(F, B, V, G, D, W, x1, P1)
-end
+# Time-dependent constructor
+StateSpaceModel{T}(A::Function, V::Function, C::Function, W::Function,
+                          x1::Vector{T}, P1::Matrix{T};
+                          B::Function=_->zeros(size(V(1), 1), 1),
+                          D::Function=_->zeros(size(W(1), 1), 1)) =
+	  StateSpaceModel{T}(A, B, V, C, D, W, x1, P1)
 
-function StateSpaceModel{T}(F::Function, V::Function,
-                            G::Function, W::Function,
-                            x1::Vector{T}, P1::Matrix{T})
-    B(_) = zeros(size(V(1), 1), 1)
-    D(_) = zeros(size(W(1), 1), 1)
-    StateSpaceModel{T}(F, B, V, G, D, W, x1, P1)
-end
-
-# Time-independent definitions
-function StateSpaceModel{T}(F::Matrix{T}, B::Matrix{T}, V::Matrix{T},
-                            G::Matrix{T}, D::Matrix{T}, W::Matrix{T},
-                            x1::Vector{T}, P1::Matrix{T})
-	StateSpaceModel{T}(_->F, _->B, _->V, _->G, _->D, _->W, x1, P1)
-end
-
-function StateSpaceModel{T}(F::Matrix{T}, V::Matrix{T},
-                            G::Matrix{T}, W::Matrix{T},
-                            x1::Vector{T}, P1::Matrix{T})
-    B(_) = zeros(size(V, 1), 1)
-    D(_) = zeros(size(W, 1), 1)
-    StateSpaceModel{T}(_->F, B, _->V, _->G, D, _->W, x1, P1)
-end
+# Time-independent constructor
+StateSpaceModel{T}(A::Matrix{T}, V::Matrix{T}, C::Matrix{T}, W::Matrix{T},
+                          x1::Vector{T}, P1::Matrix{T};
+                          B::Matrix{T}=zeros(size(A, 1), 1),
+                          D::Matrix{T}=zeros(size(C, 1), 1)) =
+	  StateSpaceModel{T}(_->A, _->B, _->V, _->C, _->D, _->W, x1, P1)
 
 function show{T}(io::IO, mod::StateSpaceModel{T})
     dx, dy = length(mod.x1), size(mod.G, 1)
@@ -79,6 +66,181 @@ function show{T}(io::IO, mod::StateSpaceModel{T})
     show(mod.W)
 end
 
+# Time-independent parametrized matrix type
+immutable ParametrizedMatrix{T<:Real}
+
+    f::Vector{T}
+    D::Matrix{T}
+    np::Int
+    dims::Tuple{Int, Int}
+
+    function ParametrizedMatrix(f::Vector{T}, D::Matrix{T}, dims::Tuple{Int, Int})
+        @assert length(f) == size(D, 1)
+        @assert length(f) == dims[1] * dims[2]
+        new(f, D, size(D, 2), dims)
+    end #ParametrizedMatrix
+
+end #ParametrizedMatrix
+
+ParametrizedMatrix{T}(f::Vector{T}, D::Matrix{T}, dims::Tuple{Int, Int}) = ParametrizedMatrix{T}(f, D, dims)
+
+function show{T}(io::IO, cpm::ParametrizedMatrix{T})
+
+    function combinestrelems(a, b) 
+        if (a != "") & (b != "")
+          a * " + " * b 
+        elseif (a == "") & (b == "")
+          "0"
+        else
+          a * b
+        end #if
+    end #combinestrelems
+
+    nx, ny = cpm.dims
+    println(nx, "x", ny, " ParametrizedMatrix{$T}")
+    conststring = map(x -> x == 0 ? "" : string(x), cpm.f)
+    paramstring = fill("", length(cpm.f))
+    paramelems = fill("", cpm.np)
+
+    for i = 1:size(cpm.D,1)
+        for j = 1:cpm.np
+            if  cpm.D[i,j] == 0
+                paramelems[j] = ""
+            elseif cpm.D[i,j] == 1
+                paramelems[j] = string(Char(96+j))
+            else
+                paramelems[j] = string(cpm.D[i,j],Char(96+j))
+            end #if
+        end #for
+        paramstring[i] = reduce(combinestrelems, paramelems)
+    end #for
+
+    finalstrings = map(combinestrelems, conststring, paramstring)
+
+    showcompact(reshape(finalstrings, cpm.dims))
+end
+
+Base.length(cpm::ParametrizedMatrix) = cpm.dims[1] * cpm.dims[2]
+Base.size(cpm::ParametrizedMatrix) = cpm.dims
+Base.size(cpm::ParametrizedMatrix, dim::Int) = cpm.dims[dim]
+Base.call(cpm::ParametrizedMatrix, params::Vector) = all(cpm.D .== 0) ?
+        reshape(cpm.f, cpm.dims) : reshape(cpm.f + cpm.D * params, cpm.dims)
+
+parametrize_full(X::Union{Vector, Matrix}) =
+        ParametrizedMatrix(zeros(length(X)), eye(length(X)), size(X'')), collect(X)
+
+function parametrize_diag(x::Vector)
+    n = length(x)
+    D = zeros(n^3)
+    D[[1 + (i-1)*(n^2 + n + 1) for i in 1:n]] = 1
+    D = reshape(D, n^2, n)
+    f = zeros(n^2)
+    return ParametrizedMatrix(f, D, (n,n)), x 
+end #parametrize_diag
+
+parametrize_none{T}(X::Union{Vector{T}, Matrix{T}}) =
+        ParametrizedMatrix(collect(X), zeros(length(X))'', size(X'')), T[] 
+
+# Time-independent parametrized state space model
+immutable ParametrizedSSM{T} <: AbstractStateSpaceModel{T}
+
+    # Transition equation and noise covariance
+
+    A1::Function
+    A2::ParametrizedMatrix{T}
+    A3::Function
+
+    B1::Function
+    B2::ParametrizedMatrix{T}
+
+    G::Function
+    Q::ParametrizedMatrix{T}
+
+    # Observation equation and noise covariance
+
+    C1::Function
+    C2::ParametrizedMatrix{T}
+    C3::Function
+
+    D1::Function
+    D2::ParametrizedMatrix{T}
+
+    H::Function
+    R::ParametrizedMatrix{T}
+
+    # Initial state and error covariance
+    x1::ParametrizedMatrix{T}
+    P1::ParametrizedMatrix{T} #TODO ... Eliminate?
+
+    nx::Int
+    ny::Int
+    nu::Int
+    nq::Int
+    nr::Int
+
+    function ParametrizedSSM{T}(A1::Function, A2::ParametrizedMatrix{T}, A3::Function,
+                                B1::Function, B2::ParametrizedMatrix{T},
+                                G::Function, Q::ParametrizedMatrix{T},
+                                C1::Function, C2::ParametrizedMatrix{T}, C3::Function,
+                                D1::Function, D2::ParametrizedMatrix{T},
+                                H::Function, R::ParametrizedMatrix{T},
+                                x1::ParametrizedMatrix{T}, P1::ParametrizedMatrix{T})
+        nx, ny, nu, nq, nr = confirm_matrix_sizes(A1(1), A2, A3(1), B1(1), B2, G(1), Q,
+                                                  C1(1), C2, C3(1), D1(1), D2, H(1), R, x1, P1)
+        new(A1, A2, A3, B1, B2, G, Q, C1, C2, C3, D1, D2, H, R, x1, P1, nx, ny, nu, nq, nr)
+    end
+
+end #ParametrizedSSM
+
+ParametrizedSSM{T}(A2::ParametrizedMatrix{T}, Q::ParametrizedMatrix,
+                          C2::ParametrizedMatrix{T}, R::ParametrizedMatrix,
+                          x1::ParametrizedMatrix{T}, P1::ParametrizedMatrix{T};
+                          A1::Function=_->eye(size(A2(1),1)), A3::Function=_->eye(size(A2(1),2)),
+                          B1::Function=_->eye(size(A1(1),1)),
+                          B2::ParametrizedMatrix{T}=parametrize_none(zeros(size(A1(1),1), 1))[1],
+                          G::Function=_->eye(size(A1(1),1)),
+                          C1::Function=_->eye(size(C2(1), 1)), C3::Function=_->eye(size(C2(1),2)),
+                          D1::Function=_->eye(size(C1(1),1)),
+                          D2::ParametrizedMatrix{T}=parametrize_none(zeros(size(C1(1),1), 1))[1],
+                          H::Function=_->eye(size(A1(1),1))) =
+          ParametrizedSSM(A1, A2, A3, B1, B2, G, Q, C1, C2, C3, D1, D2, H, R, x1, P1)
+
+# State space model parameters
+immutable SSMParameters{T}
+
+    # Process transition and noise covariance
+    A::Vector{T}
+    B::Vector{T}
+    Q::Vector{T}
+
+    # Observation and noise covariance
+    C::Vector{T}
+    D::Vector{T}
+    R::Vector{T}
+
+    # Initial state and error covariance
+    x1::Vector{T}
+    P1::Vector{T}
+
+end #SSMParameters
+
+SSMParameters{T}(A::Vector{T}, Q::Vector{T}, C::Vector{T}, R::Vector{T},
+                  x1::Vector{T}, P1::Vector{T};
+                  B::Vector{T}=T[], D::Vector{T}=T[]) =
+              SSMParameters{T}(A, B, Q, C, D, R, x1, P1)
+
+function call(m::ParametrizedSSM, p::SSMParameters)
+    A(t) = m.A1(t) * m.A2(p.A) * m.A3(t)
+    B(t) = m.B1(t) * m.B2(p.B)
+    V(t) = m.G(t) * m.Q(p.Q) * m.G(t)'
+    C(t) = m.C1(t) * m.C2(p.C) * m.C3(t)
+    D(t) = m.D1(t) * m.D2(p.D)
+    W(t) = m.H(t) * m.R(p.R) * m.H(t)'
+    x1 = m.x1(p.x1)
+    P1 = m.P1(p.P1)
+    return StateSpaceModel(A, V, C, W, x1, P1, B=B, D=D)
+end #call
+
 function confirm_matrix_sizes(F, B, V, G, D, W, x1, P1)
 
     nx = size(F, 1)
@@ -100,6 +262,48 @@ function confirm_matrix_sizes(F, B, V, G, D, W, x1, P1)
 
 end #confirm_matrix_sizes
 
+function confirm_matrix_sizes(A1, A2, A3, B1, B2, G, Q, C1, C2, C3, D1, D2, H, R, x1, P1)
+
+    nx = size(A1(1), 1)
+    ny = size(C1(1), 1)
+    nu = size(D2(1), 2)
+
+    na1, na2  = size(A1(1), 2), size(A2(1), 2)
+    nb        = size(B1(1), 2)
+    nc1, nc2  = size(C1(1), 2), size(C2(1), 2)
+    nd        = size(D1(1), 2)
+
+    nq = size(Q(1), 1)
+    nr = size(R(1), 1)
+
+    @assert size(A1) == (nx, na1)
+    @assert size(A2) == (na1, na2)
+    @assert size(A3) == (na2, nx)
+
+    @assert size(B1) == (nx, nb)
+    @assert size(B2) == (nb, nu)
+
+    @assert size(G)  == (nx, nq)
+    @assert size(Q)  == (nq, nq)
+
+    @assert size(C1) == (ny, nc1)
+    @assert size(C2) == (nc1, nc2)
+    @assert size(C3) == (nc2, nx)
+
+    @assert size(D1) == (ny, nd)
+    @assert size(D2) == (nd, nu)
+
+    @assert size(H)  == (ny, nr)
+    @assert size(R)  == (nr, nr)
+
+    @assert length(x1)  == nx
+    @assert size(P1)    == (nx, nx)
+
+    return nx, ny, nu, nq, nr
+
+end #confirm_matrix_sizes
+
+
 function simulate(model::StateSpaceModel, n::Int; u::Array=zeros(n, model.nu))
     # Generates a realization of a state space model.
     #
@@ -126,10 +330,10 @@ function simulate(model::StateSpaceModel, n::Int; u::Array=zeros(n, model.nu))
 
     # Generate the series
     x[:, 1] = model.x1
-    y[:, 1] = model.G(1) * model.x1 + model.D(1) * u[:, 1] + W_chol(1) * randn(model.ny)
+    y[:, 1] = model.C(1) * model.x1 + model.D(1) * u[:, 1] + W_chol(1) * randn(model.ny)
     for i=2:n
-        x[:, i] = model.F(i-1) * x[:, i-1] + model.B(i-1) * u[:, i-1] + V_chol(i-1) * randn(model.nx)
-        y[:, i] = model.G(i)   * x[:, i]   + model.D(i)   * u[:, i]   + W_chol(1) * randn(model.ny)
+        x[:, i] = model.A(i-1) * x[:, i-1] + model.B(i-1) * u[:, i-1] + V_chol(i-1) * randn(model.nx)
+        y[:, i] = model.C(i)   * x[:, i]   + model.D(i)   * u[:, i]   + W_chol(i) * randn(model.ny)
     end
 
     return x', y'
diff --git a/test/kalman_filter.jl b/test/kalman_filter.jl
index 5f1f1e8..26d8ecc 100644
--- a/test/kalman_filter.jl
+++ b/test/kalman_filter.jl
@@ -105,8 +105,10 @@ facts("Kalman Filter") do
             input = 100*[sin(t/2) sin(t/4) cos(t/2) cos(t/4)] + 10
             y_noisy = [y_true zeros(length(t)) -y_true] +
                         100*[sin(t/2)+sin(t/4) sin(t/2)+cos(t/2) cos(t/2)+cos(t/4)] + 10 + randn(length(t), 3)
-            lm = StateSpaceModel([1 dt; 0 1], zeros(2,4), zeros(2,2),
-                    [1. 0; 0 0; -1 0], [1. 1 0 0; 1 0 1 0; 0 0 1 1], s*eye(3), zeros(2), 100*eye(2))
+            lm = StateSpaceModel([1 dt; 0 1], zeros(2,2),
+                                  [1. 0; 0 0; -1 0], s*eye(3),
+                                  zeros(2), 100*eye(2),
+                                  B=zeros(2, 4), D=[1. 1 0 0; 1 0 1 0; 0 0 1 1])
             lm_filt = kalman_filter(y_noisy, lm, u=input)
             @fact lm_filt.filtered[end,1] --> roughly(y_true[end, 1], atol=3*sqrt(lm_filt.error_cov[1,1,end]))
 

From ab55ec8c13933e1d3fedcd3aa1311a03dffb881f Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Fri, 23 Oct 2015 14:20:02 -0400
Subject: [PATCH 2/2] First stage of EM estimation implementation

---
 src/kalman_smooth.jl        |  20 ++++
 src/parameter_estimation.jl | 226 +++++++++++++++++-------------------
 2 files changed, 124 insertions(+), 122 deletions(-)

diff --git a/src/kalman_smooth.jl b/src/kalman_smooth.jl
index 9b2713b..2d22f38 100644
--- a/src/kalman_smooth.jl
+++ b/src/kalman_smooth.jl
@@ -133,6 +133,26 @@ function kalman_smooth(y::Array, model::StateSpaceModel; u::Array=zeros(size(y,1
 
 end #smooth
 
+# Bad performance - will be much better with sparse matrices
+function lag1_smooth(y::Array, u::Array, m::StateSpaceModel)
+
+    A_stack(t) = [m.A(t) zeros(m.nx, m.nx); eye(m.nx) zeros(m.nx, m.nx)]
+    B_stack(t) = [m.B(t); zeros(m.nx, m.nu)]
+    V_stack(t) = [m.V(t) zeros(m.nx, m.nx); zeros(m.nx, 2*m.nx)]
+    C_stack(t) = [m.C(t) zeros(m.ny, m.nx)]
+    x1_stack   = [m.x1; zeros(model.x1)]
+    P1_stack   = [m.P1 zeros(m.nx, m.nx); zeros(m.nx, 2*m.nx)]
+    stack_model = StateSpaceModel(A_stack, B_stack, V_stack,
+                                  C_stack, model.D, model.W, x1_stack, V1_stack)
+
+    stack_smoothed = smooth(y, stack_model, u=u)
+    x     = stack_smoothed.x[:, 1:model.nx]
+    V     = stack_smoothed.V[1:model.nx, 1:model.nx, :]
+    Vlag1 = stack_smoothed.V[1:model.nx, (model.nx+1):end]
+    return x, V, Vlag1, stack_smoothed.loglik
+
+end #function
+
 
 # Rauch-Tung-Striebel Smoothing
 function kalman_smooth(filt::KalmanFiltered)
diff --git a/src/parameter_estimation.jl b/src/parameter_estimation.jl
index 80efb43..75d209d 100644
--- a/src/parameter_estimation.jl
+++ b/src/parameter_estimation.jl
@@ -9,78 +9,65 @@ end #fit
 
 # Expectation-Maximization (EM) parameter estimation
 
-function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters;
+function fit{T}(y::Array{T}, pmodel::ParametrizedSSM, params::SSMParameters;
           u::Array{T}=zeros(size(y,1), size(model.A, 2)), eps::Float64=1e-6, niter::Int=typemax(Int))
 
-    function em_kernel{T}(y::Array, u::Array, pmodel::ParametrizedSSM, params::SSMParameters{T})
-
-        function lag1_smooth(y::Array, u::Array, model::StateSpaceModel)
-            B_stack = [model.B zeros(model.B); eye(model.nx) zeros(model.B)]
-            U_stack = [model.U; zeros(model.U)]
-            G_stack = [model.G; zeros(model.G)]
-            Z_stack = [model.Z zeros(model.Z)]
-            x1_stack = [model.x1; zeros(model.x1)]
-            V1_stack = [model.V1 zeros(model.V1); zeros(model.V1) zeros(model.V1)]
-            stack_model = StateSpaceModel(B_stack, U_stack, G_stack, model.Q, Z_stack, model.A, model.H, model.R, x1_stack, V1_stack)
-            stack_smoothed = smooth(y, stack_model, u=u)
-            V     = SparseMatrixCSC[Vt[1:model.nx, 1:model.nx] for Vt in stack_smoothed.V]
-            Vlag1 = SparseMatrixCSC[Vt[1:model.nx, (model.nx+1):end] for Vt in stack_smoothed.V]
-            return stack_smoothed.x[:, 1:model.nx]', V, Vlag1, stack_smoothed.loglik
-        end #function
+    n = size(y, 1)
+    @assert n == size(u, 1)
 
+    y_orig = copy(y)
+    y = y'
+    y_notnan = !isnan(y)
+    y = y .* y_notnan
+
+    u_orig = copy(u)
+    u = u'
+
+    function em_kernel!{T}(params::SSMParameters{T})
+
+        estimate_A   = length(params.A) > 0
         estimate_B   = length(params.B) > 0
-        estimate_U   = length(params.U) > 0
         estimate_Q   = length(params.Q) > 0
-        estimate_Z   = length(params.Z) > 0
-        estimate_A   = length(params.A) > 0
+        estimate_C   = length(params.C) > 0
+        estimate_D   = length(params.D) > 0
         estimate_R   = length(params.R) > 0
         estimate_x1  = length(params.x1) > 0
-        estimate_V1  = length(params.V1) > 0
+        estimate_P1  = length(params.P1) > 0
 
         m = pmodel(params)
-        spB = sparse(m.B)
-        spU = sparse(m.U)
 
         print("Expectations (smoothing)... ")
         tic()
 
         if estimate_B | estimate_Q
-            exs, V, Vlag1, loglik = lag1_smooth(y, u, m)
+            exs, P, Plag1, loglik = lag1_smooth(y, u, m)
         else
             smoothed = smooth(y, m, u=u)
-            exs, V, loglik = smoothed.x', smoothed.V, smoothed.loglik
+            exs, P, loglik = smoothed.x', smoothed.V, smoothed.loglik
         end
 
         toc()
         println("Negative log-likelihood: ", loglik)
 
-        y = y'
-        u = u'
-        n = size(y,2)
-
         print("Maximizations... ")
         tic()
 
-        y_notnan = !isnan(y)
-        y = y .* y_notnan
-
         if estimate_B | estimate_U | estimate_Q
-            phi = (m.G' * m.G) \ m.G'
-            Qinv = phi' * inv(m.Q) * phi
-            spQinv = sparse(Qinv)
+            phi(t) = (pmodel.G(t)' * pmodel.G(t)) \ pmodel.G(t)'
+            Qinv(t) = phi(t)' * inv(pmodel.Q(params.Q)) * phi
         end #if BU
 
         if estimate_U | estimate_Z | estimate_A | estimate_R
-            xi = (m.H' * m.H) \ m.H'
-            Rinv = xi' * inv(m.R) * xi
+            xi(t) = (pmodel.H(t)' * pmodel.H(t)) \ pmodel.H(t)'
+            Rinv(t) = xi(t)' * inv(pmodel.R(params.R)) * xi(t)
         end #if
 
         if estimate_x1
-            Linv = inv(m.V1) #TODO: Degenerate accomodations?
+            Linv = inv(m.P1) #TODO: Degenerate accomodations?
         end #if
 
-        HRH = m.H * m.R * m.H'
-        HRH_nonzero_rows = diag(HRH) .!= 0
+        HRH(t) = pmodel.H(t) * pmodel.R(t) * pmodel.H(t)'
+        HRH_nonzero_rows(t) = diag(HRH(t)) .!= 0
 
         function get_Nt(y_notnan_t::Vector{Bool})
             O = eye(m.ny)[find(y_notnan_t & HRH_nonzero_rows), :]
@@ -92,40 +79,36 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters;
 
         yt = y[:, 1]
         Nt = get_Nt(y_notnan[:, 1])
-        ey = yt - Nt * (yt - m.Z * exs[:, 1] - m.A * u[:, 1])
+        ey = yt - Nt * (yt - m.C(1) * exs[:, 1] - m.D(1) * u[:, 1])
 
-        Uut = spU * u[:, 1]
-        Aut = m.A * u[:, 1]
+        Uut = m.B(1) * u[:, 1]
+        Aut = m.D(1) * u[:, 1]
         
-        if estimate_B # B setup
-            spB_D = sparse(pmodel.B.D)
+        if estimate_A # B setup
             beta_S1 = zeros(length(params.B), length(params.B))
             beta_S2 = zeros(params.B)
         end #if B
 
-        if estimate_U # U setup
-            spU_f = sparse(pmodel.U.f)
-            spU_D = sparse(pmodel.U.D)
-
-            deterministic = all(m.G .== 0, 2)
+        if estimate_B # U setup
+            deterministic = all(model.G(1) .== 0, 2)
             OQ0, OQp = speye(m.nx)[find(deterministic), :], speye(m.nx)[find(!deterministic), :]
 
-            nu_kp = kron(u[:, 1]', speye(m.nx))
-            fU = nu_kp * spU_f 
-            DU = nu_kp * spU_D 
+            nu_kp = kron(u[:, 1]', eye(m.nx))
+            fU = nu_kp * pmodel.A.f 
+            DU = nu_kp * pmodel.A.D 
 
-            Idt = speye(m.nx)
-            B_ = speye(m.nx)
-            f_ = nu_kp * spU_f * 0
-            D_ = nu_kp * spU_D * 0
-            M = sparse((m.B .!= 0) * 1.)
+            Idt = eye(m.nx)
+            B_  = eye(m.nx)
+            f_  = nu_kp * pmodel.A.f * 0
+            D_  = nu_kp * pmodel.D.f * 0
+            M   = m.B(1) .!= 0
             M_t = copy(M)
             EX0 = exs[:, 1]
             potential_deterministic_rows = all((OQ0 * M_t * OQp') .== 0, 2)[:]
 
-            Dt1 = ey - m.Z * (I - Idt) * ex - m.Z * Idt * (B_ * EX0 + f_) - Aut 
-            Dt2 = m.Z * Idt * D_
-            Dt3 = zeros(model.nx) 
+            Dt1 = ey - m.C(1) * (I - Idt) * ex - m.C(1) * Idt * (B_ * EX0 + f_) - Aut 
+            Dt2 = m.C(1) * Idt * D_
+            Dt3 = zeros(pmodel.nx) 
             Dt4 = DU * 0
 
             Idt1 = Idt
@@ -142,14 +125,14 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters;
             q_S2 = zeros(params.Q)
         end #if
 
-        if estimate_Z # Z setup
-            zeta_S1 = zeros(length(params.Z), length(params.Z)) 
-            zeta_S2 = zeros(params.Z)
+        if estimate_C # Z setup
+            zeta_S1 = zeros(length(params.C), length(params.C)) 
+            zeta_S2 = zeros(params.C)
         end #if
 
-        if estimate_A # A setup
-            alpha_S1 = zeros(length(params.A), length(params.A)) 
-            alpha_S2 = zeros(params.A)
+        if estimate_D # A setup
+            alpha_S1 = zeros(length(params.D), length(params.D)) 
+            alpha_S2 = zeros(params.D)
         end #if
 
         if estimate_R # R setup
@@ -161,34 +144,33 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters;
 
             ex_prev = ex
             ex      = exs[:, t]
-            Vt      = V[t]
+            At      = m.A(t)
+            Vt      = V[:, :, t]
             ut      = u[:, t]
 
-            println(Vt)
-
-            if estimate_B | estimate_Q | estimate_Z
+            if estimate_A | estimate_Q | estimate_C
 
                 exx_prev = exx
                 exx = Vt + ex * ex'
 
-                if (estimate_B | estimate_Q) && t > 1
+                if (estimate_A | estimate_Q) && t > 1
 
-                    exx1 = Vlag1[t] + ex * ex_prev'
+                    exx1 = Vlag1[:, :, t] + ex * ex_prev'
                     Uut_prev = Uut
-                    Uut = spU * ut
+                    Uut = m.B(t) * ut
 
-                    if estimate_B
-                        beta_kp = kron(exx_prev, spQinv)
-                        beta_S1 += spB_D' * beta_kp * spB_D
+                    if estimate_A
+                        beta_kp = kron(exx_prev, Qinv)
+                        beta_S1 += pmodel.B.D' * beta_kp * pmodel.B.D 
                         beta_S2 += pmodel.B.D' * (vec(Qinv * exx1) -
                                       beta_kp * pmodel.B.f - vec(Qinv * Uut_prev * ex_prev'))
                     end #if B
 
                     if estimate_Q
                         q_S1 += pmodel.Q.D' * pmodel.Q.D
-                        q_S2 += pmodel.Q.D' * vec(phi * (exx - exx1 * spB' -
-                                    spB * exx1' - ex * Uut_prev' - Uut_prev * ex' + spB * exx_prev * spB' +
-                                    spB * ex_prev * Uut_prev' + Uut_prev * ex_prev' * spB' + Uut_prev * Uut_prev') * phi')
+                        q_S2 += pmodel.Q.D' * vec(phi(t) * (exx - exx1 * At' -
+                                    At * exx1' - ex * Uut_prev' - Uut_prev * ex' + At * exx_prev * At' +
+                                    At * ex_prev * Uut_prev' + Uut_prev * ex_prev' * At' + Uut_prev * Uut_prev') * phi(t)')
                     end #if Q
 
                 end #if BQ
@@ -197,13 +179,14 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters;
 
             if estimate_Z | estimate_R | estimate_U | estimate_A
                 yt = y[:, t]
+                Ct = m.C(t)
                 Nt = get_Nt(y_notnan[:, t])
-                Aut = m.A * ut
-                ey = yt - Nt * (yt - m.Z * ex - Aut)
+                Aut = m.D(t) * ut
+                ey = yt - Nt * (yt - Ct * ex - Aut)
 
                 if estimate_Z | estimate_R
 
-                    eyx = Nt * m.Z * Vt + ey * ex'
+                    eyx = Nt * Ct * Vt + ey * ex'
 
                     if estimate_Z
                         zeta_kp = kron(exx, Rinv)
@@ -214,11 +197,11 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters;
 
                     if estimate_R
                         I2 = diagm(!y_notnan[:, t])
-                        eyy = I2 * (Nt * HRH' + Nt * m.Z * Vt * m.Z' * Nt') * I2 + ey * ey'
+                        eyy = I2 * (Nt * HRH(t)' + Nt * Ct * Vt * Ct' * Nt') * I2 + ey * ey'
                         r_S1 += pmodel.R.D' * pmodel.R.D
-                        r_S2 += pmodel.R.D' * vec(xi * (eyy - eyx * m.Z' -
-                                    m.Z * eyx' - ey * Aut' - Aut * ey' + m.Z * exx * m.Z' +
-                                    m.Z * ex * Aut' + Aut * ex' * m.Z' + Aut * Aut') * xi')
+                        r_S2 += pmodel.R.D' * vec(xi(t) * (eyy - eyx * Ct' -
+                                    Ct * eyx' - ey * Aut' - Aut * ey' + Ct * exx * Ct' +
+                                    Ct * ex * Aut' + Aut * ex' * Ct' + Aut * Aut') * xi(t)')
                     end #if R
 
                 end #if ZR
@@ -226,24 +209,24 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters;
                 if estimate_U
 
                     potential_deterministic_rows = all((OQ0 * M_t * OQp') .== 0, 2)[:]
-                    Idt = sparse(diagm(OQ0' * potential_deterministic_rows))
-                    nu_kp = kron(ut', speye(m.nx))
-                    B_ = spB * Bt1_
+                    Idt = diagm(OQ0' * potential_deterministic_rows)
+                    nu_kp = kron(ut', eye(m.nx))
+                    B_ = At * Bt1_
                     fU = nu_kp * spU_f
                     DU = nu_kp * spU_D
-                    f_ = spB * ft1_ + fU
-                    D_ = spB * Dt1_ + DU 
+                    f_ = At * ft1_ + fU
+                    D_ = At * Dt1_ + DU 
 
-                    Dt1 = ey - m.Z * (I - Idt) * ex - m.Z * Idt * (B_ * EX0 + f_) - Aut 
-                    Dt2 = m.Z * Idt * D_
-                    Dt3 = ex - spB * (I - Idt1) * ex_prev -
-                                spB * Idt1 * (Bt1_ * EX0 + ft1_) - fU 
-                    Dt4 = DU + spB * Idt1 * Dt1_ 
+                    Dt1 = ey - Ct * (I - Idt) * ex - Ct * Idt * (B_ * EX0 + f_) - Aut 
+                    Dt2 = Ct * Idt * D_
+                    Dt3 = ex - At * (I - Idt1) * ex_prev -
+                                At * Idt1 * (Bt1_ * EX0 + ft1_) - fU 
+                    Dt4 = DU + At * Idt1 * Dt1_ 
 
                     nu_S1 += Dt4' * spQinv * Dt4 + Dt2' * Rinv * Dt2
                     nu_S2 += Dt4' * spQinv * Dt3 + Dt2' * Rinv * Dt1 
 
-                    t <= (model.nx +1) ? M_t *= M : nothing
+                    t <= (pmodel.nx +1) ? M_t *= M : nothing
                     Idt1 = Idt
                     Bt1_ = B_
                     ft1_ = f_
@@ -255,30 +238,30 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters;
                     alpha_kp = kron(ut', eye(m.ny))
                     alpha_S1 += pmodel.A.D' * alpha_kp' * Rinv * alpha_kp * pmodel.A.D
                     alpha_S2 += pmodel.A.D' * alpha_kp' * Rinv * 
-                                    (ey - m.Z * ex - alpha_kp * pmodel.A.f)
+                                    (ey - Ct * ex - alpha_kp * pmodel.A.f)
                 end #if A
 
             end #if ZRUA
 
         end #for
 
-        beta    = estimate_B ? beta_S1 \ beta_S2    : T[]
-        nu      = estimate_U ? nu_S1 \ nu_S2        : T[]
-        q       = estimate_Q ? q_S1 \ q_S2          : T[]
+        params.A[:]  = estimate_A ? beta_S1 \ beta_S2    : T[]
+        params.B[:]  = estimate_B ? nu_S1 \ nu_S2        : T[]
+        params.Q[:]  = estimate_Q ? q_S1 \ q_S2          : T[]
 
-        zeta    = estimate_Z ? zeta_S1 \ zeta_S2    : T[]
-        alpha   = estimate_A ? alpha_S1 \ alpha_S2  : T[]
-        r       = estimate_R ? r_S1 \ r_S2          : T[]
+        params.C[:]  = estimate_C ? zeta_S1 \ zeta_S2    : T[]
+        params.D[:]  = estimate_D ? alpha_S1 \ alpha_S2  : T[]
+        params.R[:]  = estimate_R ? r_S1 \ r_S2          : T[]
 
-        p       = length(params.x1) == 0 ? T[] : (pmodel.x1.D' * Linv * pmodel.x1.D) \
-                        pmodel.x1.D' * Linv * (exs[:, 1] - pmodel.x1.f)
-        lambda  = length(params.V1) == 0 ? T[] : (pmodel.V1.D' * pmodel.V1.D) \ pmodel.V1.D' *
+        params.x1[:] = estimate_x1 ? (pmodel.x1.D' * Linv * pmodel.x1.D) \
+                        pmodel.x1.D' * Linv * (exs[:, 1] - pmodel.x1.f) : T[]
+        params.P1[:] = estimate_P1 ? (pmodel.V1.D' * pmodel.V1.D) \ pmodel.V1.D' *
                         vec(V[1] + exs[:, 1] * exs[:, 1]' -
-                        exs[:, 1]*m.x1' - m.x1*exs[:, 1]' + m.x1*m.x1')
+                        exs[:, 1]*m.x1' - m.x1*exs[:, 1]' + m.x1*m.x1') : T[]
 
         toc()
 
-        return SSMParameters(beta, nu, q, zeta, alpha, r, p, lambda), loglik
+        return loglik
 
     end #em_kernel
 
@@ -288,38 +271,37 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters;
     ll, ll_prev = Inf, Inf
 
     while (niter > 0) & (fraction_change(ll_prev, ll) > eps)
-        ll_prev     = ll 
-        params, ll  = em_kernel(y, u, model, params)
-        println(params)
+        ll_prev = ll 
+        ll      = em_kernel!(params)
         niter -= 1
     end #while
 
     niter > 0 ? nothing :
         warn("Parameter estimation timed out - results may have failed to converge")
 
-    return params, model(params)
+    return params, pmodel(params)
 end #fit
 
 function fit{T}(y::Array{T}, model::StateSpaceModel{T}; u::Array{T}=zeros(size(y,1), model.nu),
               eps::Float64=1e-6, niter::Int=typemax(Int))
 
     # B, Z, x1 default to parametrized as fully unconstrained
-    B, B_params = parametrize_full(model.B)
-    Z, Z_params = parametrize_full(model.Z)
+    A, A_params = parametrize_full(model.A(1))
+    C, C_params = parametrize_full(model.C(1))
     x1, x1_params = parametrize_full(model.x1)
 
     # U, A default to fixed
-    U, U_params = parametrize_none(model.U)
-    A, A_params = parametrize_none(model.A)
+    B, B_params = parametrize_none(model.B(1))
+    D, D_params = parametrize_none(model.D(1))
 
     # Q, R, V1 default to parametrized as diagonal with independent elements - any other values
     #   are ignored / set to zero 
-    Q, Q_params = parametrize_diag(diag(model.Q))
-    R, R_params = parametrize_diag(diag(model.R))
-    V1, V1_params = parametrize_diag(diag(model.V1))
+    Q, Q_params = parametrize_diag(diag(model.V(1)))
+    R, R_params = parametrize_diag(diag(model.W(1)))
+    P1, P1_params = parametrize_diag(diag(model.P1))
 
-    pmodel = ParametrizedSSM(B, U, model.G, Q, Z, A, model.H, R, x1, V1)
-    params = SSMParameters(B_params, U_params, Q_params, Z_params, A_params, R_params, x1_params, V1_params)
+    pmodel = ParametrizedSSM(A, Q, C, R, x1, P1, B=B, D=D)
+    params = SSMParameters(A_params, B_params, Q_params, C_params, D_params, R_params, x1_params, V1_params)
     fit(y, pmodel, params, u=u, eps=eps, niter=niter)
 
 end #fit