From 5c7a6922a29451a47cf64d5adbc1da3384a54b7c Mon Sep 17 00:00:00 2001 From: Gord Stephen Date: Thu, 22 Oct 2015 09:56:32 -0400 Subject: [PATCH 1/2] Added parametric state space model and changed notation --- src/kalman_filter.jl | 24 +-- src/kalman_smooth.jl | 28 +-- src/parameter_estimation.jl | 329 +++++++++++++++++++++++++++++++++++- src/statespacemodel.jl | 280 +++++++++++++++++++++++++----- test/kalman_filter.jl | 6 +- 5 files changed, 597 insertions(+), 70 deletions(-) diff --git a/src/kalman_filter.jl b/src/kalman_filter.jl index b1c06eb..700fa64 100644 --- a/src/kalman_filter.jl +++ b/src/kalman_filter.jl @@ -9,7 +9,7 @@ type KalmanFiltered{T} loglik::T end -function show{T}(io::IO, filt::KalmanFiltered{T}) +function Base.show{T}(io::IO, filt::KalmanFiltered{T}) n = size(filt.y, 1) dx, dy = filt.model.nx, filt.model.ny println("KalmanFiltered{$T}") @@ -24,14 +24,14 @@ function kalman_filter{T}(y::Array{T}, model::StateSpaceModel{T}; u::Array{T}=ze @assert size(u,2) == model.nu function kalman_recursions(y_i::Vector{T}, u_i::Vector{T}, - G_i::Matrix{T}, D_i::Matrix{T}, W_i::Matrix{T}, + C_i::Matrix{T}, D_i::Matrix{T}, W_i::Matrix{T}, x_pred_i::Vector{T}, P_pred_i::Matrix{T}) if !any(isnan(y_i)) - innov = y_i - G_i * x_pred_i - D_i * u_i - S = G_i * P_pred_i * G_i' + W_i # Innovation covariance - K = P_pred_i * G_i' / S # Kalman gain + innov = y_i - C_i * x_pred_i - D_i * u_i + S = C_i * P_pred_i * C_i' + W_i # Innovation covariance + K = P_pred_i * C_i' / S # Kalman gain x_filt_i = x_pred_i + K * innov - P_filt_i = (I - K * G_i) * P_pred_i + P_filt_i = (I - K * C_i) * P_pred_i dll = (dot(innov,S\innov) + logdet(S))/2 else x_filt_i = x_pred_i @@ -51,20 +51,20 @@ function kalman_filter{T}(y::Array{T}, model::StateSpaceModel{T}; u::Array{T}=ze log_likelihood = n*model.ny*log(2pi)/2 # first iteration - F_1 = model.F(1) + A_1 = model.A(1) x_pred[:, 1] = model.x1 P_pred[:, :, 1] = model.P1 x_filt[:, 1], P_filt[:,:,1], dll = kalman_recursions(y[:, 1], u[:, 1], - model.G(1), model.D(1), model.W(1), + model.C(1), model.D(1), model.W(1), model.x1, model.P1) log_likelihood += dll for i=2:n - F_i1 = model.F(i) - x_pred[:, i] = F_i1 * x_filt[:, i-1] + model.B(i-1) * u[:, i-1] - P_pred[:, :, i] = F_i1 * P_filt[:, :, i-1] * F_i1' + model.V(i-1) + A_i1 = model.A(i-1) + x_pred[:, i] = A_i1 * x_filt[:, i-1] + model.B(i-1) * u[:, i-1] + P_pred[:, :, i] = A_i1 * P_filt[:, :, i-1] * A_i1' + model.V(i-1) x_filt[:, i], P_filt[:,:,i], dll = kalman_recursions(y[:, i], u[:, i], - model.G(i), model.D(i), model.W(i), + model.C(i), model.D(i), model.W(i), x_pred[:,i], P_pred[:,:,i]) log_likelihood += dll end diff --git a/src/kalman_smooth.jl b/src/kalman_smooth.jl index 34e2476..9b2713b 100644 --- a/src/kalman_smooth.jl +++ b/src/kalman_smooth.jl @@ -63,7 +63,7 @@ function kalman_smooth(y::Array, model::StateSpaceModel; u::Array=zeros(size(y,1 innov_cov_inv_t = copy(innov_cov_t) Kt = zeros(model.nx, model.ny) - Ft, Gt, Dt, Wt, ut = model.F(1), model.G(1), model.D(1), model.W(1), zeros(model.nu) + At, Ct, Dt, Wt, ut = model.A(1), model.C(1), model.D(1), model.W(1), zeros(model.nu) log_likelihood = n*model.ny*log(2pi)/2 marginal_likelihood(innov::Vector, innov_cov::Matrix, innov_cov_inv::Matrix) = @@ -74,16 +74,16 @@ function kalman_smooth(y::Array, model::StateSpaceModel; u::Array=zeros(size(y,1 # Predict using last iteration's values if t > 1 - x_pred_t = Ft * x_pred_t + model.B(t-1) * ut + Kt * innov_t - V_pred_t = Ft * V_pred_t * (Ft - Kt * Gt)' + model.V(t) + x_pred_t = At * x_pred_t + model.B(t-1) * ut + Kt * innov_t + V_pred_t = At * V_pred_t * (At - Kt * Ct)' + model.V(t) V_pred_t = (V_pred_t + V_pred_t')/2 end #if x_pred[:, t] = x_pred_t V_pred[:, :, t] = V_pred_t # Assign new values - Ft = model.F(t) - Gt = model.G(t) + At = model.A(t) + Ct = model.C(t) Dt = model.D(t) Wt = model.W(t) ut = u[:, t] @@ -93,15 +93,15 @@ function kalman_smooth(y::Array, model::StateSpaceModel; u::Array=zeros(size(y,1 if missing_obs ynnt = y_notnan[:, t] I1, I2 = diagm(ynnt), diagm(!ynnt) - Gt, Dt = I1 * Gt, I1 * Dt + Ct, Dt = I1 * Ct, I1 * Dt Wt = I1 * Wt * I1 + I2 * Wt * I2 end #if # Compute nessecary filtering quantities - innov_t = y[:, t] - Gt * x_pred_t - Dt * ut - innov_cov_t = Gt * V_pred_t * Gt' + Wt + innov_t = y[:, t] - Ct * x_pred_t - Dt * ut + innov_cov_t = Ct * V_pred_t * Ct' + Wt innov_cov_inv_t = inv(innov_cov_t) - Kt = Ft * V_pred_t * Gt' * innov_cov_inv_t + Kt = At * V_pred_t * Ct' * innov_cov_inv_t innov[:, t] = innov_t innov_cov_inv[:, :, t] = innov_cov_inv_t @@ -115,13 +115,13 @@ function kalman_smooth(y::Array, model::StateSpaceModel; u::Array=zeros(size(y,1 for t = n:-1:1 - Gt = model.G(t) + Ct = model.C(t) innov_cov_inv_t = innov_cov_inv[:, :, t] V_pred_t = V_pred[:, :, t] - L = model.F(t) - K[:, :, t] * Gt + L = model.A(t) - K[:, :, t] * Ct - r = Gt' * innov_cov_inv_t * innov[:, t] + L' * r - N = Gt' * innov_cov_inv_t * Gt + L' * N * L + r = Ct' * innov_cov_inv_t * innov[:, t] + L' * r + N = Ct' * innov_cov_inv_t * Ct + L' * N * L x_smooth[:, t] = x_pred[:, t] + V_pred_t * r V_smooth_t = V_pred_t - V_pred_t * N * V_pred_t @@ -149,7 +149,7 @@ function kalman_smooth(filt::KalmanFiltered) x_smooth[:, n] = x_filt[:, n] P_smoov[:, :, n] = P_filt[:, :, n] for i = (n-1):-1:1 - J = P_filt[:, :, i] * model.F(i)' * inv(P_pred[:,:,i+1]) + J = P_filt[:, :, i] * model.A(i)' * inv(P_pred[:,:,i+1]) x_smooth[:, i] = x_filt[:, i] + J * (x_smooth[:, i+1] - x_pred[:, i+1]) P_smoov[:, :, i] = P_filt[:, :, i] + J * (P_smoov[:, :, i+1] - P_pred[:,:,i+1]) * J' end diff --git a/src/parameter_estimation.jl b/src/parameter_estimation.jl index 1c456b5..80efb43 100644 --- a/src/parameter_estimation.jl +++ b/src/parameter_estimation.jl @@ -1,5 +1,326 @@ -function fit{T}(y::Matrix{T}, build::Function, theta0::Vector{T}) - objective(theta) = kalman_filter(y, build(theta)).loglik +# ML parameter estimation with filter result log-likelihood (via Nelder-Mead) + +function fit{T}(y::Matrix{T}, build::Function, theta0::Vector{T}; + u::Array{T}=zeros(size(y,1), build(theta0).nu)) + objective(theta) = kalman_filter(y, build(theta), u=u).loglik kfit = Optim.optimize(objective, theta0) - return (kfit.minimum, build(kfit.minimum)) -end + return (kfit.minimum, build(kfit.minimum), filter(y, build(kfit.minimum))) +end #fit + +# Expectation-Maximization (EM) parameter estimation + +function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters; + u::Array{T}=zeros(size(y,1), size(model.A, 2)), eps::Float64=1e-6, niter::Int=typemax(Int)) + + function em_kernel{T}(y::Array, u::Array, pmodel::ParametrizedSSM, params::SSMParameters{T}) + + function lag1_smooth(y::Array, u::Array, model::StateSpaceModel) + B_stack = [model.B zeros(model.B); eye(model.nx) zeros(model.B)] + U_stack = [model.U; zeros(model.U)] + G_stack = [model.G; zeros(model.G)] + Z_stack = [model.Z zeros(model.Z)] + x1_stack = [model.x1; zeros(model.x1)] + V1_stack = [model.V1 zeros(model.V1); zeros(model.V1) zeros(model.V1)] + stack_model = StateSpaceModel(B_stack, U_stack, G_stack, model.Q, Z_stack, model.A, model.H, model.R, x1_stack, V1_stack) + stack_smoothed = smooth(y, stack_model, u=u) + V = SparseMatrixCSC[Vt[1:model.nx, 1:model.nx] for Vt in stack_smoothed.V] + Vlag1 = SparseMatrixCSC[Vt[1:model.nx, (model.nx+1):end] for Vt in stack_smoothed.V] + return stack_smoothed.x[:, 1:model.nx]', V, Vlag1, stack_smoothed.loglik + end #function + + estimate_B = length(params.B) > 0 + estimate_U = length(params.U) > 0 + estimate_Q = length(params.Q) > 0 + estimate_Z = length(params.Z) > 0 + estimate_A = length(params.A) > 0 + estimate_R = length(params.R) > 0 + estimate_x1 = length(params.x1) > 0 + estimate_V1 = length(params.V1) > 0 + + m = pmodel(params) + spB = sparse(m.B) + spU = sparse(m.U) + + print("Expectations (smoothing)... ") + tic() + + if estimate_B | estimate_Q + exs, V, Vlag1, loglik = lag1_smooth(y, u, m) + else + smoothed = smooth(y, m, u=u) + exs, V, loglik = smoothed.x', smoothed.V, smoothed.loglik + end + + toc() + println("Negative log-likelihood: ", loglik) + + y = y' + u = u' + n = size(y,2) + + print("Maximizations... ") + tic() + + y_notnan = !isnan(y) + y = y .* y_notnan + + if estimate_B | estimate_U | estimate_Q + phi = (m.G' * m.G) \ m.G' + Qinv = phi' * inv(m.Q) * phi + spQinv = sparse(Qinv) + end #if BU + + if estimate_U | estimate_Z | estimate_A | estimate_R + xi = (m.H' * m.H) \ m.H' + Rinv = xi' * inv(m.R) * xi + end #if + + if estimate_x1 + Linv = inv(m.V1) #TODO: Degenerate accomodations? + end #if + + HRH = m.H * m.R * m.H' + HRH_nonzero_rows = diag(HRH) .!= 0 + + function get_Nt(y_notnan_t::Vector{Bool}) + O = eye(m.ny)[find(y_notnan_t & HRH_nonzero_rows), :] + return I - HRH * O' * inv(O * HRH * O') * O + end #get_Nt + + ex = exs[:, 1] + exx = zeros(m.nx, m.nx) + + yt = y[:, 1] + Nt = get_Nt(y_notnan[:, 1]) + ey = yt - Nt * (yt - m.Z * exs[:, 1] - m.A * u[:, 1]) + + Uut = spU * u[:, 1] + Aut = m.A * u[:, 1] + + if estimate_B # B setup + spB_D = sparse(pmodel.B.D) + beta_S1 = zeros(length(params.B), length(params.B)) + beta_S2 = zeros(params.B) + end #if B + + if estimate_U # U setup + spU_f = sparse(pmodel.U.f) + spU_D = sparse(pmodel.U.D) + + deterministic = all(m.G .== 0, 2) + OQ0, OQp = speye(m.nx)[find(deterministic), :], speye(m.nx)[find(!deterministic), :] + + nu_kp = kron(u[:, 1]', speye(m.nx)) + fU = nu_kp * spU_f + DU = nu_kp * spU_D + + Idt = speye(m.nx) + B_ = speye(m.nx) + f_ = nu_kp * spU_f * 0 + D_ = nu_kp * spU_D * 0 + M = sparse((m.B .!= 0) * 1.) + M_t = copy(M) + EX0 = exs[:, 1] + potential_deterministic_rows = all((OQ0 * M_t * OQp') .== 0, 2)[:] + + Dt1 = ey - m.Z * (I - Idt) * ex - m.Z * Idt * (B_ * EX0 + f_) - Aut + Dt2 = m.Z * Idt * D_ + Dt3 = zeros(model.nx) + Dt4 = DU * 0 + + Idt1 = Idt + Bt1_ = B_ + ft1_ = f_ + Dt1_ = D_ + + nu_S1 = Dt2' * Rinv * Dt2 + nu_S2 = Dt2' * Rinv * Dt1 + end #if + + if estimate_Q # Q setup + q_S1 = zeros(length(params.Q), length(params.Q)) + q_S2 = zeros(params.Q) + end #if + + if estimate_Z # Z setup + zeta_S1 = zeros(length(params.Z), length(params.Z)) + zeta_S2 = zeros(params.Z) + end #if + + if estimate_A # A setup + alpha_S1 = zeros(length(params.A), length(params.A)) + alpha_S2 = zeros(params.A) + end #if + + if estimate_R # R setup + r_S1 = zeros(length(params.R), length(params.R)) + r_S2 = zeros(params.R) + end #if + + for t in 1:n + + ex_prev = ex + ex = exs[:, t] + Vt = V[t] + ut = u[:, t] + + println(Vt) + + if estimate_B | estimate_Q | estimate_Z + + exx_prev = exx + exx = Vt + ex * ex' + + if (estimate_B | estimate_Q) && t > 1 + + exx1 = Vlag1[t] + ex * ex_prev' + Uut_prev = Uut + Uut = spU * ut + + if estimate_B + beta_kp = kron(exx_prev, spQinv) + beta_S1 += spB_D' * beta_kp * spB_D + beta_S2 += pmodel.B.D' * (vec(Qinv * exx1) - + beta_kp * pmodel.B.f - vec(Qinv * Uut_prev * ex_prev')) + end #if B + + if estimate_Q + q_S1 += pmodel.Q.D' * pmodel.Q.D + q_S2 += pmodel.Q.D' * vec(phi * (exx - exx1 * spB' - + spB * exx1' - ex * Uut_prev' - Uut_prev * ex' + spB * exx_prev * spB' + + spB * ex_prev * Uut_prev' + Uut_prev * ex_prev' * spB' + Uut_prev * Uut_prev') * phi') + end #if Q + + end #if BQ + + end #if BQZ + + if estimate_Z | estimate_R | estimate_U | estimate_A + yt = y[:, t] + Nt = get_Nt(y_notnan[:, t]) + Aut = m.A * ut + ey = yt - Nt * (yt - m.Z * ex - Aut) + + if estimate_Z | estimate_R + + eyx = Nt * m.Z * Vt + ey * ex' + + if estimate_Z + zeta_kp = kron(exx, Rinv) + zeta_S1 += pmodel.Z.D' * zeta_kp * pmodel.Z.D + zeta_S2 += pmodel.Z.D' * (vec(Rinv * eyx) - zeta_kp * pmodel.Z.f - + vec(Rinv * Aut * ex')) + end #if Z + + if estimate_R + I2 = diagm(!y_notnan[:, t]) + eyy = I2 * (Nt * HRH' + Nt * m.Z * Vt * m.Z' * Nt') * I2 + ey * ey' + r_S1 += pmodel.R.D' * pmodel.R.D + r_S2 += pmodel.R.D' * vec(xi * (eyy - eyx * m.Z' - + m.Z * eyx' - ey * Aut' - Aut * ey' + m.Z * exx * m.Z' + + m.Z * ex * Aut' + Aut * ex' * m.Z' + Aut * Aut') * xi') + end #if R + + end #if ZR + + if estimate_U + + potential_deterministic_rows = all((OQ0 * M_t * OQp') .== 0, 2)[:] + Idt = sparse(diagm(OQ0' * potential_deterministic_rows)) + nu_kp = kron(ut', speye(m.nx)) + B_ = spB * Bt1_ + fU = nu_kp * spU_f + DU = nu_kp * spU_D + f_ = spB * ft1_ + fU + D_ = spB * Dt1_ + DU + + Dt1 = ey - m.Z * (I - Idt) * ex - m.Z * Idt * (B_ * EX0 + f_) - Aut + Dt2 = m.Z * Idt * D_ + Dt3 = ex - spB * (I - Idt1) * ex_prev - + spB * Idt1 * (Bt1_ * EX0 + ft1_) - fU + Dt4 = DU + spB * Idt1 * Dt1_ + + nu_S1 += Dt4' * spQinv * Dt4 + Dt2' * Rinv * Dt2 + nu_S2 += Dt4' * spQinv * Dt3 + Dt2' * Rinv * Dt1 + + t <= (model.nx +1) ? M_t *= M : nothing + Idt1 = Idt + Bt1_ = B_ + ft1_ = f_ + Dt1_ = D_ + + end #if U + + if estimate_A + alpha_kp = kron(ut', eye(m.ny)) + alpha_S1 += pmodel.A.D' * alpha_kp' * Rinv * alpha_kp * pmodel.A.D + alpha_S2 += pmodel.A.D' * alpha_kp' * Rinv * + (ey - m.Z * ex - alpha_kp * pmodel.A.f) + end #if A + + end #if ZRUA + + end #for + + beta = estimate_B ? beta_S1 \ beta_S2 : T[] + nu = estimate_U ? nu_S1 \ nu_S2 : T[] + q = estimate_Q ? q_S1 \ q_S2 : T[] + + zeta = estimate_Z ? zeta_S1 \ zeta_S2 : T[] + alpha = estimate_A ? alpha_S1 \ alpha_S2 : T[] + r = estimate_R ? r_S1 \ r_S2 : T[] + + p = length(params.x1) == 0 ? T[] : (pmodel.x1.D' * Linv * pmodel.x1.D) \ + pmodel.x1.D' * Linv * (exs[:, 1] - pmodel.x1.f) + lambda = length(params.V1) == 0 ? T[] : (pmodel.V1.D' * pmodel.V1.D) \ pmodel.V1.D' * + vec(V[1] + exs[:, 1] * exs[:, 1]' - + exs[:, 1]*m.x1' - m.x1*exs[:, 1]' + m.x1*m.x1') + + toc() + + return SSMParameters(beta, nu, q, zeta, alpha, r, p, lambda), loglik + + end #em_kernel + + fraction_change(ll_prev, ll_curr) = isinf(ll_prev) ? + 1 : 2 * (ll_prev - ll_curr) / (ll_prev + ll_curr + 1e-6) + + ll, ll_prev = Inf, Inf + + while (niter > 0) & (fraction_change(ll_prev, ll) > eps) + ll_prev = ll + params, ll = em_kernel(y, u, model, params) + println(params) + niter -= 1 + end #while + + niter > 0 ? nothing : + warn("Parameter estimation timed out - results may have failed to converge") + + return params, model(params) +end #fit + +function fit{T}(y::Array{T}, model::StateSpaceModel{T}; u::Array{T}=zeros(size(y,1), model.nu), + eps::Float64=1e-6, niter::Int=typemax(Int)) + + # B, Z, x1 default to parametrized as fully unconstrained + B, B_params = parametrize_full(model.B) + Z, Z_params = parametrize_full(model.Z) + x1, x1_params = parametrize_full(model.x1) + + # U, A default to fixed + U, U_params = parametrize_none(model.U) + A, A_params = parametrize_none(model.A) + + # Q, R, V1 default to parametrized as diagonal with independent elements - any other values + # are ignored / set to zero + Q, Q_params = parametrize_diag(diag(model.Q)) + R, R_params = parametrize_diag(diag(model.R)) + V1, V1_params = parametrize_diag(diag(model.V1)) + + pmodel = ParametrizedSSM(B, U, model.G, Q, Z, A, model.H, R, x1, V1) + params = SSMParameters(B_params, U_params, Q_params, Z_params, A_params, R_params, x1_params, V1_params) + fit(y, pmodel, params, u=u, eps=eps, niter=niter) + +end #fit + diff --git a/src/statespacemodel.jl b/src/statespacemodel.jl index 550b54e..efee706 100644 --- a/src/statespacemodel.jl +++ b/src/statespacemodel.jl @@ -1,12 +1,14 @@ -immutable StateSpaceModel{T <: Real} +abstract AbstractStateSpaceModel{T <: Real} + +immutable StateSpaceModel{T} <: AbstractStateSpaceModel{T} # Process transition matrix, control matrix, and noise covariance - F::Function + A::Function B::Function V::Function # Observation matrix, feed-forward matrix, and noise covariance - G::Function + C::Function D::Function W::Function @@ -19,48 +21,33 @@ immutable StateSpaceModel{T <: Real} ny::Int nu::Int - function StateSpaceModel(F::Function, B::Function, V::Function, - G::Function, D::Function, W::Function, x1::Vector{T}, P1::Matrix{T}) + function StateSpaceModel{T}(A::Function, B::Function, V::Function, + C::Function, D::Function, W::Function, + x1::Vector{T}, P1::Matrix{T}) ispossemidef(x::Matrix) = issym(x) && (eigmin(x) >= 0) @assert ispossemidef(V(1)) @assert ispossemidef(W(1)) @assert ispossemidef(P1) - nx, ny, nu = confirm_matrix_sizes(F(1), B(1), V(1), G(1), D(1), W(1), x1, P1) - new(F, B, V, G, D, W, x1, P1, nx, ny, nu) + nx, ny, nu = confirm_matrix_sizes(A(1), B(1), V(1), C(1), D(1), W(1), x1, P1) + new(A, B, V, C, D, W, x1, P1, nx, ny, nu) end end -# Time-dependent definitions -function StateSpaceModel{T}(F::Function, B::Function, V::Function, - G::Function, D::Function, W::Function, - x1::Vector{T}, P1::Matrix{T}) - StateSpaceModel{T}(F, B, V, G, D, W, x1, P1) -end +# Time-dependent constructor +StateSpaceModel{T}(A::Function, V::Function, C::Function, W::Function, + x1::Vector{T}, P1::Matrix{T}; + B::Function=_->zeros(size(V(1), 1), 1), + D::Function=_->zeros(size(W(1), 1), 1)) = + StateSpaceModel{T}(A, B, V, C, D, W, x1, P1) -function StateSpaceModel{T}(F::Function, V::Function, - G::Function, W::Function, - x1::Vector{T}, P1::Matrix{T}) - B(_) = zeros(size(V(1), 1), 1) - D(_) = zeros(size(W(1), 1), 1) - StateSpaceModel{T}(F, B, V, G, D, W, x1, P1) -end - -# Time-independent definitions -function StateSpaceModel{T}(F::Matrix{T}, B::Matrix{T}, V::Matrix{T}, - G::Matrix{T}, D::Matrix{T}, W::Matrix{T}, - x1::Vector{T}, P1::Matrix{T}) - StateSpaceModel{T}(_->F, _->B, _->V, _->G, _->D, _->W, x1, P1) -end - -function StateSpaceModel{T}(F::Matrix{T}, V::Matrix{T}, - G::Matrix{T}, W::Matrix{T}, - x1::Vector{T}, P1::Matrix{T}) - B(_) = zeros(size(V, 1), 1) - D(_) = zeros(size(W, 1), 1) - StateSpaceModel{T}(_->F, B, _->V, _->G, D, _->W, x1, P1) -end +# Time-independent constructor +StateSpaceModel{T}(A::Matrix{T}, V::Matrix{T}, C::Matrix{T}, W::Matrix{T}, + x1::Vector{T}, P1::Matrix{T}; + B::Matrix{T}=zeros(size(A, 1), 1), + D::Matrix{T}=zeros(size(C, 1), 1)) = + StateSpaceModel{T}(_->A, _->B, _->V, _->C, _->D, _->W, x1, P1) function show{T}(io::IO, mod::StateSpaceModel{T}) dx, dy = length(mod.x1), size(mod.G, 1) @@ -79,6 +66,181 @@ function show{T}(io::IO, mod::StateSpaceModel{T}) show(mod.W) end +# Time-independent parametrized matrix type +immutable ParametrizedMatrix{T<:Real} + + f::Vector{T} + D::Matrix{T} + np::Int + dims::Tuple{Int, Int} + + function ParametrizedMatrix(f::Vector{T}, D::Matrix{T}, dims::Tuple{Int, Int}) + @assert length(f) == size(D, 1) + @assert length(f) == dims[1] * dims[2] + new(f, D, size(D, 2), dims) + end #ParametrizedMatrix + +end #ParametrizedMatrix + +ParametrizedMatrix{T}(f::Vector{T}, D::Matrix{T}, dims::Tuple{Int, Int}) = ParametrizedMatrix{T}(f, D, dims) + +function show{T}(io::IO, cpm::ParametrizedMatrix{T}) + + function combinestrelems(a, b) + if (a != "") & (b != "") + a * " + " * b + elseif (a == "") & (b == "") + "0" + else + a * b + end #if + end #combinestrelems + + nx, ny = cpm.dims + println(nx, "x", ny, " ParametrizedMatrix{$T}") + conststring = map(x -> x == 0 ? "" : string(x), cpm.f) + paramstring = fill("", length(cpm.f)) + paramelems = fill("", cpm.np) + + for i = 1:size(cpm.D,1) + for j = 1:cpm.np + if cpm.D[i,j] == 0 + paramelems[j] = "" + elseif cpm.D[i,j] == 1 + paramelems[j] = string(Char(96+j)) + else + paramelems[j] = string(cpm.D[i,j],Char(96+j)) + end #if + end #for + paramstring[i] = reduce(combinestrelems, paramelems) + end #for + + finalstrings = map(combinestrelems, conststring, paramstring) + + showcompact(reshape(finalstrings, cpm.dims)) +end + +Base.length(cpm::ParametrizedMatrix) = cpm.dims[1] * cpm.dims[2] +Base.size(cpm::ParametrizedMatrix) = cpm.dims +Base.size(cpm::ParametrizedMatrix, dim::Int) = cpm.dims[dim] +Base.call(cpm::ParametrizedMatrix, params::Vector) = all(cpm.D .== 0) ? + reshape(cpm.f, cpm.dims) : reshape(cpm.f + cpm.D * params, cpm.dims) + +parametrize_full(X::Union{Vector, Matrix}) = + ParametrizedMatrix(zeros(length(X)), eye(length(X)), size(X'')), collect(X) + +function parametrize_diag(x::Vector) + n = length(x) + D = zeros(n^3) + D[[1 + (i-1)*(n^2 + n + 1) for i in 1:n]] = 1 + D = reshape(D, n^2, n) + f = zeros(n^2) + return ParametrizedMatrix(f, D, (n,n)), x +end #parametrize_diag + +parametrize_none{T}(X::Union{Vector{T}, Matrix{T}}) = + ParametrizedMatrix(collect(X), zeros(length(X))'', size(X'')), T[] + +# Time-independent parametrized state space model +immutable ParametrizedSSM{T} <: AbstractStateSpaceModel{T} + + # Transition equation and noise covariance + + A1::Function + A2::ParametrizedMatrix{T} + A3::Function + + B1::Function + B2::ParametrizedMatrix{T} + + G::Function + Q::ParametrizedMatrix{T} + + # Observation equation and noise covariance + + C1::Function + C2::ParametrizedMatrix{T} + C3::Function + + D1::Function + D2::ParametrizedMatrix{T} + + H::Function + R::ParametrizedMatrix{T} + + # Initial state and error covariance + x1::ParametrizedMatrix{T} + P1::ParametrizedMatrix{T} #TODO ... Eliminate? + + nx::Int + ny::Int + nu::Int + nq::Int + nr::Int + + function ParametrizedSSM{T}(A1::Function, A2::ParametrizedMatrix{T}, A3::Function, + B1::Function, B2::ParametrizedMatrix{T}, + G::Function, Q::ParametrizedMatrix{T}, + C1::Function, C2::ParametrizedMatrix{T}, C3::Function, + D1::Function, D2::ParametrizedMatrix{T}, + H::Function, R::ParametrizedMatrix{T}, + x1::ParametrizedMatrix{T}, P1::ParametrizedMatrix{T}) + nx, ny, nu, nq, nr = confirm_matrix_sizes(A1(1), A2, A3(1), B1(1), B2, G(1), Q, + C1(1), C2, C3(1), D1(1), D2, H(1), R, x1, P1) + new(A1, A2, A3, B1, B2, G, Q, C1, C2, C3, D1, D2, H, R, x1, P1, nx, ny, nu, nq, nr) + end + +end #ParametrizedSSM + +ParametrizedSSM{T}(A2::ParametrizedMatrix{T}, Q::ParametrizedMatrix, + C2::ParametrizedMatrix{T}, R::ParametrizedMatrix, + x1::ParametrizedMatrix{T}, P1::ParametrizedMatrix{T}; + A1::Function=_->eye(size(A2(1),1)), A3::Function=_->eye(size(A2(1),2)), + B1::Function=_->eye(size(A1(1),1)), + B2::ParametrizedMatrix{T}=parametrize_none(zeros(size(A1(1),1), 1))[1], + G::Function=_->eye(size(A1(1),1)), + C1::Function=_->eye(size(C2(1), 1)), C3::Function=_->eye(size(C2(1),2)), + D1::Function=_->eye(size(C1(1),1)), + D2::ParametrizedMatrix{T}=parametrize_none(zeros(size(C1(1),1), 1))[1], + H::Function=_->eye(size(A1(1),1))) = + ParametrizedSSM(A1, A2, A3, B1, B2, G, Q, C1, C2, C3, D1, D2, H, R, x1, P1) + +# State space model parameters +immutable SSMParameters{T} + + # Process transition and noise covariance + A::Vector{T} + B::Vector{T} + Q::Vector{T} + + # Observation and noise covariance + C::Vector{T} + D::Vector{T} + R::Vector{T} + + # Initial state and error covariance + x1::Vector{T} + P1::Vector{T} + +end #SSMParameters + +SSMParameters{T}(A::Vector{T}, Q::Vector{T}, C::Vector{T}, R::Vector{T}, + x1::Vector{T}, P1::Vector{T}; + B::Vector{T}=T[], D::Vector{T}=T[]) = + SSMParameters{T}(A, B, Q, C, D, R, x1, P1) + +function call(m::ParametrizedSSM, p::SSMParameters) + A(t) = m.A1(t) * m.A2(p.A) * m.A3(t) + B(t) = m.B1(t) * m.B2(p.B) + V(t) = m.G(t) * m.Q(p.Q) * m.G(t)' + C(t) = m.C1(t) * m.C2(p.C) * m.C3(t) + D(t) = m.D1(t) * m.D2(p.D) + W(t) = m.H(t) * m.R(p.R) * m.H(t)' + x1 = m.x1(p.x1) + P1 = m.P1(p.P1) + return StateSpaceModel(A, V, C, W, x1, P1, B=B, D=D) +end #call + function confirm_matrix_sizes(F, B, V, G, D, W, x1, P1) nx = size(F, 1) @@ -100,6 +262,48 @@ function confirm_matrix_sizes(F, B, V, G, D, W, x1, P1) end #confirm_matrix_sizes +function confirm_matrix_sizes(A1, A2, A3, B1, B2, G, Q, C1, C2, C3, D1, D2, H, R, x1, P1) + + nx = size(A1(1), 1) + ny = size(C1(1), 1) + nu = size(D2(1), 2) + + na1, na2 = size(A1(1), 2), size(A2(1), 2) + nb = size(B1(1), 2) + nc1, nc2 = size(C1(1), 2), size(C2(1), 2) + nd = size(D1(1), 2) + + nq = size(Q(1), 1) + nr = size(R(1), 1) + + @assert size(A1) == (nx, na1) + @assert size(A2) == (na1, na2) + @assert size(A3) == (na2, nx) + + @assert size(B1) == (nx, nb) + @assert size(B2) == (nb, nu) + + @assert size(G) == (nx, nq) + @assert size(Q) == (nq, nq) + + @assert size(C1) == (ny, nc1) + @assert size(C2) == (nc1, nc2) + @assert size(C3) == (nc2, nx) + + @assert size(D1) == (ny, nd) + @assert size(D2) == (nd, nu) + + @assert size(H) == (ny, nr) + @assert size(R) == (nr, nr) + + @assert length(x1) == nx + @assert size(P1) == (nx, nx) + + return nx, ny, nu, nq, nr + +end #confirm_matrix_sizes + + function simulate(model::StateSpaceModel, n::Int; u::Array=zeros(n, model.nu)) # Generates a realization of a state space model. # @@ -126,10 +330,10 @@ function simulate(model::StateSpaceModel, n::Int; u::Array=zeros(n, model.nu)) # Generate the series x[:, 1] = model.x1 - y[:, 1] = model.G(1) * model.x1 + model.D(1) * u[:, 1] + W_chol(1) * randn(model.ny) + y[:, 1] = model.C(1) * model.x1 + model.D(1) * u[:, 1] + W_chol(1) * randn(model.ny) for i=2:n - x[:, i] = model.F(i-1) * x[:, i-1] + model.B(i-1) * u[:, i-1] + V_chol(i-1) * randn(model.nx) - y[:, i] = model.G(i) * x[:, i] + model.D(i) * u[:, i] + W_chol(1) * randn(model.ny) + x[:, i] = model.A(i-1) * x[:, i-1] + model.B(i-1) * u[:, i-1] + V_chol(i-1) * randn(model.nx) + y[:, i] = model.C(i) * x[:, i] + model.D(i) * u[:, i] + W_chol(i) * randn(model.ny) end return x', y' diff --git a/test/kalman_filter.jl b/test/kalman_filter.jl index 5f1f1e8..26d8ecc 100644 --- a/test/kalman_filter.jl +++ b/test/kalman_filter.jl @@ -105,8 +105,10 @@ facts("Kalman Filter") do input = 100*[sin(t/2) sin(t/4) cos(t/2) cos(t/4)] + 10 y_noisy = [y_true zeros(length(t)) -y_true] + 100*[sin(t/2)+sin(t/4) sin(t/2)+cos(t/2) cos(t/2)+cos(t/4)] + 10 + randn(length(t), 3) - lm = StateSpaceModel([1 dt; 0 1], zeros(2,4), zeros(2,2), - [1. 0; 0 0; -1 0], [1. 1 0 0; 1 0 1 0; 0 0 1 1], s*eye(3), zeros(2), 100*eye(2)) + lm = StateSpaceModel([1 dt; 0 1], zeros(2,2), + [1. 0; 0 0; -1 0], s*eye(3), + zeros(2), 100*eye(2), + B=zeros(2, 4), D=[1. 1 0 0; 1 0 1 0; 0 0 1 1]) lm_filt = kalman_filter(y_noisy, lm, u=input) @fact lm_filt.filtered[end,1] --> roughly(y_true[end, 1], atol=3*sqrt(lm_filt.error_cov[1,1,end])) From ab55ec8c13933e1d3fedcd3aa1311a03dffb881f Mon Sep 17 00:00:00 2001 From: Gord Stephen Date: Fri, 23 Oct 2015 14:20:02 -0400 Subject: [PATCH 2/2] First stage of EM estimation implementation --- src/kalman_smooth.jl | 20 ++++ src/parameter_estimation.jl | 226 +++++++++++++++++------------------- 2 files changed, 124 insertions(+), 122 deletions(-) diff --git a/src/kalman_smooth.jl b/src/kalman_smooth.jl index 9b2713b..2d22f38 100644 --- a/src/kalman_smooth.jl +++ b/src/kalman_smooth.jl @@ -133,6 +133,26 @@ function kalman_smooth(y::Array, model::StateSpaceModel; u::Array=zeros(size(y,1 end #smooth +# Bad performance - will be much better with sparse matrices +function lag1_smooth(y::Array, u::Array, m::StateSpaceModel) + + A_stack(t) = [m.A(t) zeros(m.nx, m.nx); eye(m.nx) zeros(m.nx, m.nx)] + B_stack(t) = [m.B(t); zeros(m.nx, m.nu)] + V_stack(t) = [m.V(t) zeros(m.nx, m.nx); zeros(m.nx, 2*m.nx)] + C_stack(t) = [m.C(t) zeros(m.ny, m.nx)] + x1_stack = [m.x1; zeros(model.x1)] + P1_stack = [m.P1 zeros(m.nx, m.nx); zeros(m.nx, 2*m.nx)] + stack_model = StateSpaceModel(A_stack, B_stack, V_stack, + C_stack, model.D, model.W, x1_stack, V1_stack) + + stack_smoothed = smooth(y, stack_model, u=u) + x = stack_smoothed.x[:, 1:model.nx] + V = stack_smoothed.V[1:model.nx, 1:model.nx, :] + Vlag1 = stack_smoothed.V[1:model.nx, (model.nx+1):end] + return x, V, Vlag1, stack_smoothed.loglik + +end #function + # Rauch-Tung-Striebel Smoothing function kalman_smooth(filt::KalmanFiltered) diff --git a/src/parameter_estimation.jl b/src/parameter_estimation.jl index 80efb43..75d209d 100644 --- a/src/parameter_estimation.jl +++ b/src/parameter_estimation.jl @@ -9,78 +9,65 @@ end #fit # Expectation-Maximization (EM) parameter estimation -function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters; +function fit{T}(y::Array{T}, pmodel::ParametrizedSSM, params::SSMParameters; u::Array{T}=zeros(size(y,1), size(model.A, 2)), eps::Float64=1e-6, niter::Int=typemax(Int)) - function em_kernel{T}(y::Array, u::Array, pmodel::ParametrizedSSM, params::SSMParameters{T}) - - function lag1_smooth(y::Array, u::Array, model::StateSpaceModel) - B_stack = [model.B zeros(model.B); eye(model.nx) zeros(model.B)] - U_stack = [model.U; zeros(model.U)] - G_stack = [model.G; zeros(model.G)] - Z_stack = [model.Z zeros(model.Z)] - x1_stack = [model.x1; zeros(model.x1)] - V1_stack = [model.V1 zeros(model.V1); zeros(model.V1) zeros(model.V1)] - stack_model = StateSpaceModel(B_stack, U_stack, G_stack, model.Q, Z_stack, model.A, model.H, model.R, x1_stack, V1_stack) - stack_smoothed = smooth(y, stack_model, u=u) - V = SparseMatrixCSC[Vt[1:model.nx, 1:model.nx] for Vt in stack_smoothed.V] - Vlag1 = SparseMatrixCSC[Vt[1:model.nx, (model.nx+1):end] for Vt in stack_smoothed.V] - return stack_smoothed.x[:, 1:model.nx]', V, Vlag1, stack_smoothed.loglik - end #function + n = size(y, 1) + @assert n == size(u, 1) + y_orig = copy(y) + y = y' + y_notnan = !isnan(y) + y = y .* y_notnan + + u_orig = copy(u) + u = u' + + function em_kernel!{T}(params::SSMParameters{T}) + + estimate_A = length(params.A) > 0 estimate_B = length(params.B) > 0 - estimate_U = length(params.U) > 0 estimate_Q = length(params.Q) > 0 - estimate_Z = length(params.Z) > 0 - estimate_A = length(params.A) > 0 + estimate_C = length(params.C) > 0 + estimate_D = length(params.D) > 0 estimate_R = length(params.R) > 0 estimate_x1 = length(params.x1) > 0 - estimate_V1 = length(params.V1) > 0 + estimate_P1 = length(params.P1) > 0 m = pmodel(params) - spB = sparse(m.B) - spU = sparse(m.U) print("Expectations (smoothing)... ") tic() if estimate_B | estimate_Q - exs, V, Vlag1, loglik = lag1_smooth(y, u, m) + exs, P, Plag1, loglik = lag1_smooth(y, u, m) else smoothed = smooth(y, m, u=u) - exs, V, loglik = smoothed.x', smoothed.V, smoothed.loglik + exs, P, loglik = smoothed.x', smoothed.V, smoothed.loglik end toc() println("Negative log-likelihood: ", loglik) - y = y' - u = u' - n = size(y,2) - print("Maximizations... ") tic() - y_notnan = !isnan(y) - y = y .* y_notnan - if estimate_B | estimate_U | estimate_Q - phi = (m.G' * m.G) \ m.G' - Qinv = phi' * inv(m.Q) * phi - spQinv = sparse(Qinv) + phi(t) = (pmodel.G(t)' * pmodel.G(t)) \ pmodel.G(t)' + Qinv(t) = phi(t)' * inv(pmodel.Q(params.Q)) * phi end #if BU if estimate_U | estimate_Z | estimate_A | estimate_R - xi = (m.H' * m.H) \ m.H' - Rinv = xi' * inv(m.R) * xi + xi(t) = (pmodel.H(t)' * pmodel.H(t)) \ pmodel.H(t)' + Rinv(t) = xi(t)' * inv(pmodel.R(params.R)) * xi(t) end #if if estimate_x1 - Linv = inv(m.V1) #TODO: Degenerate accomodations? + Linv = inv(m.P1) #TODO: Degenerate accomodations? end #if - HRH = m.H * m.R * m.H' - HRH_nonzero_rows = diag(HRH) .!= 0 + HRH(t) = pmodel.H(t) * pmodel.R(t) * pmodel.H(t)' + HRH_nonzero_rows(t) = diag(HRH(t)) .!= 0 function get_Nt(y_notnan_t::Vector{Bool}) O = eye(m.ny)[find(y_notnan_t & HRH_nonzero_rows), :] @@ -92,40 +79,36 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters; yt = y[:, 1] Nt = get_Nt(y_notnan[:, 1]) - ey = yt - Nt * (yt - m.Z * exs[:, 1] - m.A * u[:, 1]) + ey = yt - Nt * (yt - m.C(1) * exs[:, 1] - m.D(1) * u[:, 1]) - Uut = spU * u[:, 1] - Aut = m.A * u[:, 1] + Uut = m.B(1) * u[:, 1] + Aut = m.D(1) * u[:, 1] - if estimate_B # B setup - spB_D = sparse(pmodel.B.D) + if estimate_A # B setup beta_S1 = zeros(length(params.B), length(params.B)) beta_S2 = zeros(params.B) end #if B - if estimate_U # U setup - spU_f = sparse(pmodel.U.f) - spU_D = sparse(pmodel.U.D) - - deterministic = all(m.G .== 0, 2) + if estimate_B # U setup + deterministic = all(model.G(1) .== 0, 2) OQ0, OQp = speye(m.nx)[find(deterministic), :], speye(m.nx)[find(!deterministic), :] - nu_kp = kron(u[:, 1]', speye(m.nx)) - fU = nu_kp * spU_f - DU = nu_kp * spU_D + nu_kp = kron(u[:, 1]', eye(m.nx)) + fU = nu_kp * pmodel.A.f + DU = nu_kp * pmodel.A.D - Idt = speye(m.nx) - B_ = speye(m.nx) - f_ = nu_kp * spU_f * 0 - D_ = nu_kp * spU_D * 0 - M = sparse((m.B .!= 0) * 1.) + Idt = eye(m.nx) + B_ = eye(m.nx) + f_ = nu_kp * pmodel.A.f * 0 + D_ = nu_kp * pmodel.D.f * 0 + M = m.B(1) .!= 0 M_t = copy(M) EX0 = exs[:, 1] potential_deterministic_rows = all((OQ0 * M_t * OQp') .== 0, 2)[:] - Dt1 = ey - m.Z * (I - Idt) * ex - m.Z * Idt * (B_ * EX0 + f_) - Aut - Dt2 = m.Z * Idt * D_ - Dt3 = zeros(model.nx) + Dt1 = ey - m.C(1) * (I - Idt) * ex - m.C(1) * Idt * (B_ * EX0 + f_) - Aut + Dt2 = m.C(1) * Idt * D_ + Dt3 = zeros(pmodel.nx) Dt4 = DU * 0 Idt1 = Idt @@ -142,14 +125,14 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters; q_S2 = zeros(params.Q) end #if - if estimate_Z # Z setup - zeta_S1 = zeros(length(params.Z), length(params.Z)) - zeta_S2 = zeros(params.Z) + if estimate_C # Z setup + zeta_S1 = zeros(length(params.C), length(params.C)) + zeta_S2 = zeros(params.C) end #if - if estimate_A # A setup - alpha_S1 = zeros(length(params.A), length(params.A)) - alpha_S2 = zeros(params.A) + if estimate_D # A setup + alpha_S1 = zeros(length(params.D), length(params.D)) + alpha_S2 = zeros(params.D) end #if if estimate_R # R setup @@ -161,34 +144,33 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters; ex_prev = ex ex = exs[:, t] - Vt = V[t] + At = m.A(t) + Vt = V[:, :, t] ut = u[:, t] - println(Vt) - - if estimate_B | estimate_Q | estimate_Z + if estimate_A | estimate_Q | estimate_C exx_prev = exx exx = Vt + ex * ex' - if (estimate_B | estimate_Q) && t > 1 + if (estimate_A | estimate_Q) && t > 1 - exx1 = Vlag1[t] + ex * ex_prev' + exx1 = Vlag1[:, :, t] + ex * ex_prev' Uut_prev = Uut - Uut = spU * ut + Uut = m.B(t) * ut - if estimate_B - beta_kp = kron(exx_prev, spQinv) - beta_S1 += spB_D' * beta_kp * spB_D + if estimate_A + beta_kp = kron(exx_prev, Qinv) + beta_S1 += pmodel.B.D' * beta_kp * pmodel.B.D beta_S2 += pmodel.B.D' * (vec(Qinv * exx1) - beta_kp * pmodel.B.f - vec(Qinv * Uut_prev * ex_prev')) end #if B if estimate_Q q_S1 += pmodel.Q.D' * pmodel.Q.D - q_S2 += pmodel.Q.D' * vec(phi * (exx - exx1 * spB' - - spB * exx1' - ex * Uut_prev' - Uut_prev * ex' + spB * exx_prev * spB' + - spB * ex_prev * Uut_prev' + Uut_prev * ex_prev' * spB' + Uut_prev * Uut_prev') * phi') + q_S2 += pmodel.Q.D' * vec(phi(t) * (exx - exx1 * At' - + At * exx1' - ex * Uut_prev' - Uut_prev * ex' + At * exx_prev * At' + + At * ex_prev * Uut_prev' + Uut_prev * ex_prev' * At' + Uut_prev * Uut_prev') * phi(t)') end #if Q end #if BQ @@ -197,13 +179,14 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters; if estimate_Z | estimate_R | estimate_U | estimate_A yt = y[:, t] + Ct = m.C(t) Nt = get_Nt(y_notnan[:, t]) - Aut = m.A * ut - ey = yt - Nt * (yt - m.Z * ex - Aut) + Aut = m.D(t) * ut + ey = yt - Nt * (yt - Ct * ex - Aut) if estimate_Z | estimate_R - eyx = Nt * m.Z * Vt + ey * ex' + eyx = Nt * Ct * Vt + ey * ex' if estimate_Z zeta_kp = kron(exx, Rinv) @@ -214,11 +197,11 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters; if estimate_R I2 = diagm(!y_notnan[:, t]) - eyy = I2 * (Nt * HRH' + Nt * m.Z * Vt * m.Z' * Nt') * I2 + ey * ey' + eyy = I2 * (Nt * HRH(t)' + Nt * Ct * Vt * Ct' * Nt') * I2 + ey * ey' r_S1 += pmodel.R.D' * pmodel.R.D - r_S2 += pmodel.R.D' * vec(xi * (eyy - eyx * m.Z' - - m.Z * eyx' - ey * Aut' - Aut * ey' + m.Z * exx * m.Z' + - m.Z * ex * Aut' + Aut * ex' * m.Z' + Aut * Aut') * xi') + r_S2 += pmodel.R.D' * vec(xi(t) * (eyy - eyx * Ct' - + Ct * eyx' - ey * Aut' - Aut * ey' + Ct * exx * Ct' + + Ct * ex * Aut' + Aut * ex' * Ct' + Aut * Aut') * xi(t)') end #if R end #if ZR @@ -226,24 +209,24 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters; if estimate_U potential_deterministic_rows = all((OQ0 * M_t * OQp') .== 0, 2)[:] - Idt = sparse(diagm(OQ0' * potential_deterministic_rows)) - nu_kp = kron(ut', speye(m.nx)) - B_ = spB * Bt1_ + Idt = diagm(OQ0' * potential_deterministic_rows) + nu_kp = kron(ut', eye(m.nx)) + B_ = At * Bt1_ fU = nu_kp * spU_f DU = nu_kp * spU_D - f_ = spB * ft1_ + fU - D_ = spB * Dt1_ + DU + f_ = At * ft1_ + fU + D_ = At * Dt1_ + DU - Dt1 = ey - m.Z * (I - Idt) * ex - m.Z * Idt * (B_ * EX0 + f_) - Aut - Dt2 = m.Z * Idt * D_ - Dt3 = ex - spB * (I - Idt1) * ex_prev - - spB * Idt1 * (Bt1_ * EX0 + ft1_) - fU - Dt4 = DU + spB * Idt1 * Dt1_ + Dt1 = ey - Ct * (I - Idt) * ex - Ct * Idt * (B_ * EX0 + f_) - Aut + Dt2 = Ct * Idt * D_ + Dt3 = ex - At * (I - Idt1) * ex_prev - + At * Idt1 * (Bt1_ * EX0 + ft1_) - fU + Dt4 = DU + At * Idt1 * Dt1_ nu_S1 += Dt4' * spQinv * Dt4 + Dt2' * Rinv * Dt2 nu_S2 += Dt4' * spQinv * Dt3 + Dt2' * Rinv * Dt1 - t <= (model.nx +1) ? M_t *= M : nothing + t <= (pmodel.nx +1) ? M_t *= M : nothing Idt1 = Idt Bt1_ = B_ ft1_ = f_ @@ -255,30 +238,30 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters; alpha_kp = kron(ut', eye(m.ny)) alpha_S1 += pmodel.A.D' * alpha_kp' * Rinv * alpha_kp * pmodel.A.D alpha_S2 += pmodel.A.D' * alpha_kp' * Rinv * - (ey - m.Z * ex - alpha_kp * pmodel.A.f) + (ey - Ct * ex - alpha_kp * pmodel.A.f) end #if A end #if ZRUA end #for - beta = estimate_B ? beta_S1 \ beta_S2 : T[] - nu = estimate_U ? nu_S1 \ nu_S2 : T[] - q = estimate_Q ? q_S1 \ q_S2 : T[] + params.A[:] = estimate_A ? beta_S1 \ beta_S2 : T[] + params.B[:] = estimate_B ? nu_S1 \ nu_S2 : T[] + params.Q[:] = estimate_Q ? q_S1 \ q_S2 : T[] - zeta = estimate_Z ? zeta_S1 \ zeta_S2 : T[] - alpha = estimate_A ? alpha_S1 \ alpha_S2 : T[] - r = estimate_R ? r_S1 \ r_S2 : T[] + params.C[:] = estimate_C ? zeta_S1 \ zeta_S2 : T[] + params.D[:] = estimate_D ? alpha_S1 \ alpha_S2 : T[] + params.R[:] = estimate_R ? r_S1 \ r_S2 : T[] - p = length(params.x1) == 0 ? T[] : (pmodel.x1.D' * Linv * pmodel.x1.D) \ - pmodel.x1.D' * Linv * (exs[:, 1] - pmodel.x1.f) - lambda = length(params.V1) == 0 ? T[] : (pmodel.V1.D' * pmodel.V1.D) \ pmodel.V1.D' * + params.x1[:] = estimate_x1 ? (pmodel.x1.D' * Linv * pmodel.x1.D) \ + pmodel.x1.D' * Linv * (exs[:, 1] - pmodel.x1.f) : T[] + params.P1[:] = estimate_P1 ? (pmodel.V1.D' * pmodel.V1.D) \ pmodel.V1.D' * vec(V[1] + exs[:, 1] * exs[:, 1]' - - exs[:, 1]*m.x1' - m.x1*exs[:, 1]' + m.x1*m.x1') + exs[:, 1]*m.x1' - m.x1*exs[:, 1]' + m.x1*m.x1') : T[] toc() - return SSMParameters(beta, nu, q, zeta, alpha, r, p, lambda), loglik + return loglik end #em_kernel @@ -288,38 +271,37 @@ function fit{T}(y::Array{T}, model::ParametrizedSSM, params::SSMParameters; ll, ll_prev = Inf, Inf while (niter > 0) & (fraction_change(ll_prev, ll) > eps) - ll_prev = ll - params, ll = em_kernel(y, u, model, params) - println(params) + ll_prev = ll + ll = em_kernel!(params) niter -= 1 end #while niter > 0 ? nothing : warn("Parameter estimation timed out - results may have failed to converge") - return params, model(params) + return params, pmodel(params) end #fit function fit{T}(y::Array{T}, model::StateSpaceModel{T}; u::Array{T}=zeros(size(y,1), model.nu), eps::Float64=1e-6, niter::Int=typemax(Int)) # B, Z, x1 default to parametrized as fully unconstrained - B, B_params = parametrize_full(model.B) - Z, Z_params = parametrize_full(model.Z) + A, A_params = parametrize_full(model.A(1)) + C, C_params = parametrize_full(model.C(1)) x1, x1_params = parametrize_full(model.x1) # U, A default to fixed - U, U_params = parametrize_none(model.U) - A, A_params = parametrize_none(model.A) + B, B_params = parametrize_none(model.B(1)) + D, D_params = parametrize_none(model.D(1)) # Q, R, V1 default to parametrized as diagonal with independent elements - any other values # are ignored / set to zero - Q, Q_params = parametrize_diag(diag(model.Q)) - R, R_params = parametrize_diag(diag(model.R)) - V1, V1_params = parametrize_diag(diag(model.V1)) + Q, Q_params = parametrize_diag(diag(model.V(1))) + R, R_params = parametrize_diag(diag(model.W(1))) + P1, P1_params = parametrize_diag(diag(model.P1)) - pmodel = ParametrizedSSM(B, U, model.G, Q, Z, A, model.H, R, x1, V1) - params = SSMParameters(B_params, U_params, Q_params, Z_params, A_params, R_params, x1_params, V1_params) + pmodel = ParametrizedSSM(A, Q, C, R, x1, P1, B=B, D=D) + params = SSMParameters(A_params, B_params, Q_params, C_params, D_params, R_params, x1_params, V1_params) fit(y, pmodel, params, u=u, eps=eps, niter=niter) end #fit