In [1]:
from __future__ import absolute_import
from __future__ import print_function

import autograd.numpy as np
from autograd import grad
from autograd.extend import notrace_primitive

@notrace_primitive
def resampling(w, rs):
    """
    Stratified resampling with "nograd_primitive" to ensure autograd
    takes no derivatives through it.
    """
    N = w.shape[0]
    bins = np.cumsum(w)
    ind = np.arange(N)
    u = (ind  + rs.rand(N))/N

    return np.digitize(u, bins)

In [2]:
def vsmc_lower_bound(prop_params, model_params, y, smc_obj, rs, verbose=False, adapt_resamp=False):
    """
    Estimate the VSMC lower bound. Amenable to (biased) reparameterization
    gradients
    .. math::
        ELBO(\theta,\lambda) =
        \mathbb{E}_{\phi}\left[\nabla_\lambda \log \hat p(y_{1:T}) \right]
    Requires an SMC object with 2 member functions:
    -- sim_prop(t, x_{t-1}, y, prop_params, model_params, rs)
    -- log_weights(t, x_t, x_{t-1}, y, prop_params, model_params)
    """
    # Extract constants
    T = y.shape[0]
    Dx = smc_obj.Dx
    N = smc_obj.N

    # Initialize SMC
    X = np.zeros((N,Dx))
    Xp = np.zeros((N,Dx))
    logW = np.zeros(N)
    W = np.exp(logW)
    W /= np.sum(W)
    logZ = 0.
    ESS = 1./np.sum(W**2)/N

    for t in range(T):
        # Resampling
        if adapt_resamp:
            if ESS < 0.5:
                ancestors = resampling(W, rs)
                Xp = X[ancestors]
                logZ = logZ + max_logW + np.log(np.sum(W)) - np.log(N)
                logW = np.zeros(N)
            else:
                Xp = X
        else:
            if t > 0:
                ancestors = resampling(W, rs)
                Xp = X[ancestors]
            else:
                Xp = X

        # Propagation
        X = smc_obj.sim_prop(t, Xp, y, prop_params, model_params, rs)

        # Weighting
        if adapt_resamp:
            logW = logW + smc_obj.log_weights(t, X, Xp, y, prop_params, model_params)
        else:
            logW = smc_obj.log_weights(t, X, Xp, y, prop_params, model_params)
        max_logW = np.max(logW)
        W = np.exp(logW-max_logW)
        if adapt_resamp:
            if t == T-1:
                logZ = logZ + max_logW + np.log(np.sum(W)) - np.log(N)
        else:
            logZ = logZ + max_logW + np.log(np.sum(W)) - np.log(N)
        W /= np.sum(W)
        ESS = 1./np.sum(W**2)/N
    if verbose:
        print('ESS: '+str(ESS))
    return logZ

In [3]:
def sim_q(prop_params, model_params, y, smc_obj, rs, verbose=False):
    """
    Simulates a single sample from the VSMC approximation.
    Requires an SMC object with 2 member functions:
    -- sim_prop(t, x_{t-1}, y, prop_params, model_params, rs)
    -- log_weights(t, x_t, x_{t-1}, y, prop_params, model_params)
    """
    # Extract constants
    T  = y.shape[0]
    Dx = smc_obj.Dx
    N  = smc_obj.N

    # Initialize SMC
    X = np.zeros((N,T,Dx))
    logW = np.zeros(N)
    W = np.zeros((N,T))
    ESS = np.zeros(T)

    for t in range(T):
        # Resampling
        if t > 0:
            ancestors = resampling(W[:,t-1], rs)
            X[:,:t,:] = X[ancestors,:t,:]

        # Propagation
        X[:,t,:] = smc_obj.sim_prop(t, X[:,t-1,:], y, prop_params, model_params, rs)

        # Weighting
        logW = smc_obj.log_weights(t, X[:,t,:], X[:,t-1,:], y, prop_params, model_params)
        max_logW = np.max(logW)
        W[:,t] = np.exp(logW-max_logW)
        W[:,t] /= np.sum(W[:,t])
        ESS[t] = 1./np.sum(W[:,t]**2)

    # Sample from the empirical approximation
    bins = np.cumsum(W[:,-1])
    u = rs.rand()
    B = np.digitize(u,bins)

    if verbose:
        print('Mean ESS', np.mean(ESS)/N)
        print('Min ESS', np.min(ESS))

    return X[B,:,:]

In [4]:
import autograd.numpy.random as npr

def init_model_params(Dx, Dy, alpha, r, obs, rs = npr.RandomState(0)):
    mu0    = np.zeros(Dx)
    Sigma0 = np.eye(Dx)
    A      = np.zeros((Dx,Dx))
    for i in range(Dx):
        for j in range(Dx):
            A[i,j] = alpha**(abs(i-j)+1)

    Q = np.eye(Dx)
    C = np.zeros((Dy,Dx))
    if obs == 'sparse':
        C[:Dy,:Dy] = np.eye(Dy)
    else:
        C = rs.normal(size=(Dy,Dx))
    R = r * np.eye(Dy)

    return (mu0, Sigma0, A, Q, C, R)

In [5]:

def init_prop_params(T, Dx, scale = 0.5, rs = npr.RandomState(0)):
    return [(scale * rs.randn(Dx), # Bias
             1. + scale * rs.randn(Dx), # Linear times A/mu0
             scale * rs.randn(Dx)) # Log-var
            for t in range(T)]

def generate_data(model_params, T = 5, rs = npr.RandomState(0)):
    mu0, Sigma0, A, Q, C, R = model_params
    Dx = mu0.shape[0]
    Dy = R.shape[0]

    x_true = np.zeros((T,Dx))
    y_true = np.zeros((T,Dy))

    for t in range(T):
        if t > 0:
            x_true[t,:] = rs.multivariate_normal(np.dot(A,x_true[t-1,:]),Q)
        else:
            x_true[0,:] = rs.multivariate_normal(mu0,Sigma0)
        y_true[t,:] = rs.multivariate_normal(np.dot(C,x_true[t,:]),R)

    return x_true, y_true

In [6]:
def log_marginal_likelihood(model_params, T, y_true):
    mu0, Sigma0, A, Q, C, R = model_params
    Dx = mu0.shape[0]
    Dy = R.shape[1]

    log_likelihood = 0.
    xfilt = np.zeros(Dx)
    Pfilt = np.zeros((Dx,Dx))
    xpred = mu0
    Ppred = Sigma0

    for t in range(T):
        if t > 0:
            # Predict
            xpred = np.dot(A,xfilt)
            Ppred = np.dot(A,np.dot(Pfilt,A.T)) + Q

        # Update
        yt = y_true[t,:] - np.dot(C,xpred)
        S = np.dot(C,np.dot(Ppred,C.T)) + R
        K = np.linalg.solve(S, np.dot(C,Ppred)).T
        xfilt = xpred + np.dot(K,yt)
        Pfilt = Ppred - np.dot(K,np.dot(C,Ppred))

        sign, logdet = np.linalg.slogdet(S)
        log_likelihood += -0.5*(np.sum(yt*np.linalg.solve(S,yt)) + logdet + Dy*np.log(2.*np.pi))

    return log_likelihood

In [7]:
class lgss_smc:
    """
    Class for defining functions used in variational SMC.
    """
    def __init__(self, T, Dx, Dy, N):
        self.T = T
        self.Dx = Dx
        self.Dy = Dy
        self.N = N

    def log_normal(self, x, mu, Sigma):
        dim = Sigma.shape[0]
        sign, logdet = np.linalg.slogdet(Sigma)
        log_norm = -0.5*dim*np.log(2.*np.pi) - 0.5*logdet
        Prec = np.linalg.inv(Sigma)
        return log_norm - 0.5*np.sum((x-mu)*np.dot(Prec,(x-mu).T).T,axis=1)

    def log_prop(self, t, Xc, Xp, y, prop_params, model_params):
        mu0, Sigma0, A, Q, C, R = model_params
        mut, lint, log_s2t = prop_params[t]
        s2t = np.exp(log_s2t)

        if t > 0:
            mu = mut + np.dot(A, Xp.T).T*lint
        else:
            mu = mut + lint*mu0

        return self.log_normal(Xc, mu, np.diag(s2t))

    def log_target(self, t, Xc, Xp, y, prop_params, model_params):
        mu0, Sigma0, A, Q, C, R = model_params
        if t > 0:
            logF = self.log_normal(Xc,np.dot(A,Xp.T).T, Q)
        else:
            logF = self.log_normal(Xc, mu0, Sigma0)
        logG = self.log_normal(np.dot(C,Xc.T).T, y[t], R)
        return logF + logG

    # These following 2 are the only ones needed by variational-smc.py
    def log_weights(self, t, Xc, Xp, y, prop_params, model_params):
        return self.log_target(t, Xc, Xp, y, prop_params, model_params) - \
               self.log_prop(t, Xc, Xp, y, prop_params, model_params)

    def sim_prop(self, t, Xp, y, prop_params, model_params, rs = npr.RandomState(0)):
        mu0, Sigma0, A, Q, C, R = model_params
        mut, lint, log_s2t = prop_params[t]
        s2t = np.exp(log_s2t)

        if t > 0:
            mu = mut + np.dot(A, Xp.T).T*lint
        else:
            mu = mut + lint*mu0
        return mu + rs.randn(*Xp.shape)*np.sqrt(s2t)

In [8]:
# Model hyper-parameters
T      = 10
Dx     = 5
Dy     = 3
alpha  = 0.42
r      = .1
obs    = 'sparse'

# Training parameters
param_scale = 0.5
num_epochs = 1000
step_size = 0.001

N = 4

data_seed = npr.RandomState(0)
model_params = init_model_params(Dx, Dy, alpha, r, obs, data_seed)

print("Generating data...")
x_true, y_true = generate_data(model_params, T, data_seed)

lml = log_marginal_likelihood(model_params, T, y_true)
print("True log-marginal likelihood: "+str(lml))

seed = npr.RandomState(0)

# Initialize proposal parameters
prop_params = init_prop_params(T, Dx, param_scale, seed)
combined_init_params = (model_params, prop_params)

lgss_smc_obj = lgss_smc(T, Dx, Dy, N)

Generating data...
True log-marginal likelihood: -46.89174359319463


In [9]:
# Define training objective
def objective(combined_params, iter):
    model_params, prop_params = combined_params
    return -vsmc_lower_bound(prop_params, model_params, y_true, lgss_smc_obj, seed)

# Get gradients of objective using autograd.
objective_grad = grad(objective)


In [10]:

from autograd.misc.optimizers import adam


def print_perf(combined_params, iter, grad):
    if iter % (num_epochs/10) == 0:
        model_params, prop_params = combined_params
        bound = -objective(combined_params, iter)
        message = "{:15}|{:20}".format(iter, bound)
        print(message)
        #with open(f_head+'_ELBO.csv', 'a') as f_handle:
        #    np.savetxt(f_handle, [[iter,bound]], fmt='%i,%f')

# SGD with adaptive step-size "adam"
optimized_params = adam(objective_grad, combined_init_params, step_size=step_size,
                        num_iters=num_epochs, callback=print_perf)
opt_model_params, opt_prop_params = optimized_params


              0| -136.52003692786687
            100|  -98.77446240248345
            200|   -57.6303750970804
            300|  -54.45751571342868
            400|  -65.08079520031357
            500|  -54.67798149962506
            600| -43.741817571752065
            700|  -50.18870619980928
            800|  -55.92935048354515
            900| -37.706093153539875


In [19]:
opt_model_params[0].shape, opt_model_params[1].shape, opt_model_params[2].shape, opt_model_params[3].shape, opt_model_params[4].shape

((5,), (5, 5), (5, 5), (5, 5), (3, 5))