In [1]:
import autograd.numpy as np
import autograd.scipy as sp
from autograd.scipy import special
from autograd import grad, hessian, hessian_vector_product, hessian, jacobian
import matplotlib.pyplot as plt
from copy import deepcopy

from scipy import optimize

from valez_finite_VI_lib import initialize_parameters, generate_data, compute_elbo, cavi_updates
from generic_optimization_lib import unpack_params, pack_params

In [2]:
def check_approx_eq(x, y, tol=1e-12):
    return np.max(np.abs(x - y)) < tol

In [3]:
#np.random.seed(12321)

alpha = 10 # IBP parameter

Num_samples = 20 # sample size
D = 2 # dimension
# so X will be a N\times D matrix

sigma_A = 100

sigma_eps = .1 # variance of noise

K_inf = 3 # take to be large for a good approximation to the IBP

Pi, Z, mu, A, X = generate_data(Num_samples, D, K_inf, sigma_A, sigma_eps, alpha)

K_approx = deepcopy(K_inf) # variational truncation

tau, nu, phi_mu, phi_var = initialize_parameters(Num_samples, D, K_approx)
nu_init = np.round(nu * (nu >= 0.9) + nu * (nu <= 0.1)) + nu * (nu >= 0.1) * (nu <= 0.9)
params = pack_params(deepcopy(tau), deepcopy(phi_mu), deepcopy(phi_var), deepcopy(nu))
params_init = deepcopy(params)


In [4]:
def pack_hyperparameters(alpha, sigma_A, sigma_eps):
    return np.array([ alpha, sigma_A, sigma_eps ])

def unpack_hyperparameters(hyper_params):
    alpha = hyper_params[0]
    sigma_A = hyper_params[1]
    sigma_eps = hyper_params[2]
    return alpha, sigma_A, sigma_eps

hyper_params = pack_hyperparameters(alpha, sigma_A, sigma_eps)
alpha0, sigma_A0, sigma_eps0 = unpack_hyperparameters(hyper_params)
print np.abs(alpha0 - alpha)
print np.abs(sigma_A0 - sigma_A)
print np.abs(sigma_eps0 - sigma_eps)


0.0
0.0
0.0


In [5]:
tau_true = np.zeros_like(tau)
tau_true_scale = 15.
tau_true[:, 0] = deepcopy(Pi) * tau_true_scale
tau_true[:, 1] = tau_true_scale

nu_true = np.zeros_like(nu)
nu_true[ Z == 1] = 0.999
nu_true[ Z == 0] = 0.001

phi_mu_true = np.zeros_like(phi_mu)
phi_mu_true[:] = A.transpose()
phi_var_true = np.zeros_like(phi_var)
phi_var_true[:] = 0.01

params_true = pack_params(deepcopy(tau_true), deepcopy(phi_mu_true), deepcopy(phi_var_true), deepcopy(nu_true))

In [6]:
class DataSet(object):
    def __init__(self, X, K_approx, alpha, sigma_eps, sigma_A):
        self.X = X
        self.K_approx = K_approx
        self.alpha = alpha
        self.data_shape = {'D': X.shape[1], 'N': X.shape[0] , 'K':K_approx}
        self.sigmas = {'eps': sigma_eps, 'A': sigma_A}
        self.x_rows = slice(0, X.shape[0])

        self.get_kl_grad = grad(self.wrapped_kl)
        self.get_kl_hvp = hessian_vector_product(self.wrapped_kl)
        self.get_kl_hessian = hessian(self.wrapped_kl)

        self.get_sample_kl_grad = grad(self.wrapped_sample_kl)
        self.get_sample_kl_hvp = hessian_vector_product(self.wrapped_sample_kl)
        
        self.get_wrapped_kl_hyperparams_paramgrad = grad(self.wrapped_kl_hyperparams, argnum=0)
        self.get_kl_sens_hess = jacobian(self.get_wrapped_kl_hyperparams_paramgrad, argnum=1)
        
    def unpack_params(self, params):
         return unpack_params(params, self.data_shape['K'], self.data_shape['D'], self.data_shape['N'])
        
    def cavi_updates(self, tau, nu, phi_mu, phi_var):
        cavi_updates(tau, nu, phi_mu, phi_var, self.X, self.alpha, self.sigmas)

    def wrapped_kl(self, params, verbose=False):
        tau, phi_mu, phi_var, nu = self.unpack_params(params)
        elbo = compute_elbo(tau, nu, phi_mu, phi_var, self.X, self.sigmas, self.alpha)[0]
        if verbose:
            print -1 * elbo
        return -1 * elbo

    def wrapped_sample_kl(self, params, verbose=False):
        tau, phi_mu, phi_var, nu = self.unpack_params(params)
        elbo = compute_elbo(tau, nu[self.x_rows, :], phi_mu, phi_var,
                            self.X[self.x_rows, :], self.sigmas, self.alpha)[0]
        if verbose:
            print -1 * elbo
        return -1 * elbo
    
    def wrapped_kl_hyperparams(self, params, hyper_params):
        tau, phi_mu, phi_var, nu = self.unpack_params(params)
        alpha, sigma_A, sigma_eps = unpack_hyperparameters(hyper_params)
        sigmas = {'eps': sigma_eps, 'A': sigma_A}
        elbo = compute_elbo(tau, nu, phi_mu, phi_var, self.X, sigmas, alpha)[0]
        return -1 * elbo



In [7]:
data_set = DataSet(X, K_approx, alpha, sigma_eps, sigma_A)
data_set.wrapped_kl(params)
kl_grad = data_set.get_kl_grad(params)
kl_hvp = data_set.get_kl_hvp(params, kl_grad)
print(data_set.wrapped_sample_kl(params) - data_set.wrapped_kl(params))

0.0


In [14]:
print phi_mu

[[ -7.62382702  -7.28007832  14.36049881]
 [  9.61654343   1.10475998  -3.49914647]]


In [8]:
print data_set.get_kl_sens_hess(params, hyper_params)

[[ -7.12185044e-03   0.00000000e+00   0.00000000e+00]
 [  4.60444285e-03   0.00000000e+00   0.00000000e+00]
 [ -6.01141185e-01   0.00000000e+00   0.00000000e+00]
 [  2.92638105e-01   0.00000000e+00   0.00000000e+00]
 [ -1.02294948e+00   0.00000000e+00   0.00000000e+00]
 [  3.96374740e-01   0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   6.90510656e-05   5.60304505e+02]
 [  0.00000000e+00   1.78178160e-04   9.34791356e+02]
 [  0.00000000e+00  -3.42785670e-05   3.31198616e+02]
 [  0.00000000e+00  -8.78010340e-05   3.95001354e+03]
 [  0.00000000e+00  -1.29947577e-04   3.15603030e+03]
 [  0.00000000e+00  -3.06335112e-05   4.79146955e+03]
 [  0.00000000e+00  -1.00000000e-04  -9.22147435e+02]
 [  0.00000000e+00  -1.00000000e-04  -1.00543899e+03]
 [  0.00000000e+00  -1.00000000e-04  -1.24750269e+03]
 [  0.00000000e+00   0.00000000e+00   6.93507065e+01]
 [  0.00000000e+00   0.00000000e+00   1.19988344e+02]
 [  0.00000000e+00   0.00000000e+00   1.60399062e+01]
 [  0.00000000e+00   0.00000

In [9]:
def flatten_params(tau, nu, phi_mu, phi_var):
    return np.hstack([ tau.flatten(), nu.flatten(), phi_mu.flatten(), phi_var.flatten() ])

true_init = False
if true_init:
    tau, phi_mu, phi_var, nu = data_set.unpack_params(params_true)
else:
    tau, phi_mu, phi_var, nu = data_set.unpack_params(params_init)

params = flatten_params(tau, nu, phi_mu, phi_var)

max_iter = 1000
tol = 1e-8
step = 0
diff = np.float('inf')

while diff > tol and step < max_iter:
    data_set.cavi_updates(tau, nu, phi_mu, phi_var)
    new_params = flatten_params(tau, nu, phi_mu, phi_var)
    diff = np.max(np.abs(new_params - params))
    print 'Diff / tol: %f' % (diff  / tol)
    if not np.isfinite(diff):
        break
    params = new_params
    step = step + 1
    
print('Done at step %d' % step)

cavi_tau = deepcopy(tau)
cavi_phi_mu = deepcopy(phi_mu)
cavi_phi_var = deepcopy(phi_var)
cavi_nu = deepcopy(nu)

Diff / tol: 1925122651.709091
Diff / tol: 394372939.624312
Diff / tol: 175810103.151832
Diff / tol: 114701149.572601
Diff / tol: 99997207.057716
Diff / tol: 100002790.188495
Diff / tol: 32411187.890719
Diff / tol: 31839917.319714
Diff / tol: 29004788.096605
Diff / tol: 26011948.411070
Diff / tol: 22653436.202133
Diff / tol: 19400516.421901
Diff / tol: 16480726.813244
Diff / tol: 13927881.438425
Diff / tol: 11730651.042548
Diff / tol: 9858063.171379
Diff / tol: 8314994.909968
Diff / tol: 7004440.334301
Diff / tol: 5886971.952624
Diff / tol: 4940316.511263
Diff / tol: 4141743.335036
Diff / tol: 3469949.945267
Diff / tol: 2905838.867328
Diff / tol: 2432720.890538
Diff / tol: 2036235.992608
Diff / tol: 1704148.218553
Diff / tol: 1426096.552654
Diff / tol: 1193343.246431
Diff / tol: 998539.135729
Diff / tol: 835513.835377
Diff / tol: 699092.695991
Diff / tol: 584939.469567
Diff / tol: 489422.335545
Diff / tol: 409500.504323
Diff / tol: 342628.613329
Diff / tol: 286676.331014
Diff / tol: 239

In [10]:
nu_tol = 1e-8
cavi_nu_trim = deepcopy(cavi_nu)
cavi_nu_trim[cavi_nu_trim < nu_tol] = nu_tol
cavi_nu_trim[cavi_nu_trim > 1 - nu_tol] = 1 - nu_tol

cavi_params = pack_params(cavi_tau, cavi_phi_mu, cavi_phi_var, cavi_nu_trim)
print np.all(np.isfinite(cavi_params))

True


In [11]:
vb_opt = optimize.minimize(
    lambda params: data_set.wrapped_kl(params, verbose=True),
    cavi_params, method='trust-ncg', jac=data_set.get_kl_grad, hessp=data_set.get_kl_hvp,
    tol=1e-6, options={'maxiter': 20, 'disp': True, 'gtol': 1e-6 })

414.37281802
414.372403918
414.372330657
414.372285401
414.372274905
414.372199155
414.372186833
414.372076461
414.372072889
414.371973761
414.371937246
414.371923813
414.371918872
Optimization terminated successfully.
         Current function value: 414.371919
         Iterations: 12
         Function evaluations: 13
         Gradient evaluations: 13
         Hessian evaluations: 0


In [12]:
data_set.x_rows = slice(0, 100)
vb_sample_opt = optimize.minimize(
    lambda params: data_set.wrapped_sample_kl(params, verbose=True),
    cavi_params, method='trust-ncg', jac=data_set.get_sample_kl_grad, hessp=data_set.get_sample_kl_hvp,
    tol=1e-6, options={'maxiter': 20, 'disp': True, 'gtol': 1e-6 })

414.37281802
414.372403918
414.372330657
414.372285401
414.372274905
414.372199155
414.372186833
414.372076461
414.372072889
414.371973761
414.371937246
414.371923813
414.371918872
Optimization terminated successfully.
         Current function value: 414.371919
         Iterations: 12
         Function evaluations: 13
         Gradient evaluations: 13
         Hessian evaluations: 0


In [13]:
print 'CAVI:'
print cavi_phi_mu.transpose()

print 'Full TR:'
tau, phi_mu, phi_var, nu = unpack_params(vb_opt.x, D=D, K_approx=K_approx, Num_samples=Num_samples)
print phi_mu.transpose()

print 'Sample TR:'
tau, phi_mu, phi_var, nu = unpack_params(vb_sample_opt.x, D=D, K_approx=K_approx, Num_samples=Num_samples)
print phi_mu.transpose()

print 'Truth:'
print A



CAVI:
[[ -7.62368013   9.61960407]
 [ -7.28314861   1.10250065]
 [ 14.36381448  -3.4996127 ]]
Full TR:
[[ -7.62382702   9.61654343]
 [ -7.28007832   1.10475998]
 [ 14.36049881  -3.49914647]]
Sample TR:
[[ -7.62382702   9.61654343]
 [ -7.28007832   1.10475998]
 [ 14.36049881  -3.49914647]]
Truth:
[[ 11.3310086   -1.3911172 ]
 [  4.5413794   -2.95429355]
 [-17.16640122  11.88621862]]
