In [1]:
import autograd.numpy as np
import autograd.scipy as sp
from autograd.scipy import special
from autograd import grad, hessian, hessian_vector_product, hessian, jacobian
import matplotlib.pyplot as plt
from copy import deepcopy

from scipy import optimize

from valez_finite_VI_lib import initialize_parameters, generate_data, compute_elbo, cavi_updates
from generic_optimization_lib import unpack_params, pack_params

In [2]:
def check_approx_eq(x, y, tol=1e-12):
    return np.max(np.abs(x - y)) < tol

In [3]:
#np.random.seed(12321)

alpha = 10 # IBP parameter

Num_samples = 2000 # sample size
D = 2 # dimension
# so X will be a N\times D matrix

sigma_A = 100

sigma_eps = .1 # variance of noise

K_inf = 3 # take to be large for a good approximation to the IBP

Pi, Z, mu, A, X = generate_data(Num_samples, D, K_inf, sigma_A, sigma_eps, alpha)

K_approx = deepcopy(K_inf) # variational truncation

tau, nu, phi_mu, phi_var = initialize_parameters(Num_samples, D, K_approx)
nu_init = np.round(nu * (nu >= 0.9) + nu * (nu <= 0.1)) + nu * (nu >= 0.1) * (nu <= 0.9)
params = pack_params(deepcopy(tau), deepcopy(phi_mu), deepcopy(phi_var), deepcopy(nu))
params_init = deepcopy(params)


In [4]:
def pack_hyperparameters(alpha, sigma_A, sigma_eps):
    return np.array([ alpha, sigma_A, sigma_eps ])

def unpack_hyperparameters(hyper_params):
    alpha = hyper_params[0]
    sigma_A = hyper_params[1]
    sigma_eps = hyper_params[2]
    return alpha, sigma_A, sigma_eps

hyper_params = pack_hyperparameters(alpha, sigma_A, sigma_eps)
alpha0, sigma_A0, sigma_eps0 = unpack_hyperparameters(hyper_params)
print np.abs(alpha0 - alpha)
print np.abs(sigma_A0 - sigma_A)
print np.abs(sigma_eps0 - sigma_eps)


0.0
0.0
0.0


In [5]:
tau_true = np.zeros_like(tau)
tau_true_scale = 15.
tau_true[:, 0] = deepcopy(Pi) * tau_true_scale
tau_true[:, 1] = tau_true_scale

nu_true = np.zeros_like(nu)
nu_true[ Z == 1] = 0.999
nu_true[ Z == 0] = 0.001

phi_mu_true = np.zeros_like(phi_mu)
phi_mu_true[:] = A.transpose()
phi_var_true = np.zeros_like(phi_var)
phi_var_true[:] = 0.01

params_true = pack_params(deepcopy(tau_true), deepcopy(phi_mu_true), deepcopy(phi_var_true), deepcopy(nu_true))

In [6]:
class DataSet(object):
    def __init__(self, X, K_approx, alpha, sigma_eps, sigma_A):
        self.X = X
        self.K_approx = K_approx
        self.alpha = alpha
        self.data_shape = {'D': X.shape[1], 'N': X.shape[0] , 'K':K_approx}
        self.sigmas = {'eps': sigma_eps, 'A': sigma_A}
        self.x_rows = slice(0, X.shape[0])

        self.get_kl_grad = grad(self.wrapped_kl)
        self.get_kl_hvp = hessian_vector_product(self.wrapped_kl)
        self.get_kl_hessian = hessian(self.wrapped_kl)

        # It turns out to be much faster to take the gradient wrt the small vector first.
        self.get_wrapped_kl_hyperparams_hyperparamgrad = grad(self.wrapped_kl_hyperparams, argnum=1)
        self.get_kl_sens_hess = jacobian(self.get_wrapped_kl_hyperparams_hyperparamgrad, argnum=0)

    def unpack_params(self, params):
         return unpack_params(params, self.data_shape['K'], self.data_shape['D'],
                              self.X[self.x_rows, :].shape[0])
        
    def cavi_updates(self, tau, nu, phi_mu, phi_var):
        cavi_updates(tau, nu, phi_mu, phi_var, self.X[self.x_rows, :], self.alpha, self.sigmas)

    def wrapped_kl(self, params, verbose=False):
        tau, phi_mu, phi_var, nu = self.unpack_params(params)
        elbo = compute_elbo(tau, nu, phi_mu, phi_var, self.X[self.x_rows, :], self.sigmas, self.alpha)
        if verbose:
            print -1 * elbo
        return -1 * elbo

    def wrapped_kl_hyperparams(self, params, hyper_params):
        tau, phi_mu, phi_var, nu = self.unpack_params(params)
        alpha, sigma_A, sigma_eps = unpack_hyperparameters(hyper_params)
        sigmas = {'eps': sigma_eps, 'A': sigma_A}
        elbo = compute_elbo(tau, nu, phi_mu, phi_var, self.X[self.x_rows, :], sigmas, alpha)
        return -1 * elbo



In [7]:
data_set = DataSet(X, K_approx, alpha, sigma_eps, sigma_A)
data_set.wrapped_kl(params)
kl_grad = data_set.get_kl_grad(params)
kl_hvp = data_set.get_kl_hvp(params, kl_grad)

In [8]:
import time
par_hp_hess_time = time.time()
par_hp_hess = data_set.get_kl_sens_hess(params, hyper_params)
par_hp_hess_time = time.time() - par_hp_hess_time
print('Num samples: %d' % Num_samples)
print('Hessian time:')
print(par_hp_hess_time)

Num samples: 2000
Hessian time:
0.0154309272766


In [9]:
def flatten_params(tau, nu, phi_mu, phi_var):
    return np.hstack([ tau.flatten(), nu.flatten(), phi_mu.flatten(), phi_var.flatten() ])

true_init = False
if true_init:
    tau, phi_mu, phi_var, nu = data_set.unpack_params(params_true)
else:
    tau, phi_mu, phi_var, nu = data_set.unpack_params(params_init)

params = flatten_params(tau, nu, phi_mu, phi_var)

max_iter = 1000
tol = 1e-8
step = 0
diff = np.float('inf')

while diff > tol and step < max_iter:
    data_set.cavi_updates(tau, nu, phi_mu, phi_var)
    new_params = flatten_params(tau, nu, phi_mu, phi_var)
    diff = np.max(np.abs(new_params - params))
    print 'Diff / tol: %f' % (diff  / tol)
    if not np.isfinite(diff):
        break
    params = new_params
    step = step + 1
    
print('Done at step %d' % step)

cavi_tau = deepcopy(tau)
cavi_phi_mu = deepcopy(phi_mu)
cavi_phi_var = deepcopy(phi_var)
cavi_nu = deepcopy(nu)

Diff / tol: 167379366173.977051
Diff / tol: 66918297332.837257
Diff / tol: 23874047185.762764
Diff / tol: 247755580.851436
Diff / tol: 5791450889.653265
Diff / tol: 608549110.346735
Diff / tol: 44760393.074283
Diff / tol: 19334051.723008
Diff / tol: 8351257.224393
Diff / tol: 3607288.777766
Diff / tol: 1558265.907289
Diff / tol: 1285529.939514
Diff / tol: 2585856.889596
Diff / tol: 3072365.297339
Diff / tol: 3372211.123360
Diff / tol: 3983575.606276
Diff / tol: 5325852.992291
Diff / tol: 8261928.410411
Diff / tol: 14871316.345238
Diff / tol: 26708789.809027
Diff / tol: 24739487.012721
Diff / tol: 5845501.430895
Diff / tol: 2452461.879125
Diff / tol: 2700590.915197
Diff / tol: 2851181.763072
Diff / tol: 3070978.478036
Diff / tol: 3619944.965317
Diff / tol: 4802163.657769
Diff / tol: 7314548.629483
Diff / tol: 12881381.661214
Diff / tol: 23816519.342358
Diff / tol: 27657266.282517
Diff / tol: 11451828.386112
Diff / tol: 12604236.506786
Diff / tol: 28589667.711913
Diff / tol: 65657486.053

In [10]:
nu_tol = 1e-8
cavi_nu_trim = deepcopy(cavi_nu)
cavi_nu_trim[cavi_nu_trim < nu_tol] = nu_tol
cavi_nu_trim[cavi_nu_trim > 1 - nu_tol] = 1 - nu_tol

cavi_params = pack_params(cavi_tau, cavi_phi_mu, cavi_phi_var, cavi_nu_trim)
print np.all(np.isfinite(cavi_params))

True


In [18]:
data_set.x_rows = slice(0, data_set.X.shape[0])
vb_opt = optimize.minimize(
    lambda params: data_set.wrapped_kl(params, verbose=True),
    cavi_params, method='trust-ncg', jac=data_set.get_kl_grad, hessp=data_set.get_kl_hvp,
    tol=1e-6, options={'maxiter': 200, 'disp': True, 'gtol': 1e-6 })

-2822695.31403
-2822695.31426
-2822695.31429
-2822695.3143
-2822695.31597
-2822695.31614
-2822695.31692
-2822695.32132
-2822695.32204
-2822695.33146
-2822695.33146
-2822695.34456
-2822695.34859
-2822695.349
-2822695.37711
-2822695.37723
-2822695.41562
-2822695.41562
-2822695.45394
-2822695.45413
-2822695.45414
-2822695.4542
-2822695.47242
-2822695.47912
-2822695.48159
-2822695.4825
-2822695.48283
-2822695.48295
-2822695.483
Optimization terminated successfully.
         Current function value: -2822695.483000
         Iterations: 28
         Function evaluations: 29
         Gradient evaluations: 29
         Hessian evaluations: 0


In [21]:
sample_slice = slice(0, 200)
data_set.x_rows = sample_slice
cavi_params_trim = pack_params(cavi_tau, cavi_phi_mu, cavi_phi_var, cavi_nu_trim[data_set.x_rows, :])

vb_sample_opt = optimize.minimize(
    lambda params: data_set.wrapped_kl(params, verbose=True),
    cavi_params_trim, method='trust-ncg', jac=data_set.get_kl_grad, hessp=data_set.get_kl_hvp,
    tol=1e-6, options={'maxiter': 20, 'disp': True, 'gtol': 1e-6 })

-300480.251541
-300492.746202
-300494.553257
-300496.880917
-300497.193065
-300498.52369
-300498.821633
-300501.035959
-300501.269892
-300501.290363
-300501.825334
-300502.385677
-300502.731021
-300502.745528
-300503.011899
-300503.059282
-300503.077424
-300503.080437
-300503.084549
-300503.084559
-300503.08476
         Current function value: -300503.084760
         Iterations: 20
         Function evaluations: 21
         Gradient evaluations: 21
         Hessian evaluations: 0


In [20]:
print 'CAVI:'
print cavi_phi_mu.transpose()

print 'Full TR:'
tau, phi_mu, phi_var, nu = unpack_params(vb_opt.x, D=D, K_approx=K_approx, Num_samples=Num_samples)
print phi_mu.transpose()

print 'Sample TR:'
tau, phi_mu, phi_var, nu = \
    unpack_params(vb_sample_opt.x, D=D, K_approx=K_approx, Num_samples=data_set.X[sample_slice, :].shape[0])
print phi_mu.transpose()

print 'Truth:'
print A



CAVI:
[[ -9.76382705  -7.65672736]
 [-19.85548388  -9.57560872]
 [ 30.73506246  10.74426103]]
Full TR:
[[ -9.76383271  -7.65672714]
 [-19.85536547  -9.5755616 ]
 [ 30.7348768   10.74419117]]
Sample TR:


AssertionError: 