In [1]:
import autograd.numpy as np
import autograd.scipy as sp
from autograd.scipy import special
from autograd import grad, hessian, hessian_vector_product, hessian, jacobian
import matplotlib.pyplot as plt
from copy import deepcopy

from scipy import optimize

from valez_finite_VI_lib import initialize_parameters, generate_data, compute_elbo, cavi_updates
from generic_optimization_lib import unpack_params, pack_params

In [2]:
def check_approx_eq(x, y, tol=1e-12):
    return np.max(np.abs(x - y)) < tol

In [48]:
#np.random.seed(12321)

alpha = 10 # IBP parameter

Num_samples = 300 # sample size
D = 2 # dimension
# so X will be a N\times D matrix

sigma_A = 100

sigma_eps = .1 # variance of noise

K_inf = 3 # take to be large for a good approximation to the IBP

Pi, Z, mu, A, X = generate_data(Num_samples, D, K_inf, sigma_A, sigma_eps, alpha)

K_approx = deepcopy(K_inf) # variational truncation

tau, nu, phi_mu, phi_var = initialize_parameters(Num_samples, D, K_approx)
nu_init = np.round(nu * (nu >= 0.9) + nu * (nu <= 0.1)) + nu * (nu >= 0.1) * (nu <= 0.9)
params = pack_params(deepcopy(tau), deepcopy(phi_mu), deepcopy(phi_var), deepcopy(nu))
params_init = deepcopy(params)


In [49]:
def pack_hyperparameters(alpha, sigma_A, sigma_eps):
    return np.array([ alpha, sigma_A, sigma_eps ])

def unpack_hyperparameters(hyper_params):
    alpha = hyper_params[0]
    sigma_A = hyper_params[1]
    sigma_eps = hyper_params[2]
    return alpha, sigma_A, sigma_eps

hyper_params = pack_hyperparameters(alpha, sigma_A, sigma_eps)
alpha0, sigma_A0, sigma_eps0 = unpack_hyperparameters(hyper_params)
print np.abs(alpha0 - alpha)
print np.abs(sigma_A0 - sigma_A)
print np.abs(sigma_eps0 - sigma_eps)


0.0
0.0
0.0


In [50]:
tau_true = np.zeros_like(tau)
tau_true_scale = 15.
tau_true[:, 0] = deepcopy(Pi) * tau_true_scale
tau_true[:, 1] = tau_true_scale

nu_true = np.zeros_like(nu)
nu_true[ Z == 1] = 0.999
nu_true[ Z == 0] = 0.001

phi_mu_true = np.zeros_like(phi_mu)
phi_mu_true[:] = A.transpose()
phi_var_true = np.zeros_like(phi_var)
phi_var_true[:] = 0.01

params_true = pack_params(deepcopy(tau_true), deepcopy(phi_mu_true), deepcopy(phi_var_true), deepcopy(nu_true))

In [51]:
class DataSet(object):
    def __init__(self, X, K_approx, alpha, sigma_eps, sigma_A):
        self.X = X
        self.K_approx = K_approx
        self.alpha = alpha
        self.data_shape = {'D': X.shape[1], 'N': X.shape[0] , 'K':K_approx}
        self.sigmas = {'eps': sigma_eps, 'A': sigma_A}
        self.x_rows = slice(0, X.shape[0])

        self.get_kl_grad = grad(self.wrapped_kl)
        self.get_kl_hvp = hessian_vector_product(self.wrapped_kl)
        self.get_kl_hessian = hessian(self.wrapped_kl)

        self.get_sample_kl_grad = grad(self.wrapped_sample_kl)
        self.get_sample_kl_hvp = hessian_vector_product(self.wrapped_sample_kl)
        
        self.get_wrapped_kl_hyperparams_paramgrad = grad(self.wrapped_kl_hyperparams, argnum=0)
        self.get_kl_sens_hess = jacobian(self.get_wrapped_kl_hyperparams_paramgrad, argnum=1)
        
    def unpack_params(self, params):
         return unpack_params(params, self.data_shape['K'], self.data_shape['D'], self.data_shape['N'])
        
    def cavi_updates(self, tau, nu, phi_mu, phi_var):
        cavi_updates(tau, nu, phi_mu, phi_var, self.X, self.alpha, self.sigmas)

    def wrapped_kl(self, params, verbose=False):
        tau, phi_mu, phi_var, nu = self.unpack_params(params)
        elbo = compute_elbo(tau, nu, phi_mu, phi_var, self.X, self.sigmas, self.alpha)
        if verbose:
            print -1 * elbo
        return -1 * elbo

    def wrapped_sample_kl(self, params, verbose=False):
        tau, phi_mu, phi_var, nu = self.unpack_params(params)
        elbo = compute_elbo(tau, nu[self.x_rows, :], phi_mu, phi_var,
                            self.X[self.x_rows, :], self.sigmas, self.alpha)
        if verbose:
            print -1 * elbo
        return -1 * elbo
    
    def wrapped_kl_hyperparams(self, params, hyper_params):
        tau, phi_mu, phi_var, nu = self.unpack_params(params)
        alpha, sigma_A, sigma_eps = unpack_hyperparameters(hyper_params)
        sigmas = {'eps': sigma_eps, 'A': sigma_A}
        elbo = compute_elbo(tau, nu, phi_mu, phi_var, self.X, sigmas, alpha)
        return -1 * elbo



In [52]:
data_set = DataSet(X, K_approx, alpha, sigma_eps, sigma_A)
data_set.wrapped_kl(params)
kl_grad = data_set.get_kl_grad(params)
kl_hvp = data_set.get_kl_hvp(params, kl_grad)
print(data_set.wrapped_sample_kl(params) - data_set.wrapped_kl(params))

0.0


In [53]:
print phi_mu

[[-1.73249514  0.9957421   2.40433344]
 [-0.21027156 -1.66476011  0.02958094]]


In [54]:
import time
par_hp_hess_time = time.time()
par_hp_hess = data_set.get_kl_sens_hess(params, hyper_params)
par_hp_hess_time = time.time() - par_hp_hess_time
print('Num samples: %d' % Num_samples)
print(par_hp_hess_time)

Num samples: 300
2.37894701958


In [55]:
def flatten_params(tau, nu, phi_mu, phi_var):
    return np.hstack([ tau.flatten(), nu.flatten(), phi_mu.flatten(), phi_var.flatten() ])

true_init = False
if true_init:
    tau, phi_mu, phi_var, nu = data_set.unpack_params(params_true)
else:
    tau, phi_mu, phi_var, nu = data_set.unpack_params(params_init)

params = flatten_params(tau, nu, phi_mu, phi_var)

max_iter = 1000
tol = 1e-8
step = 0
diff = np.float('inf')

while diff > tol and step < max_iter:
    data_set.cavi_updates(tau, nu, phi_mu, phi_var)
    new_params = flatten_params(tau, nu, phi_mu, phi_var)
    diff = np.max(np.abs(new_params - params))
    print 'Diff / tol: %f' % (diff  / tol)
    if not np.isfinite(diff):
        break
    params = new_params
    step = step + 1
    
print('Done at step %d' % step)

cavi_tau = deepcopy(tau)
cavi_phi_mu = deepcopy(phi_mu)
cavi_phi_var = deepcopy(phi_var)
cavi_nu = deepcopy(nu)

Diff / tol: 29692119576.845249
Diff / tol: 9431212402.814554
Diff / tol: 1082819284.651538
Diff / tol: 800000000.000000
Diff / tol: 121935540.902059
Diff / tol: 76139213.791759
Diff / tol: 47542987.335278
Diff / tol: 29686879.233408
Diff / tol: 18537135.506521
Diff / tol: 11574992.106293
Diff / tol: 7227677.772199
Diff / tol: 4513119.793005
Diff / tol: 2818090.527578
Diff / tol: 1759677.249063
Diff / tol: 1098780.891173
Diff / tol: 686102.776920
Diff / tol: 428417.552937
Diff / tol: 267513.273344
Diff / tol: 167041.128306
Diff / tol: 104304.127417
Diff / tol: 65129.774372
Diff / tol: 40668.453061
Diff / tol: 25394.269984
Diff / tol: 15856.736599
Diff / tol: 9901.292524
Diff / tol: 6182.583223
Diff / tol: 3860.539946
Diff / tol: 2410.605428
Diff / tol: 1505.234658
Diff / tol: 939.901383
Diff / tol: 586.894944
Diff / tol: 366.470017
Diff / tol: 228.831879
Diff / tol: 142.887621
Diff / tol: 89.222151
Diff / tol: 55.712258
Diff / tol: 34.787951
Diff / tol: 21.722356
Diff / tol: 13.563914
D

In [56]:
nu_tol = 1e-8
cavi_nu_trim = deepcopy(cavi_nu)
cavi_nu_trim[cavi_nu_trim < nu_tol] = nu_tol
cavi_nu_trim[cavi_nu_trim > 1 - nu_tol] = 1 - nu_tol

cavi_params = pack_params(cavi_tau, cavi_phi_mu, cavi_phi_var, cavi_nu_trim)
print np.all(np.isfinite(cavi_params))

True


In [57]:
vb_opt = optimize.minimize(
    lambda params: data_set.wrapped_kl(params, verbose=True),
    cavi_params, method='trust-ncg', jac=data_set.get_kl_grad, hessp=data_set.get_kl_hvp,
    tol=1e-6, options={'maxiter': 20, 'disp': True, 'gtol': 1e-6 })

-1613943.31935
-1613943.32339
-1613943.3234
-1613943.32344
-1613943.32374
-1613943.32382
-1613943.32397
-1613943.32491
-1613943.32496
-1613943.32594
-1613943.32669
-1613943.32901
-1613943.32937
-1613943.33246
-1613943.3327
-1613943.3354
-1613943.33641
-1613943.33678
-1613943.33692
-1613943.33697
-1613943.33699
         Current function value: -1613943.336985
         Iterations: 20
         Function evaluations: 21
         Gradient evaluations: 21
         Hessian evaluations: 0


In [58]:
data_set.x_rows = slice(0, 100)
vb_sample_opt = optimize.minimize(
    lambda params: data_set.wrapped_sample_kl(params, verbose=True),
    cavi_params, method='trust-ncg', jac=data_set.get_sample_kl_grad, hessp=data_set.get_sample_kl_hvp,
    tol=1e-6, options={'maxiter': 20, 'disp': True, 'gtol': 1e-6 })

-505763.589408
-505853.906502
-505855.373389
-505857.037908
-505857.24476
-505857.47894
-505857.593677
-505857.615145
-505857.615356
-505857.616322
-505857.616406
-505857.617647
-505857.617776
-505857.619008
-505857.619508
-505857.619511
-505873.591947
nan
nan
nan
nan
         Current function value: -505873.591947
         Iterations: 20
         Function evaluations: 21
         Gradient evaluations: 17
         Hessian evaluations: 0


  result_value = self.fun(*argvals, **kwargs)
  return -1 * np.sum(nu * log_nu + (1 - nu) * log_1mnu)


In [59]:
print 'CAVI:'
print cavi_phi_mu.transpose()

print 'Full TR:'
tau, phi_mu, phi_var, nu = unpack_params(vb_opt.x, D=D, K_approx=K_approx, Num_samples=Num_samples)
print phi_mu.transpose()

print 'Sample TR:'
tau, phi_mu, phi_var, nu = unpack_params(vb_sample_opt.x, D=D, K_approx=K_approx, Num_samples=Num_samples)
print phi_mu.transpose()

print 'Truth:'
print A



CAVI:
[[ -5.66801338  -8.28963591]
 [  3.87425832 -22.81115863]
 [  9.88813789 -13.25042291]]
Full TR:
[[ -5.66749816  -8.28888238]
 [  3.87431226 -22.81106095]
 [  9.88803364 -13.25044596]]
Sample TR:
[[ -5.36394836  -8.27025417]
 [  4.09406891 -22.32375524]
 [  9.50658387 -13.99904227]]
Truth:
[[ 10.93428832 -10.94470047]
 [ -5.75164307  -8.33297181]
 [  7.57701316 -18.24148303]]
