In [26]:
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
from tqdm.notebook import tqdm, trange
import torch
import autograd.numpy as np
import autograd.scipy.stats as stats
import scipy.optimize as optimize
from autograd.scipy.linalg import logm
from autograd import grad, jacobian, hessian
import numpy
import scipy

In [92]:
def decode_q_params(encoded_q):
        shape = len(encoded_q)
        mean_shape = int(shape/3)
        A_shape = (int(np.sqrt(shape - mean_shape)), int(np.sqrt(shape - mean_shape)))
        mean = encoded_q[0:mean_shape]
        A = encoded_q[mean_shape:shape].reshape(A_shape)
        return mean, A
def encode_q_params(q_params):
    mean, A = q_params
    return np.array(list(mean) + list(A.flatten()))

In [93]:
def optimizer(x0, gradient, callback=lambda x: x, rate=0.5, iters=1e1):
    x = x0
    running_average = []
    for i in range(int(iters)):
        r = rate/(1+i*1e-1)
        g = gradient(x)
        x = -g * r + x
        if i % 1 == 0:
            if not callback(x):
                break
    return x

In [94]:
def log_posterior(theta_i, q_params):
    mean, A = q_params
    return scipy.stats.multivariate_normal.logpdf(theta_i, mean=mean, cov=A @ A.T)

In [95]:
def elbo(q_params, d, y_i):
    def stable_multivariate_gaussian_logpdf(x, mu, cov):
        n = len(x)
        x_mu = x - mu
        try:
            _, log_det = np.linalg.slogdet(cov)
            cov_inv = np.linalg.inv(cov)
            log_prob = -0.5 * (n * np.log(2 * np.pi) + log_det + np.dot(x_mu, np.dot(cov_inv, x_mu)))
        except np.linalg.LinAlgError:
            cov_modified = cov + np.eye(n) * 1e-8
            _, log_det = np.linalg.slogdet(cov_modified)
            cov_inv = np.linalg.inv(cov_modified)
            log_prob = -0.5 * (n * np.log(2 * np.pi) + log_det + np.dot(x_mu, np.dot(cov_inv, x_mu)))
        return log_prob
    def log_likelihood(y, theta, d):
        likelihood_cov = np.mean(np.square(y - theta.T @ d.T)) * np.eye(len(d))
        likelihood_mean = d @ theta
        return stable_multivariate_gaussian_logpdf(y, likelihood_mean, likelihood_cov)
    def KLD(mean_q, A_q, mean_p, A_p):
        sigma_q = (A_q @ A_q.T)
        sigma_p = (A_p @ A_p.T)
        bar_sigma_q = np.linalg.norm(sigma_q)
        bar_sigma_p = np.linalg.norm(sigma_p)
        k = len(mean_q)
        return 0.5 * (np.trace(np.linalg.inv(sigma_p) @ sigma_q) + (mean_p - mean_q).T @ np.linalg.inv(sigma_p) @ (mean_p - mean_q) - k + np.log(bar_sigma_p/bar_sigma_q))
    mean, A = q_params
    if len(d.shape) == 1:
        shape = len(d)
    else:
        shape = d.shape[1]
    values = []
    while len(values) < 100:
        sample = np.random.multivariate_normal(np.zeros(shape), np.eye(shape))
        theta = mean + A @ sample
        lik = log_likelihood(y_i, theta, d)
        values.append(lik)
    return 1/(len(values))*np.sum(values) - KLD(mean, A, mean_prior, A_prior)

In [114]:
def log_posterior_grad(theta_i, y_i, d, q_params):
    def training_hessian_inner(q_params):
        return elbo(decode_q_params(q_params), d, y_i)
    def training_mixed_partials_inner(encoded_q):
        print("inner result: %s" % str(grad(lambda d: elbo(decode_q_params(encoded_q), d, y_i))(d)))
        return grad(lambda d: elbo(decode_q_params(encoded_q), d, y_i))(d)
    training_hessian = hessian(training_hessian_inner)
    training_mixed_partials = jacobian(training_mixed_partials_inner)
    encoded_q = encode_q_params(q_params)
    latest_hessian = training_hessian(encoded_q)
    for i in range(10):
        if np.linalg.det(latest_hessian) != 0:
            print("Encoded q: %s" % str(encoded_q))
            print("Mixed Partials: %s" % str(training_mixed_partials(encoded_q)))
            return - np.linalg.inv(latest_hessian) @ training_mixed_partials(encoded_q)
        latest_hessian = latest_hessian + 1e-8 * np.eye(latest_hessian.shape)
    raise ValueError("Was not able to invert hessian")

In [97]:
# Get returns mean, A for log q to approximate log_posterior
def optimal_q(d, y_i):
    thetas = []
    def objective_f(encoded_q):
        q_params = decode_q_params(encoded_q)
        return elbo(q_params, y_i, d)
    def callback(qi):
        mean, A = decode_q_params(qi)
        print("\t New ELBO iteration. mean=%s, A=%s" % (str(mean), str(A)))
    results = optimizer(encode_q_params((mean_prior, A_prior)), grad(objective_f), callback=callback)
    return decode_q_params(results)

In [110]:
noise = 0.2
def MI_grad(d):
    size = d.shape[0]
    N = 10
    theta_samples = np.random.multivariate_normal(mean_prior, A_prior, size=10)
    sample_results = []
    for i in range(N):
        theta_i = theta_samples[i]
        z_i = np.random.multivariate_normal(np.zeros(size), noise * np.eye(size))
        y_i = theta_i.T @ d + z_i
        q_params = optimal_q(d, y_i)
        result = log_posterior_grad(theta_i, y_i, d, q_params) * (log_posterior(theta_i, q_params) + 1)
        samples_results.append(result)
    return np.mean(sample_results)


In [115]:
d0 = np.array([[1,1], [2,2]])
A_prior = np.array([[3.2, 1.6], [1.1, 2.9]])
mean_prior = np.array([2, 5])
def callback(di):
    print("New MI iteration. di=%s" % str(di))
optimizer(d0, MI_grad, callback=callback)

	 New ELBO iteration. mean=[2.451713   5.44658235], A=[[3.4488981  1.5552849 ]
 [1.06532732 3.14431718]]
Encoded q: [2.451713   5.44658235 3.4488981  1.5552849  1.06532732 3.14431718]
inner result: Autograd ArrayBox with value [[ 0.30096467  0.52133202]
 [-0.15433996 -0.11083063]]
Mixed Partials: [[[ 0.07926791 -0.01747789 -0.00346955 -0.03300282 -0.01990339
   -0.02601042]
  [-0.00447041  0.09227538 -0.03850272 -0.03499761 -0.02206888
   -0.04199001]]

 [[-0.02950488 -0.03594928 -0.06395816 -0.02976289 -0.02749807
    0.00964318]
  [-0.04753503 -0.04109063 -0.0193198  -0.0074582  -0.05577989
   -0.04686427]]]
inner result: Autograd ArrayBox with value [[ 0.31458068  0.59370235]
 [-0.27466459 -0.26361711]]


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 6)