# Zappa

Recall that in Graham's paper the target distribution is
$$
\pi(u) = \left|\frac{\partial G_y}{\partial u}\frac{\partial G_y}{\partial u}^\top\right|^{-1/2} \rho(u)
$$

where $u$ is the set of latents for both the prior and for the simulator, $\rho(u)$ is the prior for these latents and $G_y: u\mapsto y$.

In our case $u = [u_\theta, u_y]$ and $G_\theta(u) = u_\theta$. This means that $\theta = u_\theta$. Therefore $G_y(u) = G_y(u_\theta, u_y) = y$. This transformation is simply the simulator, which in our case is given by
$$
G_y(u) = \log N(u\mid 0, \Sigma) = -\log(2\pi) - \frac{1}{2}\log\det \Sigma - \frac{1}{2}u^\top\Sigma^{-1} u
$$

The manifold we are interested in, is
$$
\left\{u\in\mathbb{R}^2:\, G_y(u) - z_0 = 0\right\}
$$

Since $G_y:\mathbb{R}^2\to\mathbb{R}$, the Jacobian of this function must be a matrix of shape $\mathbb{R}^{1\times 2}$.
$$
\frac{\partial G_y}{\partial u} = (-\Sigma^{-1} u)^\top = - u^\top \Sigma^{-1} \in\mathbb{R}^{1\times 2}
$$
Then the Gram term becomes
$$
\frac{\partial G_y}{\partial u}\frac{\partial G_y}{\partial u}^\top = u^\top \Sigma^{-1}\Sigma^{-1} u = (\Sigma^{-1}u)^\top(\Sigma^{-1} u) = \|\Sigma^{-1} u\|^2
$$

Therefore we have
$$
\left|\frac{\partial G_y}{\partial u}\frac{\partial G_y}{\partial u}^\top\right| = \frac{1}{\|\Sigma^{-1}u\|}
$$

The distribution of the latent variables in our case is $\rho(u) = \rho(u_\theta, u_y) = \rho(\theta, u_y) = \mathcal{U}([-50, 50]\times[-50, 50])$. The log density is thus

$$
\log \pi(u) \propto - \log \|\Sigma^{-1} u\|
$$

Recall in HUG we sample $v\sim N(0, I)$ and then, using $\delta = T/B$ we move $x + \frac{\delta}{2} v$. This means that equivalently we have sampled $v\sim N(0, \frac{\delta^2}{4} I)$ and then moved $x + v$. For this reason, I should choose the scale for zappa $\sigma \approx \frac{\delta}{2}$ 

In [1]:
import numpy as np
from numpy import load, arange, linspace
import matplotlib.pyplot as plt
from utils import box_plot, num_grad_hug_hop, num_grad_thug_hop

In [2]:
folder1 = "experiment27/"

# HUG
ESS_ZAPPA       = load(folder1 + "ESS_ZAPPA.npy")
ESS_J_ZAPPA     = load(folder1 + "ESS_JOINT_ZAPPA.npy")
A_ZAPPA         = load(folder1 + "A_ZAPPA.npy")
EJSD_ZAPPA      = load(folder1 + "EJSD_ZAPPA.npy") 

N_GRAD_ZAPPA    = load(folder1 + "N_GRAD.npy")

folder2 = "experiment13full4/"

# HUG
THETA_ESS_HUG = load(folder2 + "THETA_ESS_HUG.npy")
U_ESS_HUG     = load(folder2 + "U_ESS_HUG.npy")
ESS_HUG       = load(folder2 + "ESS_JOINT_HUG.npy")
A_HUG         = load(folder2 + "A_HUG.npy")
EJSD_HUG      = load(folder2 + "EJSD_HUG.npy")

# THUG
THETA_ESS_THUG = load(folder2 + "THETA_ESS_THUG.npy")
U_ESS_THUG     = load(folder2 + "U_ESS_THUG.npy")
ESS_THUG       = load(folder2 + "ESS_JOINT_THUG.npy")
A_THUG         = load(folder2 + "A_THUG.npy")
EJSD_THUG      = load(folder2 + "EJSD_THUG.npy")

# COMMON
EPSILONS       = load(folder2 + "EPSILONS.npy")
ALPHAS         = load(folder2 + "ALPHAS.npy")
TS             = load(folder2 + "TS.npy")
time           = load(folder2 + "TIME.npy")

### Gradients

In [16]:
avg_zappa_grads = N_GRAD_ZAPPA.mean(axis=0); avg_zappa_grads

array([922052.5, 286954.3, 205620.6, 186004.9])

In [17]:
N = 50000
B = 5

In [18]:
N_GRAD_ZAPPA.mean(axis=0) / num_grad_hug_hop(N, B) 

array([5.26887143, 1.63973886, 1.17497486, 1.06288514])

In [19]:
N_GRAD_ZAPPA.mean(axis=0) / num_grad_thug_hop(N, B) 

array([4.09801111, 1.27535244, 0.91386933, 0.82668844])

### Zappa

In [73]:
# Univariate ESS on theta and u as T changes
ESS_ZAPPA[:, :, 0].mean(axis=0), ESS_ZAPPA[:, :, 1].mean(axis=0)

(array([1064.09967736,  103.19882906,    7.22766851,    7.0543261 ]),
 array([3062.66066967,   60.39704781,    8.63964866,    6.00078204]))

In [50]:
ESS_J_ZAPPA[:, :].mean(axis=0)

array([1972.75755921,  278.12864772,  229.09232736,  228.79363086])

In [51]:
A_ZAPPA.mean(axis=0)

array([67.15729471, 97.13586595, 92.33946652, 79.25190523])

In [52]:
EJSD_ZAPPA.mean(axis=0)

array([4.22577468e-01, 9.74421205e-03, 9.86147137e-05, 8.00176912e-07])

### Hug

In [102]:
THETA_ESS_HUG[:, -1, :].mean(axis=0), U_ESS_HUG[:, -1, :].mean(axis=0)

(array([6.5363205 , 9.27562768, 5.52066951, 6.58784597]),
 array([6.7233777 , 7.80717772, 5.50943213, 6.10063469]))

In [80]:
ESS_HUG[:, -1, :].mean(axis=0)

array([227.8766808 , 229.41951762, 228.66012558, 228.84446591])

In [86]:
EJSD_HUG[:, -1, :].mean(axis=0)

array([5.14781331e-06, 8.44895527e-05, 6.12643326e-05, 2.23549963e-05])

### THUG

In [92]:
THETA_ESS_THUG[:, -1, :, :].mean(axis=0)

array([[ 7.17883013,  5.58530422,  6.4738642 ,  6.08905548],
       [ 9.17766026,  7.36308562,  6.60402867,  7.14318124],
       [ 7.66642297,  7.81313991, 12.3565901 , 19.87442853],
       [ 6.69305771,  7.11879569,  9.06712403, 10.13314349]])

In [93]:
U_ESS_THUG[:, -1, :, :].mean(axis=0)

array([[ 5.73322188,  6.05904317,  6.96061345,  5.76075086],
       [ 8.56666051,  7.68434667,  6.5969976 ,  7.43724721],
       [ 6.64961439,  8.01682884,  9.43039169, 14.40530356],
       [ 6.75732775,  6.86223326,  7.10674458,  7.97128209]])

In [95]:
ESS_THUG[:, -1, :, :].mean(axis=0)

array([[226.88888229, 226.18397237, 227.41580589, 228.41315948],
       [234.80057374, 233.0115679 , 229.68352849, 230.43860517],
       [229.7121864 , 228.55413558, 231.03757313, 236.3596721 ],
       [228.40460804, 227.9035054 , 230.38829979, 231.21534337]])

In [98]:
EJSD_THUG[:, -1, :, :].mean(axis=0)

array([[3.54867209e-05, 2.09834523e-05, 3.32484292e-05, 2.40750484e-05],
       [1.04169667e-04, 2.29449902e-04, 1.99969325e-04, 2.24519337e-04],
       [8.90760755e-05, 2.95902899e-04, 8.87978755e-04, 1.02323666e-03],
       [3.44731453e-05, 7.23478203e-05, 9.46236972e-05, 9.54316942e-05]])

In [105]:
-np.inf - (-np.inf)

nan

# project multivariate

In [4]:
import numpy as np
from numpy.random import randn, rand, exponential
from numpy.linalg import svd, solve
from numpy import log, zeros
import matplotlib.pyplot as plt
from numpy import pi
from scipy.optimize import root
from scipy.stats import multivariate_normal, norm
from utils import normalize
from utils import logp as logp_scale
from utils import angle_between

In [5]:
import numpy as np
from numpy import diag, zeros, exp, log, eye, vstack, save
from scipy.stats import multivariate_normal as MVN
from numpy.linalg import norm, solve
from numpy.random import rand
from time import time

from Manifolds.GeneralizedEllipse import GeneralizedEllipse
from utils import ESS, ESS_univariate

In [6]:
# MVN defining the manifold
Sigma = diag([1.0, 5.0]) 
d = Sigma.shape[0]
mu = zeros(d)
target = MVN(mu, Sigma)

# Manifold
z0 = -2.9513586307684885
manifold = GeneralizedEllipse(mu, Sigma, exp(z0))

# Settings
N = 50000  
n_runs = 10
n_cores = 8
tol = 1.48e-08
a_guess = 1.0

# Target on the manifold
logf = lambda xi: - log(norm(solve(Sigma, xi)))


Ts = [10, 1, 0.1, 0.01] 
n_T = len(Ts)

In [7]:
x0 = manifold.sample()

In [8]:
v0 = MVN(zeros(d-1), eye(d-1)).rvs()

In [10]:
log_uniforms = log(rand(N))

In [12]:
sigma = 1.0

In [15]:
v0 = np.array([v0])

In [17]:
maxiter = 50

In [16]:
a = 0.0
# Do First Step
Qx = manifold.Q(x0)                       # Gradient at x.                             Size: (d + m, )
tx_basis = manifold.tangent_basis(Qx)    # ON basis for tangent space at x using SVD  Size: (d + m, d)

# Sample along tangent 
v_sample = sigma*v0  # Isotropic MVN with scaling sigma         Size: (d, )
v = tx_basis @ v_sample    # Linear combination of the basis vectors  Size: (d + m, )

In [19]:
opt_output = root(lambda a: manifold.q(x0 + v0 + Qx @ a), a_guess, tol=tol, options={'maxfev':maxiter})

In [20]:
opt_output

    fjac: array([[-1.]])
     fun: 1.8203652381866609
 message: 'The iteration is not making good progress, as measured by the \n  improvement from the last ten iterations.'
    nfev: 22
     qtf: array([-1.82036604])
       r: array([-0.00147377])
  status: 5
 success: False
       x: array([-4.63996868])

In [39]:
def project_zappa(q, z, Q, grad_q, tol = 1.48e-08 , maxiter = 50):
    '''
    Project given z back onto manifold using to solve Newton's Method by solving:
        g(a) = q(z + Q @ a) = 0
    where q is the set of m constraints defining the manifold with dimension d and 
    codimension m.
    '''
    a, flag, i = np.zeros(Q.shape[1]), 1, 0
    

    #Newton's method to solve q(z + Q @ a)
    while la.norm(q(z + Q @ a)) > tol:
        delta_a = la.solve(grad_q(z + Q @ a).transpose() @ Q, -q(z + Q @ a))
        a += delta_a
        i += 1
        #print(a, q(z + Q @ a), i) #for debugging
        if i > maxiter: 
            flag = 0
            return a, flag, i
            

    return a, flag, i


In [40]:
from scipy import linalg as la

In [41]:
project_zappa(manifold.q, x0 + v0, Qx, manifold.Q, tol, maxiter)

(array([1.39786512]), 0, 51)

In [30]:
manifold.q(np.array([1.39786512]))

1.7273070561709516

In [44]:
manifold.z

0.0522686438979923

In [46]:
manifold.q(x0)

-1.5432100042289676e-14

In [47]:
manifold.q(np.array([1.39786512]))

1.7273070561709516

In [43]:
root(lambda a: manifold.q(x0 + v0 + Qx @ a), a_guess, tol=tol, options={'maxfev':200})

    fjac: array([[-1.]])
     fun: 1.8203652381866609
 message: 'The iteration is not making good progress, as measured by the \n  improvement from the last ten iterations.'
    nfev: 22
     qtf: array([-1.82036604])
       r: array([-0.00147377])
  status: 5
 success: False
       x: array([-4.63996868])