**Create Data**

In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import scipy
import ipdb

# From nsc lib
import nsc
from nsc import distributions as nsd
# from nsc.math import function as nsc_func
nsd = nsc.distributions

**Input data**

In [64]:
n_sample = 10000
scale = 1

In [65]:
# n_sample of linearly spaced numbers, starting from CLOSE to 0
X = np.linspace(1e-6, 5, n_sample)
X

array([1.00000000e-06, 5.01049905e-04, 1.00109981e-03, ...,
       4.99899990e+00, 4.99949995e+00, 5.00000000e+00])

In [66]:
X2 = np.linspace(-12*scale, 12*scale, n_sample)
X2

array([-12.        , -11.99759976, -11.99519952, ...,  11.99519952,
        11.99759976,  12.        ])

In [67]:
X_2D = np.column_stack((X, X2))
X_2D

array([[ 1.00000000e-06, -1.20000000e+01],
       [ 5.01049905e-04, -1.19975998e+01],
       [ 1.00109981e-03, -1.19951995e+01],
       ...,
       [ 4.99899990e+00,  1.19951995e+01],
       [ 4.99949995e+00,  1.19975998e+01],
       [ 5.00000000e+00,  1.20000000e+01]])

**ELBO formula:**
    $$\Large ELBO = L(\theta, \phi, x^{(i)}) =\\\\\Large-D_{KL}(q_\phi(z| x^{(i)}) || p_\theta(z)) + E_{q_\phi(z|x^{(i)})}[log_\kappa p_\theta(x^{(i)} | z)]$$

- Both encoding_dist and prior are **tfp.distributions.MultivariateNormalDiag**, although likely with different batch_shapes.
- Output of tfd.kl_divergence is a 1D tensor.
- Input of x is a nD tensor.
- Output of sampled_decoding_dist.log_prob(x) is a 1D tensor of same dim as tfd.kl_divergence.

In [None]:
# Define the prior, p(z) - a standard bivariate Gaussian
# prior = tfd.MultivariateNormalDiag(loc=tf.zeros(latent_size))

In [None]:
#elbo = -tfd.kl_divergence(encoding_dist, prior) + sampled_decoding_dist.log_prob(x)

**Coupled ELBO formula:**
    $$\Large Coupled ELBO = L(\theta, \phi, \kappa, \alpha, d, x^{(i)}) = -D_{KL}(q_\phi^{\frac{\alpha}{1+d\kappa}}(z| x^{(i)}) || p_\theta^{\frac{\alpha}{1+d\kappa}}(z)) + E_{q_\phi^{(a, d, \kappa)}(z|x^{(i)})}[log_\kappa p_\theta^{\frac{\alpha}{1+d\kappa}}(x^{(i)} | z)]$$

In [57]:
kappa = 0.1
alpha = 2
dim = 2
z_dim = 2
sample_n = 10000

In [80]:
# Using dummy prior, enncoding_dist, and sampled_decoding_dist

# Coupled Prior: p(z)
coupled_prior = nsd.MultivariateCoupledNormal(loc=np.zeros(z_dim),
                                              scale=np.ones(z_dim),
                                              kappa=kappa, alpha=alpha
                                              )

# Coupled Encoding Distribution: q(z|x)
coupled_encoding_dist = nsd.MultivariateCoupledNormal(loc=np.zeros(z_dim),
                                                      scale=np.ones(z_dim),
                                                      kappa=0.1
                                                      )

# Coupled Sampled Decoding Distribution: p(x|z)
coupled_sampled_decoding_dist = nsd.MultivariateCoupledNormal(loc=np.zeros(z_dim),
                                                              scale=np.ones(z_dim),
                                                              kappa=0.1
                                                              )

In [81]:
# Coupled KL-Divergence
coupled_encoding_dist.kl_divergence(coupled_prior, root=False)

0.0

In [82]:
# Coupled Cross-entropy
coupled_sampled_decoding_dist.prob(X_2D)

array([1.19315209e-08, 1.19583321e-08, 1.19852079e-08, ...,
       4.85875496e-09, 4.84856698e-09, 4.83840196e-09])

In [83]:
# Coupled ELBO
coupled_elbo = -coupled_encoding_dist.kl_divergence(coupled_prior) + \
                coupled_sampled_decoding_dist.prob(X_2D)

In [77]:
coupled_elbo

array([1.19315209e-08, 1.19583321e-08, 1.19852079e-08, ...,
       4.85875496e-09, 4.84856698e-09, 4.83840196e-09])

In [85]:
tf.reduce_sum(coupled_elbo)

<tf.Tensor: shape=(), dtype=float64, numpy=12.006604843648898>

In [86]:
# Loss function
-tf.reduce_sum(coupled_elbo)

<tf.Tensor: shape=(), dtype=float64, numpy=-12.006604843648898>