**Create Data**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import scipy
import ipdb

# From nsc lib
import nsc
from nsc import distributions as nsd
# from nsc.math import function as nsc_func
nsd = nsc.distributions

Importing NSC lib v0.0.4.1.


**Input data**

In [2]:
n_sample = 10000
scale = 1

In [3]:
# n_sample of linearly spaced numbers, starting from CLOSE to 0
X = np.linspace(1e-6, 5, n_sample)
X

array([1.00000000e-06, 5.01049905e-04, 1.00109981e-03, ...,
       4.99899990e+00, 4.99949995e+00, 5.00000000e+00])

In [4]:
X2 = np.linspace(-12*scale, 12*scale, n_sample)
X2

array([-12.        , -11.99759976, -11.99519952, ...,  11.99519952,
        11.99759976,  12.        ])

In [5]:
X_2D = np.column_stack((X, X2))
X_2D

array([[ 1.00000000e-06, -1.20000000e+01],
       [ 5.01049905e-04, -1.19975998e+01],
       [ 1.00109981e-03, -1.19951995e+01],
       ...,
       [ 4.99899990e+00,  1.19951995e+01],
       [ 4.99949995e+00,  1.19975998e+01],
       [ 5.00000000e+00,  1.20000000e+01]])

**ELBO formula:**
    $$\Large ELBO = L(\theta, \phi, x^{(i)}) =\\\\\Large-D_{KL}(q_\phi(z| x^{(i)}) || p_\theta(z)) + E_{q_\phi(z|x^{(i)})}[log_\kappa p_\theta(x^{(i)} | z)]$$

- Both encoding_dist and prior are **tfp.distributions.MultivariateNormalDiag**, although likely with different batch_shapes.
- Output of tfd.kl_divergence is a 1D tensor.
- Input of x is a nD tensor.
- Output of sampled_decoding_dist.log_prob(x) is a 1D tensor of same dim as tfd.kl_divergence.

In [7]:
# Define the prior, p(z) - a standard bivariate Gaussian
# prior = tfd.MultivariateNormalDiag(loc=tf.zeros(latent_size))

In [8]:
#elbo = -tfd.kl_divergence(encoding_dist, prior) + sampled_decoding_dist.log_prob(x)

**Coupled ELBO formula:**
    $$\Large Coupled ELBO = L(\theta, \phi, \kappa, \alpha, d, x^{(i)}) = -D_{KL}(q_\phi^{\frac{\alpha}{1+d\kappa}}(z| x^{(i)}) || p_\theta^{\frac{\alpha}{1+d\kappa}}(z)) + E_{q_\phi^{(a, d, \kappa)}(z|x^{(i)})}[log_\kappa p_\theta^{\frac{\alpha}{1+d\kappa}}(x^{(i)} | z)]$$

In [22]:
kappa = 0.1
alpha = 2
dim = 2
z_dim = 2
batch_size = 64
sample_n = 10000

In [26]:
encoding_loc, encoding_scale = [[0., 1.]], [[1., 2.]]
encoding_loc, encoding_scale
type(encoding_loc)

list

In [33]:
# encoding_loc, encoding_scale = [[0., 1.]], [[1., 2.]]
encoding_loc, encoding_scale = [], []
for i in range(batch_size):
    encoding_loc.append([i, i+1])
    encoding_scale.append([i+1, i+2])
encoding_loc, encoding_scale

([[0, 1],
  [1, 2],
  [2, 3],
  [3, 4],
  [4, 5],
  [5, 6],
  [6, 7],
  [7, 8],
  [8, 9],
  [9, 10],
  [10, 11],
  [11, 12],
  [12, 13],
  [13, 14],
  [14, 15],
  [15, 16],
  [16, 17],
  [17, 18],
  [18, 19],
  [19, 20],
  [20, 21],
  [21, 22],
  [22, 23],
  [23, 24],
  [24, 25],
  [25, 26],
  [26, 27],
  [27, 28],
  [28, 29],
  [29, 30],
  [30, 31],
  [31, 32],
  [32, 33],
  [33, 34],
  [34, 35],
  [35, 36],
  [36, 37],
  [37, 38],
  [38, 39],
  [39, 40],
  [40, 41],
  [41, 42],
  [42, 43],
  [43, 44],
  [44, 45],
  [45, 46],
  [46, 47],
  [47, 48],
  [48, 49],
  [49, 50],
  [50, 51],
  [51, 52],
  [52, 53],
  [53, 54],
  [54, 55],
  [55, 56],
  [56, 57],
  [57, 58],
  [58, 59],
  [59, 60],
  [60, 61],
  [61, 62],
  [62, 63],
  [63, 64]],
 [[1, 2],
  [2, 3],
  [3, 4],
  [4, 5],
  [5, 6],
  [6, 7],
  [7, 8],
  [8, 9],
  [9, 10],
  [10, 11],
  [11, 12],
  [12, 13],
  [13, 14],
  [14, 15],
  [15, 16],
  [16, 17],
  [17, 18],
  [18, 19],
  [19, 20],
  [20, 21],
  [21, 22],
  [22, 23],
  [

In [34]:
# Using dummy prior, enncoding_dist, and sampled_decoding_dist

# Coupled Prior: p(z)
coupled_prior = nsd.MultivariateCoupledNormal(loc=np.zeros(z_dim),
                                              scale=np.ones(z_dim),
                                              kappa=kappa, alpha=alpha
                                              )

# Coupled Encoding Distribution: q(z|x)
# Have batch_shape=[64] and event_shape=[2]
encoding_loc, encoding_scale = [], []
for i in range(batch_size):
    encoding_loc.append([i, i+1])
    encoding_scale.append([i+1, i+2])
coupled_encoding_dist = nsd.MultivariateCoupledNormal(loc=encoding_loc,
                                                      scale=encoding_scale,
                                                      kappa=0.1
                                                      )

# Coupled Sampled Decoding Distribution: p(x|z)
coupled_sampled_decoding_dist = nsd.MultivariateCoupledNormal(loc=np.zeros(z_dim),
                                                              scale=np.ones(z_dim),
                                                              kappa=0.1
                                                              )

AssertionError: scale must be positive definite, but not necessarily symmetric.

In [11]:
# Coupled KL-Divergence
coupled_encoding_dist.kl_divergence(coupled_prior, root=False)

0.0

In [12]:
# Coupled Cross-entropy
coupled_sampled_decoding_dist.prob(X_2D)

array([1.19315209e-08, 1.19583321e-08, 1.19852079e-08, ...,
       4.85875496e-09, 4.84856698e-09, 4.83840196e-09])

In [13]:
# Coupled ELBO
coupled_elbo = -coupled_encoding_dist.kl_divergence(coupled_prior) + \
                coupled_sampled_decoding_dist.prob(X_2D)

In [77]:
coupled_elbo

array([1.19315209e-08, 1.19583321e-08, 1.19852079e-08, ...,
       4.85875496e-09, 4.84856698e-09, 4.83840196e-09])

In [85]:
tf.reduce_sum(coupled_elbo)

<tf.Tensor: shape=(), dtype=float64, numpy=12.006604843648898>

In [86]:
# Loss function
-tf.reduce_sum(coupled_elbo)

<tf.Tensor: shape=(), dtype=float64, numpy=-12.006604843648898>