# Vector Quantization (VQ) Layer Function

Related links:
* https://stats.stackexchange.com/questions/146221/is-cosine-similarity-identical-to-l2-normalized-euclidean-distance
* https://www.mathworks.com/help/stats/pdist.html
* https://stackoverflow.com/questions/37009647/compute-pairwise-distance-in-a-batch-without-replicating-tensor-in-tensorflow

In [1]:
import tensorflow as tf
from typing import List

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


In [2]:
def vector_quantization(x: tf.Tensor, n, alpha=0.1, beta=1e-4, gamma=1e-6, lookup_ord='euclidean', embedding_initializer=tf.random_normal_initializer) -> tf.Tensor:
    # shape of x is [batch, , q], where this function quantizes along dimension q
    # The second dimension (between batch and q) is left blank intentionally. 
    # It may be used to quantize a sample in a batch several times, e.g. quantize along the channel dimension of a conv layer output.
    vec_size = x.shape[2]
    with tf.variable_scope('vq'):
        # embedding space
        emb_space = tf.get_variable('emb_space', shape=[n, vec_size], dtype=x.dtype, initializer=embedding_initializer, trainable=True)
        
        # map x to y, where y is the vector from emb_space that is closest to x
        diff = tf.expand_dims(tf.stop_gradient(x), axis=2) - emb_space  # distance of x from all vectors in the embedding space
        dist = tf.norm(diff, lookup_ord, axis=3)  # distance between x and all vectors in emb
        emb_index = tf.argmin(dist, axis=2)
        y = tf.gather(emb_space, emb_index, axis=0)
        
        # update access counter
        one_hot_access = tf.one_hot(emb_index, depth=n)
        access_count = tf.reduce_sum(one_hot_access, axis=[0, 1], name='access_count')

        # closest embedding update loss (alpha-loss)
        nearest_loss = tf.reduce_mean(alpha * tf.norm(y - tf.stop_gradient(x), lookup_ord, axis=2), axis=[0, 1])
        tf.add_to_collection(tf.GraphKeys.LOSSES, nearest_loss)
        
        # all embeddings update loss (beta-loss)
        all_loss = tf.reduce_mean(beta * tf.reduce_sum(dist, axis=2), axis=[0, 1])
        tf.add_to_collection(tf.GraphKeys.LOSSES, all_loss)
        
        # all embeddings distance from each other (coulomb-loss)
        pdiff = tf.expand_dims(emb_space, axis=0) - tf.expand_dims(emb_space, axis=1)  # pair-wise diff vectors (n x n x vec_size)
        pdist = tf.norm(pdiff, lookup_ord, axis=2)  # pair-wise distance scalars (n x n)
        coulomb_loss = tf.reduce_sum(-gamma * tf.reduce_mean(pdist, axis=1), axis=0)
        tf.add_to_collection(tf.GraphKeys.LOSSES, coulomb_loss)

        # return selection
        return tf.stop_gradient(y - x) + x  # skip this layer when doing back-prop

In [3]:
tf.reset_default_graph()
x = tf.placeholder(name='test', shape=[None, 5, 10], dtype=tf.float32)
y = vector_quantization(x, 200)

In [4]:
y

<tf.Tensor 'vq/add:0' shape=(?, 5, 10) dtype=float32>