In [1]:
import util
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_probability as tfp

In [2]:
Normal = tfp.distributions.Normal
Bernoulli = tfp.distributions.Bernoulli

In [3]:
class DenseLayer(object) :
    def __init__(self, M1, M2, f=tf.nn.relu) :
        
        self.W = tf.Variable(tf.random.normal(shape=(M1, M2)) * 2 / np.sqrt(M1 * M2))
        self.b = tf.Variable(np.zeros(M2).astype(np.float32))
        self.f = f
        
    def forward(self, X) :
        return self.f(tf.matmul(X, self.W) + self.b)

In [51]:
class VariationalAutoencoder :
    def __init__(self, D, hidden_layer_sizes) :
        '''
        initializes the params : 

        Parameter
        ---------
        D : int
            The input data dimension. eg-for a input flattened image of 28x28 , D = 784
        
        hidden_layer_sizes : list
            specifies the size of every layer in the encoder upto the final hidden layer Z. 
            The decoder will have the reverse shape.

        Returns
        -------
            
        '''
        #represents the batch of training data
        self.X = tf.keras.layers.Input(shape=(D), dtype=tf.float32)
        
        #encoder
        self.encoder_layers = []
        M_in = D #initial input data dimension
        for M_out in hidden_layer_sizes[:-1] :
            h = DenseLayer(M_in, M_out)
            self.encoder_layers.append(h)
            M_in = M_out
        
        #for convenience , we will refer to the final encoder size as M
        #also the input to the decoder size
        
        M = hidden_layer_sizes[-1]
        # the encoder's final layer output is unbounded
        #so there is no activation function
        #we also need 2 times as many units as specified by M_out
        #since there need to be (M_out x mean) + (M_out x variance)
        h = DenseLayer(M_in, M * 2, f = lambda x : x)
        self.encoder_layers.append(h)
        
        
        #getting the output of the encoder
        #get the mean/std dev of Z
        #note that the variance must be > 0
        current_layer_value = self.X
        for layer in self.encoder_layers :
            current_layer_value = layer.forward(current_layer_value)
        self.means = current_layer_value[:, :M]
        self.stddev = tf.nn.softplus(current_layer_value[:, M:]) + 1e-6 #smoothing factor 
        #this is added so that we dont get a number too close to 0 causing singularity
        
        #get a sample of Z
        print(f'self.means = {self.means} self.stddev = {self.stddev}')
        self.Z = Normal(loc = self.means, scale=self.stddev)
        
        #decoder
        self.decoder_layers = []
        M_in = M
        for M_out in reversed(hidden_layer_sizes[:-1]):
            h = DenseLayer(M_in, M_out)
            self.decoder_layers.append(h)
            M_in = M_out
        
        #the final layer will have shape D, i.e. the input dimensionality
        
        #the activation function for the last layer is nothing as we have fn in tf that will
        #accept logits
        h = DenseLayer(M_in, D, f = lambda x : x)
        self.decoder_layers.append(h)
        
        #getting the logits
        current_layer_value = self.Z
        for layer in self.decoder_layers :
            current_layer_value = layer.forward(current_layer_value)
        logits = current_layer_value
        posterior_predictive_logits = logits
        
        #get the output 
        self.X_hat_distribution = Bernoulli(logits=logits)
        
        #take samples from X_hat
        #This is called posterior predictive sample
        self.posterior_predictive = self.X_hat_distribution.sample()
        self.posterior_predictive_prob = tf.nn.sigmoid(logits) #for the mean output image
        
        #for prior predictive sample
        #first draw a sample from Standard Normal of size M, the size of the latent vector
        #sampling from a Z ~ N(0, 1)
        standard_normal = Normal(
            loc = np.zeros(M, dtype = np.float32),
            scale= np.ones(M, dtype = np.float32)
        )
        
        Z_std = standard_normal.sample(1)
        current_layer_value = Z_std
        for layer in self.decoder_layers :
            current_layer_value = layer.forward(current_layer_value)
        logits = current_layer_value
        
        #get the output 
        self.prior_predictive_dist = Bernoulli(logits=logits)
        
        #take samples from prior_predictive_dist
        #This is called prior predictive sample
        self.prior_predictive = self.prior_predictive_dist.sample()
        self.prior_predictive_prob = tf.nn.sigmoid(logits) #for the mean output image
        
        #getting the output for the given z
        self.Z_input = tf.keras.Input(shape=(None, M), dtype=tf.float32)
        current_layer_value = self.Z_input
        for layer in self.decoder_layers :
            current_layer_value = layer.forward(current_layer_value)
        logits = current_layer_value
        self.prior_predictive_from_input_probs = tf.nn.sigmoid(logits)
        
        
        #COST
        #KL-divergence
        kl = tf.reduce_sum(
            #or self.Z.kl_divergence(standard_normal)
            tfp.distributions.kl_divergence(
                self.Z, standard_normal 
            ),
            axis=1
        )
        
        expected_log_likelihood = tf.reduce_sum(
            self.X_hat_distribution.log_prob(self.X),
            axis = 1
        )
        
        self.elbo = tf.reduce_sum(expected_log_likelihood - kl)
        self.train_op = tf.keras.optimizers.RMSprop(learning_rate=0.001).minimize(self.elbo)
        
        model = tf.keras.models.Model(self.X)
        model.compile(optimizer=self.train_op)
        

In [52]:
vae = VariationalAutoencoder(784, [200, 100])

self.means = KerasTensor(type_spec=TensorSpec(shape=(None, 100), dtype=tf.float32, name=None), name='tf.__operators__.getitem_36/strided_slice:0', description="created by layer 'tf.__operators__.getitem_36'") self.stddev = KerasTensor(type_spec=TensorSpec(shape=(None, 100), dtype=tf.float32, name=None), name='tf.__operators__.add_56/AddV2:0', description="created by layer 'tf.__operators__.add_56'")


ValueError: TypeError: object of type 'Normal' has no len()


In [20]:
Normal(0, 1).sample()

<tf.Tensor: shape=(), dtype=float32, numpy=0.6180783>