# Training Bayesian Neural Network to classify MNIST Fashion Data set.
'''
The architecture is LeNet-5 [1].
#### References
[1]: Yann LeCun, Leon Bottou, Yoshua Bengio, and Patrick Haffner.
     Gradient-based learning applied to document recognition.
     _Proceedings of the IEEE_, 1998.
     http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf
'''

In [7]:
import os 
import warnings
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.examples.tutorials.mnist import input_data
from IPython.core.debugger import  set_trace

tfd = tfp.distributions

## Loading Data and Pre-processing for Training and Testing

In [8]:
# Load Data
# x_train are number of images. 
# y_train are the associated labels.
# x_test are number of images. 
# y_test are the associated labels.
'''
Another way of loading the data using Keras.
fashionData = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test,y_test) = fashionData.load_data()
'''
#fashion_mnist = input_data.read_data_sets('input/fashion',one_hot=True)

'\nAnother way of loading the data using Keras.\nfashionData = tf.keras.datasets.fashion_mnist\n(x_train, y_train), (x_test,y_test) = fashionData.load_data()\n'

## Visualizing the data

In [9]:
fashion_mnist = input_data.read_data_sets('input/fashion',one_hot=True)
print ("Number of Images for testing : {test} ". format(test=fashion_mnist.test.num_examples))
# 9 Labels Associated with each data-set
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

flag = False # Change it to true to see the data

if flag:
    plt.figure(figsize=(20,20))
    for i in range(64):
        plt.subplot(8,8,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(fashion_mnist.train.images[i].reshape(28,28), cmap=plt.cm.binary)
        print()
        plt.xlabel(class_names[np.where(fashion_mnist.train.labels[i] == 1)[0][0]])

Extracting input/fashion/train-images-idx3-ubyte.gz
Extracting input/fashion/train-labels-idx1-ubyte.gz
Extracting input/fashion/t10k-images-idx3-ubyte.gz
Extracting input/fashion/t10k-labels-idx1-ubyte.gz
Number of Images for testing : 10000 


In [13]:
def build_input_pipeline(mnist_data,batch_size,validation_datasize):
    
    #set_trace()
   
    
    Train = tf.reshape(mnist_data.train.images,[-1,28,28,1])
    Validation = tf.reshape(mnist_data.validation.images,[-1,28,28,1])
    
    training_dataset = tf.data.Dataset.from_tensor_slices(
                        (Train, np.int32(mnist_data.train.labels)))
    
    training_batches = training_dataset.shuffle(50000,reshuffle_each_iteration=True).repeat().batch(batch_size)
    
    training_iterator = training_batches.make_one_shot_iterator()

    # Build a iterator over the heldout set with batch_size=heldout_size,
    # i.e., return the entire heldout set as a constant.
    validation_dataset = tf.data.Dataset.from_tensor_slices((Validation ,
                                           np.int32(mnist_data.validation.labels)))
    
    validation_frozen = (validation_dataset.take(validation_datasize).repeat().batch(validation_datasize))
    
    validation_iterator = validation_frozen.make_one_shot_iterator()

    # Combine these into a feedable iterator that can switch between training
    # and validation inputs.
    
    handle = tf.placeholder(tf.string, shape=[])
    
    feedable_iterator = tf.data.Iterator.from_string_handle(handle, training_batches.output_types, 
                                                            training_batches.output_shapes)
    
    images, labels = feedable_iterator.get_next()

    return images, labels, handle, training_iterator, validation_iterator    

In [20]:
def main(batch_size,learning_rate,max_steps):
    
    # Load Fashion MNIST data 
    
    fashion_mnist = input_data.read_data_sets('input/fashion',one_hot=False)
    
    #fashion_mnist.train.images = tf.reshape(fashion_mnist.train.images,[-1,28,28,1])
    #fashion_mnist.validation.images = tf.reshape(fashion_mnist.validation.images,[-1,28,28,1])
    
    (images, labels, handle,training_iterator, heldout_iterator) = build_input_pipeline(
       fashion_mnist, batch_size, fashion_mnist.validation.num_examples)
    
    

    # Build a Bayesian LeNet5 network. We use the Flipout Monte Carlo estimator
    # for the convolution and fully-connected layers: this enables lower
    # variance stochastic gradients than naive reparameterization.
    with tf.name_scope("bayesian_neural_net", values=[images]):
        neural_net = tf.keras.Sequential([
            tfp.layers.Convolution2DFlipout(6,kernel_size=5,padding="SAME",activation=tf.nn.relu),
            tf.keras.layers.MaxPooling2D(pool_size=[2, 2],strides=[2, 2],padding="SAME"),
            tfp.layers.Convolution2DFlipout(16,kernel_size=5,padding="SAME",activation=tf.nn.relu),
            tf.keras.layers.MaxPooling2D(pool_size=[2, 2],strides=[2, 2],padding="SAME"),
            tfp.layers.Convolution2DFlipout(120,kernel_size=5,padding="SAME",activation=tf.nn.relu),
            tf.keras.layers.Flatten(),
            tfp.layers.DenseFlipout(84, activation=tf.nn.relu),
            tfp.layers.DenseFlipout(10)
        ])

    logits = neural_net(images)
    labels_distribution = tfd.Categorical(logits=logits)

    # We Compute the -ELBO as the loss, averaged over the batch size
    # To futher understand about ELBO or Variational Lower bound loss Look at the following reference below.
    # http://legacydirs.umiacs.umd.edu/~xyang35/files/understanding-variational-lower.pdf
    
    neg_log_likelihood = -tf.reduce_mean(labels_distribution.log_prob(labels))
    kl = sum(neural_net.losses) / fashion_mnist.train.num_examples
    elbo_loss = neg_log_likelihood + kl

    # Build metrics for evaluation. Predictions are formed from a single forward
    # pass of the probabilistic layers. They are cheap but noisy predictions.
    
    predictions = tf.argmax(logits, axis=1)
    accuracy, accuracy_update_op = tf.metrics.accuracy(labels=labels, predictions=predictions)
    
    # Extract weight posterior statistics for layers with weight distributions
    # for later visualization.
    names = []
    qmeans = []
    qstds = []
    
    for i, layer in enumerate(neural_net.layers):
        try:
            q = layer.kernel_posterior
        except AttributeError:
              continue
        names.append("Layer {}".format(i))
        qmeans.append(q.mean())
        qstds.append(q.stddev())

    with tf.name_scope("train"):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = optimizer.minimize(elbo_loss)
        
    init_op = tf.group(tf.global_variables_initializer(),
                    tf.local_variables_initializer())

    with tf.Session() as sess:
        sess.run(init_op)
        
        # Run the training loop.
        train_handle = sess.run(training_iterator.string_handle())
        heldout_handle = sess.run(heldout_iterator.string_handle())
        
        for step in range(max_steps):
            _ = sess.run([train_op, accuracy_update_op],feed_dict={handle: train_handle})

            if step % 100 == 0:
                loss_value, accuracy_value = sess.run([elbo_loss, accuracy], feed_dict={handle: train_handle})
                print("Step: {:>3d} Loss: {:.3f} Accuracy: {:.3f}".format(step, loss_value, accuracy_value))
        
        
        


In [21]:
main(batch_size= 128,learning_rate=0.001,max_steps= 400)

Extracting input/fashion/train-images-idx3-ubyte.gz
Extracting input/fashion/train-labels-idx1-ubyte.gz
Extracting input/fashion/t10k-images-idx3-ubyte.gz
Extracting input/fashion/t10k-labels-idx1-ubyte.gz
Step:   0 Loss: 27.807 Accuracy: 0.133
Step: 100 Loss: 25.492 Accuracy: 0.521
Step: 200 Loss: 24.583 Accuracy: 0.626
Step: 300 Loss: 23.797 Accuracy: 0.674


In [None]:
num_monte_carlo = 
        # Compute log prob of heldout set by averaging draws from the model:
        # p(heldout | train) = int_model p(heldout|model) p(model|train)
        #                   ~= 1/n * sum_{i=1}^n p(heldout | model_i)
        # where model_i is a draw from the posterior p(model|train).
        probs = np.asarray([sess.run((labels_distribution.probs),
                                     feed_dict={handle: heldout_handle})
                            for _ in range(num_monte_carlo)])
        mean_probs = np.mean(probs, axis=0)

        image_vals, label_vals = sess.run((images, labels),
                                          feed_dict={handle: heldout_handle})
        heldout_lp = np.mean(np.log(mean_probs[np.arange(mean_probs.shape[0]),
                                               label_vals.flatten()]))
        print(" ... Held-out nats: {:.3f}".format(heldout_lp))

        qm_vals, qs_vals = sess.run((qmeans, qstds))

    if HAS_SEABORN:
        plot_weight_posteriors(names, qm_vals, qs_vals,
                                 fname=os.path.join(
                                     FLAGS.model_dir,
                                     "step{:05d}_weights.png".format(step)))
        plot_heldout_prediction(image_vals, probs,
                                  fname=os.path.join(
                                      FLAGS.model_dir,
                                      "step{:05d}_pred.png".format(step)),
                                  title="mean heldout logprob {:.2f}"
                                  .format(heldout_lp))