In [None]:
import tensorflow as tf
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("datasets/")

In [None]:
train_data = mnist.train.images
test_data = mnist.test.images
validation_data = mnist.validation.images

print("Training -", len(train_data))
print("Testing -", len(test_data))
print("Validating -", len(validation_data))

In [None]:
# input variable
X = tf.placeholder(shape=[None, 28, 28, 1], dtype=tf.float32, name="X")
X

In [None]:
# first convolutional layer
conv1 = tf.layers.conv2d(X, filters=256, kernel_size=9, strides=1, padding="valid", activation=tf.nn.relu, name="conv1")

In [None]:
# second convolutional layer
conv2 = tf.layers.conv2d(conv1, filters=256, kernel_size=9, strides=2, padding="valid", activation=tf.nn.relu, name="conv2")

In [None]:
# Capsule Layer hyper parameters
caps1_dimension = 8
caps1_maps = 32
caps1_capsules = 1152     # 32*6*6

In [None]:
# second convolutional layer reshapes to form a capsule of shape batch_size, 1152, 8
caps1 = tf.reshape(conv2, [-1, caps1_capsules, caps1_dimension], name="caps1") 
caps1

In [None]:
# function to squash vectors to ensure that their length is between 0 and 1
def squash(s, axis=-1, epsilon=1e-7, name=None):
    with tf.name_scope(name, default_name="squash"):
        squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
                                     keepdims=True)
        safe_norm = tf.sqrt(squared_norm + epsilon)
        squash_factor = squared_norm / (1. + squared_norm)
        unit_vector = s / safe_norm
        return squash_factor * unit_vector

In [None]:
caps1_output = squash(caps1, name="caps1_output")

In [None]:
caps1_output

In [None]:
# Digit layer hyper paramaeters
caps2_capsules = 10
caps2_dimension = 16

In [None]:
W_init = tf.random_normal(
                          shape=(1, caps1_capsules, caps2_capsules, caps1_dimension, caps2_dimension),
                          stddev=0.01, dtype=tf.float32)
W = tf.Variable(W_init, name="W")
W

batch_size = tf.shape(X)[0]

W_tiled = tf.tile(W, [batch_size, 1, 1, 1, 1], name="W_tiled")
W_tiled

In [None]:
caps1_output_expanded1 = tf.expand_dims(caps1_output, -2, name="caps1_output_expanded1")
caps1_output_expanded1

In [None]:
caps1_output_expanded2 = tf.expand_dims(caps1_output_expanded1, -3, name="caps1_output_expanded2")
caps1_output_expanded2

In [None]:
caps1_output_tiled = tf.tile(caps1_output_expanded2, [1, 1, caps2_capsules, 1, 1], name="caps1_output_tiled")
caps1_output_tiled

In [None]:
caps2_predicted = tf.matmul(caps1_output_tiled, W_tiled, name="caps2_perdicted")
caps2_predicted

In [None]:
# Routing by agreement


In [None]:
routing_iterations = 3

b = tf.zeros([batch_size, caps1_capsules, caps2_capsules, 1, 1], dtype=tf.float32, name="b")

for i in range(routing_iterations):
    c = tf.nn.softmax(b, axis=2, name="c")
    t = tf.multiply(c, caps2_predicted)
    s = tf.reduce_sum(t, axis=1, keepdims=True, name="s")
    v = squash(s, axis=2, name="v")
    v_tiled = tf.tile(v, [1, caps1_capsules, 1, 1, 1])
    agreement = tf.matmul(caps2_predicted, caps2_output_tiled, transpose_b=True, name="agreement")
    b = tf.add(b, agreement)
    
caps2_output = v

In [None]:
# Estimated class prababilities
def safe_norm(s, axis=-1, epsilon=1e-7, keepdims=False, name=None):
    with tf.name_scope(name, default_name="safe_norm"):
        squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
                                     keepdims=keepdims)
        return tf.sqrt(squared_norm + epsilon)

In [None]:
y_prob = safe_norm(caps2_output, axis=-1, name="y_prob")
y_prob

In [None]:
y_prob_argmax = tf.argmax(y_prob, axis=2, name="t_prob_argmax")
y_prob_argmax

In [25]:
y_pred = tf.squeeze(y_prob_argmax, axis=[1,2], name="y_pred")
y_pred

<tf.Tensor 'y_pred:0' shape=(?,) dtype=int64>

In [26]:
y = tf.placeholder(shape=[None], dtype = tf.int64, name="y")
y

<tf.Tensor 'y:0' shape=(?,) dtype=int64>

In [27]:
m_plus = 0.9
m_minus = 0.1
lambda_ = 0.5

In [28]:
T = tf.one_hot(y, depth=caps2_capsules, name="T")
T

<tf.Tensor 'T:0' shape=(?, 10) dtype=float32>

In [29]:
caps2_output_norm = safe_norm(caps2_output, axis=-1, keepdims=True, name="caps2output_norm")
caps2_output_norm

<tf.Tensor 'caps2output_norm/Sqrt:0' shape=(?, 1, 10, 1, 1) dtype=float32>

In [30]:
# square of max(0,m+ - v)

present_error_raw = tf.square(tf.maximum(0., m_plus - caps2_output_norm))
present_error = tf.reshape(present_error_raw, shape=(-1, 10), name="present_error")
present_error

<tf.Tensor 'present_error:0' shape=(?, 10) dtype=float32>

In [31]:
# square of max(0,v - m-)

absent_error_raw = tf.square(tf.maximum(0., caps2_output_norm - m_minus), name="absent_error_raw")
absent_error = tf.reshape(absent_error_raw, shape=(-1, 10), name="absent_error")
absent_error

<tf.Tensor 'absent_error:0' shape=(?, 10) dtype=float32>

In [32]:
L = tf.add(T * present_error, lambda_ * (1 - T) * absent_error, name="L")
L

<tf.Tensor 'L:0' shape=(?, 10) dtype=float32>

In [33]:
margin_loss = tf.reduce_mean(tf.reduce_sum(L, axis=1), name="margin_loss")
margin_loss

<tf.Tensor 'margin_loss:0' shape=() dtype=float32>

In [35]:
# Reconstruction

In [36]:
mask_with_labels = tf.placeholder_with_default(False, shape=(), name="mask_with_labels")
mask_with_labels

<tf.Tensor 'mask_with_labels:0' shape=() dtype=bool>

In [37]:
reconstruction_targets = tf.cond(mask_with_labels, # condition
                                 lambda: y, # if True
                                 lambda: y_pred, # if False
                                 name="reconstruction_targets")
reconstruction_targets

<tf.Tensor 'reconstruction_targets/Merge:0' shape=(?,) dtype=int64>

In [38]:
reconstruction_mask = tf.one_hot(reconstruction_targets, depth=caps2_capsules, name="resruction_mask")
reconstruction_mask

<tf.Tensor 'resruction_mask:0' shape=(?, 10) dtype=float32>

In [39]:
caps2_output

<tf.Tensor 'caps2_output_round1_2/mul:0' shape=(?, 1, 10, 1, 16) dtype=float32>

In [40]:
reconstruction_mask_reshaped = tf.reshape(reconstruction_mask, [-1, 1, caps2_capsules, 1, 1], name="reconstruction_mask_reshaped")
reconstruction_mask_reshaped

<tf.Tensor 'reconstruction_mask_reshaped:0' shape=(?, 1, 10, 1, 1) dtype=float32>

In [41]:
caps2_output_masked = tf.multiply(reconstruction_mask_reshaped, caps2_output, name="casp2_output_masked")
caps2_output_masked

<tf.Tensor 'casp2_output_masked:0' shape=(?, 1, 10, 1, 16) dtype=float32>

In [42]:
decoder_input = tf.reshape(caps2_output_masked, [-1, caps2_capsules * caps2_dimension], name="decoder_input")
decoder_input

<tf.Tensor 'decoder_input:0' shape=(?, 160) dtype=float32>

In [43]:
# Decoder

In [44]:
n_hidden1 = 512
n_hidden2 = 1024
n_output = 28 * 28

In [45]:
with tf.name_scope("decoder"):
    hidden1 = tf.layers.dense(decoder_input, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    decoder_output = tf.layers.dense(hidden2, n_output, activation=tf.nn.relu, name="decoder_output")

In [46]:
# Reconstruction Loss

In [47]:
X_flat = tf.reshape(X, [-1, n_output], name="X_flat")

In [48]:
squared_difference = tf.square(X_flat - decoder_output, name="squared_difference")
reconstruction_loss = tf.reduce_mean(squared_difference, name="reconstruction_loss")

In [49]:
# Final loss

In [50]:
alpha = 0.0005

loss = tf.add(margin_loss, alpha * reconstruction_loss, name="loss")

In [51]:
# Accuracy

correct = tf.equal(y, y_pred, name="correct")
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

In [52]:
# Training Operations

optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss, name="training_op")

In [53]:
init =tf.global_variables_initializer()
saver = tf.train.Saver()

In [54]:
# Graph

tf.summary.scalar("Accuracy", accuracy)
tf.summary.scalar("Loss", loss)
tf.summary.scalar("Margin_Loss", margin_loss)
tf.summary.scalar("Reconstruction_Loss", reconstruction_loss)

merged_summary = tf.summary.merge_all()

In [55]:
# Training

In [56]:
n_epochs = 1
batch_size = 50
restore_checkpoint = True

n_iterations_per_epoch = mnist.train.num_examples // batch_size
n_iterations_validation = mnist.validation.num_examples // batch_size
best_loss_val = np.infty
checkpoint_path = "capsule_network_summary/capsule.ckpt"
summary_path = "capsule_network_summary/"

with tf.Session() as sess:    
    
    writer = tf.summary.FileWriter(summary_path, sess.graph)
    
    if restore_checkpoint and tf.train.checkpoint_exists(checkpoint_path):
        saver.restore(sess, checkpoint_path)
    else:
        init.run()
        for epoch in range(n_epochs):
            for iteration in range(1, n_iterations_per_epoch + 1):
                X_batch, y_batch = mnist.train.next_batch(batch_size)


                # Run the training operation and measure the loss:
                _, loss_train, summary_train = sess.run([training_op, loss, merged_summary],
                                          feed_dict={X: X_batch.reshape([-1, 28, 28, 1]),
                                                     y: y_batch,
                                                     mask_with_labels:True})

                
                writer.add_summary(summary_train, iteration)
                
                print("\rIteration: {}/{} ({:.1f}%)  Loss: {:.5f}".format(
                          iteration, n_iterations_per_epoch,iteration * 100 / n_iterations_per_epoch,loss_train),end="")

            # At the end of each epoch,
            # measure the validation loss and accuracy:
            loss_vals = []
            acc_vals = []
            for iteration in range(1, n_iterations_validation + 1):
                X_batch, y_batch = mnist.validation.next_batch(batch_size)
                loss_val, acc_val = sess.run([loss, accuracy],
                                             feed_dict={X: X_batch.reshape([-1, 28, 28, 1]),
                                                        y: y_batch})
                loss_vals.append(loss_val)
                acc_vals.append(acc_val)
                print("\rEvaluating the model: {}/{} ({:.1f}%)".format(
                          iteration, n_iterations_validation,
                          iteration * 100 / n_iterations_validation),
                      end=" " * 10)
            loss_val = np.mean(loss_vals)
            acc_val = np.mean(acc_vals)
            print("\rEpoch: {}  Val accuracy: {:.4f}%  Loss: {:.6f}{}".format(
                        epoch + 1, acc_val * 100, loss_val, " (improved)" if loss_val < best_loss_val else ""))

            # And save the model if it improved:
            if loss_val < best_loss_val:
                save_path = saver.save(sess, checkpoint_path)
                best_loss_val = loss_val

                writer = tf.summary.FileWriter(summary_path, sess.graph)

Epoch: 1  Val accuracy: 98.7600%  Loss: 0.015980 (improved)


In [58]:
n_iterations_test = mnist.test.num_examples // batch_size

with tf.Session() as sess:
    
    saver.restore(sess, checkpoint_path)
    
    loss_tests = []
    acc_tests = []
    
    for iteration in range(n_iterations_test + 1):
        X_batch, y_batch = mnist.test.next_batch(batch_size)
        
        loss_test, acc_test = sess.run([loss, accuracy], 
                                       feed_dict = {X: X_batch.reshape([-1, 28, 28, 1]), 
                                                    y: y_batch})
        loss_tests.append(loss_test)
        acc_tests.append(acc_test)
        
        
        print("\nEvaluating the model: {}/{} ({:.1f}%)".format(
                    iteration, n_iterations_test, iteration * 100 / n_iterations_test), end=" " * 10)
        
    loss_test = np.mean(loss_tests)
    acc_test = np.mean(acc_tests)
        
    print("\rFinal test accuracy: {:.4f}%  Loss: {:.6f}".format(acc_test * 100, loss_test))

INFO:tensorflow:Restoring parameters from capsule_network_summary/capsule.ckpt

Evaluating the model: 0/200 (0.0%)          
Evaluating the model: 1/200 (0.5%)          
Evaluating the model: 2/200 (1.0%)          
Evaluating the model: 3/200 (1.5%)          
Evaluating the model: 4/200 (2.0%)          
Evaluating the model: 5/200 (2.5%)          
Evaluating the model: 6/200 (3.0%)          
Evaluating the model: 7/200 (3.5%)          
Evaluating the model: 8/200 (4.0%)          
Evaluating the model: 9/200 (4.5%)          
Evaluating the model: 10/200 (5.0%)          
Evaluating the model: 11/200 (5.5%)          
Evaluating the model: 12/200 (6.0%)          
Evaluating the model: 13/200 (6.5%)          
Evaluating the model: 14/200 (7.0%)          
Evaluating the model: 15/200 (7.5%)          
Evaluating the model: 16/200 (8.0%)          
Evaluating the model: 17/200 (8.5%)          
Evaluating the model: 18/200 (9.0%)          
Evaluating the model: 19/200 (9.5%)          
Evaluating

In [58]:
# tensorboard --logdir="C:/Users/Admin/Documents/anaconda files"