In [1]:
import tensorflow as tf
import numpy as np

  from ._conv import register_converters as _register_converters


In [2]:
# Functions:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
# def squash(vector):
#     epsilon = 0.001
#     norm_vector = tf.sqrt(tf.reduce_sum(tf.square(vector)))
#     norm_vector_epsilon = tf.sqrt(tf.reduce_sum(tf.square(vector))+epsilon)
    
#     return norm_vector/(norm_vector+1)*(tf.norm(vector)/tf.norm(vector))


def squash(vector):
    '''Squashing function.
    Args:
        vector: A 4-D tensor with shape [batch_size, num_caps, vec_len, 1],
    Returns:
        A 4-D tensor with the same shape as vector but
        squashed in 3rd and 4th dimensions.
    '''
    vec_abs = tf.sqrt(tf.reduce_sum(tf.square(vector)))  # a scalar
    scalar_factor = tf.square(vec_abs) / (1 + tf.square(vec_abs))
    vec_squashed = scalar_factor * tf.divide(vector, vec_abs)  # element-wise
    return(vec_squashed)

In [3]:
# Downloading mnist dataset:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28, 28,1) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28, 28,1) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:55000], X_train[55000:]
y_valid, y_train = y_train[:55000], y_train[55000:]

In [89]:
# Some constants:
dim1 = X_train.shape[0]
n_inputs = 28*28
channels = 1
#n_output_conv1 = (20,20,256)
height,width = 28,28
# constants:
MAX_ITER = 5

In [97]:
# Initialize graph:
reset_graph()
# Placeholders:
X = tf.placeholder(shape=(dim1, height, width, channels), dtype=tf.float32)
y = tf.placeholder(tf.int32, shape=(dim1), name="y")

In [98]:
# First Conv layer:
with tf.name_scope('cnn'):
    conv = tf.layers.conv2d(X, filters=256, kernel_size=9, strides=[1,1], padding='VALID')
    print(conv.shape)

(5000, 20, 20, 256)


In [99]:
# Problems with the implementation?
# First Capsule LAyer:
with tf.name_scope('caps'):
    caps = tf.layers.conv2d(conv,filters=256,kernel_size=9,strides=[2,2],padding='VALID')
    print('caps shape: ',caps.shape)
    u_i = tf.reshape(caps, shape=[-1,32*6*6,8,1])
    #caps2 = tf.layers.conv2d(caps1,filters=8,kernel_size=9,strides=[2,2],padding='VALID')
    u_i = squash(u_i)
    print('u_i shape: ',u_i.shape)
    #a_caps1 = squash(caps1)

caps shape:  (5000, 6, 6, 256)
u_i shape:  (5000, 1152, 8, 1)


In [100]:
# # First Capsule LAyer:
# with tf.name_scope('caps'):
#     caps_list = []
#     for i in range(8):
#         caps_i = tf.layers.conv2d(conv,filters=32,kernel_size=9,strides=[2,2],padding='VALID')
#         caps_i = tf.reshape(caps_i, [5000,6,6,32,1])
#         caps_list.append(caps_i)
        
#     u_i = tf.concat(caps_list,axis = -1)
#     print('caps shape: ',u_i.shape)
#     u_i = tf.reshape(u_i, shape=[-1,32*6*6,8,1])
#     #caps2 = tf.layers.conv2d(caps1,filters=8,kernel_size=9,strides=[2,2],padding='VALID')
#     u_i = squash(u_i)
#     print('u_i shape: ',u_i.shape)
#     #a_caps1 = squash(caps1)

In [101]:
# Routing:

with tf.variable_scope('final_layer'):
    w_initializer = np.random.normal(size=[1, 1152, 10, 8, 16], scale=0.01)
    W = tf.Variable(w_initializer, dtype=tf.float32)

    # repeat W_Ij with batch_size times to shape [batch_size, 1152, 8, 16]
    W = tf.tile(W, [5000, 1, 1, 1,1])
    print('W_Ij shape: ',W_Ij.shape)
    # calc u_ahat
    # [8, 16].T x [8, 1] => [16, 1] => [batch_size, 1152, 16, 1]
    u_i = tf.reshape(u_i, shape=(5000, -1, 1, u_i.shape[-2].value, 1))
    u_i = tf.tile(u_i, [1,1,10,1,1])
    print('u_i shape: ',u_i.shape)
    u_hat = tf.matmul(W, u_i, transpose_a=True)
    print('u_hat shape: ',u_hat.shape)
    
    with tf.variable_scope('routing'):
        # Initialize constants:
        b_IJ = tf.zeros([5000, u_i.shape[1].value, 10, 1, 1], dtype=np.float32)
        print('b_IJ shape: ',b_IJ.shape)
        v,b_IJ = routing(u_hat,b_IJ)

W_Ij shape:  (5000, 1152, 10, 8, 16)
u_i shape:  (5000, 1152, 10, 8, 1)
u_hat shape:  (5000, 1152, 10, 16, 1)
b_IJ shape:  (5000, 1152, 10, 1, 1)
u_hat shape:  (5000, 1152, 10, 16, 1)
c shape:  (5000, 1152, 10, 1, 1)
s_j shape:  (5000, 1, 10, 16, 1)
v shape:  (5000, 1, 10, 16, 1)
a shape:  (5000, 1152, 10, 1, 1)
b_IJ shape:  (5000, 1152, 10, 1, 1)


In [95]:
def routing(u_hat,b_IJ):
    # Stopping the routing:
    u_hat_stopped = tf.stop_gradient(u_hat, name='u_hat_stopped')
    print('u_hat shape: ',u_hat_stopped.shape)
    # Routing
    with tf.name_scope('routing'):
        for r_iter in range(MAX_ITER):
            c = tf.nn.softmax(b_IJ,axis=2)
            assert c.get_shape().as_list() == [5000,1152,10,1,1]
            if r_iter == MAX_ITER-1:
                s_j = tf.reduce_sum(tf.multiply(c,u_hat),axis = 1, keepdims = True)
                v = squash(s_j)
            else:
                s_j = tf.reduce_sum(tf.multiply(c,u_hat_stopped),axis = 1,keepdims=True)
                v = squash(s_j)
                v_tiled = tf.tile(v,[1, 1152,1,1,1])
                a = tf.matmul(u_hat_stopped, v_tiled, transpose_a=True)
                b_IJ = b_IJ + a
        print('c shape: ',c.shape)
        print('s_j shape: ',s_j.shape)
        print('v shape: ',v.shape)
        print('a shape: ',a.shape)
        print('b_IJ shape: ',b_IJ.shape)
    return v,b_IJ

In [None]:
def routing(u_hat, b):
    v = []
    #b_shape = b.get_shape().as_list()
    u_hat_stopped = tf.stop_gradient(u_hat, name='u_hat_stopped')
       
    for r_iter ain range(MAX_ITER):
        c = tf.nn.softmax(b,axis=2)
        assert c.get_shape() == [1, 1152, 10, 1]
        c = tf.tile(c,[5000,1,1,1])
#             b_il, b_ij, b_ir = tf.split(b, size_splits, axis=2)
#             c_il, c_ij, c_ir = tf.split(c, size_splits, axis=2)

        # Calculating c_ij
        #c_ij = c[:,:,j,:]
        #new_shape_c_ij = c_ij.get_shape().as_list() + [1]
        #c_ij = tf.reshape(c_ij, new_shape_c_ij)

        #assert c_ij.get_shape() == [1, 1152, 1, 1]

        # Computing s_j
        if j == MAX_ITER-1:
            s_j = tf.reduce_sum(tf.multiply(c,u_hat))




            s_j = tf.reduce_sum(tf.multiply(c_ij, u_hat),axis=1, keepdims=True)
        assert s_j.get_shape() == [5000, 1, 16, 1]

        # computing v_j
        v_j = squash(s_j)
        # updating b_ij:

        v_j_tiled = tf.tile(v_j, [1,1152,1,1])
        a_ij = tf.reduce_sum(tf.matmul(u_hat,v_j_tiled, transpose_a=True),axis = 0, keepdims=True)
        #print(v_j.shape)
        #print(a_ij.shape)

        b_ij += a_ij
        #print('b_ij shape: ',b_ij.shape)
        b = tf.concat([b_il, b_ij, b_ir] ,axis =2)
        v.append(v_j)
    v = tf.concat(v,axis=1)

    print('c_ij shape: ',c_ij.shape)
    print('s_j shape: ',s_j.shape)
    print('v_j shape: ',v_j.shape)
    print('v_j_tiled shape: ',v_j_tiled.shape)
    print('a_ij shape: ',a_ij.shape)
    print('v shape: ',v.shape)

In [None]:
def routing(u_hat, b):
    v = []
    #b_shape = b.get_shape().as_list()
    u_hat_stopped = tf.stop_gradient(u_hat, name='u_hat_stopped')
    for j in range(10):   
        size_splits = [j, 1, b_shape[2] - j - 1]
        
        for r_iter in range(MAX_ITER):
            c = tf.nn.softmax(b,axis=2)

            assert c.get_shape() == [1, 1152, 10, 1]
            
            c = tf.tile(c,[5000,1,1,1])
#             b_il, b_ij, b_ir = tf.split(b, size_splits, axis=2)
#             c_il, c_ij, c_ir = tf.split(c, size_splits, axis=2)
        
            # Calculating c_ij
            #c_ij = c[:,:,j,:]
            #new_shape_c_ij = c_ij.get_shape().as_list() + [1]
            #c_ij = tf.reshape(c_ij, new_shape_c_ij)
            
            #assert c_ij.get_shape() == [1, 1152, 1, 1]
            
            # Computing s_j
            if j == MAX_ITER-1:
                s_j = tf.reduce_sum(tf.multiply(c,u_hat))
                
                
                
                
                s_j = tf.reduce_sum(tf.multiply(c_ij, u_hat),axis=1, keepdims=True)
            assert s_j.get_shape() == [5000, 1, 16, 1]
            
            # computing v_j
            v_j = squash(s_j)
            # updating b_ij:
            
            v_j_tiled = tf.tile(v_j, [1,1152,1,1])
            a_ij = tf.reduce_sum(tf.matmul(u_hat,v_j_tiled, transpose_a=True),axis = 0, keepdims=True)
            #print(v_j.shape)
            #print(a_ij.shape)
            
            b_ij += a_ij
            #print('b_ij shape: ',b_ij.shape)
            b = tf.concat([b_il, b_ij, b_ir] ,axis =2)
        v.append(v_j)
    v = tf.concat(v,axis=1)
    
    print('c_ij shape: ',c_ij.shape)
    print('s_j shape: ',s_j.shape)
    print('v_j shape: ',v_j.shape)
    print('v_j_tiled shape: ',v_j_tiled.shape)
    print('a_ij shape: ',a_ij.shape)
    print('v shape: ',v.shape)

In [None]:
sess = tf.Session()

# Create example y_hat.
sess.run(tf.global_variables_initializer())
a_test = sess.run(a_ij, feed_dict={X:X_train, y: y_train})

sess.close()

In [None]:
a_test

In [None]:
with tf.name_scope('digit_caps'):
    W = tf.Variable( tf.random_uniform((8,16),seed = 1), name="theta",dtype=tf.float32)
    u_ji = tf.reshape(tf.matmul(tf.reshape(u_i,[5000*1152,8]),W),[5000,1152,16])
    v_j = routing(u_ji, num_iter)
    
    
    
    
    
    

In [None]:
# Testing:
sess = tf.InteractiveSession()

# We can just use 'c.eval()' without passing 'sess'
sess.run(tf.global_variables_initializer())

#print(a.eval(feed_dict=X_test))
print(c.eval())
sess.close()

In [None]:
# Testing class Caps Layer:

In [None]:
class CapsLayer(object):
    def __init__(self, input_tensor, num_capsules, digit_layer = False):
        self.input = input_tensor
        self.num_capsules = num_capsules
        
    def __call__(self, num_filters, kernel_size, strides):
        # In case it is a primary layer:
        if digit_layer == False:
            conv = tf.layers.conv2d(inputs=self.input, filters=num_filters, \
                                    kernel_size=kernel_size, activation=tf.nn.relu, \
                                    strides=strides, padding = 'SAME' )
            caps = self.squash(conv)
            return caps
        # In case it is a final layer:
        else:
            fcaps = self.routing(self.input)
            return fcaps
            
        
    def routing(self,capsule):
    
        
    
    def create_list_capsules(num_capsules):
        for i in range(num_capsules)
            if i == 0:
                self.list_capsules.append(tf.layers.conv2d(input_tensor, \
                                                           filters=num_filters, \
                                                           kernel_size=kernel_size, \
                                                           strides=[2,2], \
                                                           padding='VALID'))
            else:
                self.list_capsules.append(create_capsule)
        
    def create_capsule(input_tensor, kernel_size, strides):
        return tf.layers.conv2d(input_capsule, filters=num_filters, kernel_size=kernel_size, strides=[2,2], padding='VALID')
    
    def squashing(vector):
        return tf.norm(vector)/(tf.norm(vector)+1)*(tf.norm(vector)/tf.norm(vector))
    