In [None]:
"""
Some key layers used for constructing a Capsule Network. These layers can used to construct CapsNet on other dataset, 
not just on MNIST.
*NOTE*: some functions can be implemented in multiple ways, I keep all of them. You can try them for yourself just by
uncommenting them and commenting their counterparts.
Author: Xifeng Guo, E-mail: `guoxifeng1990@163.com`, Github: `https://github.com/XifengGuo/CapsNet-Keras`
"""

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import initializers, layers


class Length(layers.Layer):
    """
    Compute the length of vectors. This is used to compute a Tensor that has the same shape with y_true in margin_loss.
    Using this layer as model's output can directly predict labels by using `y_pred = np.argmax(model.predict(x), 1)`
    inputs: shape=[None, num_vectors, dim_vector]
    output: shape=[None, num_vectors]
    """
    def call(self, inputs, **kwargs):
        return tf.sqrt(tf.reduce_sum(tf.square(inputs), -1) + K.epsilon())

    def compute_output_shape(self, input_shape):
        return input_shape[:-1]

    def get_config(self):
        config = super(Length, self).get_config()
        return config


class Mask(layers.Layer):
    """
    Mask a Tensor with shape=[None, num_capsule, dim_vector] either by the capsule with max length or by an additional 
    input mask. Except the max-length capsule (or specified capsule), all vectors are masked to zeros. Then flatten the
    masked Tensor.
    For example:
        ```
        x = keras.layers.Input(shape=[8, 3, 2])  # batch_size=8, each sample contains 3 capsules with dim_vector=2
        y = keras.layers.Input(shape=[8, 3])  # True labels. 8 samples, 3 classes, one-hot coding.
        out = Mask()(x)  # out.shape=[8, 6]
        # or
        out2 = Mask()([x, y])  # out2.shape=[8,6]. Masked with true labels y. Of course y can also be manipulated.
        ```
    """
    def call(self, inputs, **kwargs):
        if type(inputs) is list:  # true label is provided with shape = [None, n_classes], i.e. one-hot code.
            assert len(inputs) == 2
            inputs, mask = inputs
        else:  # if no true label, mask by the max length of capsules. Mainly used for prediction
            # compute lengths of capsules
            x = tf.sqrt(tf.reduce_sum(tf.square(inputs), -1))
            # generate the mask which is a one-hot code.
            # mask.shape=[None, n_classes]=[None, num_capsule]
            mask = tf.one_hot(indices=tf.argmax(x, 1), depth=x.shape[1])

        # inputs.shape=[None, num_capsule, dim_capsule]
        # mask.shape=[None, num_capsule]
        # masked.shape=[None, num_capsule * dim_capsule]
        masked = K.batch_flatten(inputs * tf.expand_dims(mask, -1))
        return masked

    def compute_output_shape(self, input_shape):
        if type(input_shape[0]) is tuple:  # true label provided
            return tuple([None, input_shape[0][1] * input_shape[0][2]])
        else:  # no true label provided
            return tuple([None, input_shape[1] * input_shape[2]])

    def get_config(self):
        config = super(Mask, self).get_config()
        return config


def squash(vectors, axis=-1):
    """
    The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0
    :param vectors: some vectors to be squashed, N-dim tensor
    :param axis: the axis to squash
    :return: a Tensor with same shape as input vectors
    """
    s_squared_norm = tf.reduce_sum(tf.square(vectors), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / tf.sqrt(s_squared_norm + K.epsilon())
    return scale * vectors


class CapsuleLayer(layers.Layer):
    """
    The capsule layer. It is similar to Dense layer. Dense layer has `in_num` inputs, each is a scalar, the output of the
    neuron from the former layer, and it has `out_num` output neurons. CapsuleLayer just expand the output of the neuron
    from scalar to vector. So its input shape = [None, input_num_capsule, input_dim_capsule] and output shape = \
    [None, num_capsule, dim_capsule]. For Dense Layer, input_dim_capsule = dim_capsule = 1.
    :param num_capsule: number of capsules in this layer
    :param dim_capsule: dimension of the output vectors of the capsules in this layer
    :param routings: number of iterations for the routing algorithm
    """
    def __init__(self, num_capsule, dim_capsule, routings=3,
                 kernel_initializer='glorot_uniform',
                 **kwargs):
        super(CapsuleLayer, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.kernel_initializer = initializers.get(kernel_initializer)

    def build(self, input_shape):
        assert len(input_shape) >= 3, "The input Tensor should have shape=[None, input_num_capsule, input_dim_capsule]"
        self.input_num_capsule = input_shape[1]
        self.input_dim_capsule = input_shape[2]

        # Transform matrix, from each input capsule to each output capsule, there's a unique weight as in Dense layer.
        self.W = self.add_weight(shape=[self.num_capsule, self.input_num_capsule,
                                        self.dim_capsule, self.input_dim_capsule],
                                 initializer=self.kernel_initializer,
                                 name='W')

        self.built = True

    def call(self, inputs, training=None):
        # inputs.shape=[None, input_num_capsule, input_dim_capsule]
        # inputs_expand.shape=[None, 1, input_num_capsule, input_dim_capsule, 1]
        inputs_expand = tf.expand_dims(tf.expand_dims(inputs, 1), -1)

        # Replicate num_capsule dimension to prepare being multiplied by W
        # inputs_tiled.shape=[None, num_capsule, input_num_capsule, input_dim_capsule, 1]
        inputs_tiled = tf.tile(inputs_expand, [1, self.num_capsule, 1, 1, 1])

        # Compute `inputs * W` by scanning inputs_tiled on dimension 0.
        # W.shape=[num_capsule, input_num_capsule, dim_capsule, input_dim_capsule]
        # x.shape=[num_capsule, input_num_capsule, input_dim_capsule, 1]
        # Regard the first two dimensions as `batch` dimension, then
        # matmul(W, x): [..., dim_capsule, input_dim_capsule] x [..., input_dim_capsule, 1] -> [..., dim_capsule, 1].
        # inputs_hat.shape = [None, num_capsule, input_num_capsule, dim_capsule]
        inputs_hat = tf.squeeze(tf.map_fn(lambda x: tf.matmul(self.W, x), elems=inputs_tiled))

        # Begin: Routing algorithm ---------------------------------------------------------------------#
        # The prior for coupling coefficient, initialized as zeros.
        # b.shape = [None, self.num_capsule, 1, self.input_num_capsule].
        b = tf.zeros(shape=[inputs.shape[0], self.num_capsule, 1, self.input_num_capsule])

        assert self.routings > 0, 'The routings should be > 0.'
        for i in range(self.routings):
            # c.shape=[batch_size, num_capsule, 1, input_num_capsule]
            c = tf.nn.softmax(b, axis=1)

            # c.shape = [batch_size, num_capsule, 1, input_num_capsule]
            # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule]
            # The first two dimensions as `batch` dimension,
            # then matmal: [..., 1, input_num_capsule] x [..., input_num_capsule, dim_capsule] -> [..., 1, dim_capsule].
            # outputs.shape=[None, num_capsule, 1, dim_capsule]
            outputs = squash(tf.matmul(c, inputs_hat))  # [None, 10, 1, 16]

            if i < self.routings - 1:
                # outputs.shape =  [None, num_capsule, 1, dim_capsule]
                # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule]
                # The first two dimensions as `batch` dimension, then
                # matmal:[..., 1, dim_capsule] x [..., input_num_capsule, dim_capsule]^T -> [..., 1, input_num_capsule].
                # b.shape=[batch_size, num_capsule, 1, input_num_capsule]
                b += tf.matmul(outputs, inputs_hat, transpose_b=True)
        # End: Routing algorithm -----------------------------------------------------------------------#

        return tf.squeeze(outputs)

    def compute_output_shape(self, input_shape):
        return tuple([None, self.num_capsule, self.dim_capsule])

    def get_config(self):
        config = {
            'num_capsule': self.num_capsule,
            'dim_capsule': self.dim_capsule,
            'routings': self.routings
        }
        base_config = super(CapsuleLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


def PrimaryCap(inputs, dim_capsule, n_channels, kernel_size, strides, padding):
    """
    Apply Conv2D `n_channels` times and concatenate all capsules
    :param inputs: 4D tensor, shape=[None, width, height, channels]
    :param dim_capsule: the dim of the output vector of capsule
    :param n_channels: the number of types of capsules
    :return: output tensor, shape=[None, num_capsule, dim_capsule]
    """
    output = layers.Conv2D(filters=dim_capsule*n_channels, kernel_size=kernel_size, strides=strides, padding=padding,
                           name='primarycap_conv2d')(inputs)
    outputs = layers.Reshape(target_shape=[-1, dim_capsule], name='primarycap_reshape')(output)
    return layers.Lambda(squash, name='primarycap_squash')(outputs)


class ViewPoolingLayer(layers.Layer):

    def __init__(self, num_views, **kwargs):
        super(ViewPoolingLayer, self).__init__(**kwargs)
        self.num_views = num_views
        self.tf_num_views = tf.constant(self.num_views)

    def build(self, input_shape):
        self.n_per_view = tf.divide(input_shape[0], self.tf_num_views)
        self.built = True
    
    def call(self, inputs, **kwargs):
        reshaped = K.reshape(
            inputs,
            (self.n_per_view, self.tf_num_views, inputs.shape[-1])
        )
        result = K.max(reshaped, axis=1)
        return tf.repeat(result, repeats=self.num_views, axis=0)

    def compute_output_shape(self, input_shape):
        return input_shape

    def get_config(self):
        config = super(ViewPoolingLayer, self).get_config()
        return config


class ViewCapsReshapeLayer(layers.Layer):

    def __init__(self, num_caps, **kwargs):
        super(ViewCapsReshapeLayer, self).__init__(**kwargs)
        self.num_caps = num_caps

    def call(self, inputs, **kwargs):
        return K.reshape(
            inputs, (-1, self.num_caps, inputs.shape[-1])
        )

    def compute_output_shape(self, input_shape):
        return tuple(
            int(input_shape[0] / self.num_caps),
            self.num_caps, inputs.shape[-1]
        )

    def get_config(self):
        config = super(ViewCapsReshapeLayer, self).get_config()
        return config

    
class ViewCapsRepeatLayer(layers.Layer):

    def __init__(self, num_views, **kwargs):
        super(ViewCapsRepeatLayer, self).__init__(**kwargs)
        self.num_views = num_views

    def call(self, inputs, **kwargs):
        return tf.repeat(inputs, repeats=self.num_views, axis=0)

    def compute_output_shape(self, input_shape):
        return tuple(
            input_shape[0] * self.num_views,
            inputs.shape[-2], inputs.shape[-1]
        )

    def get_config(self):
        config = super(ViewCapsRepeatLayer, self).get_config()
        return config


def ViewCapsuleLayer(inputs, num_views, dim_capsule, routings):
    n_class = inputs.shape[1]
    reshaped = ViewCapsReshapeLayer(
        num_caps=num_views * n_class, name='viewcaps_reshape'
    )(inputs)
    view_caps = CapsuleLayer(
        num_capsule=n_class, dim_capsule=dim_capsule, routings=routings, name='viewcaps'
    )(reshaped)
    return ViewCapsRepeatLayer(num_views=num_views, name='viewcaps_rep')(view_caps)


"""
# The following is another way to implement primary capsule layer. This is much slower.
# Apply Conv2D `n_channels` times and concatenate all capsules
def PrimaryCap(inputs, dim_capsule, n_channels, kernel_size, strides, padding):
    outputs = []
    for _ in range(n_channels):
        output = layers.Conv2D(filters=dim_capsule, kernel_size=kernel_size, strides=strides, padding=padding)(inputs)
        outputs.append(layers.Reshape([output.get_shape().as_list()[1] ** 2, dim_capsule])(output))
    outputs = layers.Concatenate(axis=1)(outputs)
    return layers.Lambda(squash)(outputs)
"""

In [None]:
"""
Keras implementation of CapsNet in Hinton's paper Dynamic Routing Between Capsules.
The current version maybe only works for TensorFlow backend. Actually it will be straightforward to re-write to TF code.
Adopting to other backends should be easy, but I have not tested this. 

Usage:
       python capsulenet.py
       python capsulenet.py --epochs 50
       python capsulenet.py --epochs 50 --routings 3
       ... ...
       
Result:
    Validation accuracy > 99.5% after 20 epochs. Converge to 99.66% after 50 epochs.
    About 110 seconds per epoch on a single GTX1070 GPU card
    
Author: Xifeng Guo, E-mail: `guoxifeng1990@163.com`, Github: `https://github.com/XifengGuo/CapsNet-Keras`
"""

import numpy as np
from keras import layers, models, optimizers
from keras import backend as K
import tensorflow as tf
from keras.utils import to_categorical
from PIL import Image
import os
import argparse
from keras.preprocessing.image import ImageDataGenerator
from keras import callbacks
import pandas as pd

K.set_image_data_format('channels_last')
WORK_DIR = './../input/views-rendered-from-3d-models/cleaned.dataset/'
FILE_PATH_COLUMN = 'file_path'
CLASS_COLUMN = 'label'
# image_width, image_height = 224, 224
# image_width, image_height = 30, 30
image_width, image_height = 45, 45


def CapsNet(input_shape, n_class, routings, batch_size):
    """
    A Capsule Network on MNIST.
    :param input_shape: data shape, 3d, [width, height, channels]
    :param n_class: number of classes
    :param routings: number of routing iterations
    :param batch_size: size of batch
    :return: Two Keras Models, the first one used for training, and the second one for evaluation.
            `eval_model` can also be used for training.
    """
    x = layers.Input(shape=input_shape, batch_size=batch_size)

    # Layer 1: Just a conventional Conv2D layer
    conv1 = layers.Conv2D(filters=256, kernel_size=9, strides=1, padding='valid', activation='relu', name='conv1')(x)

    # Layer 2: Conv2D layer with `squash` activation, then reshape to [None, num_capsule, dim_capsule]
    primarycaps = PrimaryCap(conv1, dim_capsule=8, n_channels=32, kernel_size=9, strides=2, padding='valid')

    # Layer 3: Capsule layer. Routing algorithm works here.
    caps_layer1 = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings, name='caps1')(primarycaps)

    # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape.
    # If using tensorflow, this will not be necessary. :)
    out_caps = Length(name='capsnet')(caps_layer1)

    # Decoder network.
    y = layers.Input(shape=(n_class,), name='recon_input')
    masked_by_y = Mask(name='mask')([caps_layer1, y])  # The true label is used to mask the output of capsule layer. For training
    masked = Mask()(caps_layer1)  # Mask using the capsule with maximal length. For prediction

    # Shared Decoder model in training and prediction
    decoder = models.Sequential(name='decoder')
    decoder.add(layers.Dense(512, activation='relu', input_dim=16 * n_class))
    decoder.add(layers.Dense(1024, activation='relu'))
    decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid'))
    decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon'))

    # Models for training and evaluation (prediction)
    train_model = models.Model([x, y], [out_caps, decoder(masked_by_y)])
    eval_model = models.Model(x, [out_caps, decoder(masked)])

    return train_model, eval_model #, manipulate_model


def margin_loss(y_true, y_pred):
    """
    Margin loss for Eq.(4). When y_true[i, :] contains not just one `1`, this loss should work too. Not test it.
    :param y_true: [None, n_classes]
    :param y_pred: [None, num_capsule]
    :return: a scalar loss value.
    """
    L = y_true * tf.square(tf.maximum(0., 0.9 - y_pred)) + \
        0.5 * (1 - y_true) * tf.square(tf.maximum(0., y_pred - 0.1))
    return tf.reduce_mean(tf.reduce_sum(L, 1))


def train(model, train_set, test_set, args, model_type='sv'):
    """
    Training a CapsuleNet
    :param model: the CapsuleNet model
    :param train_set: data frame
    :param test_set: data frame
    :param args: arguments
    :param model_type: argument
    :return: The trained model
    """

    # callbacks
    log = callbacks.CSVLogger(args.save_dir + '/' + model_type + '-log.csv')
    tb = callbacks.TensorBoard(
        log_dir=args.save_dir + '/' + model_type + '-tensorboard-logs',
        batch_size=args.batch_size, histogram_freq=int(args.debug)
    )
    checkpoint = callbacks.ModelCheckpoint(
        args.save_dir + '/' + model_type + '-weights-{epoch:02d}.h5',
        monitor='val_capsnet_acc',
        save_best_only=True, save_weights_only=True, verbose=1
    )
#     lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: args.lr * (args.lr_decay ** epoch))
    
    output_layer_name = 'capsnet'
    if model_type == 'mv' and args.mv_function == ViewPoolingLayer:
        output_layer_name = 'mv_capsnet'
    
    # compile the model
    model.compile(
        optimizer=optimizers.Adam(lr=args.lr),
        loss=[margin_loss, 'mse'],
        loss_weights=[1., args.lam_recon],
        metrics={output_layer_name: 'accuracy'}
    )

    """
    # Training without data augmentation:
    model.fit([x_train, y_train], [y_train, x_train], batch_size=args.batch_size, epochs=args.epochs,
              validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[log, tb, checkpoint, lr_decay])
    """

    # Begin: Training with data augmentation ---------------------------------------------------------------------#
    def create_generator(df, batch_size, model_type='sv', n_views=None):
        data_gen = ImageDataGenerator(rescale=1. / 255)
        if model_type == 'mv':
            df = df.sort_values(FILE_PATH_COLUMN)
            random_order = []
            for obj_index in np.random.permutation(int(df.shape[0] / n_views)):
                random_order += range(obj_index * n_views, (obj_index + 1) * n_views)
            df['order'] = random_order
            df = df.sort_values('order').drop('order', axis='columns')
        generator = data_gen.flow_from_dataframe(
            df, directory=WORK_DIR, x_col=FILE_PATH_COLUMN, y_col=CLASS_COLUMN,
            target_size=(image_width, image_height), batch_size=batch_size,
            shuffle=model_type != 'mv'
        )
        while 1:
            x_batch, y_batch = generator.next()
            yield ([x_batch, y_batch], [y_batch, x_batch])

    # Training with data augmentation. If shift_fraction=0., also no augmentation.
    model.fit_generator(
        create_generator(
            train_set, args.batch_size, model_type, args.multi_view
        ),
        steps_per_epoch=int(len(train_set) / args.batch_size),
        epochs=args.epochs,
        validation_data=create_generator(
            test_set, args.batch_size, model_type, args.multi_view
        ),
        validation_steps=int(test_set.shape[0] / args.batch_size),
        callbacks=[log, tb, checkpoint]
    )

    model.save_weights(args.save_dir + '/' + model_type + '-trained_model.h5')
    print(
        'Trained model saved to \'%s/%s-trained_model.h5\'' % (args.save_dir, model_type)
    )

    return model


def add_view_pooling(model, args):
    capsnet = [layer for layer in model.layers if layer.name == 'capsnet'][0].output
    view_pooling = ViewPoolingLayer(name='mv_capsnet', num_views=args.multi_view)(capsnet)
    
    decoder = [layer for layer in model.layers if layer.name == 'decoder'][0]
    mask = [layer for layer in model.layers if layer.name == 'mask'][0].output
    return models.Model(model.inputs, [view_pooling, decoder(mask)])


def add_view_capsule(model, args):
    caps1 = [layer for layer in model.layers if layer.name == 'caps1'][0].output
    view_caps = ViewCapsuleLayer(caps1, args.multi_view, 32, args.routings)
    capsnet = [layer for layer in model.layers if layer.name == 'capsnet'][0](view_caps)
    
    decoder = [layer for layer in model.layers if layer.name == 'decoder'][0]
    mask = [layer for layer in model.layers if layer.name == 'mask'][0].output
    return models.Model(model.inputs, [capsnet, decoder(mask)])


def test(model, data):
    test_datagen = ImageDataGenerator(rescale=1. / 255)
    total_tp = 0
    label_no = 0
    for label in test_set[CLASS_COLUMN].unique():
        label_data = data.query(f'{CLASS_COLUMN} == "{label}"')
        test_generator = test_datagen.flow_from_dataframe(
            label_data, directory=WORK_DIR, x_col=FILE_PATH_COLUMN, y_col=CLASS_COLUMN,
            target_size=(image_width, image_height)
        )
        nb_samples = len(test_generator.filenames)
        y_pred, recon = model.predict_generator(test_generator, steps=nb_samples)
        tp = np.sum(np.argmax(np.unique(y_pred, axis=0), 1) == label_no)
        print(f'Class {label} acc: {tp / label_data.shape[0]}')

        total_tp += tp
        label_no += 1
    print('Test acc:', total_tp / data.shape[0])


def load_data():
    train_set = []
    test_set = []
    classes = [
        class_name for class_name in os.listdir(WORK_DIR)
        if os.path.isdir(WORK_DIR + class_name)
    ]
    for class_name in classes:
        for file in os.listdir(WORK_DIR + class_name + '/train'):
            train_set.append((class_name, class_name + '/train/' + file))
        for file in os.listdir(WORK_DIR + class_name + '/test'):
            test_set.append((class_name, class_name + '/test/' + file))

    return (
        pd.DataFrame(train_set, columns=[CLASS_COLUMN, FILE_PATH_COLUMN]),
        pd.DataFrame(test_set, columns=[CLASS_COLUMN, FILE_PATH_COLUMN])
    )

(train_set, test_set) = load_data()

train_set = train_set.assign(
    to_drop = lambda x: x[FILE_PATH_COLUMN].str.contains(
        r'0565|0357|0536'
    )
) \
    .query(f'{CLASS_COLUMN} != "chair" or not to_drop') \
    .drop('to_drop', axis = 1)
test_set = test_set.assign(
    to_drop = lambda x: x[FILE_PATH_COLUMN].str.contains(
        r'chair_0905|car_0242|guitar_0205|airplane_0669.'
    )
) \
    .query('not to_drop').drop('to_drop', axis = 1)


In [None]:
class args:
    epochs = 5
    batch_size = 8 * 12
    lr = 0.001
    lr_decay = 0.9
    lam_recon = 0.392
    routings = 3
    shift_fraction = 0.
    debug = False
    save_dir = './experiment'
    testing = False
    weights = None
    multi_view = 12
    mv_function = add_view_capsule
	two_stage = True
    
print(vars(args))

if not os.path.exists(args.save_dir):
    os.makedirs(args.save_dir)

model, eval_model = CapsNet(
    input_shape=(image_width, image_height, 3),
    n_class=len(train_set[CLASS_COLUMN].unique()),
    routings=args.routings,
    batch_size=args.batch_size
)
model.summary()

if args.weights is not None:  # init the model weights with provided one
    eval_model.load_weights(args.weights)
    model.load_weights(args.weights)
if not args.testing:
	if two_stage:
		model = train(model=model, train_set=train_set, test_set=test_set, args=args)
	if args.multi_view:
		model = args.mv_function(model, args)
		model.summary()
		train(
			model=model, train_set=train_set, test_set=test_set,
			args=args, model_type='mv'
		)
		
else:  # as long as weights are given, will run testing
    if args.weights is None:
        print('No weights are provided. Will test using random initialized weights.')
    test(model=eval_model, data=test_set)
