In [0]:
!pip uninstall keras
!pip install keras==2.2.4

In [0]:
#@title outputs - plots/images
import numpy as np
from matplotlib import pyplot as plt
import csv
import math
import pandas

def plot_log(filename, show=True):

    data = pandas.read_csv(filename)

    fig = plt.figure(figsize=(4,6))
    fig.subplots_adjust(top=0.95, bottom=0.05, right=0.95)
    fig.add_subplot(211)
    for key in data.keys():
        if key.find('loss') >= 0 and not key.find('val') >= 0:  # training loss
            plt.plot(data['epoch'].values, data[key].values, label=key)
    plt.legend()
    plt.title('Training loss')

    fig.add_subplot(212)
    for key in data.keys():
        if key.find('acc') >= 0:  # acc
            plt.plot(data['epoch'].values, data[key].values, label=key)
    plt.legend()
    plt.title('Training and validation accuracy')

    # fig.savefig('result/log.png')
    if show:
        plt.show()


def combine_images(generated_images, height=None, width=None):
    num = generated_images.shape[0]
    if width is None and height is None:
        width = int(math.sqrt(num))
        height = int(math.ceil(float(num)/width))
    elif width is not None and height is None:  # height not given
        height = int(math.ceil(float(num)/width))
    elif height is not None and width is None:  # width not given
        width = int(math.ceil(float(num)/height))

    shape = generated_images.shape[1:4]
    image = np.zeros((height*shape[0], width*shape[1],shape[2]),
                     dtype=generated_images.dtype)
    for index, img in enumerate(generated_images):
        i = int(index/width)
        j = index % width
        image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \
            img[:, :, :]
    print(num)
    print(width)
    print(height)
    print(shape)
    return image


def combine_imagesOld(generated_images, height=None, width=None):
    num = generated_images.shape[0]
    if width is None and height is None:
        width = int(math.sqrt(num))
        height = int(math.ceil(float(num)/width))
    elif width is not None and height is None:  # height not given
        height = int(math.ceil(float(num)/width))
    elif height is not None and width is None:  # width not given
        width = int(math.ceil(float(num)/height))

    shape = generated_images.shape[1:3]
    image = np.zeros((height*shape[0], width*shape[1]),
                     dtype=generated_images.dtype)
    for index, img in enumerate(generated_images):
        i = int(index/width)
        j = index % width
        image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \
            img[:, :, 1]
    print(num)
    print(width)
    print(height)
    print(shape)
    return image


#if __name__=="__main__":
#   plot_log('result/log.csv')


In [0]:
#@title layer construction
"""
Some key layers used for constructing a Capsule Network. These layers can used to construct CapsNet on other dataset, 
not just on MNIST.
*NOTE*: some functions can be implemented in multiple ways, I keep all of them. You can try them for yourself just by
uncommenting them and commenting their counterparts.
Author: Xifeng Guo, E-mail: `guoxifeng1990@163.com`, Github: `https://github.com/XifengGuo/CapsNet-Keras`
"""

import keras.backend as K
import tensorflow as tf
from keras import initializers, layers


class Length(layers.Layer):
    """
    Compute the length of vectors. This is used to compute a Tensor that has the same shape with y_true in margin_loss.
    Using this layer as model's output can directly predict labels by using `y_pred = np.argmax(model.predict(x), 1)`
    inputs: shape=[None, num_vectors, dim_vector]
    output: shape=[None, num_vectors]
    """
    def call(self, inputs, **kwargs):
        return K.sqrt(K.sum(K.square(inputs), -1))

    def compute_output_shape(self, input_shape):
        return input_shape[:-1]

    def get_config(self):
        config = super(Length, self).get_config()
        return config


class Mask(layers.Layer):
    """
    Mask a Tensor with shape=[None, num_capsule, dim_vector] either by the capsule with max length or by an additional 
    input mask. Except the max-length capsule (or specified capsule), all vectors are masked to zeros. Then flatten the
    masked Tensor.
    For example:
        ```
        x = keras.layers.Input(shape=[8, 3, 2])  # batch_size=8, each sample contains 3 capsules with dim_vector=2
        y = keras.layers.Input(shape=[8, 3])  # True labels. 8 samples, 3 classes, one-hot coding.
        out = Mask()(x)  # out.shape=[8, 6]
        # or
        out2 = Mask()([x, y])  # out2.shape=[8,6]. Masked with true labels y. Of course y can also be manipulated.
        ```
    """
    def call(self, inputs, **kwargs):
        if type(inputs) is list:  # true label is provided with shape = [None, n_classes], i.e. one-hot code.
            assert len(inputs) == 2
            inputs, mask = inputs
        else:  # if no true label, mask by the max length of capsules. Mainly used for prediction
            # compute lengths of capsules
            x = K.sqrt(K.sum(K.square(inputs), -1))
            # generate the mask which is a one-hot code.
            # mask.shape=[None, n_classes]=[None, num_capsule]
            mask = K.one_hot(indices=K.argmax(x, 1), num_classes=x.get_shape().as_list()[1])

        # inputs.shape=[None, num_capsule, dim_capsule]
        # mask.shape=[None, num_capsule]
        # masked.shape=[None, num_capsule * dim_capsule]
        masked = K.batch_flatten(inputs * K.expand_dims(mask, -1))
        return masked

    def compute_output_shape(self, input_shape):
        if type(input_shape[0]) is tuple:  # true label provided
            return tuple([None, input_shape[0][1] * input_shape[0][2]])
        else:  # no true label provided
            return tuple([None, input_shape[1] * input_shape[2]])

    def get_config(self):
        config = super(Mask, self).get_config()
        return config


def squash(vectors, axis=-1):
    """
    The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0
    :param vectors: some vectors to be squashed, N-dim tensor
    :param axis: the axis to squash
    :return: a Tensor with same shape as input vectors
    """
    s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm + K.epsilon())
    return scale * vectors


class CapsuleLayer(layers.Layer):
    """
    The capsule layer. It is similar to Dense layer. Dense layer has `in_num` inputs, each is a scalar, the output of the 
    neuron from the former layer, and it has `out_num` output neurons. CapsuleLayer just expand the output of the neuron
    from scalar to vector. So its input shape = [None, input_num_capsule, input_dim_capsule] and output shape = \
    [None, num_capsule, dim_capsule]. For Dense Layer, input_dim_capsule = dim_capsule = 1.
    
    :param num_capsule: number of capsules in this layer
    :param dim_capsule: dimension of the output vectors of the capsules in this layer
    :param routings: number of iterations for the routing algorithm
    """
    print("fine")
    def __init__(self, num_capsule, dim_capsule, routings=3,
                 kernel_initializer='glorot_uniform',
                 **kwargs):
        super(CapsuleLayer, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.kernel_initializer = initializers.get(kernel_initializer)
    
    def build(self, input_shape):
        assert len(input_shape) >= 3, "The input Tensor should have shape=[None, input_num_capsule, input_dim_capsule]"
        self.input_num_capsule = input_shape[1]
        self.input_dim_capsule = input_shape[2]

        # Transform matrix
        self.W = self.add_weight(shape=[self.num_capsule, self.input_num_capsule,
                                        self.dim_capsule, self.input_dim_capsule],
                                 initializer=self.kernel_initializer,
                                 name='W')

        self.built = True
    
    def call(self, inputs, training=None):
        # inputs.shape=[None, input_num_capsule, input_dim_capsule]
        # inputs_expand.shape=[None, 1, input_num_capsule, input_dim_capsule]
        inputs_expand = K.expand_dims(inputs, 1)

        # Replicate num_capsule dimension to prepare being multiplied by W
        # inputs_tiled.shape=[None, num_capsule, input_num_capsule, input_dim_capsule]
        inputs_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1])

        # Compute `inputs * W` by scanning inputs_tiled on dimension 0.
        # x.shape=[num_capsule, input_num_capsule, input_dim_capsule]
        # W.shape=[num_capsule, input_num_capsule, dim_capsule, input_dim_capsule]
        # Regard the first two dimensions as `batch` dimension,
        # then matmul: [input_dim_capsule] x [dim_capsule, input_dim_capsule]^T -> [dim_capsule].
        # inputs_hat.shape = [None, num_capsule, input_num_capsule, dim_capsule]
        inputs_hat = K.map_fn(lambda x: K.batch_dot(x, self.W, [2, 3]), elems=inputs_tiled)

        # Begin: Routing algorithm ---------------------------------------------------------------------#
        # The prior for coupling coefficient, initialized as zeros.
        # b.shape = [None, self.num_capsule, self.input_num_capsule].
        b = tf.zeros(shape=[K.shape(inputs_hat)[0], self.num_capsule, self.input_num_capsule])

        assert self.routings > 0, 'The routings should be > 0.'
        for i in range(self.routings):
            # c.shape=[batch_size, num_capsule, input_num_capsule]
            c = tf.nn.softmax(b, dim=1)

            # c.shape =  [batch_size, num_capsule, input_num_capsule]
            # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule]
            # The first two dimensions as `batch` dimension,
            # then matmal: [input_num_capsule] x [input_num_capsule, dim_capsule] -> [dim_capsule].
            # outputs.shape=[None, num_capsule, dim_capsule]
            outputs = squash(K.batch_dot(c, inputs_hat, [2, 2]))  # [None, 10, 16]

            if i < self.routings - 1:
                # outputs.shape =  [None, num_capsule, dim_capsule]
                # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule]
                # The first two dimensions as `batch` dimension,
                # then matmal: [dim_capsule] x [input_num_capsule, dim_capsule]^T -> [input_num_capsule].
                # b.shape=[batch_size, num_capsule, input_num_capsule]
                b += K.batch_dot(outputs, inputs_hat, [2, 3])
        # End: Routing algorithm -----------------------------------------------------------------------#

        return outputs

    def compute_output_shape(self, input_shape):
        return tuple([None, self.num_capsule, self.dim_capsule])

    def get_config(self):
        config = {
            'num_capsule': self.num_capsule,
            'dim_capsule': self.dim_capsule,
            'routings': self.routings
        }
        base_config = super(CapsuleLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


def PrimaryCap(inputs, dim_capsule, n_channels, kernel_size, strides, padding):
    """
    Apply Conv2D `n_channels` times and concatenate all capsules
    :param inputs: 4D tensor, shape=[None, width, height, channels]
    :param dim_capsule: the dim of the output vector of capsule
    :param n_channels: the number of types of capsules
    :return: output tensor, shape=[None, num_capsule, dim_capsule]
    """
    output = layers.Conv2D(filters=dim_capsule*n_channels, kernel_size=kernel_size, strides=strides, padding=padding,
                           name='primarycap_conv2d')(inputs)
    outputs = layers.Reshape(target_shape=[-1, dim_capsule], name='primarycap_reshape')(output)
    return layers.Lambda(squash, name='primarycap_squash')(outputs)


"""
# The following is another way to implement primary capsule layer. This is much slower.
# Apply Conv2D `n_channels` times and concatenate all capsules
def PrimaryCap(inputs, dim_capsule, n_channels, kernel_size, strides, padding):
    outputs = []
    for _ in range(n_channels):
        output = layers.Conv2D(filters=dim_capsule, kernel_size=kernel_size, strides=strides, padding=padding)(inputs)
        outputs.append(layers.Reshape([output.get_shape().as_list()[1] ** 2, dim_capsule])(output))
    outputs = layers.Concatenate(axis=1)(outputs)
    return layers.Lambda(squash)(outputs)
"""

In [0]:
#@title architecture setup and def train { form-width: "10%" }
"""
Keras implementation of CapsNet in Hinton's paper Dynamic Routing Between Capsules.
The current version maybe only works for TensorFlow backend. Actually it will be straightforward to re-write to TF code.
Adopting to other backends should be easy, but I have not tested this. 
Usage:
       python capsulenet.py
       python capsulenet.py --epochs 50
       python capsulenet.py --epochs 50 --routings 3
       ... ...
       
Result:
    Validation accuracy > 99.5% after 20 epochs. Converge to 99.66% after 50 epochs.
    About 110 seconds per epoch on a single GTX1070 GPU card
    
Author: Xifeng Guo, E-mail: `guoxifeng1990@163.com`, Github: `https://github.com/XifengGuo/CapsNet-Keras`
"""

import numpy as np
from keras import layers, models, optimizers
from keras import backend as K
from keras.utils import to_categorical
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.metrics import classification_report, confusion_matrix

K.set_image_data_format('channels_last')


def CapsNet(input_shape, n_class, routings):
    """
    A Capsule Network on MNIST.
    :param input_shape: data shape, 3d, [width, height, channels]
    :param n_class: number of classes
    :param routings: number of routing iterations
    :return: Two Keras Models, the first one used for training, and the second one for evaluation.
            `eval_model` can also be used for training.
    """
    x = layers.Input(shape=input_shape)
    
    # Layer 1: Just a conventional Conv2D layer
    conv1 = layers.Conv2D(filters=256, kernel_size=9, strides=1, padding='valid', activation='relu', name='conv1')(x)
    # Layer 2: Conv2D layer with `squash` activation, then reshape to [None, num_capsule, dim_capsule]
    primarycaps = PrimaryCap(conv1, dim_capsule=8, n_channels=32, kernel_size=9, strides=2, padding='valid')
    print(primarycaps.shape)
    
    
    # Layer 3: Capsule layer. Routing algorithm works here.
    digitcaps = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings,
                             name='digitcaps')(primarycaps)
    
    print("notfine")
    # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape.
    # If using tensorflow, this will not be necessary. :)
    out_caps = Length(name='capsnet')(digitcaps)

    # Decoder network.
    y = layers.Input(shape=(n_class,))
    masked_by_y = Mask()([digitcaps, y])  # The true label is used to mask the output of capsule layer. For training
    masked = Mask()(digitcaps)  # Mask using the capsule with maximal length. For prediction
    
    # Shared Decoder model in training and prediction
    decoder = models.Sequential(name='decoder')
    decoder.add(layers.Dense(512, activation='relu', input_dim=16*n_class))
    decoder.add(layers.Dense(1024, activation='relu'))
    decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid'))
    decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon'))
    
    # Models for training and evaluation (prediction)
    train_model = models.Model([x, y], [out_caps, decoder(masked_by_y)])
    eval_model = models.Model(x, [out_caps, decoder(masked)])

    # manipulate model
    noise = layers.Input(shape=(n_class, 16))
    noised_digitcaps = layers.Add()([digitcaps, noise])
    masked_noised_y = Mask()([noised_digitcaps, y])
    manipulate_model = models.Model([x, y, noise], decoder(masked_noised_y))
    return train_model, eval_model, manipulate_model


def margin_loss(y_true, y_pred):
    """
    Margin loss for Eq.(4). When y_true[i, :] contains not just one `1`, this loss should work too. Not test it.
    :param y_true: [None, n_classes]
    :param y_pred: [None, num_capsule]
    :return: a scalar loss value.
    """
    L = y_true * K.square(K.maximum(0., 0.9 - y_pred)) + \
        0.5 * (1 - y_true) * K.square(K.maximum(0., y_pred - 0.1))

    return K.mean(K.sum(L, 1))


def train(model, data, args, model_num):
    """
    Training a CapsuleNet
    :param model: the CapsuleNet model
    :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))`
    :param args: arguments
    :return: The trained model
    """
    # unpacking the data
    (x_train, y_train), (x_test, y_test) = data

    # callbacks
    log = callbacks.CSVLogger(args.save_dir + '/log.csv')
    tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs',
                               batch_size=args.batch_size, histogram_freq=int(args.debug))
    checkpoint = callbacks.ModelCheckpoint(args.save_dir + '/weights-{epoch:02d}.h5', monitor='val_capsnet_acc',
                                           save_best_only=True, save_weights_only=True, verbose=1)
    lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: args.lr * (args.lr_decay ** epoch))

    # compile the model
    model.compile(optimizer=optimizers.Adam(lr=args.lr),
                  loss=[margin_loss, 'mse'],
                  loss_weights=[1., args.lam_recon],
                  metrics={'capsnet': 'accuracy'})

    """
    # Training without data augmentation:
    model.fit([x_train, y_train], [y_train, x_train], batch_size=args.batch_size, epochs=args.epochs,
              validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[log, tb, checkpoint, lr_decay])
    """

    # Begin: Training with data augmentation ---------------------------------------------------------------------#
    def train_generator(x, y, batch_size, shift_fraction=0.):
        train_datagen = ImageDataGenerator(width_shift_range=shift_fraction,
                                           height_shift_range=shift_fraction)  # shift up to 2 pixel for MNIST
        generator = train_datagen.flow(x, y, batch_size=batch_size)
        while 1:
            x_batch, y_batch = generator.next()
            yield ([x_batch, y_batch], [y_batch, x_batch])

    # Training with data augmentation. If shift_fraction=0., also no augmentation.
    model.fit_generator(generator=train_generator(x_train, y_train, args.batch_size, args.shift_fraction),
                        steps_per_epoch=int(y_train.shape[0] / args.batch_size),
                        epochs=args.epochs*args.ensemble,
                        validation_data=[[x_test, y_test], [y_test, x_test]],
                        callbacks=[log, tb, checkpoint, lr_decay])
    # End: Training with data augmentation -----------------------------------------------------------------------#

    model.save_weights(args.save_dir + '/trained_model' + str(model_num) +'.h5')
    print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir)


    plot_log(args.save_dir + '/log.csv', show=True)

    return model

def test(model, data, args):
    x_test, y_test = data
    y_pred, x_recon = model.predict(x_test, batch_size=100)
    print(np.sum(np.argmax(y_pred, 1) == np.argmax(y_test, 1)))
    print(y_test.shape[0])
    print('-'*30 + 'Begin: test' + '-'*30)
    print('Test acc:', np.sum(np.argmax(y_pred, 1) == np.argmax(y_test, 1))/y_test.shape[0])
    print(y_test.shape[0])
    print(y_pred[1])
    print(confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1)))
    img = combine_images(np.concatenate([x_test[:50],x_recon[:50]]))
    plt.imshow(img)
    plt.show()
    plt.imshow(x_recon[5])
    plt.show()
    image = img * 255
    Image.fromarray(image.astype(np.uint8),'RGB').save(args.save_dir + "/real_and_recon.png")
    print()
    print('Reconstructed images are saved to %s/real_and_recon.png' % args.save_dir)
    print('-' * 30 + 'End: test' + '-' * 30)
    plt.imshow(plt.imread(args.save_dir + "/real_and_recon.png"))
    plt.show()

def testForEnsemble(model, data, args):
    x_test, y_test = data
    y_pred, x_recon = model.predict(x_test, batch_size=100)
    print(np.sum(np.argmax(y_pred, 1) == np.argmax(y_test, 1)))
    print(y_test.shape[0])
    print('-'*30 + 'Begin: test' + '-'*30)
    print(f'%d. Test accuracy run:  {np.mean(np.equal(np.argmax(y_test,axis=1),np.argmax(y_pred,axis=1)))}'%(i+1))
    #print(y_test.shape[0])
    #print(y_pred[1])
    #print(confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1)))
    #img = combine_images(np.concatenate([x_test[:50],x_recon[:50]]))
    #plt.imshow(img)
    #plt.show()
    #plt.imshow(x_recon[5])
    #plt.show()
    #image = img * 255
    #Image.fromarray(image.astype(np.uint8),'RGB').save(args.save_dir + "/real_and_recon.png")
    #print()
    #print('Reconstructed images are saved to %s/real_and_recon.png' % args.save_dir)
    #print('-' * 30 + 'End: test' + '-' * 30)
    #plt.imshow(plt.imread(args.save_dir + "/real_and_recon.png"))
    #plt.show()
    return y_pred


def manipulate_latent(model, data, args):
    print('-'*30 + 'Begin: manipulate' + '-'*30)
    x_test, y_test = data
    index = np.argmax(y_test, 1) == args.digit
    number = np.random.randint(low=0, high=sum(index) - 1)
    x, y = x_test[index][number], y_test[index][number]
    x, y = np.expand_dims(x, 0), np.expand_dims(y, 0)
    noise = np.zeros([1, 10, 16])
    x_recons = []
    for dim in range(16):
        for r in [-0.25, -0.2, -0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15, 0.2, 0.25]:
            tmp = np.copy(noise)
            tmp[:,:,dim] = r
            x_recon = model.predict([x, y, tmp])
            x_recons.append(x_recon)

    x_recons = np.concatenate(x_recons)

    img = combine_images(x_recons, height=16)
    image = img*255
    Image.fromarray(image.astype(np.uint8)).save(args.save_dir + '/manipulate-%d.png' % args.digit)
    print('manipulated result saved to %s/manipulate-%d.png' % (args.save_dir, args.digit))
    print('-' * 30 + 'End: manipulate' + '-' * 30)


def load_mnist():
    # the data, shuffled and split between train and test sets
    from keras.datasets import mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.
    x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.
    y_train = to_categorical(y_train.astype('float32'))
    y_test = to_categorical(y_test.astype('float32'))
    return (x_train, y_train), (x_test, y_test)


In [0]:
#@title ModifiedMnistLoader
from keras.datasets import mnist
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cv2
import random
def load_modified_mnist():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_trainModified=np.zeros((60000,28,28,3),dtype=int)
    for x in range(x_train.shape[0]):
      x_trainModified[x] = cv2.cvtColor(x_train[x],cv2.COLOR_GRAY2RGB)
    x_testModified=np.zeros((10000,28,28,3),dtype=int)
    for x in range(x_test.shape[0]):
      x_testModified[x] = cv2.cvtColor(x_test[x],cv2.COLOR_GRAY2RGB)

    !wget "https://images2.alphacoders.com/101/1011957.jpg"
    sourceImage = mpimg.imread('1011957.jpg')

    sourceWidth = sourceImage.shape[1]
    sourceHeight = sourceImage.shape[0]
    width = 28
    height = 28

    for x in range(60000):
     x1 = random.randint(0, sourceWidth-width-1)
     y1 = random.randint(0, sourceHeight-height-1)
     croppedImage= sourceImage[y1:y1+height,x1:x1+width,:]
     for row in range(x_trainModified[x].shape[0]):
        for pixel in range(x_trainModified[x].shape[1]):
         if all(i<140 for i in x_trainModified[x][row][pixel]):
           x_trainModified[x][row][pixel]=croppedImage[row][pixel]

    for x in range(10000):
      x1 = random.randint(0, sourceWidth-width-1)
      y1 = random.randint(0, sourceHeight-height-1)
      croppedImage= sourceImage[y1:y1+height,x1:x1+width,:]
      for row in range(x_testModified[x].shape[0]):
        for pixel in range(x_testModified[x].shape[1]):
          if all(i<140 for i in x_testModified[x][row][pixel]):
            x_testModified[x][row][pixel]=croppedImage[row][pixel]
    
    x_trainModified = x_trainModified.reshape(-1, 28, 28, 3).astype('float32') / 255.
    x_testModified = x_testModified.reshape(-1, 28, 28, 3).astype('float32') / 255.
    y_train = to_categorical(y_train.astype('float32'))
    y_test = to_categorical(y_test.astype('float32'))
    return (x_trainModified, y_train), (x_testModified, y_test)
    


In [0]:
#@title Cifar10Loader

from keras.datasets import cifar10
def load_cifar10():
  (x_train, y_train), (x_test, y_test) = cifar10.load_data()
  x_train = x_train.reshape(-1, 32, 32, 3).astype('float32') / 255.
  x_test = x_test.reshape(-1, 32, 32, 3).astype('float32') / 255.
  y_train = to_categorical(y_train.astype('float32'))
  y_test = to_categorical(y_test.astype('float32'))
  return (x_train, y_train), (x_test, y_test)


In [0]:
#@title Run
!pip install easydict
import os
import argparse
import easydict
from keras.preprocessing.image import ImageDataGenerator
from keras import callbacks


# setting the hyper parameters
#parser = argparse.ArgumentParser(description="Capsule Network on MNIST.")
#parser.add_argument('--epochs', default=50, type=int)
#parser.add_argument('--batch_size', default=100, type=int)
#parser.add_argument('--lr', default=0.001, type=float,
#                    help="Initial learning rate")
#parser.add_argument('--lr_decay', default=0.9, type=float,
#                    help="The value multiplied by lr at each epoch. Set a larger value for larger epochs")
#parser.add_argument('--lam_recon', default=0.392, type=float,
#                    help="The coefficient for the loss of decoder")
#parser.add_argument('-r', '--routings', default=3, type=int,
#                    help="Number of iterations used in routing algorithm. should > 0")
#parser.add_argument('--shift_fraction', default=0.1, type=float,
#                    help="Fraction of pixels to shift at most in each direction.")
#parser.add_argument('--debug', action='store_true',
#                    help="Save weights by TensorBoard")
#parser.add_argument('--save_dir', default='./result')
#parser.add_argument('-t', '--testing', action='store_true',
#                    help="Test the trained model on testing dataset")
#parser.add_argument('--digit', default=5, type=int,
#                    help="Digit to manipulate")
#parser.add_argument('-w', '--weights', default=None,
#                    help="The path of the saved weights. Should be specified when testing")
args = easydict.EasyDict({
    "epochs": 50,
    "batch_size": 100,
    'train_steps': 1000, 
    "lr": 0.001,
    "lr_decay": 0.9,
    "lam_recon": 0.392,
    "routings": 3,
    "shift_fraction": 0.1,
    "save_dir": 'result',
    "digit": 5,
    "debug": False,
    "testing": True,
    "weights": "trained_model.h5",
    "ensemble": 5    
})

#args = parser.parse_args()
print(args)


if not os.path.exists(args.save_dir):
    os.makedirs(args.save_dir)

# load data
(x_train, y_train), (x_test, y_test) = load_mnist()

#(x_train, y_train), (x_test, y_test) = load_modified_mnist()

#(x_train,y_train), (x_test, y_test) = load_cifar10()
# define models
partialXStartingPoint=0
if args.testing==False:
  for i in range(args.ensemble):
    train_size = int(x_train.shape[0]/args.ensemble)
    x_trainPartial = x_train[partialXStartingPoint:partialXStartingPoint+train_size]
    y_trainPartial = y_train[partialXStartingPoint:partialXStartingPoint+train_size]
    partialXStartingPoint+=train_size
    model, eval_model, manipulate_model = CapsNet(input_shape=x_train.shape[1:],
                                                n_class=len(np.unique(np.argmax(y_train, 1))),
                                                routings=args.routings)
    train(model=model, data=((x_trainPartial, y_trainPartial), (x_test, y_test)), args=args, model_num=i)
    
else:
  y_summed = []
  for i in range(args.ensemble):

    model, eval_model, manipulate_model = CapsNet(input_shape=x_train.shape[1:],
                                                n_class=len(np.unique(np.argmax(y_train, 1))),
                                                routings=args.routings)
    model.load_weights(str('trained_model' + str(i) + '.h5'))
    y_pred = testForEnsemble(model=eval_model, data=(x_test, y_test), args=args)
    if(i==0):
      y_summed= np.array(y_pred)
    else:
      y_summed += np.array(y_pred)
  print(np.sum(np.argmax(y_summed, 1) == np.argmax(y_test, 1)))
  print(y_test.shape[0])
  print('-'*30 + 'Ensemble results: ' + '-'*30)
  print(f'%d. Ensemble accuracy:  {np.mean(np.equal(np.argmax(y_test,axis=1),np.argmax(y_pred,axis=1)))}'%(i+1))
  print('Test acc:', np.sum(np.argmax(y_summed, 1) == np.argmax(y_test, 1))/y_test.shape[0])
#  
# train or test
#if args.weights is not None:  # init the model weights with provided one
#    model.load_weights(args.weights)
#if not args.testing:
#    
#else:  # as long as weights are given, will run testing
#    if args.weights is None:
#        print('No weights are provided. Will test using random initialized weights.')
#    #manipulate_latent(manipulate_model, (x_test, y_test), args)
#    test(model=eval_model, data=(x_test, y_test), args=args)


{'epochs': 50, 'batch_size': 100, 'train_steps': 1000, 'lr': 0.001, 'lr_decay': 0.9, 'lam_recon': 0.392, 'routings': 3, 'shift_fraction': 0.1, 'save_dir': 'result', 'digit': 5, 'debug': False, 'testing': True, 'weights': 'trained_model.h5', 'ensemble': 5}
(?, ?, 8)
notfine
9964
10000
------------------------------Begin: test------------------------------
1. Test accuracy run:  0.9964
(?, ?, 8)
notfine
9963
10000
------------------------------Begin: test------------------------------
2. Test accuracy run:  0.9963
(?, ?, 8)
notfine
9957
10000
------------------------------Begin: test------------------------------
3. Test accuracy run:  0.9957
(?, ?, 8)
notfine
9964
10000
------------------------------Begin: test------------------------------
4. Test accuracy run:  0.9964
(?, ?, 8)
notfine
9960
10000
------------------------------Begin: test------------------------------
5. Test accuracy run:  0.996
9964
10000
------------------------------Ensemble results: ------------------------------


In [0]:
#@title check gpu
from tensorflow.python.client import device_lib
device_lib.list_local_devices()