In [1]:
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
import seaborn as sns
import numpy as np
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
from matplotlib import pyplot as plt
import keras
from keras import initializers
import tensorflow as tf
from keras import backend as K
from keras.backend import *
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Layer, Lambda, Input, Flatten, Dropout, Conv2D, MaxPooling2D, Conv1D, MaxPooling1D, LSTM, TimeDistributed, ConvLSTM2D, Permute, Reshape, Conv2D
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping

def own_batch_dot(x, y, axes=None):
	"""Batchwise dot product.
	`batch_dot` is used to compute dot product of `x` and `y` when
	`x` and `y` are data in batch, i.e. in a shape of
	`(batch_size, :)`.
	`batch_dot` results in a tensor or variable with less dimensions
	than the input. If the number of dimensions is reduced to 1,
	we use `expand_dims` to make sure that ndim is at least 2.
	Arguments:
		x: Keras tensor or variable with `ndim >= 2`.
		y: Keras tensor or variable with `ndim >= 2`.
		axes: list of (or single) int with target dimensions.
			The lengths of `axes[0]` and `axes[1]` should be the same.
	Returns:
		A tensor with shape equal to the concatenation of `x`'s shape
		(less the dimension that was summed over) and `y`'s shape
		(less the batch dimension and the dimension that was summed over).
		If the final rank is 1, we reshape it to `(batch_size, 1)`.
	Examples:
		Assume `x = [[1, 2], [3, 4]]` and `y = [[5, 6], [7, 8]]`
		`batch_dot(x, y, axes=1) = [[17, 53]]` which is the main diagonal
		of `x.dot(y.T)`, although we never have to calculate the off-diagonal
		elements.
		Shape inference:
		Let `x`'s shape be `(100, 20)` and `y`'s shape be `(100, 30, 20)`.
		If `axes` is (1, 2), to find the output shape of resultant tensor,
			loop through each dimension in `x`'s shape and `y`'s shape:
		* `x.shape[0]` : 100 : append to output shape
		* `x.shape[1]` : 20 : do not append to output shape,
			dimension 1 of `x` has been summed over. (`dot_axes[0]` = 1)
		* `y.shape[0]` : 100 : do not append to output shape,
			always ignore first dimension of `y`
		* `y.shape[1]` : 30 : append to output shape
		* `y.shape[2]` : 20 : do not append to output shape,
			dimension 2 of `y` has been summed over. (`dot_axes[1]` = 2)
		`output_shape` = `(100, 30)`
	```python
		>>> x_batch = K.ones(shape=(32, 20, 1))
		>>> y_batch = K.ones(shape=(32, 30, 20))
		>>> xy_batch_dot = K.batch_dot(x_batch, y_batch, axes=[1, 2])
		>>> K.int_shape(xy_batch_dot)
		(32, 1, 30)
	```
	"""
	if isinstance(axes, int):
		axes = (axes, axes)
	x_ndim = ndim(x)
	y_ndim = ndim(y)
	if axes is None:
		# behaves like tf.batch_matmul as default
		axes = [x_ndim - 1, y_ndim - 2]
	if x_ndim > y_ndim:
		diff = x_ndim - y_ndim
		y = array_ops.reshape(y,
							array_ops.concat(
								[array_ops.shape(y), [1] * (diff)], axis=0))
	elif y_ndim > x_ndim:
		diff = y_ndim - x_ndim
		x = array_ops.reshape(x,
							array_ops.concat(
								[array_ops.shape(x), [1] * (diff)], axis=0))
	else:
		diff = 0
	if ndim(x) == 2 and ndim(y) == 2:
		if axes[0] == axes[1]:
			out = math_ops.reduce_sum(math_ops.multiply(x, y), axes[0])
		else:
			out = math_ops.reduce_sum(
			math_ops.multiply(array_ops.transpose(x, [1, 0]), y), axes[1])
	else:
		adj_x = None if axes[0] == ndim(x) - 1 else True
		adj_y = True if axes[1] == ndim(y) - 1 else None
		out = math_ops.matmul(x, y, adjoint_a=adj_x, adjoint_b=adj_y)
	if diff:
		if x_ndim > y_ndim:
			idx = x_ndim + y_ndim - 3
		else:
			idx = x_ndim - 1
			out = array_ops.squeeze(out, list(range(idx, idx + diff)))
	if ndim(out) == 1:
		out = expand_dims(out, 1)
	return out

In [2]:
# load a single file as a numpy array
def load_file(filepath):
    dataframe = read_csv(filepath, header=None, delim_whitespace=True)
    return dataframe.values

In [3]:
# load a list of files and return as a 3d numpy array
def load_group(filenames, prefix=''):
    loaded = list()
    for name in filenames:
        data = load_file(prefix + name)
        loaded.append(data)
    # stack group so that features are the 3rd dimension
    loaded = dstack(loaded)
    return loaded

In [4]:
# load a dataset group, such as train or test
def load_dataset_group(group, prefix=''):
    filepath = prefix + group + '/Inertial Signals/'
    print('File Path : ',filepath)
    # load all 9 files as a single array
    filenames = list()
    # total acceleration
    filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt', 'total_acc_z_'+group+'.txt']
    # body acceleration
    filenames += ['body_acc_x_'+group+'.txt', 'body_acc_y_'+group+'.txt', 'body_acc_z_'+group+'.txt']
    # body gyroscope
    filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt', 'body_gyro_z_'+group+'.txt']
    # load input data
    X = load_group(filenames, filepath)
    # load class output
    y = load_file(prefix + group + '/y_'+group+'.txt')
    return X, y

In [5]:
# load all train
X_train, Y_train = load_dataset_group('train', 'UCIDataset/')
# load all test
X_test, Y_test = load_dataset_group('test', 'UCIDataset/')

# zero-offset class values
Y_train = Y_train - 1
Y_test = Y_test - 1
# one hot encode y
Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)

print('X_train.shape : ', X_train.shape)
print('Y_train.shape : ', Y_train.shape)
print('X_test.shape : ', X_test.shape)
print('Y_test.shape : ', Y_test.shape)

File Path :  UCIDataset/train/Inertial Signals/
File Path :  UCIDataset/test/Inertial Signals/
X_train.shape :  (7352, 128, 9)
Y_train.shape :  (7352, 6)
X_test.shape :  (2947, 128, 9)
Y_test.shape :  (2947, 6)


In [30]:
verbose = 1
epochs = 50
batch_size = 128

n_timesteps = X_train.shape[1]
n_features = X_train.shape[2]
n_outputs = Y_train.shape[1]

In [31]:
checkpoint = ModelCheckpoint("CNN_Capsule_weights.h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto')

In [32]:
inputs = layers.Input(shape=(128,9))

# input_shape = Input(shape=(n_timesteps,n_features))
conv1 = Conv1D(filters=128, kernel_size=4, activation='relu')(inputs)
max_pooling = MaxPooling1D(pool_size=2)(conv1)
conv2 = Conv1D(filters=128, kernel_size=4, activation='relu', padding='same')(max_pooling)
dropout = Dropout(0.5)(conv2)
max_pooling = MaxPooling1D(pool_size=2)(dropout)
reshaped = Reshape((496,8))(max_pooling)

def squash(inputs):
    # take norm of input vectors
    squared_norm = K.sum(K.square(inputs), axis = -1, keepdims = True)
    # use the formula for non-linear function to return squashed output
    return ((squared_norm/(1+squared_norm))/(K.sqrt(squared_norm+K.epsilon())))*inputs

# squash the reshaped output to make length of vector b/w 0 and 1
squashed_output = Lambda(squash)(reshaped)

class DigitCapsuleLayer(Layer):
    # creating a layer class in keras
    def __init__(self, **kwargs):
        super(DigitCapsuleLayer, self).__init__(**kwargs)
        self.kernel_initializer = initializers.get('glorot_uniform')

    def build(self, input_shape):
        # initialize weight matrix for each capsule in lower layer
        self.W = self.add_weight(shape = [6, 496, 16, 8], initializer = self.kernel_initializer, name = 'weights')
        self.built = True

    def call(self, inputs):
        inputs = K.expand_dims(inputs, 1)
        inputs = K.tile(inputs, [1, 6, 1, 1])
        # matrix multiplication b/w previous layer output and weight matrix
        inputs = K.map_fn(lambda x: own_batch_dot(x, self.W, [2, 3]), elems=inputs)
        b = tf.zeros(shape = [K.shape(inputs)[0], 6, 496])

		# routing algorithm with updating coupling coefficient c, using scalar product b/w input capsule and output capsule
        for i in range(3-1):
            # print(b)
            c = tf.nn.softmax(b, axis=1)
            print(c)
            s = own_batch_dot(c, inputs, [2, 2])
            print(s)
            v = squash(s)
            print(v)
            b = b + own_batch_dot(v, inputs, [2,3])
            
        return v
    
    def compute_output_shape(self, input_shape):
        return tuple([None, 6, 16])

def output_layer(inputs):
    return K.sqrt(K.sum(K.square(inputs), -1) + K.epsilon())

digit_caps = DigitCapsuleLayer()(squashed_output)
outputs = Lambda(output_layer)(digit_caps)

model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test),
                    epochs=epochs, batch_size=batch_size, verbose=verbose, callbacks=[checkpoint])

Tensor("digit_capsule_layer_12/transpose_1:0", shape=(None, 6, 496), dtype=float32)
Tensor("digit_capsule_layer_12/Squeeze:0", shape=(None, 6, 16), dtype=float32)
Tensor("digit_capsule_layer_12/mul:0", shape=(None, 6, 16), dtype=float32)
Tensor("digit_capsule_layer_12/transpose_3:0", shape=(None, 6, 496), dtype=float32)
Tensor("digit_capsule_layer_12/Squeeze_2:0", shape=(None, 6, 16), dtype=float32)
Tensor("digit_capsule_layer_12/mul_1:0", shape=(None, 6, 16), dtype=float32)
Model: "model_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_23 (InputLayer)       [(None, 128, 9)]          0         
                                                                 
 conv1d_24 (Conv1D)          (None, 125, 128)          4736      
                                                                 
 max_pooling1d_24 (MaxPoolin  (None, 62, 128)          0         
 g1D)                                   

In [33]:
# Maximum validation Accuracy
val_accuracy_tensor = history.history['val_accuracy']
best_val_accuracy = np.max(val_accuracy_tensor)
print("Best Validation Accuracy:", best_val_accuracy)

Best Validation Accuracy: 0.9311164021492004
