In [1]:
from numpy.random import seed
seed(123)
#from tensorflow import set_random_seed
#set_random_seed(234)

import sklearn
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn import decomposition
import scipy

import tensorflow as tf
from keras.models import Model, load_model
from keras.layers import Input, Dense, Layer, InputSpec
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import regularizers, activations, initializers, constraints, Sequential
from keras import backend as K
from keras.constraints import UnitNorm, Constraint

Using TensorFlow backend.


In [2]:
tf.__version__

'2.1.0'

# Generate random multi-dimensional correlated data

**Step 1**. Set the dimension of the data.

We set the dim small to clear understanding.

In [3]:
n_dim = 5

**Step 2.1.** Generate a positive definite symmetric matrix to be used as covariance to generate a random data.

This is a matrix of size n_dim x n_dim.

In [4]:
cov = sklearn.datasets.make_spd_matrix(n_dim, random_state=None)

**Step 2.2.** Generate a vector of mean for generating the random data.

This is an np array of size n_dim.

In [5]:
mu = np.random.normal(0, 0.1, n_dim)

**Step 3**. Generate the random data, `X`.

The number of samples for `X` is set as `n`.

In [6]:
n = 1000

X = np.random.multivariate_normal(mu, cov, n)

**Step 4.** Split the data into train and test.

We split the data into train and test. The test will be used to measure the improvement in Autoencoder after tuning.

In [7]:
X_train, X_test = train_test_split(X, test_size=0.5, random_state=123)

# Data preprocessing

In [8]:
scaler = MinMaxScaler()
scaler.fit(X_train)

X_train_scaled = scaler.transform(X_train)

X_test_scaled = scaler.transform(X_test)

In [9]:
X_train_scaled

array([[0.52681732, 0.62810095, 0.59284649, 0.52236607, 0.40995414],
       [0.41440755, 0.55001423, 0.63635182, 0.57413981, 0.30587165],
       [0.38378646, 0.14857921, 0.39851437, 0.34588993, 0.63374338],
       ...,
       [0.28371994, 0.4794476 , 0.48410551, 0.54245635, 0.59818998],
       [0.50510169, 0.76450508, 0.73271896, 0.69792451, 0.34080318],
       [0.3978237 , 0.50478323, 0.40141433, 0.67971132, 0.60012005]])

-----------

# PCA vs Single Layer Linear Autoencoder

### Fit Principal Component Analysis (PCA)

In [10]:
pca = decomposition.PCA(n_components=2)

pca.fit(X_train_scaled)

PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

### Fit Single Layer Linear Autoencoder

In [11]:
nb_epoch = 100
batch_size = 16
input_dim = X_train_scaled.shape[1] #num of predictor variables,
encoding_dim = 2
learning_rate = 1e-3

encoder = Dense(encoding_dim, activation="linear", input_shape=(input_dim,), use_bias = True)
decoder = Dense(input_dim, activation="linear", use_bias = True)

autoencoder = Sequential()
autoencoder.add(encoder)
autoencoder.add(decoder)

autoencoder.compile(metrics=['accuracy'],
                    loss='mean_squared_error',
                    optimizer='sgd')
autoencoder                    
autoencoder.summary()

autoencoder.fit(X_train_scaled, X_train_scaled,
                epochs=nb_epoch,
                batch_size=batch_size,
                shuffle=True)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 2)                 12        
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 15        
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/

Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.callbacks.History at 0x7f528eebf790>

Compare and contrast the outputs.

### 1. Tied Weights

The weights on Encoder and Decoder are not the same.

In [12]:
w_encoder = np.round(autoencoder.layers[0].get_weights()[0], 2).T  # W in Figure 2.
w_decoder = np.round(autoencoder.layers[1].get_weights()[0], 2)  # W' in Figure 2.
print('Encoder weights \n', w_encoder)
print('Decoder weights \n', w_decoder)

Encoder weights 
 [[-0.47  0.35 -0.51 -0.46 -0.32]
 [-0.68 -0.07  0.8  -0.55 -0.67]]
Decoder weights 
 [[ 0.23 -0.79  0.26 -0.59  0.5 ]
 [ 0.2  -0.06 -0.08 -0.3  -0.7 ]]


### 2. Weight Orthogonality
Unlike PCA weights, the weights on Encoder and Decoder are not orthogonal.

In [13]:
w_pca = pca.components_
np.round(np.dot(w_pca, w_pca.T), 3)

array([[1., 0.],
       [0., 1.]])

In [14]:
np.round(np.dot(w_encoder, w_encoder.T), 3)

array([[0.918, 0.355],
       [0.355, 1.859]], dtype=float32)

In [15]:
np.round(np.dot(w_decoder, w_decoder.T), 3)

array([[ 1.343, -0.1  ],
       [-0.1  ,  0.63 ]], dtype=float32)

### 3. Uncorrelated Features
Unlike PCA features, i.e. Principal Scores, the Encoded features are correlated.

In [16]:
pca_features = pca.fit_transform(X_train_scaled)
np.round(np.cov(pca_features.T), 5)

array([[ 0.09899, -0.     ],
       [-0.     ,  0.01549]])

In [17]:
encoder_layer = Model(inputs=autoencoder.inputs, outputs=autoencoder.layers[0].output)
encoded_features = np.array(encoder_layer.predict(X_train_scaled))
print('Encoded feature covariance\n', np.cov(encoded_features.T))

Encoded feature covariance
 [[0.01636493 0.0002863 ]
 [0.0002863  0.01895543]]


### 4. Unit Norm

In [18]:
print('PCA weights norm, \n', np.sum(w_pca ** 2, axis = 1))
print('Encoder weights norm, \n', np.sum(w_encoder ** 2, axis = 1))
print('Decoder weights norm, \n', np.sum(w_decoder ** 2, axis = 1))

PCA weights norm, 
 [1. 1.]
Encoder weights norm, 
 [0.9175 1.8587]
Decoder weights norm, 
 [1.3427 0.63  ]


### Train Test Reconstruction Accuracy

In [19]:
train_predictions = autoencoder.predict(X_train_scaled)
print('Train reconstrunction error\n', sklearn.metrics.mean_squared_error(X_train_scaled, train_predictions))
test_predictions = autoencoder.predict(X_test_scaled)
print('Test reconstrunction error\n', sklearn.metrics.mean_squared_error(X_test_scaled, test_predictions))

Train reconstrunction error
 0.02325372908429263
Test reconstrunction error
 0.022643016308100682


--------

# Well-posed Autoencoder
### Constraints for Autoencoder
Optimizing Autoencoder using PCA principles

In [20]:
nb_epoch = 100
batch_size = 16
input_dim = X_train_scaled.shape[1] #num of predictor variables,
encoding_dim = 2
learning_rate = 1e-3

### 1. Constraint: Tied weights

Make decoder weights equal to encoder.

In [21]:
class DenseTied(Layer):
    def __init__(self, units,
                 activation=None,
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 tied_to=None,
                 **kwargs):
        self.tied_to = tied_to
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super().__init__(**kwargs)
        self.units = units
        self.activation = activations.get(activation)
        self.use_bias = use_bias
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.bias_initializer = initializers.get(bias_initializer)
        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)
        self.kernel_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)
        self.input_spec = InputSpec(min_ndim=2)
        self.supports_masking = True

    def build(self, input_shape):
        assert len(input_shape) >= 2
        input_dim = input_shape[-1]

        if self.tied_to is not None:
            self.kernel = K.transpose(self.tied_to.kernel)
            self._non_trainable_weights.append(self.kernel)
        else:
            self.kernel = self.add_weight(shape=(input_dim, self.units),
                                          initializer=self.kernel_initializer,
                                          name='kernel',
                                          regularizer=self.kernel_regularizer,
                                          constraint=self.kernel_constraint)
        if self.use_bias:
            self.bias = self.add_weight(shape=(self.units,),
                                        initializer=self.bias_initializer,
                                        name='bias',
                                        regularizer=self.bias_regularizer,
                                        constraint=self.bias_constraint)
        else:
            self.bias = None
        self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
        self.built = True

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) >= 2
        output_shape = list(input_shape)
        output_shape[-1] = self.units
        return tuple(output_shape)

    def call(self, inputs):
        output = K.dot(inputs, self.kernel)
        if self.use_bias:
            output = K.bias_add(output, self.bias, data_format='channels_last')
        if self.activation is not None:
            output = self.activation(output)
        return output

#### 1.1 Bias=False for Decoder

In [22]:
encoder = Dense(encoding_dim, activation="linear", input_shape=(input_dim,), use_bias = True)
decoder = DenseTied(input_dim, activation="linear", tied_to=encoder, use_bias = False)

In [23]:
autoencoder = Sequential()
autoencoder.add(encoder)
autoencoder.add(decoder)

autoencoder.compile(metrics=['accuracy'],
                    loss='mean_squared_error',
                    optimizer='sgd')
autoencoder.summary()

autoencoder.fit(X_train_scaled, X_train_scaled,
                epochs=3,
                batch_size=batch_size,
                shuffle=True,
                verbose=0)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 2)                 12        
_________________________________________________________________
dense_tied_1 (DenseTied)     (None, 5)                 22        
Total params: 22
Trainable params: 12
Non-trainable params: 10
_________________________________________________________________


<keras.callbacks.callbacks.History at 0x7f528eeed210>

In [24]:
#w_encoder = np.round(np.transpose(autoencoder.layers[0].get_weights()[0]), 3)
#w_decoder = np.round(autoencoder.layers[1].get_weights()[0], 3)
#get_weights() not working for class DenseTied because DenseTied.weights contains a tensor element
#which
w_encoder = np.round(np.transpose(autoencoder.layers[0].weights[0].numpy()), 3)
w_decoder = np.round(np.transpose(autoencoder.layers[1].weights[0].numpy()), 3)
print('Encoder weights\n', w_encoder)
print('Decoder weights\n', w_decoder)

Encoder weights
 [[ 0.62  -0.761 -0.824  0.049  0.521]
 [ 0.086 -0.727  0.036  0.47  -0.73 ]]
Decoder weights
 [[ 0.62  -0.761 -0.824  0.049  0.521]
 [ 0.086 -0.727  0.036  0.47  -0.73 ]]


In [25]:
train_predictions = autoencoder.predict(X_train_scaled)
print('Train reconstrunction error\n', sklearn.metrics.mean_squared_error(X_train_scaled, train_predictions))
test_predictions = autoencoder.predict(X_test_scaled)
print('Test reconstrunction error\n', sklearn.metrics.mean_squared_error(X_test_scaled, test_predictions))

Train reconstrunction error
 0.21581950338286945
Test reconstrunction error
 0.21446727007950078


#### 1.2 Bias=True for Decoder

In [26]:
encoder = Dense(encoding_dim, activation="linear", input_shape=(input_dim,), use_bias = True)
decoder = DenseTied(input_dim, activation="linear", tied_to=encoder, use_bias = True)

autoencoder = Sequential()
autoencoder.add(encoder)
autoencoder.add(decoder)

autoencoder.compile(metrics=['accuracy'],
                    loss='mean_squared_error',
                    optimizer='sgd')
autoencoder.summary()

autoencoder.fit(X_train_scaled, X_train_scaled,
                epochs=nb_epoch,
                batch_size=batch_size,
                shuffle=True,
                verbose=0)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 2)                 12        
_________________________________________________________________
dense_tied_2 (DenseTied)     (None, 5)                 27        
Total params: 27
Trainable params: 17
Non-trainable params: 10
_________________________________________________________________


<keras.callbacks.callbacks.History at 0x7f528e99dc90>

In [27]:
#w_encoder = np.round(np.transpose(autoencoder.layers[0].get_weights()[0]), 3)
#w_decoder = np.round(autoencoder.layers[1].get_weights()[1], 3)
w_encoder = np.round(autoencoder.layers[0].weights[0].numpy().transpose(), 3)
w_decoder = np.round(autoencoder.layers[1].weights[1].numpy().transpose(), 3)

print('Encoder weights\n', w_encoder)
print('Decoder weights\n', w_decoder)
print('PCA weights\n', w_pca)

Encoder weights
 [[ 0.384  0.414  0.606  0.233 -0.48 ]
 [ 0.038  0.419 -0.732  0.184 -0.466]]
Decoder weights
 [[ 0.384  0.414  0.606  0.233 -0.48 ]
 [ 0.038  0.419 -0.732  0.184 -0.466]]
PCA weights
 [[ 0.41991315  0.44792337  0.47767246  0.4164908  -0.47053377]
 [ 0.31253406 -0.13919604  0.32638195 -0.84008385 -0.26585876]]


In [28]:
train_predictions = autoencoder.predict(X_train_scaled)
print('Train reconstrunction error\n', sklearn.metrics.mean_squared_error(X_train_scaled, train_predictions))
test_predictions = autoencoder.predict(X_test_scaled)
print('Test reconstrunction error\n', sklearn.metrics.mean_squared_error(X_test_scaled, test_predictions))

Train reconstrunction error
 0.006114087527982598
Test reconstrunction error
 0.006267197898881717


### 2. Constraint: Weights orthogonality.

In [29]:
class WeightsOrthogonalityConstraint (Constraint):
    def __init__(self, encoding_dim, weightage = 1.0, axis = 0):
        self.encoding_dim = encoding_dim
        self.weightage = weightage
        self.axis = axis

    def weights_orthogonality(self, w):
        if(self.axis==1):
            w = K.transpose(w)
        if(self.encoding_dim > 1):
            m = K.dot(K.transpose(w), w) - K.eye(self.encoding_dim)
            return self.weightage * K.sqrt(K.sum(K.square(m)))
        else:
            m = K.sum(w ** 2) - 1.
            return m

    def __call__(self, w):
        return self.weights_orthogonality(w)

#### 2.1 Encoder weight orthogonality

In [30]:
encoder = Dense(encoding_dim, activation="linear", input_shape=(input_dim,), use_bias=True, kernel_regularizer=WeightsOrthogonalityConstraint(encoding_dim, weightage=1., axis=0))
decoder = Dense(input_dim, activation="linear", use_bias = True)

autoencoder = Sequential()
autoencoder.add(encoder)
autoencoder.add(decoder)

autoencoder.compile(metrics=['accuracy'],
                    loss='mean_squared_error',
                    optimizer='sgd')
autoencoder.summary()

autoencoder.fit(X_train_scaled, X_train_scaled,
                epochs=nb_epoch,
                batch_size=batch_size,
                shuffle=True,
                verbose=0)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 2)                 12        
_________________________________________________________________
dense_6 (Dense)              (None, 5)                 15        
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________


<keras.callbacks.callbacks.History at 0x7f24b8518150>

In [31]:
w_encoder = autoencoder.layers[0].get_weights()[0]
print('Encoder weights dot product\n', np.round(np.dot(w_encoder.T, w_encoder), 2))

Encoder weights dot product
 [[0.99 0.02]
 [0.02 0.98]]


In [32]:
train_predictions = autoencoder.predict(X_train_scaled)
print('Train reconstrunction error\n', sklearn.metrics.mean_squared_error(X_train_scaled, train_predictions))
test_predictions = autoencoder.predict(X_test_scaled)
print('Test reconstrunction error\n', sklearn.metrics.mean_squared_error(X_test_scaled, test_predictions))

Train reconstrunction error
 0.022781762567340248
Test reconstrunction error
 0.021670269457052582


#### 2.2 Encoder and Decoder Weight orthogonality

In [33]:
encoder = Dense(encoding_dim, activation="linear", input_shape=(input_dim,), use_bias=True, kernel_regularizer=WeightsOrthogonalityConstraint(encoding_dim, weightage=1., axis=0))
decoder = Dense(input_dim, activation="linear", use_bias = True, kernel_regularizer=WeightsOrthogonalityConstraint(encoding_dim, weightage=1., axis=1))

autoencoder = Sequential()
autoencoder.add(encoder)
autoencoder.add(decoder)

autoencoder.compile(metrics=['accuracy'],
                    loss='mean_squared_error',
                    optimizer='sgd')
autoencoder.summary()

autoencoder.fit(X_train_scaled, X_train_scaled,
                epochs=nb_epoch,
                batch_size=batch_size,
                shuffle=True,
                verbose=0)

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 2)                 12        
_________________________________________________________________
dense_8 (Dense)              (None, 5)                 15        
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________


<keras.callbacks.callbacks.History at 0x7f24b8423510>

In [34]:
w_encoder = autoencoder.layers[0].get_weights()[0]
print('Encoder weights dot product\n', np.round(np.dot(w_encoder.T, w_encoder), 2))

w_decoder = autoencoder.layers[1].get_weights()[0]
print('Decoder weights dot product\n', np.round(np.dot(w_decoder, w_decoder.T), 2))

Encoder weights dot product
 [[ 1.01 -0.  ]
 [-0.    1.  ]]
Decoder weights dot product
 [[ 0.97 -0.01]
 [-0.01  1.  ]]


### 3. Constraint: Uncorrelated Encoded features

In [35]:
class UncorrelatedFeaturesConstraint (Constraint):

    def __init__(self, encoding_dim, weightage=1.0):
        self.encoding_dim = encoding_dim
        self.weightage = weightage

    def get_covariance(self, x):
        x_centered_list = []

        for i in range(self.encoding_dim):
            x_centered_list.append(x[:, i] - K.mean(x[:, i]))

        x_centered = tf.stack(x_centered_list)
        covariance = K.dot(x_centered, K.transpose(x_centered)) / \
            tf.cast(x_centered.get_shape()[0], tf.float32)

        return covariance

    # Constraint penalty
    def uncorrelated_feature(self, x):
        if(self.encoding_dim <= 1):
            return 0.0
        else:
            output = K.sum(K.square(
                self.covariance - tf.math.multiply(self.covariance, K.eye(self.encoding_dim))))
            return output

    def __call__(self, x):
        self.covariance = self.get_covariance(x)
        return self.weightage * self.uncorrelated_feature(x)

In [36]:
encoder = Dense(encoding_dim, activation="linear", input_shape=(input_dim,), use_bias=True,
                activity_regularizer=UncorrelatedFeaturesConstraint(encoding_dim, weightage=1.))
decoder = Dense(input_dim, activation="linear", use_bias=True)

autoencoder = Sequential()
autoencoder.add(encoder)
autoencoder.add(decoder)

autoencoder.compile(metrics=['accuracy'],
                    loss='mean_squared_error',
                    optimizer='sgd')
autoencoder.summary()

autoencoder.fit(X_train_scaled, X_train_scaled,
                epochs=nb_epoch,
                batch_size=batch_size,
                shuffle=True,
                verbose=0)

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 2)                 12        
_________________________________________________________________
dense_10 (Dense)             (None, 5)                 15        
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________


<keras.callbacks.callbacks.History at 0x7f24b84c5390>

In [37]:
encoder_layer = Model(inputs=autoencoder.inputs, outputs=autoencoder.layers[0].output)
encoded_features = np.array(encoder_layer.predict(X_train_scaled))
print('Encoded feature covariance\n', np.round(np.cov(encoded_features.T), 3))

Encoded feature covariance
 [[ 0.006 -0.   ]
 [-0.     0.019]]


In [38]:
train_predictions = autoencoder.predict(X_train_scaled)
print('Train reconstrunction error\n', sklearn.metrics.mean_squared_error(X_train_scaled, train_predictions))
test_predictions = autoencoder.predict(X_test_scaled)
print('Test reconstrunction error\n', sklearn.metrics.mean_squared_error(X_test_scaled, test_predictions))

Train reconstrunction error
 0.024166302454890175
Test reconstrunction error
 0.022729708452168602


### 4. Constraint: Unit Norm

#### 4.1 Unit Norm constraint on Encoding Layer

In [39]:
encoder = Dense(encoding_dim, activation="linear", input_shape=(input_dim,), use_bias = True, kernel_constraint=UnitNorm(axis=0))
decoder = Dense(input_dim, activation="linear", use_bias = True)

autoencoder = Sequential()
autoencoder.add(encoder)
autoencoder.add(decoder)

autoencoder.compile(metrics=['accuracy'],
                    loss='mean_squared_error',
                    optimizer='sgd')
autoencoder.summary()

autoencoder.fit(X_train_scaled, X_train_scaled,
                epochs=nb_epoch,
                batch_size=batch_size,
                shuffle=True,
                verbose=0)

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 2)                 12        
_________________________________________________________________
dense_12 (Dense)             (None, 5)                 15        
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________


<keras.callbacks.callbacks.History at 0x7f24b8156dd0>

In [40]:
w_encoder = np.round(autoencoder.layers[0].get_weights()[0], 2).T  # W in Figure 2.
print('Encoder weights norm, \n', np.round(np.sum(w_encoder ** 2, axis = 1),3))

Encoder weights norm, 
 [1.006 1.006]


In [41]:
train_predictions = autoencoder.predict(X_train_scaled)
print('Train reconstrunction error\n', sklearn.metrics.mean_squared_error(X_train_scaled, train_predictions))
test_predictions = autoencoder.predict(X_test_scaled)
print('Test reconstrunction error\n', sklearn.metrics.mean_squared_error(X_test_scaled, test_predictions))

Train reconstrunction error
 0.02233539208002045
Test reconstrunction error
 0.02182826257448169


#### 4.2 Unit Norm constraint on both Encoding and Decoding Layer

In [42]:
encoder = Dense(encoding_dim, activation="linear", input_shape=(input_dim,), use_bias = True, kernel_constraint=UnitNorm(axis=0))
decoder = Dense(input_dim, activation="linear", use_bias = True, kernel_constraint=UnitNorm(axis=1))

autoencoder = Sequential()
autoencoder.add(encoder)
autoencoder.add(decoder)

autoencoder.compile(metrics=['accuracy'],
                    loss='mean_squared_error',
                    optimizer='sgd')
autoencoder.summary()

autoencoder.fit(X_train_scaled, X_train_scaled,
                epochs=nb_epoch,
                batch_size=batch_size,
                shuffle=True,
                verbose=0)

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 2)                 12        
_________________________________________________________________
dense_14 (Dense)             (None, 5)                 15        
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________


<keras.callbacks.callbacks.History at 0x7f24b806a9d0>

In [43]:
w_encoder = np.round(autoencoder.layers[0].get_weights()[0], 2).T  # W in Figure 2.
w_decoder = np.round(autoencoder.layers[1].get_weights()[0], 2)  # W' in Figure 2.

print('Encoder weights norm, \n', np.round(np.sum(w_encoder ** 2, axis = 1),3))
print('Decoder weights norm, \n', np.round(np.sum(w_decoder ** 2, axis = 1),3))

Encoder weights norm, 
 [0.999 1.004]
Decoder weights norm, 
 [0.996 0.993]


In [44]:
train_predictions = autoencoder.predict(X_train_scaled)
print('Train reconstrunction error\n', sklearn.metrics.mean_squared_error(X_train_scaled, train_predictions))
test_predictions = autoencoder.predict(X_test_scaled)
print('Test reconstrunction error\n', sklearn.metrics.mean_squared_error(X_test_scaled, test_predictions))

Train reconstrunction error
 0.015216757583960159
Test reconstrunction error
 0.014341794905161442


----------

## Constraints put together

In [45]:
encoder = Dense(encoding_dim, activation="linear", input_shape=(input_dim,), use_bias = True, kernel_regularizer=WeightsOrthogonalityConstraint(encoding_dim, weightage=1., axis=0), kernel_constraint=UnitNorm(axis=0))
decoder = DenseTied(input_dim, activation="linear", tied_to=encoder, use_bias = False)

autoencoder = Sequential()
autoencoder.add(encoder)
autoencoder.add(decoder)

autoencoder.compile(metrics=['accuracy'],
                    loss='mean_squared_error',
                    optimizer='sgd')
autoencoder.summary()

autoencoder.fit(X_train_scaled, X_train_scaled,
                epochs=nb_epoch,
                batch_size=batch_size,
                shuffle=True,
                verbose=0)

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 2)                 12        
_________________________________________________________________
dense_tied_3 (DenseTied)     (None, 5)                 22        
Total params: 22
Trainable params: 12
Non-trainable params: 10
_________________________________________________________________


<keras.callbacks.callbacks.History at 0x7f249051b1d0>

In [46]:
train_predictions = autoencoder.predict(X_train_scaled)
print('Train reconstrunction error\n', sklearn.metrics.mean_squared_error(X_train_scaled, train_predictions))
test_predictions = autoencoder.predict(X_test_scaled)
print('Test reconstrunction error\n', sklearn.metrics.mean_squared_error(X_test_scaled, test_predictions))

Train reconstrunction error
 0.00848629044050973
Test reconstrunction error
 0.008613352028043192


-------------