In [13]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import MeanSquaredError
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.activations import sigmoid
from datetime import datetime
from matplotlib import pyplot as plt
from datetime import datetime
import os
import pandas as pd
import numpy as np

# Load and check data

In [14]:
FILE_NAME = "GEO_data_batch_corr_final"

N_LAYERS = 2
N_NODES = [100, 50]
DROPOUT = [0.1]
BATCH_SIZE = 10
EPOCHS =1
TEST_RATIO = 0.15

In [15]:
now = datetime.now().strftime("%Y%m%d-%H%M%S")
output = os.path.join('output', now)
print(output)

output/20200723-233156


In [16]:
assert len(N_NODES) == N_LAYERS or len(N_NODES) == 1
assert len(DROPOUT) == N_LAYERS or len(DROPOUT) == 1

In [17]:
dataframe = pd.read_pickle('data/pd/'+FILE_NAME)
print(dataframe.shape)
data = dataframe.values
data = normalize(data)

(954, 6785)


In [18]:
classes = np.random.randint(0,3, dataframe.shape[0])
classes = to_categorical(classes)

In [19]:
x_train, x_test, y_train, y_test = train_test_split(data, classes, test_size=TEST_RATIO)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

810 train samples
144 test samples


# Define basic two-layer autoencoder

In [45]:
class DenoisingAutoencoder(object):

    def __init__(self, inputs, output_path, num_hidden=500, dropout_rate=0.05,
        encoder_act='sigmoid', decoder_act='linear', bias=True, loss_fn='mse',
        batch_size=32, num_epochs=300, optimizer='rmsprop', verbose=1):

        self.inputs = inputs
        self.num_hidden = num_hidden
        self.dropout_rate = dropout_rate
        self.encoder_act = encoder_act
        self.decoder_act = decoder_act
        self.bias = bias
        self.loss_fn = loss_fn
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        self.optimizer = optimizer
        self.verbose = verbose

        self.num_inputs = inputs.shape[1]

        now = datetime.now().strftime("%Y%m%d-%H%M%S")
        self.log = os.path.join(now, output_path)

        self.tsb = TensorBoard(log_dir=self.log, write_graph=True,
            update_freq='batch')

        self.mse_obj = MeanSquaredError()

        # Build layers #########################################################

        dropout_layer = Dropout(rate=dropout_rate)
        dropout_output = dropout_layer(self.inputs)

        self.encoder_layer = Dense(units=self.num_hidden,
            kernel_initializer='glorot_uniform', activation=self.encoder_act,
            name="encoder{}{}".format(self.num_inputs, self.num_hidden),
            use_bias=self.bias)
        self.encoder_ouput = self.encoder_layer(dropout_output)

        self.decoder_layer = Dense(units=inputs.shape[1],
            kernel_initializer='glorot_uniform', activation=self.decoder_act,
            name="decdoer{}{}".format(self.num_hidden, self.num_inputs),
            use_bias=self.bias)
        self.decoder_ouput = self.decoder_layer(self.encoder_ouput)

        # Build model ##########################################################

        self.autoencoder_model = Model(self.inputs, self.decoder_ouput)
        self.autoencoder_model.compile(loss=self.loss_fn, optimizer=self.optimizer)

        self.encoder_model = Model(self.inputs, self.encoder_ouput)

    def _mse(self, real):
        recon = self.autoencoder_model.predict(real)
        return self.mse_obj(real, recon)

    def fit_unsupervised(self, data_train, data_val, data_test):

        early_stop = EarlyStopping(monitor='val_loss', patience=1, verbose=0)

        self.autoencoder_model.fit(x=data_train, y=data_train,
            callbacks=[early_stop, self.tsb], epochs=self.num_epochs,
            batch_size=self.batch_size, shuffle=True,
            validation_data=(data_val, data_val))

        self.autoencoder_model.save(os.path.join(self.log, 'model'))

        return self._mse(data_train), self._mse(data_val), self._mse(data_test)


In [55]:
class AutoencoderStack(object):

    def __init__(self, num_features, num_stacks=2, hidden_nodes=[500, 100], output_path="/"):

        assert num_stacks == len(hidden_nodes) or len(hidden_nodes) == 1

        self.num_features = num_features
        self.num_stacks = num_stacks
        self.hidden_nodes = hidden_nodes
        self.output_path = output_path

        self.stack = []
        self.inputs = Input(shape=(num_features,))

        for i in range(self.num_stacks):
            input_layer = self.inputs if not i else self.stack[-1].encoder_ouput

            model = DenoisingAutoencoder(input_layer, output_path=self.output_path, num_hidden=hidden_nodes[i])

            if i:
                self.stack[-1].decoder_layer(model.decoder_output)

            self.stack.append(model)

    def unsupervised_fit(self, data_train, data_val, data_test, output_dir):

        mse_per_layer = []

        for layer in self.stack:
            mse_tuple = layer.fit_unsupervised(data_train, data_val, data_test)
            mse_per_layer.append(mse_tuple)

        return mse_per_layer

In [None]:
model = AutoencoderStack(x_train.shape[1], num_stacks=N_LAYERS, hidden_nodes=N_NODES)

encoders, data, recon_mse = model.unsupervised_fit(x_train, x_test, x_test, dir_out=output)

Note that input tensors are instantiated via `tensor = tf.keras.Input(shape)`.
The tensor that caused the issue was: encoder6785100_6/Sigmoid:0
