In [1]:
%load_ext tensorboard
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm import tqdm_notebook
%matplotlib inline
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
from sklearn import preprocessing, model_selection
from datetime import datetime
from sklearn.manifold import TSNE
import seaborn as sns

In [2]:
class Autoencoder(layers.Layer):

    def __init__(self, encoding_layer_size):
        super().__init__()
        self.encoding_layer = layers.Dense(
            units=encoding_layer_size,
            activation=tf.nn.relu,
            kernel_initializer='random_uniform',
            bias_initializer='zeros'
        )
        self.output_layer = layers.Dense(
        units=encoding_layer_size,
        activation=tf.nn.sigmoid
        )

    def call(self, input_attributes):
        activation = self.encoding_layer(input_attributes)
        return self.output_layer(activation)

class Autodecoder(layers.Layer):
    def __init__(self, encoding_layer_size, attributes_size):
        super().__init__()
        self.decoding_layer = layers.Dense(
            units=encoding_layer_size,
            activation=tf.nn.relu,
            kernel_initializer='he_normal',
            bias_initializer='zeros'
        )
        self.output_layer = layers.Dense(
            units=attributes_size,
            activation=tf.nn.sigmoid
        )
    def call(self, encoded):
        activation = self.decoding_layer(encoded)
        return self.output_layer(activation)



In [3]:
class AutoencoderModel(tf.keras.Model):
    def __init__(self, encoding_layer_size, attributes_size):
        super().__init__()
        self.encoder = Autoencoder(encoding_layer_size=encoding_layer_size)
        self.decoder = Autodecoder(
            encoding_layer_size=encoding_layer_size, attributes_size=attributes_size
        )
    def call(self, input_attributes):
        encoded = self.encoder(input_attributes)
        reconstructed = self.decoder(encoded)
        return reconstructed

In [4]:
def loss(model, original):
  reconstruction_error = tf.reduce_mean(tf.square(tf.subtract(model(original), original)))
  return reconstruction_error

In [5]:
def grad_step(loss, model, optamizer, target):
    with tf.GradientTape() as tape:
        gradients = tape.gradient(loss(autoencoder, target), autoencoder.trainable_variables)
        gradient_variables = zip(gradients, autoencoder.trainable_variables)
        optamizer.apply_gradients(gradient_variables)

In [6]:
tf.keras.backend.set_floatx('float64')

In [None]:
learning_rate = 0.00001
encoding_layer_size = 100
epochs = 100
batch_size = 256

current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
training_logs = os.path.join('logs/train/', current_time)
writer = tf.summary.create_file_writer(training_logs)

with open(os.path.join(training_logs, 'parameters.txt'), 'w') as f:
    f.write('learning_rate: {}\n'.format(learning_rate))
    f.write('encoding_layer_size: {}\n'.format(encoding_layer_size))
    f.write('epochs: {}\n'.format(epochs))
    f.write('batch_size: {}\n'.format(batch_size))

autoencoder = AutoencoderModel(encoding_layer_size=encoding_layer_size, attributes_size=len(df_numerical.columns) - 1)
optamizer = tf.optimizers.Adam(learning_rate=learning_rate)

# Already scaled so just setting it
scaled_train = X_train
scaled_train = pd.DataFrame(scaled_train).astype(np.float64)
scaled_test = X_test
scaled_test = pd.DataFrame(scaled_test).astype(np.float64)
scaled_cv = X_cv
scaled_cv = pd.DataFrame(scaled_cv).astype(np.float64)

train_dataset = tf.data.Dataset.from_tensor_slices(scaled_train.values)
train_dataset_batches = train_dataset.shuffle(len(scaled_train)).batch(batch_size)

with writer.as_default():
    with tf.summary.record_if(True):
        cumulative_step = 0
        for epoch in tqdm_notebook(range(epochs)):
            for step, cur_batch in enumerate(train_dataset_batches):
                grad_step(loss, autoencoder, optamizer, cur_batch)
                loss_values = loss(autoencoder, cur_batch)
                #original = cur_batch
                #reconstructed = autoencoder(tf.constant(cur_batch))
                    
                tf.summary.scalar('loss', loss_values, step=cumulative_step)
                    
                cumulative_step += 1
                cv_loss_values = loss(autoencoder, scaled_cv.sample(batch_size).values)
                tf.summary.scalar('CV-loss', cv_loss_values , step=cumulative_step)
