In [None]:
import numpy as np
import pickle

padded_features = np.load("../../data/processed_data/rtp_features.npy")
padded_target = np.load("../../data/processed_data/rtp_target.npy")

with open("../../data/processed_data/alphabet", "rb") as f:
    alphabet = pickle.load(f)

print(padded_features.shape, padded_target.shape)

In [None]:
LATENT_SIZE = len(alphabet) + 1

N_BLSTM_LAYERS = 5
N_CELLS = 64

LEARNING_RATE = 10**-4

BATCH_SIZE = 8
N_FEATURES = 5
N_TIMESTEPS = 1713
N_EPOCHS = 1

In [None]:
import tensorflow as tf

def split_data(features, train_size=0.9):
    size = len(features)
    indices = np.arange(size)
    np.random.shuffle(indices)
    train_samples = int(size * train_size)
    
    x_train = features[indices[:train_samples]]
    x_valid = features[indices[train_samples:]]
    
    return (
        x_train,
        x_valid
    )

x_train, x_valid = split_data(padded_features)

def create_dataset(train_data, n_epochs, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((x_train, x_train))
    dataset = dataset.shuffle(1000)
    dataset = dataset.repeat(n_epochs)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(1)
    return dataset

train_dataset = create_dataset(x_train, N_EPOCHS, BATCH_SIZE)
validation_dataset = create_dataset(x_valid, N_EPOCHS, BATCH_SIZE)

In [None]:
from keras.layers import Bidirectional, LSTM, Dense, Input, Masking

def build_encoder_model(n_blstm_layers, n_cells, n_features, output_size):
    model = tf.keras.models.Sequential()

    model.add(Masking(input_shape=(None, N_FEATURES), mask_value=tf.constant([0, 0, 0, 0, 0], tf.float32)))
    
    for i in range(n_blstm_layers):
        model.add(Bidirectional(LSTM(N_CELLS,
                                     input_shape=(None, N_FEATURES),
                                     return_sequences = True,
                                     dropout = 0.5),
                                merge_mode = 'sum'))

    model.add(Dense(output_size, activation = 'softmax'))
    return model

def build_decoder_model(n_blstm_layers, n_cells, n_features, output_size):
    model = tf.keras.models.Sequential()
    
    for i in range(n_blstm_layers):
        model.add(Bidirectional(LSTM(N_CELLS,
                                     input_shape=(None, N_FEATURES),
                                     return_sequences = True,
                                     dropout = 0.5),
                                merge_mode = 'sum'))

    model.add(Dense(output_size, activation = 'sigmoid'))
    return model

In [None]:
from tensorflow.keras.models import Model
import os

class Autoencoder(Model):
    def __init__(self, latent_dim):
        super(Autoencoder, self).__init__()
        self.latent_dim = latent_dim
        
        self.encoder = build_encoder_model(N_BLSTM_LAYERS, N_CELLS, N_FEATURES, LATENT_SIZE)
        
        checkpoint_path = "training_1/cp.ckpt"
        checkpoint_dir = os.path.dirname(checkpoint_path)
        latest = tf.train.latest_checkpoint(checkpoint_dir)
        self.encoder.load_weights(latest)
        
        self.decoder = build_decoder_model(N_BLSTM_LAYERS, N_CELLS, LATENT_SIZE, N_FEATURES)

    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

autoencoder = Autoencoder(len(alphabet))

In [None]:
from tensorflow.keras import losses

autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError())

In [None]:
import keras

checkpoint_path = "training_2/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

tb_callback = tf.keras.callbacks.TensorBoard('./logs', update_freq=1)

autoencoder.fit(train_dataset,
                epochs=1,
                validation_data=validation_dataset,
                callbacks=[cp_callback, tb_callback]
)