# Import packages

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import keras
import os, sys
import random
from datetime import datetime

from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Reshape, Conv1DTranspose, BatchNormalization, Layer

sys.path.insert(1, str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\\Timestamp2Vec\\")
from helper_functions import *

SEED = 123
random.seed(SEED)

SAVE_MIN_MAX_VAL = False

# Data preparation

In [2]:
data_location = os.path.join(os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop'), 'data_thesis')
data = np.load(data_location + "/vectorized_dates.npy", allow_pickle=True)

In [20]:
x = data[:,3]

In [None]:
# transform the Booleans to int
data = np.asarray(data, dtype="float64")
np.shape(data)

### Train and Test set

In [None]:
# create train and test split, using SEED
train_data, test_data, _, _ = train_test_split(
    data, data, test_size=0.2, random_state=SEED
)
print(train_data.shape)
print(test_data.shape)

In [None]:
# free up space
del data

In [None]:
if SAVE_MIN_MAX_VAL:
    # normalize data to [0, 1]
    # obtain the min and max value
    min_val = np.amin(train_data, axis=0)
    max_val = np.amax(train_data, axis=0)

    data_folder_check  = os.path.isdir(str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\Data")
    if not data_folder_check:
        os.makedirs(str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\Data")
        print('Created Data folder as it was not present')

    variable_folder_check = os.path.isdir(str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\Data\important_variables")
    if not variable_folder_check:
        os.makedirs(str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\Data\important_variables")
        print('Created important_variables folder as it was not present')

    np.save(str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\Data\important_variables\min_val.npy", min_val)
    np.save(str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\Data\important_variables\max_val.npy", max_val)

In [None]:
# normalize the train and test data
train_data = normalize(train_data)
test_data = normalize(test_data)

# store tensors on CPU to save enough space on GPU
with tf. device("cpu:0"):
    train_data = tf.cast(train_data, tf.float32)
    test_data = tf.cast(test_data, tf.float32)

# Variational Autoencoder model

### Model parameters

In [None]:
LATENT_DIM = 8
X_SHAPE = train_data.shape[1]
EPOCHS = 25
BATCH_SIZE = 256

TRAIN = True

LOAD_ENCODER = False
LOAD_DECODER = False

SAVE_LATENT_VAR = False
SAVE_ENCODER_DECODER = True

NAME_ENC = "encoder_VAE1"
NAME_DEC = "decoder_VAE1"

ENCODER_LOCATION = "Models/Encoder/" + NAME_ENC
DECODER_LOCATION = "Models/Decoder/" + NAME_DEC

initializer = tf.keras.initializers.GlorotUniform(seed=SEED)

In [None]:
if SAVE_LATENT_VAR:    
    # sys.path.insert(1, str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))))
    data_folder_check  = os.path.isdir(str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\Data")
    if not data_folder_check:
        os.makedirs(str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\Data")
        print('Created Data folder as it was not present')

    variable_folder_check = os.path.isdir(str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\Data\important_variables")
    if not variable_folder_check:
        os.makedirs(str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\Data\important_variables")
        print('Created important_variables folder as it was not present')

    with open(str(os.path.abspath(os.path.join(os.getcwd(), os.pardir))) + "\Data\important_variables" + 'latent_dim.txt', 'w') as f:
        f.write('%d' % LATENT_DIM)

### Sampling layer

In [None]:
class Sampling(Layer):
    # Sampling layer of the VAE, creation of the latent variable z
    # The sampling layer uses as distribution a normal distribution

    def call(self, inputs):
        # obtain the mean and the logvar's of each dimension
        z_mean, z_log_var = inputs
        # get the batchsize
        batch = tf.shape(z_mean)[0]
        # get the dimension of the data
        dim = tf.shape(z_mean)[1]
        # sample random values from the normal distribution
        epsilon = tf.keras.backend.random_normal(shape =(batch, dim))
        # perform the sample step
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

### Encoder network

In [None]:
if LOAD_ENCODER:
    encoder = keras.models.load_model(ENCODER_LOCATION)
else:
    # define the input of the encoder
    input_encoder = keras.Input(shape = (X_SHAPE,))

    # define the layers of the model
    x = Reshape((X_SHAPE, 1))(input_encoder)
    x = Conv1D(filters=16, kernel_size=3, padding="same", activation="elu", kernel_initializer = initializer)(x)
    x = BatchNormalization()(x)
    x = Conv1D(filters=8, kernel_size=2, padding="valid", activation="elu", kernel_initializer = initializer)(x)
    x = BatchNormalization()(x)
    x = Flatten()(x)
    x = Dense(30, activation="tanh", kernel_initializer = initializer)(x)

    # obtain the mean and variance to sample from
    z_mean = Dense(LATENT_DIM, name ="z_mean", kernel_initializer = initializer)(x)
    z_log_var = Dense(LATENT_DIM, name ="z_log_var", kernel_initializer = initializer)(x)

    # sample, using the z_mean and z_log_var
    z = Sampling()([z_mean, z_log_var])

    # define the encoder model
    encoder = Model(input_encoder, [z_mean, z_log_var, z], name ="encoder")
encoder.summary()

### Decoder network

In [None]:
if LOAD_DECODER:
    decoder = keras.models.load_model(DECODER_LOCATION)
else:
    # define the input of the decoder
    input_decoder = keras.Input(shape = (LATENT_DIM,))

    # define the layers of the model
    x = Dense(20 * 8, activation="elu", kernel_initializer = initializer)(input_decoder)
    x = Reshape((20, 8))(x)
    x = Conv1DTranspose(8, 2, padding="valid", activation="elu", kernel_initializer = initializer)(x)
    x = BatchNormalization()(x)
    x = Conv1DTranspose(16, 3, padding="same", activation="elu", kernel_initializer = initializer)(x)
    x = BatchNormalization()(x)
    x = Conv1DTranspose(1, 1, activation="elu", kernel_initializer = initializer)(x)
    x = Flatten()(x)
    x = Dense(X_SHAPE, activation="sigmoid", kernel_initializer = initializer)(x)
    decoder_output = Reshape((X_SHAPE,))(x)
    # define the decoder model
    decoder = Model(input_decoder, decoder_output, name ="decoder")
decoder.summary()

### VAE

#### VAE Class

In [None]:
class VariationalAutoEncoder(Model):
  def __init__(self, encoder, decoder, X_SHAPE):
    super(VariationalAutoEncoder, self).__init__()
    self.encoder = encoder
    self.decoder = decoder
    self.X_shape = X_SHAPE

    
  def train_step(self, x):
    if isinstance(x, tuple):
            x = x[0]
    with tf.GradientTape() as tape:
      # map to latent space and obtain z_mean, z_log_var, z
      z_mean, z_log_var, z = self.encoder(x)
      # decode z to obtain the reconstruction
      decoded = self.decoder(z)
      # obtain the reconstruction loss
      reconstruction_loss = tf.reduce_mean(
              keras.losses.mean_squared_error(x, decoded)
            )
      reconstruction_loss *= X_SHAPE
      # obtain the kl_loss
      kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
      kl_loss = tf.reduce_mean(kl_loss)
      total_loss = reconstruction_loss + kl_loss
    grads = tape.gradient(total_loss, self.trainable_weights)
    self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
    return {
      "loss": total_loss,
      "reconstruction_loss": reconstruction_loss,
      "kl_loss": kl_loss,
      }


  def test_step(self, x):
    if isinstance(x, tuple):
            x = x[0]
    # map to latent space and obtain z_mean, z_log_var, z
    z_mean, z_log_var, z = self.encoder(x)
    # decode z to obtain the reconstruction
    decoded = self.decoder(z)
    # obtain the reconstruction loss
    reconstruction_loss = tf.reduce_mean(
            keras.losses.mean_squared_error(x, decoded)
          )
    reconstruction_loss *= X_SHAPE
    # obtain the kl_loss
    kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
    kl_loss = tf.reduce_mean(kl_loss)
    total_loss = reconstruction_loss + kl_loss
    return {
      "loss": total_loss,
      "reconstruction_loss": reconstruction_loss,
      "kl_loss": kl_loss,
      }

In [None]:
# Initialize the VAE
variational_autoencoder = VariationalAutoEncoder(encoder, decoder, X_SHAPE)

#### Training

In [None]:
def train(variational_autoencoder):
        callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, mode = 'min', restore_best_weights=True)
        history = variational_autoencoder.fit(train_data, train_data, 
                epochs=EPOCHS, 
                batch_size=BATCH_SIZE,
                validation_data=(test_data, test_data),
                shuffle=True,
                verbose=1,
                callbacks=[callback])
        return history

In [None]:
if TRAIN:
    variational_autoencoder.compile(optimizer="adam")
    history = train(variational_autoencoder)
    # Plot the train and val loss
    plt.plot(history.history["loss"], label="Training Loss")
    plt.plot(history.history["val_loss"], label="Validation Loss")
    plt.legend()

In [None]:
if SAVE_ENCODER_DECODER:
    #Check if Models folder exists, if not create
    models_folder_check  = os.path.isdir('Models')
    encoder_folder_check  = os.path.isdir("Models/Encoder")
    decoder_folder_check  = os.path.isdir("Models/Decoder")
    if not models_folder_check:
        os.makedirs('Models')
        print('Created Models folder as it was not present')
    if not encoder_folder_check:
        os.makedirs(ENCODER_LOCATION)
        print('Created Models/Encoder folder as it was not present')
    if not decoder_folder_check:
        os.makedirs(DECODER_LOCATION)
        print('Created Models/Decoder folder as it was not present')
    date = datetime.now().strftime("%Y%m%d%H%M%S")
    #Store the encoder in the Encoder folder
    variational_autoencoder.encoder.save("Models/Encoder" + "/{}".format(date))
    print("Saved encoder")
    #Store the encoder in the Decoder folder
    variational_autoencoder.decoder.save("Models/Decoder" + "/{}".format(date))
    print("Saved decoder")