In [None]:
# Training parameters

window = 672       # 1 week
stride = 8         # 1 hour
latent_dim = 10    # Latent dimension
epochs = 500       # Number of epochs (no early stopping)
batch_size = 8     # Batch size
M = 100            # Monte Carlo
alpha = 0.5
noise_factor = [0.2, 0.2, 0.2] 

In [None]:
# Reparametrization trick
def sample_z1(args):
    z_mean, z_log_var = args
    eps = tf.keras.backend.random_normal(shape=(K.shape(z_mean)[0], K.int_shape(z_mean)[1]))
    return z_mean + tf.exp(alpha * z_log_var) * eps

# Reparametrization trick
def sample_z2(args):
    z_mean, z_log_var = args
    eps = tf.keras.backend.random_normal(shape=(K.shape(z_mean)[0], K.int_shape(z_mean)[1], K.int_shape(z_mean)[2]))
    return z_mean + tf.exp(alpha * z_log_var) * eps

In [None]:
class AddNoise(tfk.layers.Layer):
    
    def call(self, inputs):
        data, noise_factor = inputs
        noise = tf.map_fn(fn=lambda t: tf.random.normal((672,3), 0, 1)*noise_factor, elems=data)
        noise_input = data + noise
        
        return noise_input

In [None]:
class SimScore(tfk.layers.Layer):

    def call(self, inputs):
        seq, h_dim = inputs  # seq: batch x x_dim x h_dim*2

        S = tf.map_fn(fn=lambda t: tf.linalg.matmul(tf.transpose(t), t), elems=seq) # batch x h_dim x h_dim
        S = tf.map_fn(fn=lambda t: t / tf.math.sqrt((tf.cast(h_dim*2, dtype=tf.float32))), elems=S)
        A = tf.map_fn(fn=lambda t: tf.keras.activations.softmax(t), elems=S)
        C = tf.matmul(seq, A)

        return C

In [None]:
# Building the model

from keras import backend as K
from tensorflow.keras import Input
attention_dim = 10

input_shape = X_train.shape[1:]
output_shape = X_train.shape[1:]

###########
# ENCODER #
###########

encoder_input = tf.keras.Input(shape=input_shape)

noisy_input = AddNoise()([encoder_input, noise_factor])

h_seq, forward_h, forward_c, backward_h, backward_c = tfkl.Bidirectional(tfkl.LSTM(72, activation='tanh', return_sequences=True, return_state=True))(noisy_input)

state_h = tfkl.Concatenate()([forward_h, backward_h])

Cdet = SimScore()([h_seq, window])

c_mean = tfkl.Dense(attention_dim , activation='linear', name="c_mean")(Cdet)
c_log_var = tfkl.Dense(attention_dim , activation='softplus', name="c_var")(Cdet)

# Latent representation: mean + log of std.dev.
z_mean = tfkl.Dense(latent_dim, activation='linear', name="z_mean")(state_h)
z_log_var = tfkl.Dense(latent_dim, activation='softplus', name="z_var")(state_h)

# Sampling a vector from the latent distribution
z = tfkl.Lambda(sample_z1, name='z')([z_mean, z_log_var])
c = tfkl.Lambda(sample_z2, name='c')([c_mean, c_log_var])

encoder = tfk.Model(encoder_input, [z_mean, z_log_var, z, c_mean, c_log_var, c], name='encoder')
print(encoder.summary())

In [None]:
###########
# DECODER #
###########

z_inputs = Input(shape=(latent_dim, ), name='decoder_input_1')
c_inputs = Input(shape=(window, attention_dim), name='decoder_input_2')

repeated = tfkl.RepeatVector(window)(z_inputs)
concat = tfkl.Concatenate(axis=-1)([repeated, c_inputs])

x = tfkl.Bidirectional(tfkl.LSTM(72, return_sequences=True))(concat)

# OPZIONE 1 - Dense Layers come nel paper
mu = tfkl.Dense(3, activation='linear', name="mu")(x)
log_sigma = tfkl.Dense(3, activation='softplus', name="sigma")(x)

# Define and summarize decoder model
decoder = tfk.Model([z_inputs, c_inputs], [mu, log_sigma], name='decoder')
decoder.summary()

In [None]:
class VAE(tfk.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = tfk.metrics.Mean(name="total_loss")
        self.likelihood_tracker = tfk.metrics.Mean(name="likelihood")
        self.kl_loss_z_tracker = tfk.metrics.Mean(name="kl_loss_z")
        self.kl_loss_c_tracker = tfk.metrics.Mean(name="kl_loss_c")
        self.reconstruction_loss_tracker = tfk.metrics.Mean(name="reconstruction_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.likelihood_tracker,
            self.kl_loss_z_tracker,
            self.kl_loss_c_tracker,
            self.reconstruction_loss_tracker
        ]
    

    def train_step(self, data):
        with tf.GradientTape() as tape:
            
            sample = None
            encoder_mu, encoder_log_var, z, c_mean, c_log_var, c = self.encoder(data)
            decoder_mu, decoder_log_sigma = self.decoder([z,c])
            decoder_sigma = tf.exp(decoder_log_sigma)
            
            pdf_laplace = tfp.distributions.Laplace(decoder_mu, decoder_sigma, validate_args=True, name='Laplace')
                        
            for _ in range(M):
                sample = pdf_laplace.sample() if sample is None else sample + pdf_laplace.sample()
                             
            likelihood = -(pdf_laplace.log_prob(data))
            likelihood = tf.reduce_mean(likelihood, axis=-1)
            likelihood = tf.reduce_mean(likelihood, axis=-1)
                
            decoder_output = sample/M
            reconstruction_loss = tf.reduce_mean(tf.reduce_sum(tfk.losses.mse(data, decoder_output), axis=1))
            
            kl_loss_z = -0.5 * (1 + encoder_log_var - tf.square(encoder_mu) - tf.exp(encoder_log_var))
            kl_loss_z = tf.reduce_mean(tf.reduce_sum(kl_loss_z, axis=1))
            
            kl_loss_c = -0.5 * (1 + c_log_var - tf.square(c_mean) - tf.exp(c_log_var))
            kl_loss_c = tf.keras.backend.sum(kl_loss_c, axis=1)
            kl_loss_c = tf.reduce_mean(kl_loss_c, axis=1)

            total_loss = 4*likelihood + 2*(kl_loss_z + 6*kl_loss_c) + reconstruction_loss
            
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.likelihood_tracker.update_state(likelihood)
        self.kl_loss_z_tracker.update_state(kl_loss_z)
        self.kl_loss_c_tracker.update_state(kl_loss_c)
        
        return {
            "loss": self.total_loss_tracker.result(),
            "likelihood": self.likelihood_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result()
        }
    
    
    
    def test_step(self, data): # https://github.com/keras-team/keras-io/issues/38

        sample = None
        encoder_mu, encoder_log_var, z, c_mean, c_log_var, c = self.encoder(data)
        decoder_mu, decoder_log_sigma = self.decoder([z,c])
        decoder_sigma = tf.exp(decoder_log_sigma)
            
        pdf_laplace = tfp.distributions.Laplace(decoder_mu, decoder_sigma, validate_args=True, name='Laplace')
                        
        for _ in range(M):
            sample = pdf_laplace.sample() if sample is None else sample + pdf_laplace.sample()
                             
        likelihood = -(pdf_laplace.log_prob(data))
        likelihood = tf.reduce_mean(likelihood, axis=-1)
        likelihood = tf.reduce_mean(likelihood, axis=-1)
                
        decoder_output = sample/M
        reconstruction_loss = tf.reduce_mean(tf.reduce_sum(tfk.losses.mse(data, decoder_output), axis=1))
            
        kl_loss_z = -0.5 * (1 + encoder_log_var - tf.square(encoder_mu) - tf.exp(encoder_log_var))
        kl_loss_z = tf.reduce_mean(tf.reduce_sum(kl_loss_z, axis=1))
            
        kl_loss_c = -0.5 * (1 + c_log_var - tf.square(c_mean) - tf.exp(c_log_var))
        kl_loss_c = tf.keras.backend.sum(kl_loss_c, axis=1)
        kl_loss_c = tf.reduce_mean(kl_loss_c, axis=1)

        total_loss = 4*likelihood + 2*(kl_loss_z + 6*kl_loss_c) + reconstruction_loss
            
        self.total_loss_tracker.update_state(total_loss)
        self.likelihood_tracker.update_state(likelihood)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_z_tracker.update_state(kl_loss_z)
        self.kl_loss_c_tracker.update_state(kl_loss_c)
        
        return {
            "loss": self.total_loss_tracker.result(),
            "likelihood": self.likelihood_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_c": self.kl_loss_c_tracker.result(),
            "kl_z": self.kl_loss_z_tracker.result(),
        }

In [None]:
vae = VAE(encoder, decoder)

seed = 42
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

vae.compile(optimizer=tfk.optimizers.Adam())

vae.fit(x = X_train,
        validation_data = (X_val, None),
        epochs=epochs, 
        batch_size=batch_size,
        callbacks=[tfk.callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True), tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=5, factor=0.5, min_lr=1e-5)]
       )