# Replication

I cant find model weights, only the layers of the final quanitzed model

Instead I will replicate the architecture and the way training data was split etc

Training Input is everything except classification

Data is scaled in the following ways:
- pT ([:,:,0]) Is z-scaled (x - mean)/std : They say they bring all to O(1) but not how so this seems reasonable
- eta and phi are tanh scaled : tanh(x / x_b) where x_b is the max(abs(x)) for non padded entries : This is only done before the MSE loss, implementing this for KL is very non trivial

Models are constructed as classes with the encoder and decoder created as submodels

So if we have a full model say CNN_VAE we access the encoder with either CNN_VAE.layers[0] or CNN_VAE.get_layer("cnn_encoder"), but check what the actual name is, the submodel extracted then has the same methods available

The MSE loss and masked MSE loss is handled as functions that require a partial to work due to the way tensorflow works

The beta parameter managing the balance between MSE and KL is not defined in the paper -> A number of values are trialed and the best is selected -> Note that this is non exhaustive of a search and only done to order of magnitude not to exact value

Also note that if you run this and an error occurs during any step whatsoever you will likely have to restart the kernel as I am now convinced that this causes Memory leaks either due to jupyter loosing the reference or tensorflow handling the reference in an odd fashion -> I would like to blame tensorflow rather than jupyter

Misc Notes:

When you inevitably run into nan errors dont bother printing things use tf.debugging.enable_check_numerics() instead, this will tell the compiler to raise an exception on nan and inf with a full backtrace on which layer this occured

In [1]:
# Loss Functions --- Reasoning written out where data is loaded -> Partial needs to be called on these
import tensorflow as tf
@tf.function
def scale_for_MSE(y_true, y_pred, eta_b, phi_b):
    """
    Loss scaling helper
    """
    # Extract
    eta_true = y_true[:, :, 1]
    phi_true = y_true[:, :, 2]
    eta_pred = y_pred[:, :, 1]
    phi_pred = y_pred[:, :, 2]
    # Apply tanh scaling
    eta_true_scaled = tf.tanh(eta_true / eta_b)
    phi_true_scaled = tf.tanh(phi_true / phi_b)
    eta_pred_scaled = tf.tanh(eta_pred / eta_b)
    phi_pred_scaled = tf.tanh(phi_pred / phi_b)
    # Replace and hope compilation makes this efficient
    y_true_scaled = tf.concat([
        y_true[:, :, :1],
        eta_true_scaled[:, :, tf.newaxis],
        phi_true_scaled[:, :, tf.newaxis],
        y_true[:, :, 3:],
    ], axis=-1)

    y_pred_scaled = tf.concat([
        y_pred[:, :, :1],
        eta_pred_scaled[:, :, tf.newaxis],
        phi_pred_scaled[:, :, tf.newaxis],
        y_pred[:, :, 3:],
    ], axis=-1)
    return y_true_scaled, y_pred_scaled

@tf.function
def full_MSE(y_true, y_pred, eta_b, phi_b):
    "Full Loss Function overwrite"
    y_true_scaled, y_pred_scaled = scale_for_MSE(y_true, y_pred, eta_b, phi_b)
    # Compute MSE
    mse = tf.reduce_mean(
        tf.reduce_sum(tf.square(y_true_scaled - y_pred_scaled), axis=(1, 2, 3))
    )

    return mse

@tf.function
def masked_mse(y_true_og, y_pred_og, eta_b, phi_b, epsilon=1e-12):
    """
    Computes MSE per event, ignoring any zero-padded rows.
    Assumes y_true, y_pred each have shape (batch, n_features)
    or possibly (batch, H, W, C). Adjust logic as needed.
    """
    y_true, y_pred = scale_for_MSE(y_true_og, y_pred_og, eta_b, phi_b)
    # Sum over feature-dims to see whether row is all zeros
    row_sum = tf.reduce_sum(tf.abs(y_true), axis=-1)  # shape = (batch,)
    # A mask: 1 if non-padded, 0 if padded
    mask = tf.cast(row_sum > epsilon, tf.float32)
    # Squared error, summation over features
    sq_error = tf.reduce_sum(tf.square(y_true - y_pred), axis=-1)  # shape=(batch,)
    # Apply mask -> I hate this
    sq_error_masked = sq_error * mask
    return tf.reduce_sum(sq_error_masked) / tf.reduce_sum(mask)

def tmp():
    """For VAE implementation a lot of code is taken from
    https://keras.io/examples/generative/vae/
    """
    import numpy as np
    import tensorflow as tf
    import keras
    from keras import ops
    from keras import layers
    class Sampling(layers.Layer):
        """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

        def __init__(self, **kwargs):
            super().__init__(**kwargs)
            self.seed_generator = keras.random.SeedGenerator(1337)

        def call(self, inputs):
            z_mean, z_log_var = inputs
            batch = ops.shape(z_mean)[0]
            dim = ops.shape(z_mean)[1]
            epsilon = keras.random.normal(shape=(batch, dim), seed=self.seed_generator)
            return z_mean + ops.exp(0.5 * z_log_var) * epsilon


    return Sampling
Sampling = tmp()

In [2]:
from tensorflow.keras import layers, Model

def dnn_reshape():
    """Input Space for DNN is flattened"""
    return layers.Flatten()

def dnn_encoder(input_dim = 57, latent_dim = 3, name='DNN_Encoder', vae=False):
    inputs = tf.keras.Input(shape=(19,3,), name="encoder_in")
    x = inputs#layers.BatchNormalization()(inputs)
    x = layers.Flatten()(x)

    x = layers.Dense(32)(x) # Activation is None By Default
    x = layers.LeakyReLU()(x)
    x = layers.Dense(16)(x)
    x = layers.LeakyReLU()(x)

    if vae:
        z_mean = layers.Dense(latent_dim, name="z_mean")(x)
        # Kernel initializer as this term is exponentiated and default init makes the exp -> ooo
        z_log_var = layers.Dense(latent_dim, name="z_log_var", kernel_initializer="zeros")(x)
        z = Sampling()([z_mean, z_log_var])
        return Model(inputs, [z_mean, z_log_var, z], name=name)

    latent = layers.Dense(latent_dim, name="latent")(x)

    return Model(inputs, latent, name=name)

def dnn_decoder(output_dim=57, latent_dim=3, name='DNN_Decoder'):
    """
    BN between Fully connected and activation, last doesnt have activation & BN
    """
    latent_in = tf.keras.Input(shape=(latent_dim,), name="decoder_in")

    y = layers.Dense(16)(latent_in)
    y = layers.BatchNormalization()(y)
    y = layers.LeakyReLU()(y)

    y = layers.Dense(32)(y)
    y = layers.BatchNormalization()(y)
    y = layers.LeakyReLU()(y)

    outputs = layers.Dense(output_dim, name="reconstruction")(y)

    # And reshape so that the loss works right
    outputs = layers.Reshape((19,3,))(outputs)

    return Model(latent_in, outputs, name=name)

dnn_encoder().summary()
dnn_decoder().summary()

In [3]:
@tf.keras.utils.register_keras_serializable()
class DNN_AE(tf.keras.Model):
    """
    AE = Encoder -> Decoder, no sampling.
    """
    def __init__(self, input_dim=57, latent_dim=3):
        super().__init__()
        self.encoder = dnn_encoder(input_dim=input_dim, latent_dim=latent_dim)
        self.decoder = dnn_decoder(output_dim=input_dim, latent_dim=latent_dim)

    def call(self, x):
        z = self.encoder(x)            # shape=(batch, latent_dim)
        x_recon = self.decoder(z)      # shape=(batch, input_dim)
        return x_recon


@tf.keras.utils.register_keras_serializable()
class DNN_VAE(tf.keras.Model):
    def __init__(self, eta_b, phi_b, input_dim=57, latent_dim=3, beta=1.0, **kwargs):
        super().__init__(**kwargs)
        self.encoder = dnn_encoder(input_dim=input_dim, latent_dim=latent_dim, vae=True)
        self.decoder = dnn_decoder(output_dim=input_dim, latent_dim=latent_dim)
        self.beta = beta
        self.eta_b = eta_b
        self.phi_b = phi_b

        self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = tf.keras.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = tf.keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)

            reconstruction_loss = tf.reduce_mean(
                masked_mse(data, reconstruction, self.eta_b, self.phi_b)
            )

            # ✅ KL divergence
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))

            # ✅ Total loss with β weighting
            total_loss = (1.0 - self.beta) * reconstruction_loss + self.beta * kl_loss

        # ✅ Backpropagation
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))

        # ✅ Update metrics
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)

        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

    def call(self, inputs):
        z_mean, z_log_var, z = self.encoder(inputs)
        reconstruction = self.decoder(z)
        return reconstruction

In [4]:
def build_cnn_encoder(name='cnn_encoder', vae=False):
    """
    Build CNN Encoder
    vae : Variational Autoencoder varitation (second final layer)
    """

    inputs = tf.keras.Input(shape=(19,3,1), name="cnn_encoder_input")
    #      Zeropad to (20,3,1)  - Syntax padding = ((top, bottom), (left, right))
    x = layers.ZeroPadding2D(padding=((0,1),(0,0)))(inputs)
    x = layers.BatchNormalization()(x)
    #      Block 1  -> Padding valid means no padding
    x = layers.Conv2D(16, kernel_size=(3,3), strides=(1,1), padding='valid', use_bias=False)(x)
    x = layers.ReLU()(x)
    x = layers.AveragePooling2D(pool_size=(3,1), strides=(3,1))(x)
    #      Block 2
    x = layers.Conv2D(32, kernel_size=(3,1), strides=(1,1), padding='valid', use_bias=False)(x)
    x = layers.ReLU()(x)
    x = layers.AveragePooling2D(pool_size=(3,1), strides=(3,1))(x)
    x = layers.Flatten()(x)
    #      Block 3 - started
    if vae:
        latent_mean = layers.Dense(8, activation=None, name="latent_mean")(x)
        latent_log_var = layers.Dense(8, activation=None, name="latent_log_var", kernel_initializer='zeros')(x)
        z = Sampling()([latent_mean, latent_log_var])
        return tf.keras.Model(inputs, [latent_mean, latent_log_var, z], name=name)

    latent = layers.Dense(8, activation=None, name="latent")(x)
    return tf.keras.Model(inputs, latent, name=name)


def build_cnn_decoder(name='cnn_decoder'):
    """
    CNN decoder from the paper's bottom figure.
    Takes a latent dimension (8) -> Dense -> Reshape -> Conv2DTranspose or Upsampling + Conv2D
    to go back to shape (20,3,1) then maybe slice off padding if you want 19,3,1 final.
    """
    #      Block 3 - remainder > Forming encoder input
    latent_inputs = tf.keras.Input(shape=(8,), name="cnn_decoder_input")
    y = layers.Dense(64, activation=None)(latent_inputs)
    y = layers.ReLU()(y)
    y = layers.Reshape((2,1,32))(y)
    #      Block 4
    y = layers.Conv2D(32, kernel_size=(3,1), strides=(3,1), padding='same')(y)
    y = layers.ReLU()(y)
    y = layers.UpSampling2D(size=(3,1))(y)
    y = layers.ZeroPadding2D(padding=((0,0),(1,1)))(y)
    #      Block 5
    y = layers.Conv2D(16, kernel_size=(3,1), strides=(1,1), padding='same')(y)
    y = layers.ReLU()(y)
    y = layers.UpSampling2D(size=(3,1))(y)
    y = layers.ZeroPadding2D(padding=((1,0),(0,0)))(y)
    #      Output
    output = layers.Conv2D(1, kernel_size=(3,3), strides=(1,1), padding='valid')(y)

    return tf.keras.Model(latent_inputs, output, name=name)

build_cnn_encoder().summary()
build_cnn_decoder().summary()

In [5]:
@tf.keras.utils.register_keras_serializable()
class CNN_AE(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.encoder = build_cnn_encoder(name="cnn_encoder")
        self.decoder = build_cnn_decoder(name="cnn_decoder")

    def call(self, x):
        z = self.encoder(x)
        recon = self.decoder(z)
        return recon


@tf.keras.utils.register_keras_serializable()
class CNN_VAE(tf.keras.Model):
    def __init__(self,eta_b, phi_b, input_shape=(19, 3, 1), latent_dim=3, beta=1.0, **kwargs):
        super().__init__(**kwargs)
        self.encoder = build_cnn_encoder(input_shape=input_shape, latent_dim=latent_dim, vae=True)
        self.decoder = build_cnn_decoder(output_shape=input_shape, latent_dim=latent_dim)
        self.beta = beta
        self.eta_b = eta_b
        self.phi_b = phi_b

        self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = tf.keras.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = tf.keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)


            # MSE over all spatial and channel dimensions
            #reconstruction_loss = tf.reduce_mean(
            #    tf.reduce_sum(tf.square(data - reconstruction), axis=(1, 2, 3))
            #)
            reconstruction_loss = full_MSE(data , reconstruction, self.eta_b, self.phi_b)

            # KL divergence term
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))

            # Total loss with β weighting
            total_loss = (1.0 - self.beta) * reconstruction_loss + self.beta * kl_loss

        # Backprop
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))

        # Update
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)

        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

    def call(self, inputs):
        z_mean, z_log_var, z = self.encoder(inputs)
        reconstruction = self.decoder(z)
        return reconstruction

## Load Data + Define training Loops

In [9]:
import h5py
import numpy as np
from tensorflow.keras.mixed_precision import set_dtype_policy

#setting the default evaluation for keras to 16 bit floats, unless more accuracy is necesarry. Is supposed to speed up learning for modern GPU's and TPU's.
#All the hls4ml seem to set it to 16bit and i'm guessing there is some built in safety here. See "https://keras.io/api/mixed_precision/". Might need to be moved up?
set_dtype_policy('mixed_float16')

# If you use the dockerfile this should just work
#dset = h5py.File('/Code/Dataset/background_for_training.h5', 'r')
dset = h5py.File('background_for_training.h5', 'r')
dset = {key: dset[key][()] for key in dset.keys()}
"""
Contains keys:
    Particles_Classes : 4 classes of Particles
    Particles_Names : Names of the Particles
    Particles : The data (n, 19,4)
        19 : Indexes are
            - 0 : Missing Transverse Energy
            - 1:4 Up to 4 electrons
            - 4:8 Up to 4 muons
            - 8-18 Up to 10 jets
        Subdimension 4 by idx:
            - 0 : pT (transverse momentum)
            - 1 : eta (pseudorapidity)
            - 2 : phi (azimuthal angle)
            - 3 : class (0=Nothing, 1=Met,2=electron,3=muon,4=jet)
And when something doesnt make sense (ie [:,0,1:4]) its just zero
"""
data = dset['Particles']
del dset
# Do z score norm to aid in training : They dont specify how they made O(1) : And I assume they mean across all defined objects
detected_bmap = (data[:,:,3] != 0) # Select defined entries
mean_pt = tf.reduce_mean(data[detected_bmap, 0])
std_pt = tf.math.reduce_std(data[detected_bmap, 0])
data[:,:,0] = ((data[:,:,0] - mean_pt) / std_pt)

"""
They mention in the paper:
To account for physical boundaries of η and φ, for those features a re-scaled tanh activation is used in the loss computation.

So I assume this means we need to find the extent of the parameters (call it b ) and do tanh(x/b) prior to the loss function,
I am not sure how one would go about implementing this for KL loss so Im going to do MSE only
"""
eta_b = np.maximum(np.max(data[detected_bmap, 1]), np.abs(np.min(data[detected_bmap, 1]))).astype("float32")
phi_b = np.maximum(np.max(data[detected_bmap, 2]), np.abs(np.min(data[detected_bmap, 2]))).astype("float32")

In [10]:
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np

train_split = int(0.5*len(data))
val_split = int((0.4+0.5)*len(data))

n = np.arange(len(data))
np.random.shuffle(n)
# Do splitting - Dont grab class indices
train = data[n[:train_split],          :, :3]
val   = data[n[train_split:val_split], :, :3]
test  = data[n[val_split:],            :, :3]

In [11]:
from functools import partial
def train_dnn_ae(x_train, x_val, epochs=100, batch_size=1024):
    """
    Create and train
    """
    # Build
    dnn_ae = DNN_AE(
        input_dim=57, latent_dim=3
        )

    # Compile with Adam and masked MSE
    loss_func = tf.function(partial(masked_mse, eta_b=eta_b, phi_b=phi_b))
    dnn_ae.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=loss_func
    )
    # Make Callback
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss', patience=10, restore_best_weights=True # restore???
        )
    ]
    # Fit
    dnn_ae.fit(
        x_train, x_train,
        validation_data=(x_val, x_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=callbacks,
        verbose=1
    )
    return dnn_ae

def train_dnn_vae(x_train, x_val, epochs=100, batch_size=1024, beta=1.0):
    """
    Create and train
    """
    # Build
    dnn_vae = DNN_VAE(eta_b=eta_b, phi_b=phi_b, input_dim=57, latent_dim=3, beta=beta)

    # Compile with Adam and masked MSE
    dnn_vae.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3, clipnorm=1.0),
        loss=masked_mse
    )
    # Make Callback
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss', patience=10, restore_best_weights=True # restore???
        )
    ]
    # Fit
    dnn_vae.fit(
        x_train,
        validation_data=(x_val,),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=callbacks,
        verbose=1
    )
    return dnn_vae

In [12]:
from functools import partial

def train_cnn_ae(x_train, x_val,eta_b, phi_b, epochs=100, batch_size=1024):
    """
    Create and train
    """
    cnn_ae_model = CNN_AE(
        # No Args
    )
    loss_func = tf.function(partial(full_MSE, eta_b=eta_b, phi_b=phi_b))
    cnn_ae_model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=loss_func
    )
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss', patience=10, restore_best_weights=True
        )
    ]
    cnn_ae_model.fit(
        x_train, x_train,
        validation_data=(x_val, x_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=callbacks
    )
    return cnn_ae_model

def train_cnn_vae(x_train, x_val, epochs=100, batch_size=1024, beta=1.0):
    """
    Create and train
    """
    cnn_vae_model = CNN_VAE( eta_b=eta_b, phi_b=phi_b, beta=beta )
    cnn_vae_model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss='mse' # I think this is just ignored since we have a custom train loop
    )

    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss', patience=10, restore_best_weights=True
        )
    ]
    cnn_vae_model.fit(
        x_train, x_train,
        validation_data=(x_val, x_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=callbacks
    )

    return cnn_vae_model

# Actual Training

Note that there is a lot of duplicate code as the kernel often doesnt survive past more than one model trainings

In [None]:
import os
path = '/Code/Replicate/Models'
if not os.path.exists(path):
    os.makedirs(path)
epochs = 100
batch_size = 1024

dnn_ae  = train_dnn_ae( train, val, epochs=epochs, batch_size=batch_size)

dnn_ae.save(f'{path}/full_dnn_ae.keras')

Epoch 1/100
[1m1954/1954[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 26ms/step - loss: 0.2965 - val_loss: 0.0577
Epoch 2/100
[1m1954/1954[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - loss: 0.0561 - val_loss: 0.0543
Epoch 3/100
[1m1954/1954[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 27ms/step - loss: 0.0541 - val_loss: 0.0527
Epoch 4/100
[1m1954/1954[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 22ms/step - loss: 0.0524 - val_loss: 0.0474
Epoch 5/100
[1m1954/1954[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 26ms/step - loss: 0.0459 - val_loss: 0.0414
Epoch 6/100
[1m1954/1954[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 26ms/step - loss: 0.0409 - val_loss: 0.0399
Epoch 7/100
[1m1954/1954[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 26ms/step - loss: 0.0399 - val_loss: 0.0402
Epoch 8/100
[1m1954/1954[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 26ms/step - loss: 0.0394 - val_loss: 0.0388


In [None]:
import os
path = '/Code/Replicate/Models'
if not os.path.exists(path):
    os.makedirs(path)
epochs = 100
batch_size = 1024

cnn_ae  = train_dnn_ae( train, val, epochs=epochs, batch_size=batch_size)

cnn_ae.save(f'{path}/full_cnn_ae.keras')

# Beta testing

In [None]:
import os
# Iterate e-3 to e0
for i in [1]:#np.logspace(-3, 0, 4):
    #The source sited in the paper "https://openreview.net/pdf?id=Sy2fzU9gl" talks about beta 1 for vae (and higher?). I think it is sufficient to use 1 for the replica and maybe
    #experiment with it in the try things state.
    path = '/Code/Replicate/Models'
    if not os.path.exists(path):
        os.makedirs(path)
    epochs = 100
    batch_size = 1024

    dnn_vae = train_dnn_vae(train, val, epochs=epochs, batch_size=batch_size, beta=i)

    dnn_vae.save(f'{path}/full_dnn_vae_beta{i}.keras')

In [None]:
import os
# Iterate e-3 to e0
for i in [1]:#np.logspace(-3, 0, 4):
    path = '/Code/Replicate/Models'
    if not os.path.exists(path):
        os.makedirs(path)
    epochs = 100
    batch_size = 1024

    cnn_vae = train_cnn_vae(train, val, epochs=epochs, batch_size=batch_size, beta=i)

    cnn_vae.save(f'{path}/full_cnn_vae_beta{i}.keras')