## Training

In [32]:
import tensorflow as tf
from tensorflow.keras import layers, models, losses, backend as K
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd

# Load and preprocess your dataset
scaled_df = pd.read_csv('../../data/kdd_train_scaled.csv')
X_train = scaled_df.to_numpy()  # Convert DataFrame to NumPy array

# Check for NaN values in the dataset
if np.any(np.isnan(X_train)):
    print("Warning: NaN values found in the dataset. Consider handling them before training.")
    # Optional: Handle NaN values if necessary (e.g., using imputation)

# Define the latent space dimension
latent_dim = 2

# Encoder
input_data = layers.Input(shape=(X_train.shape[1],))  # Use preprocessed data's feature shape
h = layers.Dense(64, activation='relu')(input_data)
h = layers.Dense(32, activation='relu')(h)

# Latent variables
z_mean = layers.Dense(latent_dim)(h)
z_log_var = layers.Dense(latent_dim)(h)

# Sampling function for the latent space
def sampling(args):
    z_mean, z_log_var = args
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = layers.Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

# Decoder
decoder_h = layers.Dense(32, activation='relu')
decoder_mean = layers.Dense(X_train.shape[1], activation='sigmoid')  # Match the number of features in the output

h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

# Define custom layer for KL divergence loss
class KLDivergenceLayer(layers.Layer):
    """ Custom layer to compute the KL divergence loss """
    def call(self, inputs):
        z_mean, z_log_var = inputs
        kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
        kl_loss = K.sum(kl_loss, axis=-1)
        kl_loss *= -0.5
        self.add_loss(K.mean(kl_loss))  # Add KL divergence as a loss to the model
        return inputs

# Apply the custom KL divergence layer
z_mean, z_log_var = KLDivergenceLayer()([z_mean, z_log_var])

# Define the VAE model
vae = models.Model(input_data, x_decoded_mean)

# Custom VAE loss function (reconstruction loss)
def vae_loss(input_data, x_decoded_mean):
    # Binary Crossentropy reconstruction loss if data is scaled to [0, 1]
    reconstruction_loss = losses.binary_crossentropy(input_data, x_decoded_mean)
    reconstruction_loss *= X_train.shape[1]  # Scale by the number of features
    return K.mean(reconstruction_loss)  # KL loss is added by the custom layer

# Compile the model with a lower learning rate and gradient clipping
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=1.0)  # Reduced learning rate and clipping
vae.compile(optimizer=optimizer, loss=vae_loss)

# Train VAE with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5)  # Stop early if no improvement
vae.fit(X_train, X_train, epochs=100, batch_size=128, validation_split=0.2, callbacks=[early_stopping])

# Save the trained model
vae.save('../../models/vae_model.h5')

print("Model training completed!")


Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 32.1272 - val_loss: 30.0547
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 26.1678 - val_loss: 19.9620
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 17.0130 - val_loss: 9.7803
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5.9664 - val_loss: -5.1501
Epoch 5/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: -10.8507 - val_loss: -27.2699
Epoch 6/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: -37.4805 - val_loss: -61.0047
Epoch 7/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: -73.0837 - val_loss: -112.3236
Epoch 8/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: -135.0452 - val_loss: -189.6085
Epoch 9



Model training completed!


## Evaluation

In [33]:
# Load model
vae = tf.keras.models.load_model('../../models/vae_model.h5', custom_objects={'sampling': sampling}, compile=False)

X_test = pd.read_csv('../../data/kdd_test_scaled.csv')

# Generate reconstructions on the validation/test set (assuming X_test is available)
X_test_reconstructions = vae.predict(X_test)

X_test_reconstructions = vae.predict(X_test)

# Check for NaNs in the reconstructed data
print(f"NaNs in X_test_reconstructions: {np.isnan(X_test_reconstructions).sum()}")

# Check reconstruction output range
print(f"Reconstruction range: Min={X_test_reconstructions.min()}, Max={X_test_reconstructions.max()}")


# Calculate reconstruction error (Mean Squared Error)
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(X_test, X_test_reconstructions)
print(f'Reconstruction MSE: {mse}')


[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
NaNs in X_test_reconstructions: 0
Reconstruction range: Min=0.0, Max=1.0
Reconstruction MSE: 2.7615975264846275
