In [1]:
import tensorflow as tf
from tensorflow.keras import layers, Model
import numpy as np
import pickle

# Load data
with open('param.pkl', 'rb') as f:
    data = pickle.load(f)

# Reshape data to (n_samples, n_features)
data = np.tile(np.array(data, dtype=np.float32), 108).reshape(-1, 356)

# Manual normalization to [-1, 1] range (for tanh activation)
def manual_normalize(data):
    data_min = np.min(data, axis=0)
    data_max = np.max(data, axis=0)
    # Handle case where min == max to avoid division by zero
    scale = np.where(data_max != data_min, 2.0 / (data_max - data_min), 1.0)
    normalized = -1.0 + scale * (data - data_min)
    return normalized, data_min, data_max

def manual_denormalize(normalized_data, data_min, data_max):
    # Inverse of manual_normalize
    scale = np.where(data_max != data_min, 2.0 / (data_max - data_min), 1.0)
    original = data_min + (normalized_data + 1.0) / scale
    return original

# Normalize data
normalized_data, data_min, data_max = manual_normalize(data)

# Parameters
latent_dim = 100
input_dim = normalized_data.shape[1]  # 89 features
batch_size = 64
epochs = 3
patience = 5

# Custom VAE loss layer
class VAELossLayer(layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        
    def call(self, inputs):
        x, z_mean, z_log_var, x_decoded = inputs
        # Reconstruction loss
        recon_loss = tf.reduce_mean(tf.square(x - x_decoded))
        # KL divergence
        kl_loss = -0.5 * tf.reduce_mean(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
        # Total loss
        self.add_loss(recon_loss + kl_loss)
        return x_decoded

# Encoder
def build_encoder():
    inputs = layers.Input(shape=(input_dim,))
    x = layers.Dense(256, activation='relu')(inputs)
    z_mean = layers.Dense(latent_dim, name="z_mean")(x)
    z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
    return Model(inputs, [z_mean, z_log_var], name="encoder")

# Sampling layer
class Sampling(layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

# Decoder
def build_decoder():
    latent_inputs = layers.Input(shape=(latent_dim,))
    x = layers.Dense(256, activation='relu')(latent_inputs)
    outputs = layers.Dense(input_dim, activation='tanh')(x)
    return Model(latent_inputs, outputs, name="decoder")

# Build VAE
encoder = build_encoder()
decoder = build_decoder()

# Input layer
inputs = layers.Input(shape=(input_dim,))
# Get latent variables
z_mean, z_log_var = encoder(inputs)
# Sample from latent space
z = Sampling()([z_mean, z_log_var])
# Reconstruct input
outputs = decoder(z)

# Add VAE loss
outputs = VAELossLayer()([inputs, z_mean, z_log_var, outputs])

# Create VAE model
vae = Model(inputs, outputs, name="vae")

# Compile VAE
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
vae.compile(optimizer=optimizer)

# Early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='loss',
    patience=patience,
    restore_best_weights=True
)

# Train VAE
history = vae.fit(
    normalized_data,
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[early_stopping],
    shuffle=True
)

# Generate samples
def generate_samples(num_samples):
    noise = tf.random.normal(shape=(num_samples, latent_dim))
    generated_normalized = decoder.predict(noise)
    generated = manual_denormalize(generated_normalized, data_min, data_max)
    return generated

# Save models
encoder.save('vae_encoder.h5')
decoder.save('vae_decoder.h5')
vae.save('vae_model.h5')

# Save normalization parameters
np.savez('normalization_params.npz', data_min=data_min, data_max=data_max)

# Generate and print some samples
samples = generate_samples(5)
print("Generated samples shape:", samples.shape)
print("Sample data (first sample):", samples[0])
print("Min value in generated samples:", np.min(samples))
print("Max value in generated samples:", np.max(samples))

2025-04-27 11:12:30.928712: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/3
[1m1430/1430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - loss: 0.1155
Epoch 2/3
[1m1430/1430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0526
Epoch 3/3
[1m1430/1430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0465




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Generated samples shape: (5, 356)
Sample data (first sample): [ 3.478313   -0.00759876  0.71235394 -0.35856748 -0.3304314  -0.3253756
 -1.8023881   0.99238604  2.1537256  -0.09488481  0.50220263 -0.29566836
 -0.2656945  -0.29924646 -1.4601655   1.0285158   1.7709928  -0.07546836
  0.42515346 -0.2777443  -0.22112675 -0.28213775 -1.2700822   1.0037655
  1.5476444  -0.11144415  0.3644592  -0.26855633 -0.20167167 -0.26689726
 -1.1270921   0.98767704  1.410085   -0.09194291  0.3444108  -0.2536827
 -0.17770547 -0.25152123 -1.0354369   0.97090113  1.2900827  -0.12244916
  0.29794914 -0.26185483 -0.17873727 -0.2603454  -0.9701748   0.9453529
  1.2112689  -0.11741716  0.29704893 -0.23617136 -0.14631009 -0.23246369
 -0.8932205   0.93430185  1.1429648  -0.10811397  0.27869004 -0.22678994
 -0.13658082 -0.23597941 -0.840661    0.9201855   1.0822029  -0.11743
  0.2647385  -0.22100428 -0.12385696 -0.2238409  -0.78118366  0.912821

In [2]:
samples = generate_samples(5)

print(len(samples))
samples[0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
5


array([ 5.83508759e+01, -1.48726654e+00,  1.86574707e+01, -4.08523560e-01,
        1.10439091e+01, -3.20655823e-01,  8.08969307e+00,  4.39474106e-01,
        5.63818169e+00, -1.29783630e-01,  4.69741821e+00,  6.36335373e-01,
        4.76300621e+00,  2.67868042e-02,  4.28443527e+00, -1.59519196e-01,
        3.37932777e+00,  2.66742706e-02,  2.97634411e+00, -3.62815857e-01,
        2.69639492e+00, -1.19199753e-01,  2.41815662e+00,  2.81211853e-01,
        2.79145432e+00,  2.40846634e-01,  2.27065468e+00,  9.01985168e-02,
        2.30745316e+00,  2.52185822e-01,  1.72310066e+00,  1.38048172e-01,
        2.00636959e+00, -9.98344421e-02,  1.32189178e+00,  3.95956039e-01,
        1.38926125e+00,  4.05349731e-02,  9.96501923e-01, -2.99383163e-01,
        1.87197304e+00,  2.47028351e-01,  1.24649239e+00,  2.59674072e-01,
        1.87097645e+00,  4.45842743e-02,  1.76911068e+00, -1.75168991e-01,
        1.24607658e+00, -1.54285431e-01,  1.10260201e+00, -6.72130585e-02,
        1.41532993e+00,  