In [None]:
import os
import sys
import glob
import tqdm
import math
import librosa
import librosa.display as display
import pickle

import numpy as np
np.random.seed(42)
import pandas as pd

import IPython.display as i_disn
%matplotlib inline
import matplotlib as mlp
import matplotlib.pyplot as plt
mlp.rc("xtick",labelsize=12)
mlp.rc("ytick",labelsize=12)
mlp.rc("axes",labelsize=14)

import tensorflow as tf
from sklearn.model_selection import StratifiedShuffleSplit,train_test_split


In [None]:
with open(r"X_train_features.pkl","rb") as file:
    X_train_features = pickle.load(file)
    
with open(r"X_test_features.pkl","rb") as file:
    X_test_features = pickle.load(file)
    
with open(r"X_val_features.pkl","rb") as file:
    X_val_features = pickle.load(file)

with open(r"y_train.pkl","rb") as file:
    y_train = pickle.load(file)

with open(r"y_test.pkl","rb") as file:
    y_test = pickle.load(file)

with open(r"y_val.pkl","rb") as file:
    y_val = pickle.load(file)

In [None]:
print("Shape of training data: ", X_train_features.shape)
print("Shape of test data: ", X_test_features.shape)
print("Shape of validation data: ", X_val_features.shape)

In [None]:
plt.figure(figsize=(12,4))
librosa.display.specshow(X_val_features[0].reshape(257, 69), x_axis='time', y_axis='log')

## VAE with MLP

In [None]:
import tensorflow as tf
import keras
from tensorflow.keras.layers import Input, Dense, Lambda, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras import backend as K
from tensorflow.keras.datasets import mnist
from tensorflow.keras.callbacks import Callback

from tensorflow.keras.regularizers import l1
from tensorflow.keras.regularizers import l2
from tensorflow.keras.regularizers import l1_l2

from tensorflow.python.framework.ops import disable_eager_execution, enable_eager_execution

disable_eager_execution()
# enable_eager_execution()

In [None]:
def MinMaxScaler(features):
    scaled = []
    min_max_values = []
    for f in features:
        min_val = np.min(f)
        max_val = np.max(f)
        f_normalized = (f - min_val) / (max_val - min_val)
        min_max_values.append((min_val,max_val))
        scaled.append(f_normalized)
    return np.array(scaled),np.array(min_max_values)

In [None]:
X_train_features, X_train_min_max  = MinMaxScaler(X_train_features)
X_val_features , X_val_min_max = MinMaxScaler(X_val_features)
X_test_features , X_test_min_max = MinMaxScaler(X_test_features)

In [None]:
print("Shape of training data: ", X_train_features.shape)
print("Shape of test data: ", X_test_features.shape)
print("Shape of validation data: ", X_val_features.shape)

In [None]:
X_train =  X_train_features.reshape(len(X_train_features),np.prod(X_train_features.shape[1:]))
X_test = X_test_features.reshape(len(X_test_features),np.prod(X_test_features.shape[1:]))
X_val = X_val_features.reshape(len(X_val_features),np.prod(X_val_features.shape[1:]))

In [None]:
print("Shape of training data: ", X_train.shape)
print("Shape of test data: ", X_test.shape)
print("Shape of validation data: ", X_val.shape)

In [None]:

original_dim = X_train.shape[1]
latent_dim = 3  # Size of the latent space



In [None]:
# Encoder network
inputs = Input(shape=(X_train.shape[1],))

# x = Dense(2048, activation='relu')(inputs)
x = Dense(1024, activation='relu')(inputs)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)

z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)


def sampling(args):
    z_mean, z_log_var = args
    batch_size = K.shape(z_mean)[0]
    epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0., stddev=1.)
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

# Decoder network
decoder_inputs = Input(shape=(latent_dim,), name='decoder_input')
x = Dense(256, activation='relu')(decoder_inputs)
x = Dense(512, activation='relu')(x)
x = Dense(1024, activation='relu')(x)
# x = Dense(2048, activation='relu')(x)
outputs = Dense(17733, activation='sigmoid')(x)

# Define the encoder and decoder models
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
decoder = Model(decoder_inputs, outputs, name='decoder')


In [None]:
# see encoder summary
encoder.summary()

In [None]:
# see decoder summary
decoder.summary()

In [None]:
# Define the loss function for VAE
def vae_loss(inputs, x_decoded_mean):
    recon_loss = original_dim * binary_crossentropy(inputs, x_decoded_mean)
    kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    return K.mean(recon_loss + kl_loss)

In [None]:
# VAE model
outputs = decoder(encoder(inputs)[2])
dense_vae = Model(inputs, outputs, name='vae')
dense_vae.compile(optimizer='adam', loss=vae_loss)
dense_vae.summary()

In [None]:
# class TestLossCallback(Callback):
#     def __init__(self, test_data):
#         self.test_data = test_data
#         self.test_losses = []

#     def on_epoch_end(self, epoch, logs=None):
#         test_loss = self.model.evaluate(self.test_data, self.test_data, verbose=0)
#         print(f"\nTest Loss after Epoch {epoch + 1}: {test_loss}")
#         self.test_losses.append(test_loss)

# test_loss_callback = TestLossCallback(X_test)



In [None]:
#  Train the VAE

# dense_vae_history = dense_vae.fit(X_train, X_train, epochs=10, batch_size=128, shuffle=True, validation_data=(X_val, X_val),callbacks=[test_loss_callback])

#  Train the VAE

dense_vae_history = dense_vae.fit(X_train, X_train, epochs=10, batch_size=128, shuffle=True, validation_data=(X_val, X_val))

In [None]:
plt.figure(figsize=(10,8))
plt.plot(dense_vae_history.history["loss"])
plt.plot(dense_vae_history.history["val_loss"])
plt.legend(["Training Loss", "Validation Loss"])
plt.title("Loss plot for Variational AutoEncoder (with MLP)")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.tight_layout()
plt.show()

In [None]:
# Evaluate the model on the test data
test_loss = dense_vae.evaluate(X_test, X_test)

In [None]:
loss: 7673.7411 - val_loss: 7667.3369  7692.695394226507

In [None]:
test_loss

In [None]:
plt.barh(7673.7411, 7667.3369, test_loss)

In [None]:
latent_space = encoder.predict(X_train)[0]
y_train = [int(y) for y in y_train]
plt.figure(figsize=(10, 9))
plt.scatter(latent_space[:, 0], latent_space[:, 1], c=y_train, s=3**2,cmap='viridis',alpha=1)
plt.colorbar()
plt.xlabel('Latent Variable 1 ')
plt.ylabel('Latent Variable 2 ')
plt.title('2D Latent Space Visualization')
plt.tight_layout()
plt.show()


In [None]:
!pip3 install ipympl

In [None]:

latent_space = encoder.predict(X_val)[0]
y_val = [int(y) for y in y_val]

fig = plt.figure(figsize=(10, 9))
ax = fig.add_subplot(111, projection='3d')

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

%matplotlib widget

# fig = plt.figure()

scatter = ax.scatter(latent_space[:, 0], latent_space[:, 1], latent_space[:, 2], c=y_val, s=3**2, cmap='viridis', alpha=1)
plt.colorbar(scatter)
ax.set_xlabel('Latent Variable 1')
ax.set_ylabel('Latent Variable 2')
ax.set_zlabel('Latent Variable 3')
ax.set_title('3D Latent Space Visualization')

# Use `%matplotlib notebook` or `%matplotlib widget` in Jupyter Notebook/Lab for interactive plot
# plt.tight_layout()
plt.show()


In [None]:
latent_space = encoder.predict(X_val)[0]
y_val = [int(y) for y in y_val]
plt.figure(figsize=(10, 9))
plt.scatter(latent_space[:, 0], latent_space[:, 1], c=y_val, s=3**2,cmap='viridis',alpha=1)
plt.colorbar()
plt.xlabel('Latent Variable 1 ')
plt.ylabel('Latent Variable 2 ')
plt.title('2D Latent Space Visualization')
plt.tight_layout()
plt.show()


In [None]:
latent_space = encoder.predict(X_test)[0]
y_test = [int(y) for y in y_test]
plt.figure(figsize=(10, 9))
plt.scatter(latent_space[:, 0], latent_space[:, 1], c=y_test, s=3**2,cmap='viridis',alpha=1)
plt.colorbar()
plt.xlabel('Latent Variable 1 ')
plt.ylabel('Latent Variable 2 ')
plt.title('2D Latent Space Visualization')
plt.tight_layout()
plt.show()


2023-11-15 14:06:58.386379: W tensorflow/c/c_api.cc:305] Operation '{name:'loss/mul' id:342 op device:{requested: '', assigned: ''} def:{{{node loss/mul}} = Mul[T=DT_FLOAT, _has_manual_control_dependencies=true](loss/mul/x, loss/decoder_loss/value)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
22840/22840 [==============================] - 64s 3ms/sample - loss: 9949928.5254 - val_loss: 8427.6236
Epoch 2/100
22840/22840 [==============================] - 62s 3ms/sample - loss: 8685.8543 - val_loss: 8569.1816
Epoch 3/100
22840/22840 [==============================] - 61s 3ms/sample - loss: 8522.9825 - val_loss: 8377.5673
Epoch 4/100
22840/22840 [==============================] - 61s 3ms/sample - loss: 8506.5232 - val_loss: 8205.4903
Epoch 5/100
22840/22840 [==============================] - 61s 3ms/sample - loss: 8261.0748 - val_loss: 8104.8371
Epoch 6/100
22840/22840 [==============================] - 204s 9ms/sample - loss: 12601.6746 - val_loss: 9356.3297
Epoch 7/100
22840/22840 [==============================] - 151s 7ms/sample - loss: 17122.5691 - val_loss: 10473.3295
Epoch 8/100
22840/22840 [==============================] - 95s 4ms/sample - loss: 9971.8597 - val_loss: 9464.1363
Epoch 9/100
22840/22840 [==============================] - 123s 5ms/sample - loss: 9107.4455 - val_loss: 8522.2567
Epoch 10/100
22840/22840 [==============================] - 61s 3ms/sample - loss: 11377.3423 - val_loss: 11114.9137
Epoch 11/100
22840/22840 [==============================] - 62s 3ms/sample - loss: 11209.2646 - val_loss: 10870.3877
Epoch 12/100
22840/22840 [==============================] - 62s 3ms/sample - loss: 10961.6752 - val_loss: 10756.3944
Epoch 13/100
22840/22840 [==============================] - 61s 3ms/sample - loss: 10430.8617 - val_loss: 10006.0978

In [None]:
y_val

In [None]:
# Generate and visualize reconstructed samples

decoded_stft = dense_vae.predict(X_test)
# decoded_stft = MinMaxUnScaler(dense_vae.predict(X_val),X_val_min_max)

In [None]:
len(X_val)

In [None]:
len(decoded_stft)

In [None]:
decoded_stft = decoded_stft.reshape(len(decoded_stft),257,69,1)

In [None]:
decoded_stft.shape

In [None]:
def MinMaxUnScaler(features, min_max_values):
    unscaled = []
    for f,min_max in zip(features,min_max_values):
        f = f.reshape(257,69)
        unscaled_feature = (f * (min_max[1] - min_max[0])) + min_max[0]
        unscaled_feature = librosa.db_to_amplitude(unscaled_feature)
        unscaled.append(unscaled_feature)
    return np.array(unscaled)

In [None]:
# def db_to_amplitude():
#     librosa.db_to_amplitude()
#     pass

In [None]:
decoded_stft[0]

In [None]:
decoded_stft[0]

In [None]:
decoded_stft = MinMaxUnScaler(decoded_stft,X_test_min_max)

In [None]:
y_test[1000]

In [None]:
plt.figure(figsize=(12,4))
librosa.display.specshow(X_test[5].reshape(257, 69), x_axis='time', y_axis='log')

In [None]:
plt.figure(figsize=(12,4))
plt.title("Generated Audio for Digit " + str(y_test[n]))
librosa.display.specshow(decoded_stft[5].reshape(257, 69), x_axis='time', y_axis='log')

In [None]:
n = 256#900 #256 #500 #400
abc = librosa.istft(decoded_stft[n],hop_length=256)

## Invserse STFT

In [None]:
def ISTFT(feature):
    audios = []
    for f in tqdm.tqdm(feature,desc="Appling inverse STFT ..."):
        audio = librosa.istft(f,hop_length=256)
        audios.append(audio)
    return np.array(audios)

In [None]:
decoded_istft = ISTFT(decoded_stft)

In [None]:
# n = 456 
# n = 900 
n = 776
# n = 500 
# n = 400

abc = librosa.istft(decoded_stft[n],hop_length=256)

plt.figure(figsize=(12,4))
time_values = np.linspace(0, len(abc)/22050, len(abc))
plt.title("Generated Audio for Digit " + str(y_test[n]))
plt.plot(time_values,abc,color="purple")
plt.xlabel("Time (in seconds)")
plt.ylabel("Amplitude")
plt.tight_layout()
plt.show()
Audio(data=abc,rate=22050)

In [None]:
abc = librosa.istft(decoded_stft[n], hop_length=256)

# Plot STFT
plt.figure(figsize=(12, 4))

# Plot STFT
plt.subplot(1, 2, 1)
plt.title("STFT")
plt.imshow(np.abs(decoded_stft[n]), aspect='auto', origin='lower', cmap='viridis')
plt.colorbar(format='%+2.0f dB')
plt.xlabel('Time')
plt.ylabel('Frequency')

# Plot Audio Signal
plt.subplot(1, 2, 2)
time_values = np.linspace(0, len(abc) / 22050, len(abc))
plt.title("Generated Audio for Digit " + str(y_val[n]))
plt.plot(time_values, abc, color="purple")
plt.xlabel("Time (in seconds)")
plt.ylabel("Amplitude")

plt.tight_layout()
plt.show()

# Display Audio
Audio(data=abc, rate=22050)

In [None]:
from IPython.display import Audio

Audio(data=abc,rate=22050)

## MLP-VAE with Regularization

In [None]:
# Encoder network
inputs = Input(shape=(X_train.shape[1],))

x = Dense(1024, activation='relu')(inputs)
x = Dropout(0.3)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu')(x)

z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)


def sampling(args):
    z_mean, z_log_var = args
    batch_size = K.shape(z_mean)[0]
    epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0., stddev=1.)
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

# Decoder network
decoder_inputs = Input(shape=(latent_dim,), name='decoder_input')
y = Dense(256, activation='relu')(decoder_inputs)
y = Dropout(0.3)(y)
y = Dense(512, activation='relu')(y)
y = Dropout(0.3)(y)
y = Dense(1024, activation='relu')(y)
outputs = Dense(17733, activation='sigmoid')(y)

# Define the encoder and decoder models
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
decoder = Model(decoder_inputs, outputs, name='decoder')


In [None]:
# see encoder summary
encoder.summary()

In [None]:
# see decoder summary
decoder.summary()

In [None]:
# Define the loss function for VAE
def vae_loss(inputs, x_decoded_mean):
    recon_loss = original_dim * binary_crossentropy(inputs, x_decoded_mean)
    kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    return K.mean(recon_loss + kl_loss)

In [None]:
# VAE model
outputs = decoder(encoder(inputs)[2])
dense_vae = Model(inputs, outputs, name='vae')
dense_vae.compile(optimizer='adam', loss=vae_loss)
dense_vae.summary()

In [None]:
# class TestLossCallback(Callback):
#     def __init__(self, test_data):
#         self.test_data = test_data
#         self.test_losses = []

#     def on_epoch_end(self, epoch, logs=None):
#         test_loss = self.model.evaluate(self.test_data, self.test_data, verbose=0)
#         print(f"\nTest Loss after Epoch {epoch + 1}: {test_loss}")
#         self.test_losses.append(test_loss)

# test_loss_callback = TestLossCallback(X_test)



In [None]:
#  Train the VAE

# dense_vae_history = dense_vae.fit(X_train, X_train, epochs=10, batch_size=128, shuffle=True, validation_data=(X_val, X_val),callbacks=[test_loss_callback])

#  Train the VAE

dense_vae_history = dense_vae.fit(X_train, X_train, epochs=100, batch_size=128, shuffle=True, validation_data=(X_val, X_val))

In [None]:
plt.figure(figsize=(10,8))
plt.plot(dense_vae_history.history["loss"])
plt.plot(dense_vae_history.history["val_loss"])
plt.legend(["Training Loss", "Validation Loss"])
plt.title("Loss plot for Variational AutoEncoder (with MLP)")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.tight_layout()
plt.show()

In [None]:
# Evaluate the model on the test data
test_loss = dense_vae.evaluate(X_test, X_test)

In [None]:
test_loss

In [None]:
plt.barh(7673.7411, 7667.3369, test_loss)

In [None]:
latent_space = encoder.predict(X_train)[0]
y_train = [int(y) for y in y_train]
plt.figure(figsize=(10, 9))
plt.scatter(latent_space[:, 0], latent_space[:, 1], c=y_train, s=3**2,cmap='viridis',alpha=0.7)
plt.colorbar()
plt.xlabel('Latent Variable 1 ')
plt.ylabel('Latent Variable 2 ')
plt.title('2D Latent Space Visualization')
plt.tight_layout()
plt.show()


In [None]:
latent_space = encoder.predict(X_val)[0]
y_val = [int(y) for y in y_val]
plt.figure(figsize=(10, 9))
plt.scatter(latent_space[:, 0], latent_space[:, 1], c=y_val, s=3**2,cmap='viridis',alpha=1)
plt.colorbar()
plt.xlabel('Latent Variable 1 ')
plt.ylabel('Latent Variable 2 ')
plt.title('2D Latent Space Visualization')
plt.tight_layout()
plt.show()


In [None]:
latent_space = encoder.predict(X_test)[0]
y_test = [int(y) for y in y_test]
plt.figure(figsize=(10, 9))
plt.scatter(latent_space[:, 0], latent_space[:, 1], c=y_test, s=3**2,cmap='viridis',alpha=1)
plt.colorbar()
plt.xlabel('Latent Variable 1 ')
plt.ylabel('Latent Variable 2 ')
plt.title('2D Latent Space Visualization')
plt.tight_layout()
plt.show()


In [None]:
import soundfile as sf

In [None]:
# sf.output.write_wav("Val_set_900_1.wav", abc, 22050)
sf.write("Val_set_900_1.wav", abc, 22050, format='wav')

In [None]:
from IPython.display import Audio

Audio(data=reduced_noise,rate=22050)

In [None]:

n = 5 # Number of samples to visualize
# plt.figure(figsize=(20, 4))
for i in range(n):
    # Original images
    ax = plt.subplot(2, n, i + 1)
    librosa.display.specshow(X_val[i].reshape(257, 69), sr=22050, x_axis='time', y_axis='log',ax=ax[2,n, i + 1])
    plt.imshow(X_test[i].reshape(69, 257))
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # Reconstructed images
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_stft[i].reshape(69, 257))
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.tight_layout()
plt.show()


In [None]:
decoded_imgs = dense_vae.predict(X_train[:20])

In [None]:
librosa.display.specshow(X_test[5].reshape(257, 69), sr=22050, x_axis='time', y_axis='log')

In [None]:
print(y_test)

In [None]:
librosa.display.specshow(decoded_imgs[5].reshape(257, 69), sr=22050, x_axis='time', y_axis='log')

In [None]:
abc = librosa.istft(decoded_imgs[5].reshape(257, 69))

In [None]:
plt.plot(abc)

In [None]:
from IPython.display import Audio

Audio(data=abc,rate=22050)

### Unscale Min Max 

In [None]:
def MinMaxScaler(features):
    scaled = []
    min_max_values = []
    for f in features:
        min_val = np.min(f)
        max_val = np.max(f)
        f_normalized = (f - min_val) / (max_val - min_val)
        min_max_values.append((min_val,max_val))
        scaled.append(f_normalized)
    return np.array(scaled),np.array(min_max_values)

In [None]:
_ , X_train_min_max  = MinMaxScaler(X_train_features)
_ , X_val_min_max = MinMaxScaler(X_val_features)
_ , X_test_min_max = MinMaxScaler(X_test_features)

In [None]:
def MinMaxUnScaler(features, min_max_values):
    unscaled = []
    for f,min_max in zip(features,min_max_values):
        unscaled_feature = (f * (min_max[1] - min_max[0])) + min_max[0]
        unscaled.append(unscaled_feature)
    return np.array(unscaled_feature)

In [None]:
# Generate and visualize reconstructed samples
generated_audio = dense_vae.predict(X_val)
n = 1 # Number of samples to visualize
# plt.figure(figsize=(20, 4))
for i in range(n):
    # Original images
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(X_test[i].reshape(69, 257))
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    librosa.display.specshow(X_val, sr=22050, x_axis='time', y_axis='log',ax=axs[i, j])
    # Reconstructed images
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(69, 257))
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.tight_layout()
plt.show()


## VAE with CNN

In [None]:
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Lambda, Reshape, Conv2DTranspose
from tensorflow.keras.models import Model
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras import backend as K

In [None]:
# Define the Convolutional VAE architecture
input_shape = (1025,81,1)
latent_dim = 2  # Size of the latent space

In [None]:
# Encoder network
inputs = Input(shape=input_shape)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
# x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = Flatten()(x)
x = Dense(256,activation='relu')(x)
z_mean = Dense(latent_dim)(x)
z_log_var = Dense(latent_dim)(x)

In [None]:
def sampling(args):
    z_mean, z_log_var = args
    # return the z mean
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch, dim), mean=0.0, stddev=1.0)
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

In [None]:
z = Lambda(sampling)([z_mean, z_log_var])

In [None]:
encoder = Model(inputs, z)
encoder.summary()

In [None]:
decoder_input = Input(shape=(latent_dim,))
y = Dense(1025 * 81 * 64, activation='relu')(decoder_input)
y = Reshape((1025, 81, 64))(y)
# y = Conv2DTranspose(64, (3, 3), activation='relu', padding='same')(y)
y = Conv2DTranspose(32, (3, 3), activation='relu', padding='same')(y)
y = Conv2DTranspose(1, (3, 3), activation='sigmoid', padding='same')(y)

In [None]:
decoder_output = y
decoder = Model(decoder_input,y)

decoder.summary()

In [None]:
outputs = decoder(encoder(inputs))

In [None]:
# Define the loss function for VAE
def vae_loss(inputs, outputs):
    xent_loss = K.sum(K.binary_crossentropy(inputs, outputs), axis=(1, 2, 3))
    kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    return K.mean(xent_loss + kl_loss)

In [None]:
cnn_vae = Model(inputs, outputs)
cnn_vae.compile(optimizer='adam', loss=vae_loss)
cnn_vae.summary()

In [None]:
# Train the VAE
cnn_vae_history = cnn_vae.fit(X_train_features, X_train_features, epochs=2, batch_size=128, shuffle=True, validation_data=(X_val_features, X_val_features))