In [2]:
import os
import sys
import glob
import tqdm
import math
import librosa
import librosa.display as display
import pickle

import numpy as np
np.random.seed(42)
import pandas as pd

import IPython.display as i_disn
%matplotlib inline
import matplotlib as mlp
import matplotlib.pyplot as plt
mlp.rc("xtick",labelsize=12)
mlp.rc("ytick",labelsize=12)
mlp.rc("axes",labelsize=14)

import tensorflow as tf
from sklearn.model_selection import StratifiedShuffleSplit,train_test_split


In [3]:
import tensorflow as tf
import keras
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import Callback

from tensorflow.keras.regularizers import l1
from tensorflow.keras.regularizers import l2
from tensorflow.keras.regularizers import l1_l2

from tensorflow.python.framework.ops import disable_eager_execution, enable_eager_execution

disable_eager_execution()
# enable_eager_execution()

In [17]:
with open(r"X_train_features.pkl","rb") as file:
    X_train_features = pickle.load(file)
    
with open(r"X_test_features.pkl","rb") as file:
    X_test_features = pickle.load(file)
    
with open(r"X_val_features.pkl","rb") as file:
    X_val_features = pickle.load(file)

with open(r"y_train.pkl","rb") as file:
    y_train = pickle.load(file)

with open(r"y_test.pkl","rb") as file:
    y_test = pickle.load(file)

with open(r"y_val.pkl","rb") as file:
    y_val = pickle.load(file)

In [18]:
print("Shape of training data: ", X_train_features.shape)
print("Shape of test data: ", X_test_features.shape)
print("Shape of validation data: ", X_val_features.shape)

Shape of training data:  (22840, 257, 69, 1)
Shape of test data:  (2820, 257, 69, 1)
Shape of validation data:  (2538, 257, 69, 1)


In [27]:
X_train_features[5]

array([[[0.6829035 ],
        [0.7642675 ],
        [0.67142975],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.6792315 ],
        [0.7478292 ],
        [0.7216569 ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.6563965 ],
        [0.6257569 ],
        [0.5951136 ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       ...,

       [[0.        ],
        [0.        ],
        [0.        ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.        ],
        [0.        ],
        [0.        ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.        ],
        [0.        ],
        [0.        ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]]], dtype=float32)

In [20]:
def MinMaxScaler(features):
    scaled = []
    min_max_values = []
    for f in features:
        min_val = np.min(f)
        max_val = np.max(f)
        f_normalized = (f - min_val) / (max_val - min_val)
        min_max_values.append((min_val,max_val))
        scaled.append(f_normalized)
    return np.array(scaled),np.array(min_max_values)

In [21]:
# X_train_features, _  = MinMaxScaler(X_train_features)
# X_val_features , _ = MinMaxScaler(X_val_features)
# X_test_features , _= MinMaxScaler(X_test_features)

In [22]:
X_train_features, X_train_min_max  = MinMaxScaler(X_train_features)
X_val_features , X_val_min_max = MinMaxScaler(X_val_features)
X_test_features , X_test_min_max = MinMaxScaler(X_test_features)

In [23]:
def MinMaxUnScaler(features, min_max_values):
    unscaled = []
    for f,min_max in zip(features,min_max_values):
        unscaled_feature = (f * (min_max[1] - min_max[0])) + min_max[0]
        unscaled.append(unscaled_feature)
    return np.array(unscaled_feature)

In [24]:
X_train_unscaled = MinMaxUnScaler(X_train_features,X_train_min_max)

In [28]:
X_train_features[5]

array([[[0.6829035 ],
        [0.7642675 ],
        [0.67142975],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.6792315 ],
        [0.7478292 ],
        [0.7216569 ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.6563965 ],
        [0.6257569 ],
        [0.5951136 ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       ...,

       [[0.        ],
        [0.        ],
        [0.        ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.        ],
        [0.        ],
        [0.        ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.        ],
        [0.        ],
        [0.        ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]]], dtype=float32)

In [29]:
X_train_unscaled[5]

array([[-45.258553 ],
       [-50.930992 ],
       [-56.96792  ],
       [-59.533783 ],
       [-58.934425 ],
       [-62.117554 ],
       [-58.834568 ],
       [-51.464764 ],
       [-49.882156 ],
       [-37.038    ],
       [-47.772446 ],
       [-44.34912  ],
       [-50.97335  ],
       [-40.37213  ],
       [-43.60937  ],
       [-68.1235   ],
       [-34.536587 ],
       [-16.81588  ],
       [ -4.2324905],
       [ -4.2877045],
       [ -2.368332 ],
       [ -2.5424194],
       [ -5.567108 ],
       [ -7.2952576],
       [ -5.4086075],
       [ -4.6268005],
       [ -5.474991 ],
       [ -7.591919 ],
       [ -8.63797  ],
       [ -8.500275 ],
       [ -7.799553 ],
       [ -6.982849 ],
       [ -6.3534164],
       [ -6.073807 ],
       [ -5.983444 ],
       [ -6.2961884],
       [ -7.132103 ],
       [ -7.529915 ],
       [ -7.4950867],
       [ -8.007965 ],
       [-10.673752 ],
       [-14.893188 ],
       [-17.6651   ],
       [-17.865585 ],
       [-27.68695  ],
       [-3

In [10]:
print("Shape of training data: ", X_train_features.shape)
print("Shape of test data: ", X_test_features.shape)
print("Shape of validation data: ", X_val_features.shape)

Shape of training data:  (22840, 257, 69, 1)
Shape of test data:  (2820, 257, 69, 1)
Shape of validation data:  (2538, 257, 69, 1)


In [11]:
X_train =  X_train_features.reshape(len(X_train_features),np.prod(X_train_features.shape[1:]))
X_test = X_test_features.reshape(len(X_test_features),np.prod(X_test_features.shape[1:]))
X_val = X_val_features.reshape(len(X_val_features),np.prod(X_val_features.shape[1:]))

In [12]:
print("Shape of training data: ", X_train.shape)
print("Shape of test data: ", X_test.shape)
print("Shape of validation data: ", X_val.shape)

Shape of training data:  (22840, 17733)
Shape of test data:  (2820, 17733)
Shape of validation data:  (2538, 17733)


In [16]:
os.getcwd()

'/Users/rabinnepal/gitHub/Neural Network Project/codes'

In [15]:
vae = load_model("50_epoch.h5")

XXX lineno: 14, opcode: 47


SystemError: unknown opcode

In [None]:

original_dim = X_train.shape[1]
latent_dim = 2  # Size of the latent space



In [None]:
# Encoder network
inputs = Input(shape=(X_train.shape[1],))

x = Dense(4096, activation='relu')(inputs)
x = Dense(1024, activation='relu')(x)
x = Dense(256, activation='relu')(x)
# x = Dense(128, activation='relu')(x)

z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

# Reparameterization trick to sample from the latent space
def sampling(args):
    z_mean, z_log_var = args
    batch_size = K.shape(z_mean)[0]
    epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0., stddev=1.)
    return z_mean #+ K.exp(0.5 * z_log_var) * epsilon

z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

# Decoder network
decoder_inputs = Input(shape=(latent_dim,), name='decoder_input')
x = Dense(256, activation='relu')(decoder_inputs)
x = Dense(1024, activation='relu')(x)
x = Dense(4096, activation='relu')(x)
outputs = Dense(17733, activation='sigmoid')(x)  # Output layer with sigmoid activation for MNIST

# Define the encoder and decoder models
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
decoder = Model(decoder_inputs, outputs, name='decoder')



In [None]:
# see encoder summary
encoder.summary()

In [None]:
# see decoder summary
decoder.summary()

In [None]:
# Define the loss function for VAE
def vae_loss(inputs, x_decoded_mean):
    recon_loss = original_dim * binary_crossentropy(inputs, x_decoded_mean)
    kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    return K.mean(recon_loss + kl_loss)

In [None]:
# VAE model
outputs = decoder(encoder(inputs)[2])
dense_vae = Model(inputs, outputs, name='vae')
dense_vae.compile(optimizer='adam', loss=vae_loss)
dense_vae.summary()

In [None]:
# class TestLossCallback(Callback):
#     def __init__(self, test_data):
#         self.test_data = test_data
#         self.test_losses = []

#     def on_epoch_end(self, epoch, logs=None):
#         test_loss = self.model.evaluate(self.test_data, self.test_data, verbose=0)
#         print(f"\nTest Loss after Epoch {epoch + 1}: {test_loss}")
#         self.test_losses.append(test_loss)

# test_loss_callback = TestLossCallback(X_test)



In [None]:
#  Train the VAE

# dense_vae_history = dense_vae.fit(X_train, X_train, epochs=10, batch_size=128, shuffle=True, validation_data=(X_val, X_val),callbacks=[test_loss_callback])

#  Train the VAE

dense_vae_history = dense_vae.fit(X_train, X_train, epochs=100, batch_size=128, shuffle=True, validation_data=(X_val, X_val))

In [None]:
plt.plot(dense_vae_history.history["loss"])
plt.plot(dense_vae_history.history["val_loss"])
plt.legend(["Loss", "Validation Loss"])
plt.title("Loss plot for VAE with MLP")
plt.xlabel("Number of Epochs")
plt.ylabel("")
plt.show()

In [None]:
# Evaluate the model on the test data
test_loss = dense_vae.evaluate(X_test, X_test)

In [None]:
test_loss

2023-11-15 14:06:58.386379: W tensorflow/c/c_api.cc:305] Operation '{name:'loss/mul' id:342 op device:{requested: '', assigned: ''} def:{{{node loss/mul}} = Mul[T=DT_FLOAT, _has_manual_control_dependencies=true](loss/mul/x, loss/decoder_loss/value)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
22840/22840 [==============================] - 64s 3ms/sample - loss: 9949928.5254 - val_loss: 8427.6236
Epoch 2/100
22840/22840 [==============================] - 62s 3ms/sample - loss: 8685.8543 - val_loss: 8569.1816
Epoch 3/100
22840/22840 [==============================] - 61s 3ms/sample - loss: 8522.9825 - val_loss: 8377.5673
Epoch 4/100
22840/22840 [==============================] - 61s 3ms/sample - loss: 8506.5232 - val_loss: 8205.4903
Epoch 5/100
22840/22840 [==============================] - 61s 3ms/sample - loss: 8261.0748 - val_loss: 8104.8371
Epoch 6/100
22840/22840 [==============================] - 204s 9ms/sample - loss: 12601.6746 - val_loss: 9356.3297
Epoch 7/100
22840/22840 [==============================] - 151s 7ms/sample - loss: 17122.5691 - val_loss: 10473.3295
Epoch 8/100
22840/22840 [==============================] - 95s 4ms/sample - loss: 9971.8597 - val_loss: 9464.1363
Epoch 9/100
22840/22840 [==============================] - 123s 5ms/sample - loss: 9107.4455 - val_loss: 8522.2567
Epoch 10/100
22840/22840 [==============================] - 61s 3ms/sample - loss: 11377.3423 - val_loss: 11114.9137
Epoch 11/100
22840/22840 [==============================] - 62s 3ms/sample - loss: 11209.2646 - val_loss: 10870.3877
Epoch 12/100
22840/22840 [==============================] - 62s 3ms/sample - loss: 10961.6752 - val_loss: 10756.3944
Epoch 13/100
22840/22840 [==============================] - 61s 3ms/sample - loss: 10430.8617 - val_loss: 10006.0978

In [None]:
# Generate and visualize reconstructed samples
decoded_imgs = dense_vae.predict(X_test)
n = 1 # Number of samples to visualize
# plt.figure(figsize=(20, 4))
for i in range(n):
    # Original images
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(X_test[i].reshape(69, 257))
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # Reconstructed images
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(69, 257))
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.tight_layout()
plt.show()


In [None]:
decoded_imgs = dense_vae.predict(X_train[:20])

In [None]:
librosa.display.specshow(X_test[0].reshape(257, 69), sr=22050, x_axis='time', y_axis='log')

In [None]:
librosa.display.specshow(decoded_imgs[0].reshape(257, 69), sr=22050, x_axis='time', y_axis='log')

In [None]:
abc = librosa.istft(decoded_imgs[0].reshape(257, 69))

In [None]:
plt.plot(abc)

In [None]:
from IPython.display import Audio

Audio(data=abc,rate=22050)

## VAE with CNN

In [None]:
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Lambda, Reshape, Conv2DTranspose
from tensorflow.keras.models import Model
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras import backend as K

In [None]:
# Define the Convolutional VAE architecture
input_shape = (1025,81,1)
latent_dim = 2  # Size of the latent space

In [None]:
# Encoder network
inputs = Input(shape=input_shape)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
# x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = Flatten()(x)
x = Dense(256,activation='relu')(x)
z_mean = Dense(latent_dim)(x)
z_log_var = Dense(latent_dim)(x)

In [None]:
def sampling(args):
    z_mean, z_log_var = args
    # return the z mean
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch, dim), mean=0.0, stddev=1.0)
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

In [None]:
z = Lambda(sampling)([z_mean, z_log_var])

In [None]:
encoder = Model(inputs, z)
encoder.summary()

In [None]:
decoder_input = Input(shape=(latent_dim,))
y = Dense(1025 * 81 * 64, activation='relu')(decoder_input)
y = Reshape((1025, 81, 64))(y)
# y = Conv2DTranspose(64, (3, 3), activation='relu', padding='same')(y)
y = Conv2DTranspose(32, (3, 3), activation='relu', padding='same')(y)
y = Conv2DTranspose(1, (3, 3), activation='sigmoid', padding='same')(y)

In [None]:
decoder_output = y
decoder = Model(decoder_input,y)

decoder.summary()

In [None]:
outputs = decoder(encoder(inputs))

In [None]:
# Define the loss function for VAE
def vae_loss(inputs, outputs):
    xent_loss = K.sum(K.binary_crossentropy(inputs, outputs), axis=(1, 2, 3))
    kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    return K.mean(xent_loss + kl_loss)

In [None]:
cnn_vae = Model(inputs, outputs)
cnn_vae.compile(optimizer='adam', loss=vae_loss)
cnn_vae.summary()

In [None]:
# Train the VAE
cnn_vae_history = cnn_vae.fit(X_train_features, X_train_features, epochs=2, batch_size=128, shuffle=True, validation_data=(X_val_features, X_val_features))