# Load a saved tensorflow model and run inference. (Autoencoder on ToyADMOS)

In [26]:
TRESHOLD_FOR_MODEL = 0.017349888
MAX_VALUE_FOR_MODEL = 21.5335
MIN_VALUE_FOR_MODEL = -62.62027

In [1]:
import tensorflow as tf
import librosa

We first load the model from the saved_tf_models directory

In [5]:
save_path = "./saved_tf_models/test/"
conv_autoencoder = tf.keras.models.load_model(save_path)

## We now have to prepare the ToyADMOS audio to input to the saved model

This is the same preprocessing done in the original notebook

In [6]:
# There is 1800 normal sound files and 400 anomalous sound files 
NUMBER_OF_NORMAL_FILES = 40
NUMBER_OF_ANOMALOUS_FILES = 40

In [7]:
normal_path = "/Users/emjn/Documents/DTU/Datasets/ToyConveyor/case1/NormalSound_IND/"
anomalous_path = "/Users/emjn/Documents/DTU/Datasets/ToyConveyor/case1/AnomalousSound_IND/"

normal_files_path = tf.io.gfile.glob(normal_path + "*ch1*.wav")
anomalous_files_path = tf.io.gfile.glob(anomalous_path + "*ch1*.wav")

normal_files_path = tf.convert_to_tensor(normal_files_path[:NUMBER_OF_NORMAL_FILES])
anomalous_files_path = tf.convert_to_tensor(anomalous_files_path[:NUMBER_OF_ANOMALOUS_FILES])

audio_file = normal_files_path[0].numpy()
_, sr = librosa.load(audio_file)

def custom_librosa_load(audio_file):
    audio, _ = librosa.load(audio_file.numpy())
    return audio

normal_audio = tf.map_fn(fn=custom_librosa_load, elems=normal_files_path, fn_output_signature=tf.float32)
anomalous_audio = tf.map_fn(fn=custom_librosa_load, elems=anomalous_files_path, fn_output_signature=tf.float32)

FRAME_SIZE = 2048
HOP_SIZE = 512

def apply_stft(audio_sample):
    mel_spectrogram = librosa.feature.melspectrogram(audio_sample.numpy(), sr=sr, n_fft=2048, hop_length=512, n_mels=256)
    return librosa.power_to_db(mel_spectrogram)

normal_magnitudes = tf.map_fn(fn=apply_stft, elems=normal_audio)
anomalous_magnitudes = tf.map_fn(fn=apply_stft, elems=anomalous_audio)

We now have to create a dataset to run inference on

In [8]:
magnitudes = tf.concat([normal_magnitudes, anomalous_magnitudes], axis=0)

In [27]:
magnitudes_4D = test_magnitudes[..., tf.newaxis]

x = tf.keras.layers.ZeroPadding2D(padding=((0,0),(1,0)))(magnitudes_4D)

# Apply shift
x = tf.math.subtract(x, MIN_VALUE_FOR_MODEL)

# Apply scale
x = x / MAX_VALUE_FOR_MODEL

## Running inference using the loaded model

In [28]:
predictions = conv_autoencoder.predict(x)

In [29]:
predictions.shape

(80, 256, 432, 1)

In [30]:
x.shape

TensorShape([80, 256, 432, 1])

In [31]:
reshaped_x = tf.reshape(x, [x.shape[0], tf.math.reduce_prod(x.shape[1:])])
reshaped_predictions = tf.reshape(predictions, [predictions.shape[0], tf.math.reduce_prod(predictions.shape[1:])])
loss = tf.keras.losses.mse(reshaped_x, reshaped_predictions)

In [32]:
tf.math.less(loss, TRESHOLD_FOR_MODEL)

<tf.Tensor: shape=(80,), dtype=bool, numpy=
array([ True,  True, False,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False])>