# Urban Noise Monitoring

In this notebook we implement a Deep Learning model for the Urban noise monitoring task. In particular our model has to be able to recognize three different vehicles (car, bus, and motorcycle) only basing on audio data. In the first section we setup the notebook importing all the dependencies. Then we load and preprocess the data. In the third section we build and train the model. In the last section we will test the selected model and finally we save it.

## Notebook Setup

In [11]:
import tensorflow as tf
from tensorflow.keras.metrics import *
import tensorflow_addons as tfa
import tensorflow_model_optimization as tfmot

from glob import glob

import numpy as np


from MAVD_Preprocessing import *
from functools import partial
tf.compat.v1.enable_eager_execution()

In [12]:
import os
import numpy as np
import random

seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)


In [13]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

import datetime

MODEL_NAME = "hmw_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

log_dir = f"logs/fit/{MODEL_NAME}"
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
MODEL_NAME

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


'hmw_20230411-154544'

## Data import and preprocessing

In [14]:
#Select the target labels
LABELS = ['car','bus','motorcycle']#,'other','truck','chatter']
num_classes = len(LABELS)
#Select the task and the corrispsonding variables 
task = 'multilabel' # Select 'multilabel' or 'singlelabel'
last_activation = 'softmax' if task == 'singlelabel' else 'sigmoid'
threshold = None if task == 'singlelabel' else 0.5

#Setupt the training parameters
TRAINING_ARGS = {
    'batch_size': 32,
    'initial_learning_rate': 0.01,
    'end_learning_rate': 1.e-6,
    'epochs': 10
}

#Setup the preprocessing parameters
PREPROCESSING_MFCCS_ARGS = {
    'downsampling_rate': 44100,
    'frame_length_in_s': 0.032,
    'frame_step_in_s': 0.008,
    'num_mel_bins' : 128,
    'lower_frequency': 0,
    'upper_frequency': 2000,
    'num_coefficients':10
}

### Setup the optimization parameters
#Width multiplier
alpha = 0.25       
#Weight pruning
weight_pruning = True
final_sparsity = 0.7  
#Depthwise
depthwise = False     



batch_size = TRAINING_ARGS['batch_size']
epochs = TRAINING_ARGS['epochs']

Prepare the preprocessing functions

In [15]:
get_frozen_mfccs = partial(get_mfccs, 
**PREPROCESSING_MFCCS_ARGS)

def preprocess_mfccs_single(filename):
    signal, label = get_frozen_mfccs(filename)
    signal = tf.expand_dims(signal, -1)
    print(label)
    label = [label == elem for elem in LABELS]
    print(label)
    label_id = tf.cast(label, dtype=tf.int32)
    label_id = tf.reshape(label_id, shape=(-1,))
    print(label_id)
    return signal, label_id

def preprocess_mfccs_multi(filename):
    signal, label = get_frozen_mfccs(filename)
    signal = tf.expand_dims(signal, -1)
    label_id = tf.strings.to_number(label, out_type=tf.int32)
    return signal, label_id

In [16]:
#Load the data
train_ds = tf.data.Dataset.list_files(f'MAVD-traffic/{task}_train/*.flac')
val_ds = tf.data.Dataset.list_files(f'MAVD-traffic/{task}_validate/*.flac')
test_ds = tf.data.Dataset.list_files(f'MAVD-traffic/{task}_test/*.flac')

#train_ds = tf.data.Dataset.list_files([f'MAVD-traffic/{task}_train/car*',f'MAVD-traffic/{task}_train/motorcycle*',f'MAVD-traffic/{task}_train/bus*'])
#val_ds = tf.data.Dataset.list_files([f'MAVD-traffic/{task}_validate/car*.flac',f'MAVD-traffic/{task}_validate/motorcycle*.flac',f'MAVD-traffic/{task}_validate/bus*.flac'])
#test_ds = tf.data.Dataset.list_files([f'MAVD-traffic/{task}_test/car*.flac',f'MAVD-traffic/{task}_test/motorcycle*.flac',f'MAVD-traffic/{task}_test/bus*.flac'])

#Be sure thath the number of data is a multiple of the batch size
train_len = len(train_ds)
val_len = len(val_ds)
test_len = len(test_ds)

train_len_adj = train_len - (train_len % batch_size)
val_len_adj = val_len - (val_len % batch_size)
test_len_adj = test_len - (test_len % batch_size)

train_ds_adj = train_ds.take(train_len_adj).shuffle(train_len_adj)
val_ds_adj = val_ds.take(val_len_adj).shuffle(val_len_adj)
test_ds_adj = test_ds.take(test_len_adj).shuffle(test_len_adj)

print(f'Number of files in training set: {train_len}, Number of files in validation set: {val_len}, Number of files in test set: {test_len}')
print(f'Number of files in training set after adjusting for batch size: {train_len_adj}, Number of files in validation set after adjusting for batch size: {val_len_adj}, Number of files in test set after adjusting for batch size: {test_len_adj}')

#Preprocess the data
if task == 'singlelabel':
    train_ds = train_ds_adj.map(preprocess_mfccs_single).batch(batch_size).cache()
    val_ds = val_ds_adj.map(preprocess_mfccs_single).batch(batch_size)
    val_ds = test_ds_adj.map(preprocess_mfccs_single).batch(batch_size)
elif task == 'multilabel': 
    train_ds = train_ds_adj.map(preprocess_mfccs_multi).batch(batch_size).cache()
    val_ds = val_ds_adj.map(preprocess_mfccs_multi).batch(batch_size)
    test_ds = test_ds_adj.map(preprocess_mfccs_multi).batch(batch_size)

print(f'Number of batches in training set: {tf.data.experimental.cardinality(train_ds)}')
print(f'Number of batches in validation set: {tf.data.experimental.cardinality(val_ds)}')
print(f'Number of batches in test set: {tf.data.experimental.cardinality(test_ds)}')


for example_batch, example_labels in train_ds.take(1):
    batch_size_check = example_batch.shape[0]
    print(f'Batch Shape: {example_batch.shape}, Data Shape: {example_batch.shape[1:]}, Labels: {example_labels}')
    if batch_size_check != batch_size:
        print(f'Warning: batch size is {batch_size_check}, which is different from the desired batch size of {batch_size}.')


Number of files in training set: 6709, Number of files in validation set: 1995, Number of files in test set: 4990
Number of files in training set after adjusting for batch size: 6688, Number of files in validation set after adjusting for batch size: 1984, Number of files in test set after adjusting for batch size: 4960
2023-04-11 15:45:45.910686: W tensorflow/core/framework/op_kernel.cc:1780] OP_REQUIRES failed at functional_ops.cc:373 : INTERNAL: No function library
2023-04-11 15:45:45.912776: W tensorflow/core/framework/op_kernel.cc:1780] OP_REQUIRES failed at functional_ops.cc:373 : INTERNAL: No function library
2023-04-11 15:45:45.913087: W tensorflow/core/framework/op_kernel.cc:1780] OP_REQUIRES failed at functional_ops.cc:373 : INTERNAL: No function library
2023-04-11 15:45:46.336196: W tensorflow/core/framework/op_kernel.cc:1780] OP_REQUIRES failed at functional_ops.cc:373 : INTERNAL: No function library
2023-04-11 15:45:46.338274: W tensorflow/core/framework/op_kernel.cc:1780] 

## Build the model

Homeworks' model

In [19]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=example_batch.shape[1:]),
    
    tf.keras.layers.Conv2D(filters=256*alpha, kernel_size=[3, 3], strides=[2, 2],
        use_bias=False, padding='valid'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),

    tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], 
        use_bias=False, padding='same'),
    tf.keras.layers.Conv2D(filters=256*alpha, kernel_size=[1, 1], strides=[1, 1],   
       use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),

    tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1],
        use_bias=False, padding='same'),
    tf.keras.layers.Conv2D(filters=256*alpha, kernel_size=[1, 1], strides=[1, 1],   
       use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),

    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units=len(LABELS), activation=last_activation)

])

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_12 (Conv2D)          (None, 60, 4, 64)         576       
                                                                 
 batch_normalization_13 (Bat  (None, 60, 4, 64)        256       
 chNormalization)                                                
                                                                 
 re_lu_6 (ReLU)              (None, 60, 4, 64)         0         
                                                                 
 depthwise_conv2d_4 (Depthwi  (None, 60, 4, 64)        576       
 seConv2D)                                                       
                                                                 
 conv2d_13 (Conv2D)          (None, 60, 4, 64)         4096      
                                                                 
 batch_normalization_14 (Bat  (None, 60, 4, 64)       

In [8]:
if depthwise:
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=example_batch.shape[1:]),

        #Conv1
        tf.keras.layers.Conv2D(64*alpha, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),
        
        #Conv2 + pooling + dropout
        tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], 
            use_bias=False, padding='same'),
        tf.keras.layers.Conv2D(filters=64*alpha, kernel_size=[1, 1], strides=[1, 1],
            use_bias=False, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Dropout(0.2),
        
        #Conv3
        tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], 
            use_bias=False, padding='same'),
        tf.keras.layers.Conv2D(128*alpha, kernel_size=[1, 1], strides=[1, 1], padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),

        #Conv4 + pooling + dropout
        tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], 
            use_bias=False, padding='same'),
        tf.keras.layers.Conv2D(128*alpha, kernel_size=[1, 1], strides=[1, 1], padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Dropout(0.3),
        
        #Conv5
        tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], 
            use_bias=False, padding='same'),
        tf.keras.layers.Conv2D(256*alpha, kernel_size=[1, 1], strides=[1, 1], padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),
        
        #Conv6 + pooling + dropout
        tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], 
            use_bias=False, padding='same'),
        tf.keras.layers.Conv2D(256*alpha, kernel_size=[1, 1], strides=[1, 1], padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Dropout(0.4),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(len(LABELS), activation=last_activation)
    ])
else:
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=example_batch.shape[1:]),

        tf.keras.layers.Conv2D(64*alpha, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),

        tf.keras.layers.Conv2D(filters=64*alpha, kernel_size=[3, 3], strides=[1, 1],
            use_bias=False, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Dropout(0.2),

        tf.keras.layers.Conv2D(128*alpha, kernel_size=[3, 3], strides=[1, 1], padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(128*alpha, kernel_size=[3, 3], strides=[1, 1], padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Dropout(0.3),

        tf.keras.layers.Conv2D(256*alpha, kernel_size=[3, 3], strides=[1, 1], padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(256*alpha, kernel_size=[3, 3], strides=[1, 1], padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Dropout(0.4),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(len(LABELS), activation=last_activation)
    ])
model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 122, 10, 16)       160       
                                                                 
 batch_normalization_3 (Batc  (None, 122, 10, 16)      64        
 hNormalization)                                                 
                                                                 
 conv2d_4 (Conv2D)           (None, 122, 10, 16)       2304      
                                                                 
 batch_normalization_4 (Batc  (None, 122, 10, 16)      64        
 hNormalization)                                                 
                                                                 
 max_pooling2d (MaxPooling2D  (None, 61, 5, 16)        0         
 )                                                               
                                                      

Prepare the model for weight pruning if required

In [20]:
if weight_pruning:
    prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

    begin_step = int(len(train_ds) * epochs * 0.2)
    end_step = int(len(train_ds) * epochs)

    pruning_params = {
        'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
            initial_sparsity=0.0,
            final_sparsity=final_sparsity,
            begin_step=begin_step,
            end_step=end_step
        )
    }

    model = prune_low_magnitude(model, **pruning_params)
    print("Pruning added")

Pruning added


In [21]:
#Setup the loss function
loss = tf.losses.BinaryCrossentropy(from_logits=False) if task == 'multilabel'\
else tf.losses.CategoricalCrossentropy(from_logits=False)

#Setup the linear decay for the learning rate
initial_learning_rate = TRAINING_ARGS['initial_learning_rate']
end_learning_rate = TRAINING_ARGS['end_learning_rate']

linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=initial_learning_rate,
    end_learning_rate=end_learning_rate,
    decay_steps=len(train_ds) * epochs,
)

#Setup the Optimizer
optimizer = tf.optimizers.SGD(learning_rate=linear_decay)

#Setup the metrics
metrics = [tfa.metrics.F1Score(num_classes=num_classes, 
                                average='micro',
                                threshold=threshold,
                                name = 'f1_micro'),
            tfa.metrics.F1Score(num_classes=num_classes,
             average='weighted', 
             threshold=threshold, 
             name = 'f1_weighted'),
            ]

model.compile(
    loss=loss,
    optimizer=optimizer,
    metrics=metrics
)

callbacks = [tensorboard_callback]

if weight_pruning:
    callbacks.append(tfmot.sparsity.keras.UpdatePruningStep(), )

print(f"Task: {task}, Loss: {loss}, last_activation: {last_activation}")

history = model.fit(train_ds, 
                    epochs=epochs,
                    validation_data=val_ds,
                    callbacks=callbacks)

#ATTENTION, if a Graph execution error is raised, please re-run the cell


Task: multilabel, Loss: <keras.losses.BinaryCrossentropy object at 0x7fed992c0550>, last_activation: sigmoid
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Final Sparsity Check

In [22]:
import numpy as np


if weight_pruning:
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Wrapper):
            weights = layer.trainable_weights
        else:
            weights = layer.weights
        for weight in weights:        
            weight_size = weight.numpy().size
            zero_num = np.count_nonzero(weight == 0)
            print(
                f'{weight.name}: {zero_num/weight_size:.2%} sparsity ',
                f'({zero_num}/{weight_size})',
            )
else:
    print("Weight pruning not used")

conv2d_12/kernel:0: 69.97% sparsity  (403/576)
batch_normalization_13/gamma:0: 0.00% sparsity  (0/64)
batch_normalization_13/beta:0: 0.00% sparsity  (0/64)
depthwise_conv2d_4/depthwise_kernel:0: 0.00% sparsity  (0/576)
conv2d_13/kernel:0: 70.00% sparsity  (2867/4096)
batch_normalization_14/gamma:0: 0.00% sparsity  (0/64)
batch_normalization_14/beta:0: 0.00% sparsity  (0/64)
depthwise_conv2d_5/depthwise_kernel:0: 0.00% sparsity  (0/576)
conv2d_14/kernel:0: 70.00% sparsity  (2867/4096)
batch_normalization_15/gamma:0: 0.00% sparsity  (0/64)
batch_normalization_15/beta:0: 0.00% sparsity  (0/64)
dense_4/kernel:0: 69.79% sparsity  (134/192)
dense_4/bias:0: 0.00% sparsity  (0/3)


## Model Testing

In [23]:
results = model.evaluate(test_ds)
results



[0.5454511046409607, 0.6088300347328186, 0.454133003950119]

## Save the model

In [24]:
MODEL_NAME

'hmw_20230411-154544'

In [25]:
from time import time
import os


#Strip the model before save it
if weight_pruning:
    model_for_export = tfmot.sparsity.keras.strip_pruning(model)
else:
    model_for_export = model
saved_model_dir = f'./saved_models/{MODEL_NAME}'
if not os.path.exists(saved_model_dir):
    os.makedirs(saved_model_dir)
model_for_export.save(saved_model_dir)

#Prepare the folder for tflite models
tflite_models_dir = './tflite_models'
if not os.path.exists(tflite_models_dir):
    os.makedirs(tflite_models_dir)

INFO:tensorflow:Assets written to: ./saved_models/hmw_20230411-154544/assets
INFO:tensorflow:Assets written to: ./saved_models/hmw_20230411-154544/assets


## Tflite model conversion

In [26]:
converter = tf.lite.TFLiteConverter.from_saved_model(f'./saved_models/{MODEL_NAME}')
tflite_model = converter.convert()

2023-04-11 16:32:08.891275: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-04-11 16:32:08.891327: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-04-11 16:32:08.892443: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: ./saved_models/hmw_20230411-154544
2023-04-11 16:32:08.898415: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-04-11 16:32:08.898456: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: ./saved_models/hmw_20230411-154544
2023-04-11 16:32:08.925431: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2023-04-11 16:32:08.927279: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2023-04-11 16:32:08.979600: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bund

In [27]:
tflite_models_dir = './tflite_models'
if not os.path.exists(tflite_models_dir):
    os.makedirs(tflite_models_dir)

In [28]:
tflite_model_name = os.path.join(tflite_models_dir, f'{MODEL_NAME}.tflite')
tflite_model_name

'./tflite_models/hmw_20230411-154544.tflite'

In [29]:
#Save the tflite model
with open(tflite_model_name, 'wb') as fp:
    fp.write(tflite_model)

In [30]:
#Zip the tflite model
import zipfile

with zipfile.ZipFile(f'{tflite_model_name}.zip', 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(tflite_model_name)

In [31]:
#Check the size
tflite_size = os.path.getsize(tflite_model_name) / 1024.0
zipped_size = os.path.getsize(f'{tflite_model_name}.zip') / 1024.0

print(f'Original tflite size: {tflite_size:.3f} KB')
print(f'Zipped tflite size: {zipped_size:.3f} KB')

Original tflite size: 44.359 KB
Zipped tflite size: 21.095 KB


## Test the tflite model latency

In [32]:
model_path = tflite_model_name
#------------Get the model-----------------
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("Input name:", input_details[0]['name'])
print("Input shape:", input_details[0]['shape'])
print("Output name:", output_details[0]['name'])
print("Output shape:", output_details[0]['shape'])

Input name: serving_default_input_4:0
Input shape: [  1 122  10   1]
Output name: StatefulPartitionedCall:0
Output shape: [1 3]
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [33]:
from tqdm import tqdm

#------------File preparation------------------
filenames = glob('MAVD-traffic/multilabel_test/*') 

#PREPROCESSING_MFCCS_ARGS is the one defined in the Hyperparameters section

#-----------Compute the linear to mel weight matrix--------------------
downsampling_rate = PREPROCESSING_MFCCS_ARGS['downsampling_rate']
sampling_rate_int64 = tf.cast(downsampling_rate, tf.int64)
frame_length = int(downsampling_rate * PREPROCESSING_MFCCS_ARGS['frame_length_in_s'])
frame_step = int(downsampling_rate * PREPROCESSING_MFCCS_ARGS['frame_step_in_s'])
spectrogram_width = (16000 - frame_length) // frame_step + 1
num_spectrogram_bins = frame_length // 2 + 1
num_mfccs_coefficients = PREPROCESSING_MFCCS_ARGS['num_coefficients']

linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
    PREPROCESSING_MFCCS_ARGS['num_mel_bins'],
    num_spectrogram_bins,
    downsampling_rate,
    PREPROCESSING_MFCCS_ARGS['lower_frequency'],
    PREPROCESSING_MFCCS_ARGS['upper_frequency']
)


avg_preprocessing_latency = 0.0
avg_model_latency = 0.0
latencies = []

#------------Test the model----------------
for filename in tqdm(filenames):
    audio_binary = tf.io.read_file(filename)
    sampling_rate = 44100
    path_parts = tf.strings.split(filename, '/')
    path_end = path_parts[-1]
    file_parts = tf.strings.split(path_end, '_')
    label =file_parts[:-1].numpy().astype(int)

    
    start_preprocess = time()

    audio = tfio.audio.decode_flac(audio_binary, dtype=tf.int32)
    audio = tf.squeeze(audio)
    audio = tf.cast(audio, tf.float32)
    stft = tf.signal.stft(
        audio,
        frame_length = frame_length,
        frame_step = frame_step,
        fft_length = frame_length
    )

    spectrogram = tf.abs(stft)

    mel_spectrogram = tf.matmul(spectrogram, linear_to_mel_weight_matrix)
    log_mel_spectrogram = tf.math.log(mel_spectrogram+1.e-6)
    mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
    mfccs = mfccs[:,:num_mfccs_coefficients]
    mfccs = tf.expand_dims(mfccs, 0)
    mfccs = tf.expand_dims(mfccs, -1)

    end_preprocess = time()


    interpreter.set_tensor(input_details[0]['index'], mfccs)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])

    end_inference = time()

    latencies.append(end_inference - start_preprocess)

#-----------------Compute the latency----------------------------
median_total_latency = np.median(latencies)
median_total_latency


100%|██████████| 4990/4990 [06:36<00:00, 12.58it/s]


0.06420135498046875

In [0]:
model.summary()

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=251b70cc-64ed-4269-9639-75a0d233eb14' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>