# HW2: Reference Latency

In [None]:
import tensorflow as tf
import os


REF_PREPROCESSING_ARGS = {
    'sampling_rate': 16000,
    'frame_length_in_s': 0.04,
    'frame_step_in_s': 0.02,
    'num_mel_bins': 40,
    'lower_frequency': 20,
    'upper_frequency': 4000,
}

tflite_models_dir = './tflite_models'
if not os.path.exists(tflite_models_dir):
    os.makedirs(tflite_models_dir)
tflite_model_name = os.path.join(tflite_models_dir, 'ref_model.tflite')

if not os.path.exists(tflite_model_name):
    ref_model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=[49, 40, 1]),
        tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[2, 2], use_bias=False, padding='valid'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(units=2),
        tf.keras.layers.Softmax()
    ])

    ref_model.build()

    saved_model_dir = f'./saved_models/ref_model'
    if not os.path.exists(saved_model_dir):
        os.makedirs(saved_model_dir)
    ref_model.save(saved_model_dir)

    converter = tf.lite.TFLiteConverter.from_saved_model(f'./saved_models/ref_model')
    tflite_model = converter.convert()

    with open(tflite_model_name, 'wb') as fp:
        fp.write(tflite_model)

2023-12-01 13:44:10.225047: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-01 13:44:10.259387: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-01 13:44:10.259988: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
import numpy as np
from time import time
from preprocessing import MelSpectrogram

mel_spec_processor = MelSpectrogram(**REF_PREPROCESSING_ARGS)
interpreter = tf.lite.Interpreter(model_path='tflite_models/ref_model.tflite')
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

audio = tf.random.normal((16000,))

ref_latencies = []

for i in range(100):
    start_preprocess = time()

    log_mel_spectrogram = mel_spec_processor.get_mel_spec(audio)
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, 0)
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, -1)
    interpreter.set_tensor(input_details[0]['index'], log_mel_spectrogram)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])

    end_inference = time()

    ref_latencies.append(end_inference - start_preprocess)

median_ref_latency = np.median(ref_latencies)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [None]:
print(f'Reference Tot Latency: {1000*median_ref_latency:.1f}ms')

Reference Tot Latency: 10.4ms


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=6f1fd91f-a434-4542-983d-3ce5ae14ac33' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>