In [21]:
import os
import numpy as np
import tensorflow as tf

from time import time
from zipfile import ZipFile
from preprocessing import compute_linear_matrix, get_mfcc, LABELS


seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)


SAVING_FOLDER = os.path.join('.', 'SAVING_FOLDER')

ZIP_PATH = os.path.join(SAVING_FOLDER, 'tflite_model', 'model10.tflite.zip')
UNZIPPED_PATH = os.path.join(SAVING_FOLDER, 'unzipped_model') 
MODEL_NAME = 'model10.tflite'

if not os.path.exists(UNZIPPED_PATH):
    os.makedirs(UNZIPPED_PATH)


In [22]:
test_ds_pure = tf.data.Dataset.list_files(['msc-test/go*', 'msc-test/stop*'])

#Unzip model
with ZipFile(ZIP_PATH, 'r') as z:
    z.extractall(UNZIPPED_PATH) 

#Load model
interpreter = tf.lite.Interpreter(model_path=os.path.join(UNZIPPED_PATH, MODEL_NAME))
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

In [23]:
downsampling_rate = 16000
frame_length_in_s = 0.016
frame_step_in_s = 0.016
num_mel_bins = 20
num_coefficients = 20
lower_frequency = 20
upper_frequency = 8000

#get weigth matrix
frame_length = int(downsampling_rate * frame_length_in_s)
frame_step = int(downsampling_rate * frame_step_in_s)
spectrogram_width = (16000 - frame_length) // frame_step + 1
num_spectrogram_bins = frame_length // 2 + 1

weight_matrix = compute_linear_matrix(downsampling_rate, num_mel_bins, lower_frequency, upper_frequency, frame_length)


In [31]:
avg_preprocessing_latency = 0
avg_model_latency = 0
latencies = []
accuracy = 0.0


for filename in test_ds_pure:
    audio_binary = tf.io.read_file(filename)
    label = tf.strings.split(tf.strings.split(filename, '/')[-1], '_')[0]
    label = label.numpy().decode()

    start_preprocess = time()
    mfccs = get_mfcc(audio_binary, downsampling_rate, frame_length, frame_step, weight_matrix)

    mfccs = tf.expand_dims(mfccs, 0)  # batch axis
    mfccs = tf.expand_dims(mfccs, -1)  # channel axis
    mfccs = tf.image.resize(mfccs, [32,32])
    end_preprocess = time()

    start_inference = time()
    interpreter.set_tensor(input_details[0]['index'], mfccs)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
    end_inference = time()

    top_index = np.argmax(output[0])
    predicted_label = LABELS[top_index]

    accuracy += label == predicted_label
    avg_preprocessing_latency += (end_preprocess - start_preprocess)
    avg_model_latency += (end_inference - start_inference)
    latencies.append(end_inference - start_preprocess)

accuracy /= len(test_ds_pure)
avg_preprocessing_latency /= len(test_ds_pure)
avg_model_latency /= len(test_ds_pure)
median_total_latency = np.median(latencies)


In [33]:
print(f"Accuracy = {accuracy*100:.2f}%")
print(f"Median latencies = {median_total_latency*1000:.2f} ms")

Accuracy = 99.00%
Median latencies = 4.87 ms


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=da3e1e27-78dd-422e-a631-d2ca46001cf1' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>