In [8]:
from tensorflow import keras
model = keras.models.load_model('model_4.hdf5')


OSError: SavedModel file does not exist at: model.tflite/{saved_model.pbtxt|saved_model.pb}

In [1]:
import tensorflow as tf
import tensorflow.lite as lite
import tensorflow_datasets as tfds
dataset_name = "speech_commands"


speech_builder = tfds.builder(dataset_name)
for ele in tfds.list_builders():
    if "speech" in ele:
        print(ele)

dataset_name = "speech_commands"

# instantiate a dataset builder (see tensorflow dataset builder)
speech_builder = tfds.builder(dataset_name)
print(speech_builder.info)

# download data into existing data folder
speech_builder.download_and_prepare()
data = speech_builder.as_dataset(as_supervised=True)
test_set = data['test']
test_set_size = 4890


def make_mfccs(audio, labels):
    
    FRAME_RATE = 16000
    
    stfts = tf.signal.stft(tf.cast(audio, tf.float32), frame_length=1024, frame_step=256,
                       fft_length=1024)
    
    spectrograms = tf.abs(stfts)

    # Warp the linear scale spectrograms into the mel-scale.
    num_spectrogram_bins = stfts.shape[-1]
    lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 80
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(num_mel_bins, num_spectrogram_bins,
                                                                        FRAME_RATE, lower_edge_hertz,
                                                                        upper_edge_hertz)
    
    mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, 1)
    
    mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(linear_to_mel_weight_matrix.shape[-1:]))

    # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
    log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)

    # Compute MFCCs from log_mel_spectrograms and take the first 13.
    # You can use other parts, or even all of the MFCCs, to test around how it affect the accuracy
    mfcc = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrograms)[..., :13]
    
    # Finally add depth to the mfcc tensors, our Conv2D-Model requires this.
    mfcc = tf.expand_dims(mfcc, -1)
    
    return mfcc, labels

dataset_test = test_set.map(make_mfccs
                           ).padded_batch(128
                           ).prefetch(tf.data.experimental.AUTOTUNE)



gtzan_music_speech
librispeech
librispeech_lm
ljspeech
speech_commands
tfds.core.DatasetInfo(
    name='speech_commands',
    version=0.0.2,
    description='An audio dataset of spoken words designed to help train and evaluate keyword
spotting systems. Its primary goal is to provide a way to build and test small
models that detect when a single word is spoken, from a set of ten target words,
with as few false positives as possible from background noise or unrelated
speech. Note that in the train and validation set, the label "unknown" is much
more prevalent than the labels of the target words or background noise.
One difference from the release version is the handling of silent segments.
While in the test set the silence segments are regular 1 second files, in the
training they are provided as long segments under "background_noise" folder.
Here we split these background noise into 1 second clips, and also keep one of
the files for the validation set.',
    homepage='https://arxiv.org/a

In [2]:
def calc_acc(predictions, dataset):
    total, correct = 0, 0
    index = 0

    for batch in dataset:
        for b_ele in batch[1]:
            #print(b_ele)
            total += 1
            if b_ele == np.argmax(predictions[index]):
                correct += 1
            index += 1


    print("{} out of {} correct, resulting in an accuracy of {}\n".format(correct, total, (correct/total)*100))  

In [11]:
preds = model.predict(dataset_test, verbose=1)



In [14]:
calc_acc(preds, dataset_test)

4242 out of 4890 correct, resulting in an accuracy of 86.74846625766871



In [12]:
import numpy as np

total, correct = 0, 0
index = 0

for batch in dataset_test:
    for b_ele in batch[1]:
        #print(b_ele)
        total += 1
        if b_ele == np.argmax(preds[index]):
            correct += 1
        index += 1
        
        
print("{} out of {} correct, resulting in an accuracy of {}".format(correct, total, (correct/total)*100))    

4242 out of 4890 correct, resulting in an accuracy of 86.74846625766871


In [5]:
validation_set = data['validation']
validation_set_size = 10102
dataset_validation = validation_set.map(make_mfccs
                                       ).padded_batch(128
                                       ).prefetch(tf.data.experimental.AUTOTUNE)

In [6]:
preds = model.predict(dataset_validation, verbose=1)



In [7]:
total, correct = 0, 0
index = 0

for batch in dataset_validation:
    for b_ele in batch[1]:
        #print(b_ele)
        total += 1
        if b_ele == np.argmax(preds[index]):
            correct += 1
        index += 1
        
        
print("{} out of {} correct, resulting in an accuracy of {}".format(correct, total, (correct/total)*100)) 

9493 out of 10102 correct, resulting in an accuracy of 93.9714907939022


## TFLite

In [4]:
"""Convert batched tfds into numpy for tflite inference"""

import numpy as np

data = np.zeros((test_set_size, 59,13,1))
label_arr = np.zeros((test_set_size,1))
#print(data.shape)

index = 0
for batch in dataset_test:
    audio = batch[0]
    labels = batch[1]
    for audio, label in zip(audio, labels):
        
        #print(ele)
        #print(audio.shape)
        #print(label.numpy())
        data[index] = audio.numpy()
        label_arr[index] = label.numpy()
        #print(label_arr[index])
        
        index += 1
        #break
    #break

In [22]:
test_arr = data[1]
test_arr = test_arr.astype('float32')
#test_arr = np.expand_dims(test_arr, 0)
print(type(test_arr), test_arr.dtype, " shape: ", test_arr.shape)

for i in range(5):
    #print(data[i])
    print("#######################")
    print(label_arr[i])
    #break

<class 'numpy.ndarray'> float32  shape:  (59, 13, 1)
#######################
[7.]
#######################
[2.]
#######################
[6.]
#######################
[8.]
#######################
[5.]


In [40]:
""" Load the model and make some quick tests"""

import tensorflow as tf

full_int_quanti = True

# Load the TFLite model and allocate tensors.

if full_int_quanti:
    interpreter = tf.lite.Interpreter(model_path="model_only_ints.tflite")
else:
    interpreter = tf.lite.Interpreter(model_path="model.tflite")

interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("Input Details:\t\t\t\tOutput Details:")
for in_items, out_items in zip(input_details[0].items(), output_details[0].items()):
    print("{}: {}\t\t | \t\t{}: {}".format(in_items[0],in_items[1],out_items[0],out_items[1]))
print("################################")
# Test the model on random input data.

input_shape = input_details[0]['shape']
print(input_shape)
#input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)

if full_int_quanti:
    input_data = test_arr.astype('uint8')
    input_data = np.expand_dims(input_data, 0)
else:
    input_data = test_arr

print(input_data.dtype, input_data.shape)
interpreter.set_tensor(input_details[0]['index'], input_data)

interpreter.invoke()

# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.

output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data, output_data.shape)

Input Details:				Output Details:
name: conv2d_input		 | 		name: Identity
index: 20		 | 		index: 21
shape: [ 1 59 13  1]		 | 		shape: [ 1 12]
shape_signature: [-1 59 13  1]		 | 		shape_signature: [-1 12]
dtype: <class 'numpy.uint8'>		 | 		dtype: <class 'numpy.uint8'>
quantization: (1.3954178094863892, 125)		 | 		quantization: (0.00390625, 0)
quantization_parameters: {'scales': array([1.3954178], dtype=float32), 'zero_points': array([125], dtype=int32), 'quantized_dimension': 0}		 | 		quantization_parameters: {'scales': array([0.00390625], dtype=float32), 'zero_points': array([0], dtype=int32), 'quantized_dimension': 0}
sparsity_parameters: {}		 | 		sparsity_parameters: {}
################################
[ 1 59 13  1]
uint8 (1, 59, 13, 1)
[[  0   0 255   0   0   0   0   0   0   0   0   0]] (1, 12)


### Maybe it always predicts the last label (unknown) because it's so overrepresented in the dataset...

In [41]:
"""inference loop for lite model"""
preds = np.zeros((test_set_size, 12)) # dataset size and number of labels

for i, input_data in enumerate(data):
    # Some data adjustment
    
    #input_data = input_data.astype('float32')
    input_data = input_data.astype('uint8')
    #print(input_data.shape)
    input_data = np.expand_dims(input_data, 0)
    
    #print(input_data.shape)
    #break
    
    # Inference
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    preds[i] = interpreter.get_tensor(output_details[0]['index'])
    
    #print(output_data.shape, preds.shape)
    
    #preds[i] = output_data

In [42]:
total, correct = 0, 0
index = 0
print(preds[6])
not_null = 0
predicted_label_count = {}

for i, label in enumerate(label_arr):
    prediction = np.argmax(preds[i])
    if prediction in predicted_label_count:
        predicted_label_count[prediction] += 1
    else:
        predicted_label_count[prediction] = 1
        
    if prediction == label:
        
        correct += 1
    total += 1
    if prediction != 0:
        not_null += 1
        
print(not_null)
print("{} out of {} correct, resulting in an accuracy of {}".format(correct, total, (correct/total)*100))
for key, val in predicted_label_count.items():
    print(key, val)

[  0.   0. 255.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
4885
445 out of 4890 correct, resulting in an accuracy of 9.100204498977506
2 4559
1 146
8 43
11 104
3 19
7 12
9 2
0 5


In [87]:
print(preds.shape)
for i in range(10):
    print(preds[i])

(4890, 12)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
