# Lab 5: Google Speech Commands

## Imports

In [1]:
import copy
import wave
from pathlib import Path
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Input, Conv1D, AvgPool1D, MaxPool1D, ZeroPadding1D, BatchNormalization, Flatten, Dense, Activation
from keras.activations import softmax
from keras.utils import get_file
from keras.utils import to_categorical

2024-04-05 14:02:31.847635: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-05 14:02:31.908015: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-05 14:02:31.908073: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-05 14:02:31.908130: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-05 14:02:31.918973: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-05 14:02:31.919720: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

## Download, cache and extract Google Speech Commands

In [24]:
dataset_dir = Path('datasets')
if not (dataset_dir/'testing_list.txt').exists(): # Assume dataset already downloaded/extracted if testing list is present
    get_file(None, "http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz",
                    extract=True,
                    file_hash="6b74f3901214cb2c2934e98196829835",
                    cache_dir='.',
                    cache_subdir=dataset_dir)

## Load raw spoken digits data from Google Speech Commands

In [25]:
# Classes to handle, ordered by label
CLASSES = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']

with (dataset_dir/'testing_list.txt').open() as f:
    testing_list = f.read().splitlines()

x_train = []
y_train = []
x_test = []
y_test = []

for recording in dataset_dir.glob(f'**/*.wav'):
    if not recording.parent.name in CLASSES: # Ignore unused classes
        continue
    label = CLASSES.index(recording.parent.name) # Assign class number
    
    with wave.open(str(recording)) as f: # Read wave file
        data = np.frombuffer(f.readframes(f.getnframes()), dtype=np.int16).copy() # As 16-bit signed integer
        
    data = data.astype(np.float32) # Convert to 32-bit floating-point
    data.resize((16000, 1)) # Resize to 1s (16kHz) with zero-padding, 1 channel

    if str(recording.relative_to(dataset_dir)) in testing_list: # Assign to test set if file in test list
        x_test.append(data)
        y_test.append(label)
    else:
        x_train.append(data)
        y_train.append(label)

x_train = np.array(x_train)
y_train = to_categorical(np.array(y_train))
x_test = np.array(x_test)
y_test = to_categorical(np.array(y_test))

## Prepare for inference with fixed-point Q7.9 samples by scaling input data accordingly

In [26]:
FIXED_POINT = 9
x_train /= 2**FIXED_POINT
x_test  /= 2**FIXED_POINT

## Export small dataset (250 random vectors)

In [27]:
perms = np.random.permutation(len(y_test))[0:250]
x_test_250 = x_test[perms]
y_test_250 = y_test[perms]
np.savetxt('x_test_gsc_250.csv', x_test_250.reshape((x_test_250.shape[0], -1)), delimiter=',', fmt='%s')
np.savetxt('y_test_gsc_250.csv', y_test_250, delimiter=',', fmt='%s')

## Build model M5

In [31]:
model = Sequential()
model.add(Input(shape=(16000, 1)))
model.add(MaxPool1D(pool_size=20, padding='valid'))
model.add(Conv1D(filters=8, kernel_size=40, activation='relu'))
model.add(MaxPool1D(pool_size=4, padding='valid'))
model.add(Conv1D(filters=16, kernel_size=3, activation='relu'))
model.add(MaxPool1D(pool_size=4, padding='valid'))
model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
model.add(MaxPool1D(pool_size=4, padding='valid'))
model.add(AvgPool1D(pool_size=8))
model.add(Flatten())
model.add(Dense(units=10))
model.add(Activation('softmax'))  # SoftMax activation needs to be separate from Dense to remove it later on# EXPLORE Learning Rate
opt = tf.keras.optimizers.Adam(learning_rate=10e-3)
model.summary()
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 max_pooling1d_4 (MaxPoolin  (None, 800, 1)            0         
 g1D)                                                            
                                                                 
 conv1d_3 (Conv1D)           (None, 761, 8)            328       
                                                                 
 max_pooling1d_5 (MaxPoolin  (None, 190, 8)            0         
 g1D)                                                            
                                                                 
 conv1d_4 (Conv1D)           (None, 188, 16)           400       
                                                                 
 max_pooling1d_6 (MaxPoolin  (None, 47, 16)            0         
 g1D)                                                            
                                                      

## Train model

In [32]:
model.fit(x_train, y_train, epochs=50, batch_size=384, validation_data=(x_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7f0399633d90>

## Evaluate model on test dataset

In [33]:
model.evaluate(x_test, y_test, verbose=2)
pred_test = model.predict(x_test)
print(tf.math.confusion_matrix(y_test.argmax(axis=1), pred_test.argmax(axis=1)))

129/129 - 1s - loss: 1.1991 - categorical_accuracy: 0.5931 - 785ms/epoch - 6ms/step
tf.Tensor(
[[207  37   9  62  29   7  15  18   7  27]
 [ 26 246   2  25   4   9   4   4  28  51]
 [  1   4 276  53  21   2  20  10  36   1]
 [ 20  17  53 193  37  13  24  19  27   2]
 [ 15  12  14  62 160  64  30  26  13   4]
 [  7   7  12  24  55 257  21  24  34   4]
 [  1   1   7  15  13   7 328  15   7   0]
 [  4   8  11  25  12  14  53 277   2   0]
 [  4  22  13  41  16  24  26   5 250   7]
 [ 24  92   2  22   5   6   6   0   9 242]], shape=(10, 10), dtype=int32)


## Evaluate model on small dataset

In [34]:
model.evaluate(x_test_250, y_test_250, verbose=2)
pred_test_250 = model.predict(x_test_250)
print(tf.math.confusion_matrix(y_test_250.argmax(axis=1), pred_test_250.argmax(axis=1)))

8/8 - 0s - loss: 1.3492 - categorical_accuracy: 0.5360 - 86ms/epoch - 11ms/step
tf.Tensor(
[[ 7  5  2  5  0  1  1  1  1  1]
 [ 2 13  0  0  0  1  1  0  6  4]
 [ 0  1 20  5  2  0  0  0  5  0]
 [ 1  2  4 12  2  1  0  0  2  0]
 [ 1  0  0  4 12  5  0  2  1  0]
 [ 0  2  1  1  2 17  1  0  1  0]
 [ 0  0  0  0  1  0 18  2  0  0]
 [ 1  0  1  2  3  2  2 15  0  0]
 [ 1  2  0  2  2  2  1  0 12  1]
 [ 2  9  0  0  1  1  0  0  1  8]], shape=(10, 10), dtype=int32)


## Save trained model

In [35]:
model.save('lab_gsc.h5')

## Remove SoftMax layer

In [36]:
if isinstance(model.layers[-1], Activation) and model.layers[-1].activation == softmax:
    model = tf.keras.Model(model.input, model.layers[-2].output, name=model.name)
else:
    print('Error: last layer is not SoftMax Activation')

## Install Qualia-CodeGen for C inference code generation

In [37]:
%pip install qualia_codegen_core
import qualia_codegen_core
from qualia_codegen_core.graph.KerasModelGraph import KerasModelGraph
from qualia_codegen_core.graph.Quantization import Quantization
from qualia_codegen_core.graph.RoundMode import RoundMode

from importlib.resources import files
main_path = str((files('qualia_codegen_core.examples')/'Linux'/'main.cpp').resolve())

Note: you may need to restart the kernel to use updated packages.


## Convert Keras Model to Qualia-CodeGen's internal representation

In [38]:
modelgraph = KerasModelGraph(model).convert()
print(modelgraph)

—————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
Inputs                                           | Layer                                            | Outputs                                          | Input shape                                      | Output shape                                    
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
                                                 | input_4                                          | max_pooling1d_4                                  | (1, 16000, 1)                                    | ((1, 16000, 1),)                   

## Generate C code for the trained model with 32-bit floating-point representation

In [39]:
float_modelgraph = copy.deepcopy(modelgraph)

# layer quantization annotations for float32
for node in float_modelgraph.nodes:
    # No scale factor if not fixed-point quantization on integers
    node.q = Quantization(
            number_type=float,
            width=32,
            long_width=32,
            weights_scale_factor=0,
            output_scale_factor=0,
            weights_round_mode=RoundMode.NONE,
            output_round_mode=RoundMode.NONE,
            )

float_res = qualia_codegen_core.Converter(output_path=Path('gsc_output_floating')).convert_model(float_modelgraph)

with open('gsc_model_floating.h', 'w') as f:
    f.write(float_res)

Graphviz not available


## Compile the 32-bit floating-point C code for x86 and evaluate on small dataset

In [40]:
!g++ -std=c++17 -Wall -Wextra -pedantic -Ofast -o gsc_floating -include gsc_output_floating/include/defines.h -Igsc_output_floating/include gsc_output_floating/model.c {main_path}
!./gsc_floating x_test_gsc_250.csv y_test_gsc_250.csv

In file included from [01m[Kgsc_output_floating/model.c:15[m[K:
[01m[Kgsc_output_floating/include/number.h:[m[K In function ‘[01m[Kfloat scale_number_t_float(float, int, round_mode_t)[m[K’:
  143 |   float number, [01;35m[Kint scale_factor[m[K, round_mode_t round_mode) {
      |                 [01;35m[K~~~~^~~~~~~~~~~~[m[K
  143 |   float number, int scale_factor, [01;35m[Kround_mode_t round_mode[m[K) {
      |                                   [01;35m[K~~~~~~~~~~~~~^~~~~~~~~~[m[K
[01m[Kgsc_output_floating/include/number.h:[m[K In function ‘[01m[Kfloat scale_and_clamp_to_number_t_float(float, int, round_mode_t)[m[K’:
  151 |   float number, [01;35m[Kint scale_factor[m[K, round_mode_t round_mode) {
      |                 [01;35m[K~~~~^~~~~~~~~~~~[m[K
  151 |   float number, int scale_factor, [01;35m[Kround_mode_t round_mode[m[K) {
      |                                   [01;35m[K~~~~~~~~~~~~~^~~~~~~~~~[m[K
In file included from [0

## Generate C code for the trained model with 16-bit fixed-point representation

In [43]:
fixed_modelgraph = copy.deepcopy(modelgraph)

# layer quantization annotations for int16 Q9.7
for node in fixed_modelgraph.nodes:
    node.q = Quantization(
            number_type=int,
            width=16,
            long_width=32,
            weights_scale_factor=7,
            output_scale_factor=7,
            weights_round_mode=RoundMode.FLOOR,
            output_round_mode=RoundMode.FLOOR,
            )

fixed_res = qualia_codegen_core.Converter(output_path=Path('gsc_output_fixed')).convert_model(fixed_modelgraph)

with open('gsc_model_fixed.h', 'w') as f:
    f.write(fixed_res)

Graphviz not available


## Compile the 16-bit fixed-point C code for x86 and evaluate on small dataset

In [49]:
!g++ -std=c++17 -Wall -Wextra -pedantic -Ofast -o gsc_fixed -include gsc_output_fixed/include/defines.h -Igsc_output_fixed/include gsc_model_fixed.h
!./gsc_fixed x_test_gsc_250.csv y_test_gsc_250.csv

/bin/bash: line 1: ./gsc_fixed: cannot execute binary file: Exec format error
