### Notebook Setup

In [3]:
import tensorflow as tf

from glob import glob

#from preprocessing import LABELS
from preprocessing import *
from functools import partial

In [2]:
#unzip dataset
#!unzip /datasets/minispeechcommands/msc-test.zip
#!unzip /datasets/minispeechcommands/msc-train.zip
#!unzip /datasets/minispeechcommands/msc-val.zip

In [4]:
train_ds = tf.data.Dataset.list_files(['msc-train/go*','msc-train/stop*'])
val_ds = tf.data.Dataset.list_files(['msc-train/go*','msc-train/stop*'])
test_ds = tf.data.Dataset.list_files(['msc-train/go*','msc-train/stop*'])
len(train_ds), len(val_ds), len(test_ds)

(1600, 1600, 1600)

### Reproducibility 

In [5]:
import os
import numpy as np
import random

seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)


### Hyperparameters

In [6]:
TRAINING_ARGS = {
    'batch_size': 20,
    'initial_learning_rate': 0.01,
    'end_learning_rate': 1.e-5,
    'epochs': 20
}

LABELS = ['go','stop']

PREPROCESSING_MFCCS_ARGS = {
    'downsampling_rate': 16000,
    'frame_length_in_s': 0.016,
    'frame_step_in_s': 0.016,
    'num_mel_bins' : 40,
    'lower_frequency': 20,
    'upper_frequency': 8000,
    'num_mfccs_coefficients':10
}

alpha = 0.25
final_sparsity = 0.7

batch_size = TRAINING_ARGS['batch_size']
epochs = TRAINING_ARGS['epochs']

### Dataset mapping

In [7]:
def get_mfccs_and_label(filename, downsampling_rate, frame_length_in_s, 
frame_step_in_s, num_mel_bins, lower_frequency, upper_frequency, num_mfccs_coefficients):

    log_mel_spectrogram, label = get_log_mel_spectrogram(filename, 
    downsampling_rate, frame_length_in_s, frame_step_in_s, num_mel_bins, 
    lower_frequency, upper_frequency)
    mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
    mfccs = mfccs[..., :num_mfccs_coefficients]
    
    return mfccs, label

get_frozen_mfccs = partial(get_mfccs_and_label, 
**PREPROCESSING_MFCCS_ARGS)

def preprocess_mfccs(filename):
    signal, label = get_frozen_mfccs(filename)
    signal = tf.expand_dims(signal, -1)
    label_id = tf.argmax(label == LABELS)

    return signal, label_id

In [8]:
go_stop_train_ds = train_ds.map(preprocess_mfccs).batch(batch_size).cache()
go_stop_val_ds = val_ds.map(preprocess_mfccs).batch(batch_size)
go_stop_test_ds = test_ds.map(preprocess_mfccs).batch(batch_size)

print(f'Number of batches in training set {len(go_stop_train_ds)}')

for example_batch, example_labels in go_stop_train_ds.take(1):
  #print(example_bat
  print('Batch Shape:', example_batch.shape)
  print('Data Shape:', example_batch.shape[1:])
  print('Labels:', example_labels)

2022-12-17 10:55:45.707028: W tensorflow_io/core/kernels/audio_video_mp3_kernels.cc:271] libmp3lame.so.0 or lame functions are not available
2022-12-17 10:55:45.707259: I tensorflow_io/core/kernels/cpu_check.cc:128] Your CPU supports instructions that this TensorFlow IO binary was not compiled to use: AVX2 AVX512F FMA
2022-12-17 10:55:46.113033: W tensorflow/core/framework/op_kernel.cc:1780] OP_REQUIRES failed at functional_ops.cc:373 : INTERNAL: No function library
2022-12-17 10:55:46.114589: W tensorflow/core/framework/op_kernel.cc:1780] OP_REQUIRES failed at functional_ops.cc:373 : INTERNAL: No function library
2022-12-17 10:55:46.114757: W tensorflow/core/framework/op_kernel.cc:1780] OP_REQUIRES failed at functional_ops.cc:373 : INTERNAL: No function library
2022-12-17 10:55:46.469909: W tensorflow/core/framework/op_kernel.cc:1780] OP_REQUIRES failed at functional_ops.cc:373 : INTERNAL: No function library
2022-12-17 10:55:46.471450: W tensorflow/core/framework/op_kernel.cc:1780] O

### Model

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=example_batch.shape[1:]),
    tf.keras.layers.Conv2D(filters=256*alpha, kernel_size=[3, 3], strides=[2, 2],
        use_bias=False, padding='valid'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], 
        use_bias=False, padding='same'),
    tf.keras.layers.Conv2D(filters=256*alpha, kernel_size=[1, 1], strides=[1, 1],   
       use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1],
        use_bias=False, padding='same'),
    tf.keras.layers.Conv2D(filters=256*alpha, kernel_size=[1, 1], strides=[1, 1],   
       use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units=len(LABELS)),
    tf.keras.layers.Softmax()
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 4, 64)         576       
                                                                 
 batch_normalization (BatchN  (None, 30, 4, 64)        256       
 ormalization)                                                   
                                                                 
 re_lu (ReLU)                (None, 30, 4, 64)         0         
                                                                 
 depthwise_conv2d (Depthwise  (None, 30, 4, 64)        576       
 Conv2D)                                                         
                                                                 
 conv2d_1 (Conv2D)           (None, 30, 4, 64)         4096      
                                                                 
 batch_normalization_1 (Batc  (None, 30, 4, 64)        2

### Weight  Pruning 

In [15]:
import tensorflow_model_optimization as tfmot

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

begin_step = int(len(go_stop_train_ds) * epochs * 0.2)
end_step = int(len(go_stop_train_ds) * epochs)

pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.0,
        final_sparsity=final_sparsity,
        begin_step=begin_step,
        end_step=end_step
    )
}

model_for_pruning = prune_low_magnitude(model, **pruning_params)

### Model Training

In [18]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=False)
initial_learning_rate = TRAINING_ARGS['initial_learning_rate']
end_learning_rate = TRAINING_ARGS['end_learning_rate']

linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=initial_learning_rate,
    end_learning_rate=end_learning_rate,
    decay_steps=len(go_stop_train_ds) * epochs,
)
optimizer = tf.optimizers.Adam(learning_rate=linear_decay)
metrics = [tf.metrics.SparseCategoricalAccuracy()]


model_for_pruning.compile(loss=loss, optimizer=optimizer, metrics=metrics)

history = model_for_pruning.fit(go_stop_train_ds, 
                                epochs=epochs,
                                validation_data=go_stop_val_ds)


#ATTENTION, if a Graph execution error is raised, please re-run the cell

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [20]:
model_for_pruning.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d   (None, 30, 4, 64)        1154      
 (PruneLowMagnitude)                                             
                                                                 
 prune_low_magnitude_batch_n  (None, 30, 4, 64)        257       
 ormalization (PruneLowMagni                                     
 tude)                                                           
                                                                 
 prune_low_magnitude_re_lu (  (None, 30, 4, 64)        1         
 PruneLowMagnitude)                                              
                                                                 
 prune_low_magnitude_depthwi  (None, 30, 4, 64)        577       
 se_conv2d (PruneLowMagnitud                                     
 e)                                                     

The number of non-trainable params increase since they are parameters introduced by tensorflow to handle the pruning. They will be removed by tfmot.sparsity.keras.strip_pruning(model_for_pruning) function executed in the next cells.

### Final sparsity check

In [21]:
import numpy as np

for layer in model_for_pruning.layers:
    if isinstance(layer, tf.keras.layers.Wrapper):
        weights = layer.trainable_weights
    else:
        weights = layer.weights
    for weight in weights:        
        weight_size = weight.numpy().size
        zero_num = np.count_nonzero(weight == 0)
        print(
            f'{weight.name}: {zero_num/weight_size:.2%} sparsity ',
            f'({zero_num}/{weight_size})',
        )

conv2d/kernel:0: 69.97% sparsity  (403/576)
batch_normalization/gamma:0: 0.00% sparsity  (0/64)
batch_normalization/beta:0: 0.00% sparsity  (0/64)
depthwise_conv2d/depthwise_kernel:0: 0.00% sparsity  (0/576)
conv2d_1/kernel:0: 69.97% sparsity  (2866/4096)
batch_normalization_1/gamma:0: 0.00% sparsity  (0/64)
batch_normalization_1/beta:0: 0.00% sparsity  (0/64)
depthwise_conv2d_1/depthwise_kernel:0: 0.00% sparsity  (0/576)
conv2d_2/kernel:0: 69.97% sparsity  (2866/4096)
batch_normalization_2/gamma:0: 0.00% sparsity  (0/64)
batch_normalization_2/beta:0: 0.00% sparsity  (0/64)
dense/kernel:0: 70.31% sparsity  (90/128)
dense/bias:0: 0.00% sparsity  (0/2)


The convolutional and the dense layers reached the final sparsity desired.

### Model Testing

In [22]:
test_loss, test_accuracy = model_for_pruning.evaluate(go_stop_test_ds)



It seems that there is not overfitting. 

### Save the model

In [28]:
from time import time
import os

timestamp = int(time())

MODEL_NAME = f'model_{timestamp}'

#Strip the model before save it
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)
saved_model_dir = f'./saved_models/{MODEL_NAME}'
if not os.path.exists(saved_model_dir):
    os.makedirs(saved_model_dir)
model_for_export.save(saved_model_dir)

#Prepare the folder for tflite models
tflite_models_dir = './tflite_models'
if not os.path.exists(tflite_models_dir):
    os.makedirs(tflite_models_dir)

INFO:tensorflow:Assets written to: ./saved_models/model_1671275148/assets
INFO:tensorflow:Assets written to: ./saved_models/model_1671275148/assets


### Tflite model conversion

In [29]:
converter = tf.lite.TFLiteConverter.from_saved_model(f'./saved_models/{MODEL_NAME}')
tflite_model = converter.convert()

2022-12-17 11:05:52.634891: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-12-17 11:05:52.634939: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-12-17 11:05:52.635070: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: ./saved_models/model_1671275148
2022-12-17 11:05:52.637517: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2022-12-17 11:05:52.637543: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: ./saved_models/model_1671275148
2022-12-17 11:05:52.643528: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2022-12-17 11:05:52.673093: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: ./saved_models/model_1671275148
2022-12-17 11:05:52.683054: I tensorflow/cc/saved_model/loader.cc:305] SavedModel load for tags

In [30]:
tflite_models_dir = './tflite_models'
if not os.path.exists(tflite_models_dir):
    os.makedirs(tflite_models_dir)

In [31]:
tflite_model_name = os.path.join(tflite_models_dir, f'{MODEL_NAME}.tflite')
tflite_model_name

'./tflite_models/model_1671275148.tflite'

In [32]:
#Save the tflite model
with open(tflite_model_name, 'wb') as fp:
    fp.write(tflite_model)

In [33]:
#Zip the tflite model
import zipfile

with zipfile.ZipFile(f'{tflite_model_name}.zip', 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(tflite_model_name)

In [34]:
#Check the size
tflite_size = os.path.getsize(tflite_model_name) / 1024.0
zipped_size = os.path.getsize(f'{tflite_model_name}.zip') / 1024.0

print(f'Original tflite size (pruned model): {tflite_size:.3f} KB')
print(f'Zipped tflite size (pruned model): {zipped_size:.3f} KB')

Original tflite size (pruned model): 44.020 KB
Zipped tflite size (pruned model): 20.626 KB


The memory constraint (<25KB) is reached.

### Test the tflite model

In [35]:
model_path = tflite_model_name
#------------Get the model-----------------
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("Input name:", input_details[0]['name'])
print("Input shape:", input_details[0]['shape'])
print("Output name:", output_details[0]['name'])
print("Output shape:", output_details[0]['shape'])

Input name: serving_default_input_1:0
Input shape: [ 1 62 10  1]
Output name: StatefulPartitionedCall:0
Output shape: [1 2]
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [36]:
#------------File preparation------------------
filenames = glob('msc-test/go*') + glob('msc-test/stop*')

#PREPROCESSING_MFCCS_ARGS is the one defined in the Hyperparameters section

#-----------Compute the linear to mel weight matrix--------------------
downsampling_rate = PREPROCESSING_MFCCS_ARGS['downsampling_rate']
sampling_rate_int64 = tf.cast(downsampling_rate, tf.int64)
frame_length = int(downsampling_rate * PREPROCESSING_MFCCS_ARGS['frame_length_in_s'])
frame_step = int(downsampling_rate * PREPROCESSING_MFCCS_ARGS['frame_step_in_s'])
spectrogram_width = (16000 - frame_length) // frame_step + 1
num_spectrogram_bins = frame_length // 2 + 1
num_mfccs_coefficients = PREPROCESSING_MFCCS_ARGS['num_mfccs_coefficients']

linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
    PREPROCESSING_MFCCS_ARGS['num_mel_bins'],
    num_spectrogram_bins,
    downsampling_rate,
    PREPROCESSING_MFCCS_ARGS['lower_frequency'],
    PREPROCESSING_MFCCS_ARGS['upper_frequency']
)


avg_preprocessing_latency = 0.0
avg_model_latency = 0.0
latencies = []
accuracy = 0.0
#------------Test the model----------------
for filename in filenames:
    audio_binary = tf.io.read_file(filename)
    path_parts = tf.strings.split(filename, '/')
    path_end = path_parts[-1]
    file_parts = tf.strings.split(path_end, '_')
    true_label = file_parts[0]
    true_label = true_label.numpy().decode()
    
    start_preprocess = time()

    audio, sampling_rate = tf.audio.decode_wav(audio_binary)
    audio = tf.squeeze(audio)
    zero_padding = tf.zeros(sampling_rate-tf.shape(audio), dtype = tf.float32) #prova a ridurre 
    audio_padded = tf.concat([audio, zero_padding], axis = 0)

    stft = tf.signal.stft(
        audio_padded,
        frame_length = frame_length,
        frame_step = frame_step,
        fft_length = frame_length
    )

    spectrogram = tf.abs(stft)

    mel_spectrogram = tf.matmul(spectrogram, linear_to_mel_weight_matrix)
    log_mel_spectrogram = tf.math.log(mel_spectrogram+1.e-6)
    mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
    mfccs = mfccs[:,:num_mfccs_coefficients]
    mfccs = tf.expand_dims(mfccs, 0)
    mfccs = tf.expand_dims(mfccs, -1)

    end_preprocess = time()


    interpreter.set_tensor(input_details[0]['index'], mfccs)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])

    end_inference = time()

    top_index = np.argmax(output[0])
    predicted_label = LABELS[top_index]

    accuracy += true_label == predicted_label
    avg_preprocessing_latency += end_preprocess - start_preprocess
    avg_model_latency += end_inference - end_preprocess
    latencies.append(end_inference - start_preprocess)

#-----------------Compute the metrics----------------------------
accuracy /= len(filenames)
avg_preprocessing_latency /= len(filenames)
avg_model_latency /= len(filenames)
median_total_latency = np.median(latencies)
RESULTS = {
    'accuracy': accuracy,
    'avg_preprocessing_latency': avg_preprocessing_latency,
    'avg_model_latency': avg_model_latency,
    'median_total_latency': median_total_latency
}



In [37]:
RESULTS

{'accuracy': 0.98,
 'avg_preprocessing_latency': 0.005781207084655762,
 'avg_model_latency': 0.00012733697891235352,
 'median_total_latency': 0.00571596622467041}

Both accuracy and latency respect the relative constraint.

### Results storage

In [38]:
import pandas as pd


output_dict = {
    'tag':'Final',
    'model_name': MODEL_NAME,
    **PREPROCESSING_MFCCS_ARGS,
    **TRAINING_ARGS,
    'alpha': alpha,
    'final_sparsity': final_sparsity,
    'test_accuracy': test_accuracy,
    **RESULTS,
    'tflite_size':tflite_size,
    'zipped_size':zipped_size 
}

df = pd.DataFrame([output_dict])

output_path=f'./team08_hw2_results.csv'
df.to_csv(output_path, mode='a', header=not os.path.exists(output_path), index=False)

In [39]:
output_dict

{'tag': 'Final',
 'model_name': 'model_1671275148',
 'downsampling_rate': 16000,
 'frame_length_in_s': 0.016,
 'frame_step_in_s': 0.016,
 'num_mel_bins': 40,
 'lower_frequency': 20,
 'upper_frequency': 8000,
 'num_mfccs_coefficients': 10,
 'batch_size': 20,
 'initial_learning_rate': 0.01,
 'end_learning_rate': 1e-05,
 'epochs': 20,
 'alpha': 0.25,
 'final_sparsity': 0.7,
 'test_accuracy': 0.9937499761581421,
 'accuracy': 0.98,
 'avg_preprocessing_latency': 0.005781207084655762,
 'avg_model_latency': 0.00012733697891235352,
 'median_total_latency': 0.00571596622467041,
 'tflite_size': 44.01953125,
 'zipped_size': 20.6259765625}

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=81f45795-723f-4142-b302-f9037f9c1cf7' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>