**Warning**: the submitted model (model4.tflite) has been trained and saved as part of a hyperparameter search on another notebook. This notebook displays the exact same methodology: the same parameters, algorithms, architecture, optimizations, and order of operations. Unfortunately, running this notebook doesn't output a model able to reach the accuracy constraint. This is likely due to the randomness of the parameter initialization that is not reproducible. We are sorry for the inconvenience and can provide the hyperparameter search notebook if requested (it is called hp_finding on our group project on deepnote, Group 4). Thank you

In [1]:
import sys
#PREPROCESSING_TYPE = 'mel_spectrogram' # comment this line if working with MFCC
PREPROCESSING_TYPE = 'mfcc' # comment this line if working with mel spectrogram

if sys.version.split()[0] != '3.11.10':
    print(sys.version)
    raise RuntimeError('Wrong Python version. Go to ENVIRONMENT settings, Stop machine, Start machine')


import tensorflow as tf
if tf.__version__ != '2.13.0':
    raise RuntimeError('Wrong TF version. Go to ENVIRONMENT settings, Stop machine, Start machine')


print('\nPython version: OK')
print('TensorFlow version: OK')

2025-01-05 23:10:08.547321: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-05 23:10:08.552215: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-05 23:10:08.601672: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-05 23:10:08.602430: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

Python version: OK
TensorFlow version: OK


In [14]:
PREPROCESSING_ARGS = {
    'sampling_rate': 16000,
    'frame_length_in_s': 0.064,
    'frame_step_in_s': 0.032,
    'num_mel_bins': 100,
    'lower_frequency': 20,
    'upper_frequency': 1000,
    'num_coefficients': 10  # set num_coefficients to 0 if log-Mel Spectrogram features have been used
}

N_frames = int((1 - PREPROCESSING_ARGS['frame_length_in_s']) / PREPROCESSING_ARGS['frame_step_in_s']) + 1

TRAINING_ARGS = {
    'batch_size': 256,
    'learning_rate': 0.01,
    'end_learning_rate': 1.e-5,
    'epochs': 50,
    'initial_sparsity': 0.0, # The 4 next args are for weight pruning
    'final_sparsity': 0.6,
    'begin_step_%': 0.1,
    'end_step_%': 0.9,
    'num_clusters': 8,
    'width_multiplier' : 0.5,
}

if PREPROCESSING_TYPE != 'mfcc' and 'num_coefficients' in PREPROCESSING_ARGS:
    del PREPROCESSING_ARGS['num_coefficients']

LABELS = ['down', 'up']

In [4]:
from reader import AudioReader
from preprocessing import Padding, Normalization
from preprocessing import MelSpectrogram

class MFCC():
    def __init__(
        self, 
        sampling_rate,
        frame_length_in_s,
        frame_step_in_s,
        num_mel_bins,
        lower_frequency,
        upper_frequency,
        num_coefficients
    ):

        self.mel_spec_processor = MelSpectrogram(
            sampling_rate, frame_length_in_s, frame_step_in_s, num_mel_bins, lower_frequency, upper_frequency
        )
        self.num_coefficients = num_coefficients

    def get_mfccs(self, audio):
        log_mel_spectrogram = self.mel_spec_processor.get_mel_spec(audio)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
        mfccs = mfccs[..., :self.num_coefficients]

        return mfccs

    def get_mfccs_and_label(self, audio, label):
        mfccs = self.get_mfccs(audio)

        return mfccs, label

if PREPROCESSING_TYPE == 'mfcc':
    mfcc_processor = MFCC(**PREPROCESSING_ARGS)
else:
    mel_spetrogram_processor = MelSpectrogram(**PREPROCESSING_ARGS)

audio_reader = AudioReader(tf.int16)

padding = Padding(PREPROCESSING_ARGS['sampling_rate'])
normalization = Normalization(tf.int16)


In [5]:
train_ds = tf.data.Dataset.list_files(['/tmp/msc-train/down*', '/tmp/msc-train/up*'])
val_ds = tf.data.Dataset.list_files(['/tmp/msc-val/down*', '/tmp/msc-val/up*'])
test_ds = tf.data.Dataset.list_files(['/tmp/msc-test/down*', '/tmp/msc-test/up*'])

In [6]:
def prepare_for_training(feature, label):
    feature = tf.expand_dims(feature, -1)
    label_id = tf.argmax(label == LABELS)

    return feature, label_id

if PREPROCESSING_TYPE == 'mel_spectrogram':

    train_ds = (train_ds
                .map(audio_reader.get_audio_and_label)
                .map(padding.pad)
                .map(normalization.normalize)
                .map(mel_spetrogram_processor.get_mel_spec_and_label)
                .map(prepare_for_training)
                .batch(TRAINING_ARGS['batch_size']))
    val_ds = (val_ds
                .map(audio_reader.get_audio_and_label)
                .map(padding.pad)
                .map(normalization.normalize)
                .map(mel_spetrogram_processor.get_mel_spec_and_label)
                .map(prepare_for_training)
                .batch(TRAINING_ARGS['batch_size']))
    test_ds = (test_ds
                .map(audio_reader.get_audio_and_label)
                .map(padding.pad)
                .map(normalization.normalize)
                .map(mel_spetrogram_processor.get_mel_spec_and_label)
                .map(prepare_for_training)
                .batch(TRAINING_ARGS['batch_size']))

else:

    train_ds = (train_ds
                .map(audio_reader.get_audio_and_label)
                .map(padding.pad)
                .map(normalization.normalize)
                .map(mfcc_processor.get_mfccs_and_label)
                .map(prepare_for_training)
                .batch(TRAINING_ARGS['batch_size']))
    val_ds = (val_ds
                .map(audio_reader.get_audio_and_label)
                .map(padding.pad)
                .map(normalization.normalize)
                .map(mfcc_processor.get_mfccs_and_label)
                .map(prepare_for_training)
                .batch(TRAINING_ARGS['batch_size']))
    test_ds = (test_ds
                .map(audio_reader.get_audio_and_label)
                .map(padding.pad)
                .map(normalization.normalize)
                .map(mfcc_processor.get_mfccs_and_label)
                .map(prepare_for_training)
                .batch(TRAINING_ARGS['batch_size']))
        

for example_batch, _ in train_ds.take(1):
    input_shape = example_batch.shape[1:]

print(input_shape)

2025-01-05 23:10:11.827113: I tensorflow_io/core/kernels/cpu_check.cc:128] Your CPU supports instructions that this TensorFlow IO binary was not compiled to use: AVX AVX2 AVX512F FMA
2025-01-05 23:10:11.829791: W tensorflow_io/core/kernels/audio_video_mp3_kernels.cc:271] libmp3lame.so.0 or lame functions are not available
(30, 10, 1)


In [7]:
wm = TRAINING_ARGS['width_multiplier']
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=example_batch.shape[1:]),
    tf.keras.layers.Conv2D(filters=int(32 * wm), kernel_size=[3, 3], strides=[2, 2], use_bias=False, padding='valid'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=int(64 * wm), kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=int(64 * wm), kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units=len(LABELS)),
    tf.keras.layers.Softmax()
])

from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
print(timestamp)

20250105-231015


In [8]:
import tensorflow_model_optimization as tfmot

begin_step = int(len(train_ds) * TRAINING_ARGS['epochs'] * TRAINING_ARGS['begin_step_%'])
end_step = int(len(train_ds) * TRAINING_ARGS['epochs'] * TRAINING_ARGS['end_step_%'])

pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=TRAINING_ARGS['initial_sparsity'],
        final_sparsity=TRAINING_ARGS['final_sparsity'],
        begin_step=begin_step,
        end_step=end_step
    )
}
model = tfmot.sparsity.keras.prune_low_magnitude(model, **pruning_params)
model.build(input_shape)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d  (None, 14, 4, 16)         290       
  (PruneLowMagnitude)                                            
                                                                 
 prune_low_magnitude_batch_  (None, 14, 4, 16)         65        
 normalization (PruneLowMag                                      
 nitude)                                                         
                                                                 
 prune_low_magnitude_re_lu   (None, 14, 4, 16)         1         
 (PruneLowMagnitude)                                             
                                                                 
 prune_low_magnitude_conv2d  (None, 14, 4, 32)         9218      
 _1 (PruneLowMagnitude)                                          
                                                        

In [9]:
linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=TRAINING_ARGS['learning_rate'],
    end_learning_rate=TRAINING_ARGS['end_learning_rate'],
    decay_steps=len(train_ds)*TRAINING_ARGS['epochs']
)
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(linear_decay)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    verbose=1,
    mode='auto' # min
)

loss = tf.losses.SparseCategoricalCrossentropy(from_logits=False)
optimizer = tf.optimizers.Adam(learning_rate=linear_decay)
metrics = [tf.metrics.SparseCategoricalAccuracy()]
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [10]:
try:
    history = model.fit(train_ds, epochs=TRAINING_ARGS['epochs'], validation_data=val_ds, callbacks=[lr_scheduler, early_stopping]) # this line cause an error only once, so we do this to not haivng the error.
except:
    print('here')
    history = model.fit(train_ds, epochs=TRAINING_ARGS['epochs'], validation_data=val_ds, callbacks=[lr_scheduler, early_stopping])

Epoch 1/50
here
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 19: early stopping


In [11]:
# Strip the pruning wrappers
model_for_export = tfmot.sparsity.keras.strip_pruning(model)
model_for_export.compile(loss=loss, metrics=metrics)

In [16]:
# Apply weight clustering
clustering_params = {
    'number_of_clusters': TRAINING_ARGS['num_clusters'],  # Choose the number of clusters for weight clustering
    'cluster_centroids_init': tfmot.clustering.keras.CentroidInitialization.KMEANS_PLUS_PLUS
}

model = tfmot.clustering.keras.cluster_weights(model_for_export, **clustering_params)

# Compile the clustered model
model.compile(optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

# Fine-tune the clustered model
model.fit(train_ds, validation_data=val_ds, epochs=2)
model_for_export = tfmot.clustering.keras.strip_clustering(model)
model_for_export.compile(loss=loss, metrics=metrics)

Epoch 1/2
Epoch 2/2


In [17]:
training_loss = history.history['loss'][-1]
training_accuracy = history.history['sparse_categorical_accuracy'][-1]
val_loss = history.history['val_loss'][-1]
val_accuracy = history.history['val_sparse_categorical_accuracy'][-1]

test_loss, test_accuracy = model_for_export.evaluate(test_ds)

print(f'Training Loss: {training_loss:.4f}')
print(f'Training Accuracy: {training_accuracy*100.:.2f}%')
print()
print(f'Validation Loss: {val_loss:.4f}')
print(f'Validation Accuracy: {val_accuracy*100.:.2f}%')
print()
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy*100.:.2f}%')

Training Loss: 0.0268
Training Accuracy: 99.06%

Validation Loss: 0.1958
Validation Accuracy: 91.00%

Test Loss: 0.1078
Test Accuracy: 93.25%


In [18]:
import os

os.makedirs("saved_models", exist_ok=True)
save_path = f"saved_models/{timestamp}"
model_for_export.save(save_path)

def get_dir_size(start_path = '.'):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # skip if it is symbolic link
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)

    return total_size

file_size_kb = get_dir_size(save_path) / 1024


print(f"Model size: {file_size_kb:.2f} KB")

INFO:tensorflow:Assets written to: saved_models/20250105-231015/assets
INFO:tensorflow:Assets written to: saved_models/20250105-231015/assets
Model size: 272.57 KB


In [19]:
quantization_ds = tf.data.Dataset.list_files(['/tmp/msc-train/down*', '/tmp/msc-train/up*'])

if PREPROCESSING_TYPE == 'mel_spectrogram':
    quantization_ds = (quantization_ds
                .map(audio_reader.get_audio_and_label)
                .map(padding.pad)
                .map(normalization.normalize)
                .map(mel_spetrogram_processor.get_mel_spec_and_label)
                .map(prepare_for_training)
                .batch(1))
else:
    quantization_ds = (quantization_ds
                .map(audio_reader.get_audio_and_label)
                .map(padding.pad)
                .map(normalization.normalize)
                .map(mfcc_processor.get_mfccs_and_label)
                .map(prepare_for_training)
                .batch(1))

def representative_data_gen():
    for input_value in quantization_ds.take(100):
        yield [input_value[0]]


converter = tf.lite.TFLiteConverter.from_saved_model(save_path)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
converter.target_spec.supported_types = [tf.float16]
tflite_model = converter.convert()

2025-01-05 23:13:57.154664: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2025-01-05 23:13:57.155452: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-01-05 23:13:57.328554: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: saved_models/20250105-231015
2025-01-05 23:13:57.486477: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2025-01-05 23:13:57.486625: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: saved_models/20250105-231015
2025-01-05 23:13:57.492862: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:375] MLIR V1 optimization pass is not enabled
2025-01-05 23:13:57.494020: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2025-01-05 23:13:57.893171: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: 

In [20]:
#gzip
import os
import zipfile

tflite_path = f'tflite_models/{timestamp}.tflite'
zip_path = f'tflite_models/{timestamp}.zip'
print(tflite_path)

with open(tflite_path, 'wb') as fp:
    fp.write(tflite_model)

file_size_kb = os.path.getsize(tflite_path) / 1024

with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.writestr(timestamp, tflite_model)

zipped_size = os.path.getsize(zip_path) / 1024.0
print(f"TFLite model size: {file_size_kb:.2f} KB")
print(f'Zipped tflite size (pruned model): {zipped_size:.3f} KB')

tflite_models/20250105-231015.tflite
TFLite model size: 31.73 KB
Zipped tflite size (pruned model): 12.853 KB


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=b6b1fe17-4020-4f88-867f-48004baa1058' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>