In [None]:
import tensorflow as tf
import os
import numpy as np
from time import time
import pandas as pd
import zipfile


from preprocessing import LABELS
from preprocessing import AudioReader
from preprocessing import MelSpectrogram, MFCC

2024-01-04 09:03:03.284608: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-04 09:03:03.286453: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-04 09:03:03.323137: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-04 09:03:03.324141: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 2

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set the `numpy` pseudo-random generator at a fixed value
np.random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
tf.random.set_seed(seed_value)
# for later versions: 
# tf.compat.v1.set_random_seed(seed_value)

# 5. Configure a new global `tensorflow` session

# for later versions:
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

## 1. Defining hyper-parameters

In [None]:
PREPROCESSING_ARGS = { 'sampling_rate': 16000,
    'frame_length_in_s': 0.032,
    'frame_step_in_s': 0.016,
    'num_mel_bins': 16, # Triet 32
    'lower_frequency': 20,
    'upper_frequency': 4000,
    'num_coefficients': 20 # Triet 13
}

TRAINING_ARGS = {
    'batch_size': 20,
    'initial_learning_rate': 0.01,
    #'end_learning_rate': 1.e-5,
    'end_learning_rate': 0.001,
    'epochs': 20
}

## 2. Create train / test datasets

In [None]:
train_ds = tf.data.Dataset.list_files('/tmp/yn-train/*')
test_ds = tf.data.Dataset.list_files('/tmp/yn-test/*')

In [None]:
audio_reader = AudioReader(tf.int16, 16000)
mfcc_spec_processor = MFCC(**PREPROCESSING_ARGS)

def prepare_for_training(feature, label):
    feature = tf.expand_dims(feature, -1)
    label_id = tf.argmax(label == LABELS)

    return feature, label_id


batch_size = TRAINING_ARGS['batch_size']
epochs = TRAINING_ARGS['epochs']

train_ds = (train_ds
            .map(audio_reader.get_audio_and_label)
            .map(mfcc_spec_processor.get_mfccs_and_label)
            .map(prepare_for_training)
            .batch(batch_size)
            .cache())

test_ds = (test_ds
            .map(audio_reader.get_audio_and_label)
            .map(mfcc_spec_processor.get_mfccs_and_label)
            .map(prepare_for_training)
            .batch(batch_size))

2024-01-04 09:03:05.961317: I tensorflow_io/core/kernels/cpu_check.cc:128] Your CPU supports instructions that this TensorFlow IO binary was not compiled to use: AVX AVX2 AVX512F FMA
2024-01-04 09:03:05.963125: W tensorflow_io/core/kernels/audio_video_mp3_kernels.cc:271] libmp3lame.so.0 or lame functions are not available


In [None]:
for example_batch, example_labels in train_ds.take(1):
  print('Batch Shape:', example_batch.shape)
  print('Data Shape:', example_batch.shape[1:])
  print('Labels:', example_labels)

Batch Shape: (20, 61, 16, 1)
Data Shape: (61, 16, 1)
Labels: tf.Tensor([1 1 1 1 0 1 0 1 0 0 1 1 1 0 1 1 1 1 1 1], shape=(20,), dtype=int64)
2024-01-04 09:03:07.449379: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


## 3. Train model

workflow: train many models, save models and hyper-parameters. If a model reaches required accuracy, move on to test latency

### 3.1 Model creation

In [None]:
#depth-wise convolution
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=example_batch.shape[1:]),
    tf.keras.layers.Conv2D(filters=32, kernel_size=[5, 5], strides=[2, 2],
        use_bias=False, padding='valid'),
    tf.keras.layers.MaxPool2D(padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Dropout(0.02),
    tf.keras.layers.DepthwiseConv2D(kernel_size=[5, 5], strides=[1, 1], 
        use_bias=False, padding='same'),
    tf.keras.layers.Conv2D(filters=32, kernel_size=[1, 1], strides=[1, 1],   
       use_bias=False),
    tf.keras.layers.MaxPool2D(padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Dropout(0.02),
    tf.keras.layers.DepthwiseConv2D(kernel_size=[5, 5], strides=[1, 1],
        use_bias=False, padding='same'),
    tf.keras.layers.Conv2D(filters=32, kernel_size=[1, 1], strides=[1, 1],   
       use_bias=False),
    tf.keras.layers.MaxPool2D(padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Dropout(0.02),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units=2),
    tf.keras.layers.Softmax()
])

In [None]:
import tensorflow_model_optimization as tfmot

final_sparsity = 0.9
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

begin_step = int(len(train_ds) * epochs * 0.1 )
end_step = int(len(train_ds) * epochs )

pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.10,
        final_sparsity=final_sparsity,
        begin_step=begin_step,
        end_step=end_step 
    )
}

model_for_pruning = prune_low_magnitude(model, **pruning_params)

### 3.2 Training

In [None]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=False)
initial_learning_rate = TRAINING_ARGS['initial_learning_rate']
end_learning_rate = TRAINING_ARGS['end_learning_rate']

linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=initial_learning_rate,
    end_learning_rate=end_learning_rate,
    decay_steps=len(train_ds) * epochs,
)
optimizer = tf.optimizers.Adam(learning_rate=linear_decay)
metrics = [tf.metrics.SparseCategoricalAccuracy()]
callbacks = [tfmot.sparsity.keras.UpdatePruningStep()]
model_for_pruning.compile(loss=loss, optimizer=optimizer, metrics=metrics)

history = model_for_pruning.fit(train_ds, epochs=epochs, validation_data=test_ds, callbacks=callbacks)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### 3.3 Testing

In [None]:
test_loss, test_accuracy = model_for_pruning.evaluate(test_ds)



### 3.4 Saving model and hyper-parameters

In [None]:
if test_accuracy > 0.989:
    #timestamp = int(time())
    timestamp = seed_value

    #save model
    saved_model_dir = f'./triet/saved_models/{timestamp}'
    if not os.path.exists(saved_model_dir):
        os.makedirs(saved_model_dir)
    model.save(saved_model_dir)
    
    #save parameters
    output_dict = {
        'timestamp': timestamp,
        **PREPROCESSING_ARGS,
        **TRAINING_ARGS,
        'test_accuracy': test_accuracy
    }

    df = pd.DataFrame([output_dict])

    parameter_dir ='./triet/saved_hyperparameters/'
    if not os.path.exists(parameter_dir):
        os.makedirs(parameter_dir)
    output_path = f'./triet/saved_hyperparameters/{timestamp}.csv'
    df.to_csv(output_path, mode='a', header=not os.path.exists(output_path), index=False)

    #save tflite model
    tflite_models_dir = './triet/tflite_models'
    if not os.path.exists(tflite_models_dir):
        os.makedirs(tflite_models_dir)

    converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
    tflite_model = converter.convert()

    tflite_model_name = os.path.join(tflite_models_dir, f'{timestamp}.tflite')

    with open(tflite_model_name, 'wb') as fp:
        fp.write(tflite_model)
    saved_path = tflite_model_name
    with zipfile.ZipFile(f'{tflite_model_name}.zip', 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(tflite_model_name)

    tflite_size = os.path.getsize(tflite_model_name) / 1024.0
    zipped_size = os.path.getsize(f'{tflite_model_name}.zip') / 1024.0

    print(f'Original tflite size (pruned model): {tflite_size:.3f} KB')
    print(f'Zipped tflite size (pruned model): {zipped_size:.3f} KB')


INFO:tensorflow:Assets written to: ./triet/saved_models/2/assets
INFO:tensorflow:Assets written to: ./triet/saved_models/2/assets
2024-01-04 09:03:44.280055: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2024-01-04 09:03:44.280090: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2024-01-04 09:03:44.445932: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: ./triet/saved_models/2
2024-01-04 09:03:44.644760: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2024-01-04 09:03:44.644791: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: ./triet/saved_models/2
2024-01-04 09:03:44.648276: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:375] MLIR V1 optimization pass is not enabled
2024-01-04 09:03:44.649387: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2024-01-0

## 4. Test model

### 4.1 Define reference model

In [None]:
REF_PREPROCESSING_ARGS = {
    'sampling_rate': 16000,
    'frame_length_in_s': 0.04,
    'frame_step_in_s': 0.02,
    'num_mel_bins': 40,
    'lower_frequency': 20,
    'upper_frequency': 4000,
}

tflite_models_dir = './tflite_models'
if not os.path.exists(tflite_models_dir):
    os.makedirs(tflite_models_dir)
tflite_model_name = os.path.join(tflite_models_dir, 'ref_model.tflite')

if not os.path.exists(tflite_model_name):
    ref_model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=[49, 40, 1]),
        tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[2, 2], use_bias=False, padding='valid'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(units=2),
        tf.keras.layers.Softmax()
    ])

    ref_model.build()

    saved_model_dir = f'./saved_models/ref_model'
    if not os.path.exists(saved_model_dir):
        os.makedirs(saved_model_dir)
    ref_model.save(saved_model_dir)

    converter = tf.lite.TFLiteConverter.from_saved_model(f'./saved_models/ref_model')
    tflite_model = converter.convert()

    with open(tflite_model_name, 'wb') as fp:
        fp.write(tflite_model)


### 4.2 Measure Latency Saving

In [None]:
mel_spec_processor = MelSpectrogram(**REF_PREPROCESSING_ARGS)
interpreter = tf.lite.Interpreter(model_path='tflite_models/ref_model.tflite')
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

audio = tf.random.normal((16000,))

ref_latencies = []

for i in range(100):
    start_preprocess = time()

    log_mel_spectrogram = mel_spec_processor.get_mel_spec(audio)
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, 0)
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, -1)
    interpreter.set_tensor(input_details[0]['index'], log_mel_spectrogram)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])

    end_inference = time()

    ref_latencies.append(end_inference - start_preprocess)

median_ref_latency = np.median(ref_latencies)

####################################################3
mfcc_spec_processor = MFCC(**PREPROCESSING_ARGS)
#interpreter = tf.lite.Interpreter(model_path='./triet/tflite_models/1704039608.tflite')
interpreter = tf.lite.Interpreter(model_path=saved_path)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

audio = tf.random.normal((16000,))

latencies = []

for i in range(100):
    start_preprocess = time()

    mfcc = mfcc_spec_processor.get_mfccs(audio)
    mfcc = tf.expand_dims(mfcc, 0)
    mfcc = tf.expand_dims(mfcc, -1)
    interpreter.set_tensor(input_details[0]['index'], mfcc)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])

    end_inference = time()

    latencies.append(end_inference - start_preprocess)

optimized_latency = np.median(latencies)


print(100 * (median_ref_latency - optimized_latency) / median_ref_latency)


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
55.5087001450894


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=0ec8e575-19c8-4894-9160-01c0b36ec399' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>