In [1]:
import os
import random
import zipfile
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_model_optimization as tfmot


from time import time
from glob import glob
from preprocessing import compute_linear_matrix, get_mfccs_training, get_mfcc, LABELS
from itertools import product
from functools import partial
from typing import Iterable, Any

seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)


SAVING_FOLDER = os.path.join('.', 'SAVING_FOLDER')

if not os.path.exists(SAVING_FOLDER):
    os.makedirs(SAVING_FOLDER)


2022-12-21 10:02:50.101126: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-21 10:02:50.312148: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-12-21 10:02:50.317212: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-21 10:02:50.317233: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if yo

In [2]:
train_ds_pure = tf.data.Dataset.list_files(['msc-train/go*', 'msc-train/stop*'])
val_ds_pure = tf.data.Dataset.list_files(['msc-val/go*', 'msc-val/stop*'])
test_ds_pure = tf.data.Dataset.list_files(['msc-test/go*', 'msc-test/stop*'])

2022-12-21 10:02:55.250096: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-12-21 10:02:55.250125: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2022-12-21 10:02:55.250144: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (p-da3e1e27-78dd-422e-a631-d2ca46001cf1): /proc/driver/nvidia/version does not exist
2022-12-21 10:02:55.250555: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
def preprocess(filename):
    signal, label = get_frozen_spectrogram(filename)
    signal.set_shape(SHAPE)
    signal = tf.expand_dims(signal, -1)
    signal = tf.image.resize(signal, [32,32])
    label_id = tf.argmax(label == LABELS)

    return signal, label_id

def get_model(alpha, model_filter, input_shape):
    '''Returns the model'''
    return tf.keras.Sequential([
        tf.keras.layers.Input(shape=input_shape),
        tf.keras.layers.Conv2D(filters=int(model_filter * alpha), kernel_size=[3, 3], strides=[2, 2], use_bias=False, padding='valid'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Conv2D(filters=int(model_filter * alpha), kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Conv2D(filters=int(model_filter * alpha), kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(units=len(LABELS)),
        tf.keras.layers.Softmax()
    ])
    return 

def save_model(model, path):
    '''Saves the model'''
    if not os.path.exists(path):
        os.makedirs(path)
    model.save(path)

def convert_model(model, model_path, tflite_path, model_name):
    '''Converts the saved model into tflite model and saves it (also zip version)'''
    converter = tf.lite.TFLiteConverter.from_saved_model(model_path)
    tflite_model = converter.convert()

    if not os.path.exists(tflite_path):
        os.makedirs(tflite_path)
    tflite_model_path = os.path.join(tflite_path, f'{model_name}.tflite')

    with open(tflite_model_path, 'wb') as fp:
        fp.write(tflite_model)

    with zipfile.ZipFile(f'{tflite_model_path}.zip', 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(tflite_model_path, 'model10.tflite')
    
    sizes = {
        'tflite_model_size' : os.path.getsize(f'{tflite_model_path}') / 1024.0,
        'tflite_zip_model_size' : os.path.getsize(f'{tflite_model_path}.zip') / 1024.0
    }

    return tflite_model_path, sizes

def save_results(name, parameters, model_accuracies, sizes):
    '''saves results in a csv'''
    output_dict = {
                    'model_name': name,
                    **parameters,
                    **model_accuracies,
                    **sizes
            }
            
    df = pd.DataFrame([output_dict])
    output_path = os.path.join(SAVING_FOLDER, 'results.csv')
    df.to_csv(output_path, mode='a', header = not os.path.exists(output_path), index = False)

    
        


In [4]:
def training(downsampling_rate, frame_length_in_s, frame_step_in_s, num_mel_bins, num_coefficients, lower_frequency,
 upper_frequency, batch_size, initial_learning_rate, end_learning_rate, epochs, model_filter, alpha, initial_sparsity,
 final_sparsity):

    # ******************* Parameter definition ***********************************************************
    PREPROCESSING_ARGS = {
        'downsampling_rate': downsampling_rate,
        'frame_length_in_s': frame_length_in_s,
        'frame_step_in_s': frame_step_in_s, 
        'num_mel_bins': num_mel_bins,
        'num_coefficients': num_coefficients,
        'lower_frequency': lower_frequency,
        'upper_frequency': upper_frequency,
    }
    
    # ******************* Preprocessing ***********************************************************
    global SHAPE, get_frozen_spectrogram

    get_frozen_spectrogram = partial(get_mfccs_training, **PREPROCESSING_ARGS)

    for mfcc, label in train_ds_pure.map(get_frozen_spectrogram).take(1):
        SHAPE = mfcc[:num_coefficients].shape


    train_ds = train_ds_pure.map(preprocess).batch(batch_size).cache()
    val_ds = val_ds_pure.map(preprocess).batch(batch_size)
    test_ds = test_ds_pure.map(preprocess).batch(batch_size)

    #get shape for model input
    for example_batch, example_labels in train_ds.take(1):
        input_shape = example_batch.shape[1:]



    # ******************* Model And Training *******************************************************
    loss = tf.losses.SparseCategoricalCrossentropy(from_logits=False)
    metrics = [tf.metrics.SparseCategoricalAccuracy()]
    callbacks = [tfmot.sparsity.keras.UpdatePruningStep()]

    linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(
        initial_learning_rate=initial_learning_rate,
        end_learning_rate=end_learning_rate,
        decay_steps=len(train_ds) * epochs,
    )

    optimizer = tf.optimizers.Adam(learning_rate=linear_decay)

    begin_step = int(len(train_ds_pure) * epochs * 0.2)
    end_step = int(len(train_ds_pure) * epochs)

    # Pruning
    pruning_params = {
        'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
            initial_sparsity= initial_sparsity,
            final_sparsity= final_sparsity,
            begin_step=begin_step,
            end_step=end_step
        )
    }


    model = tfmot.sparsity.keras.prune_low_magnitude(get_model(alpha, model_filter, input_shape), **pruning_params)
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    history = model.fit(train_ds, epochs=epochs, validation_data=val_ds, callbacks=callbacks)


    # ******************* Results ***************************************************************
    _, test_accuracy = model.evaluate(test_ds)
    _, training_accuracy = model.evaluate(train_ds)
    _, validation_accuracy = model.evaluate(val_ds)

    accuracies = {
        'training_accuracy_tf': training_accuracy*100,
        'validation_accuracy_tf': validation_accuracy*100,
        'test_accuracy_tf': test_accuracy*100
    }

    fig = plt.figure()
    plt.plot(history.history['loss'], c='r')
    plt.plot(history.history['val_loss'], c='b')
    plt.title(f"Training and Validation Loss")
    plt.legend(['train_loss', 'val_loss'])
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.show()

    return model, accuracies



In [5]:
ARGUMENTS = {
        'downsampling_rate': 16000,
        'frame_length_in_s': 0.016,
        'frame_step_in_s': 0.016,
        'num_mel_bins': 20,
        'num_coefficients': 20,
        'lower_frequency': 20,
        'upper_frequency': 8000,
        'batch_size': 20,
        'initial_learning_rate': 0.01,
        'end_learning_rate': 1.e-5,
        'epochs': 20,
        'model_filter': 64,
        'alpha': 0.2,
        'initial_sparsity': 0.2,
        'final_sparsity': 0.6,
    }


In [6]:
model, accuracies = training(**ARGUMENTS)

MODEL_NAME = 'model10'
MODEL_PATH = os.path.join(SAVING_FOLDER, 'model', f'{MODEL_NAME}')
save_model(model, MODEL_PATH)

TFLITE_PATH = os.path.join(SAVING_FOLDER, 'tflite_model')
TFLITE_NAME, sizes = convert_model(model, MODEL_PATH, TFLITE_PATH, MODEL_NAME)

save_results(MODEL_NAME, ARGUMENTS, accuracies, sizes)


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=da3e1e27-78dd-422e-a631-d2ca46001cf1' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>