# Notebook with model definitions

This notebook contains the definitions for the models and a py version of it will be used to import it to the training notebook
on second thought we can use ipynb library to import directly to the jupyter notebook

[model inspiration paper](https://www.isca-archive.org/interspeech_2015/sainath15b_interspeech.html)

[model inspiration code from TF](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/speech_commands/models.py#L673)

^^^ two OG papers that are definitelly good

[general keywrod spotting on microcontroller paper](https://arxiv.org/abs/1711.07128)


[binary quantization paper, might not be possible with tf and espressif](https://www.isca-archive.org/interspeech_2022/wang22g_interspeech.html)

These two papers go into other possible model architecutes:

[model with temporal convolutions](https://www.isca-archive.org/interspeech_2020/li20s_interspeech.html)

[depthwise conv](https://www.isca-archive.org/interspeech_2020/xu20d_interspeech.html) this might be very good

maybe for NAS:

[micronets maybe for NAS](https://arxiv.org/abs/2010.11267)

In [1]:
# imports
import os
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf

from tensorflow.keras import layers, models
from IPython import display

from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, GlobalAveragePooling2D, Dense, Activation, Input, Reshape, Multiply, AveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, ReLU, Dropout
from tensorflow.keras import Sequential

import tensorflow_model_optimization as tfmot
from tensorflow_model_optimization.python.core.keras.compat import keras
from keras.layers import Resizing
from tensorflow_model_optimization.quantization.keras import quantize_annotate_layer
import tempfile

import keras_tuner as kt
import nbimporter





In [2]:
#this is needed for layers that are not supported by the quantization API
class NoOpQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
    def get_weights_and_quantizers(self, layer):
        return []

    def get_activations_and_quantizers(self, layer):
        return []

    def set_quantize_weights(self, layer, quantize_weights):
        pass

    def set_quantize_activations(self, layer, quantize_activations):
        pass

    def get_output_quantizers(self, layer):
        return []

    def get_config(self):
        return {}

## Simple CNNs

### Model 1

In [3]:
def create_model_1(input_shape, num_classes=6, is_training=True):
    # Build the model step-by-step
    model = keras.Sequential(name='CNN1')
    
    # Input layer
    model.add(keras.layers.InputLayer(input_shape=input_shape))
    
    # Quantize layer (assuming quantize_annotate_layer is correctly defined elsewhere)
    model.add(quantize_annotate_layer(
        keras.layers.Resizing(32, 32), 
        quantize_config=NoOpQuantizeConfig()
    ))
    
    # Adding convolution, pooling, and dropout layers
    model.add(keras.layers.Conv2D(32, 3, activation='relu'))
    model.add(keras.layers.Conv2D(64, 3, activation='relu'))
    model.add(keras.layers.MaxPooling2D())
    model.add(keras.layers.Dropout(0.25))
    
    # Flatten layer
    model.add(keras.layers.Flatten())
    
    # Fully connected layers
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.Dropout(0.5))
    
    # Output layer
    model.add(keras.layers.Dense(num_classes))

    return model


### Model 2

In [4]:
# Reduced complexity for faster training and smaller size
def create_model_2(input_shape, num_classes=6, is_training=True):
    model = keras.Sequential(name='CNN2')

    model.add(keras.layers.Input(shape=input_shape))

    # Quantize layer (assuming quantize_annotate_layer is correctly defined elsewhere)
    model.add(quantize_annotate_layer(
        keras.layers.Resizing(32, 32), 
        quantize_config=NoOpQuantizeConfig()
    ))
    
    # Normalize.
    #norm_layer,
    model.add(keras.layers.Conv2D(32, 3, activation='relu'))
    #layers.Conv2D(64, 3, activation='relu'),
    model.add(keras.layers.MaxPooling2D())
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.25))
    model.add(keras.layers.Dense(num_classes))
    
    return model

## CNNs

### Model 3 Tiny Conv model from paper 

These models are defined twice, once as from my interpretation of the paper models and once as tf.Sequencetial models to work with quantization aware training

In [5]:
'''
def create_tiny_conv_model_small(input_shape, num_classes=6, is_training=True):
    """
    Builds a tiny convolutional model optimized for microcontrollers.

    Args:
        input_shape: Tuple, the shape of the input data (time_steps, frequency_bins, channels).
        num_classes: Integer, number of output classes.
        is_training: Boolean, whether the model is being trained or deployed.

    Returns:
        model: Keras Model instance.
    """
    inputs = Input(shape=(124, 129, 1), name='input')

    # Convolutional Layer
    x = Conv2D(filters=8,
               kernel_size=(10, 8),
               strides=(2, 2),
               padding='same',
               use_bias=True,
               kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
               bias_initializer='zeros',
               name='conv')(inputs)
    
    # ReLU Activation
    x = ReLU(name='relu1')(x)
    
    # Optional Dropout Layer
    if is_training:
        x = Dropout(rate=0.2, name='dropout1')(x)
    
    # Flatten the output
    x = Flatten(name='flatten')(x)
    
    # Output Layer
    outputs = Dense(units=num_classes,
                    activation='softmax',  # Use 'softmax' if you prefer probabilities
                    kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
                    bias_initializer='zeros',
                    name='fc2')(x)
    
    # Define the model
    model = Model(inputs=inputs, outputs=outputs, name='tiny_conv_model')
    
    return model
'''


'\ndef create_tiny_conv_model_small(input_shape, num_classes=6, is_training=True):\n    """\n    Builds a tiny convolutional model optimized for microcontrollers.\n\n    Args:\n        input_shape: Tuple, the shape of the input data (time_steps, frequency_bins, channels).\n        num_classes: Integer, number of output classes.\n        is_training: Boolean, whether the model is being trained or deployed.\n\n    Returns:\n        model: Keras Model instance.\n    """\n    inputs = Input(shape=(124, 129, 1), name=\'input\')\n\n    # Convolutional Layer\n    x = Conv2D(filters=8,\n               kernel_size=(10, 8),\n               strides=(2, 2),\n               padding=\'same\',\n               use_bias=True,\n               kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),\n               bias_initializer=\'zeros\',\n               name=\'conv\')(inputs)\n    \n    # ReLU Activation\n    x = ReLU(name=\'relu1\')(x)\n    \n    # Optional Dropout Layer\n    if is_tr

In [6]:
# Create tiny convolutional model using Sequential API
def create_tiny_conv_model_small(input_shape=(124, 129, 1), num_classes=6, is_training=True):
    model = keras.Sequential(name='tiny_conv_model')
    model.add(keras.layers.Conv2D(filters=8,
                     kernel_size=(10, 8),
                     strides=(2, 2),
                     padding='same',
                     use_bias=True,
                     kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
                     bias_initializer='zeros',
                     input_shape=input_shape,
                     name='conv'))
    model.add(keras.layers.ReLU(name='relu1'))
    
    if is_training:
        model.add(keras.layers.Dropout(rate=0.2, name='dropout1'))
    
    model.add(keras.layers.Flatten(name='flatten'))
    
    model.add(keras.layers.Dense(units=num_classes,
                    activation='softmax',
                    kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
                    bias_initializer='zeros',
                    name='fc2'))
    
    return model

### Model 4 Tiny Embed Conv model from paper

In [7]:
'''
def create_tiny_embed_conv_model_small(input_shape, num_classes=6, is_training=True):
    """
    Builds a tiny convolutional model optimized for microcontrollers.

    Args:
        input_shape: Tuple, the shape of the input data (time_steps, frequency_bins, channels).
        num_classes: Integer, number of output classes.
        is_training: Boolean, whether the model is being trained or deployed.

    Returns:
        model: Keras Model instance.
    """
    inputs = Input(shape=(124, 129, 1), name='input')

    # Convolutional Layer
    x = Conv2D(filters=8,
               kernel_size=(10, 8),
               strides=(2, 2),
               padding='same',
               use_bias=True,
               kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
               bias_initializer='zeros',
               name='conv1')(inputs)
    
    # ReLU Activation
    x = ReLU(name='relu1')(x)
    
    # Optional Dropout Layer
    if is_training:
        x = Dropout(rate=0.2, name='dropout1')(x)

    # Convolutional Layer
    x = Conv2D(filters=8,
               kernel_size=(10, 8),
               strides=(8, 8),
               padding='same',
               use_bias=True,
               kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
               bias_initializer='zeros',
               name='conv2')(x)
    
    # ReLU Activation
    x = ReLU(name='relu2')(x)
    
    # Optional Dropout Layer
    if is_training:
        x = Dropout(rate=0.2, name='dropout2')(x)
    
    # Flatten the output
    x = Flatten(name='flatten')(x)
    
    # Output Layer
    outputs = Dense(units=num_classes,
                    activation='softmax',  # Use 'softmax' if you prefer probabilities
                    kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
                    bias_initializer='zeros',
                    name='fc2')(x)
    
    # Define the model
    model = Model(inputs=inputs, outputs=outputs, name='tiny_embed_conv_model')
    
    return model
    
'''


'\ndef create_tiny_embed_conv_model_small(input_shape, num_classes=6, is_training=True):\n    """\n    Builds a tiny convolutional model optimized for microcontrollers.\n\n    Args:\n        input_shape: Tuple, the shape of the input data (time_steps, frequency_bins, channels).\n        num_classes: Integer, number of output classes.\n        is_training: Boolean, whether the model is being trained or deployed.\n\n    Returns:\n        model: Keras Model instance.\n    """\n    inputs = Input(shape=(124, 129, 1), name=\'input\')\n\n    # Convolutional Layer\n    x = Conv2D(filters=8,\n               kernel_size=(10, 8),\n               strides=(2, 2),\n               padding=\'same\',\n               use_bias=True,\n               kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),\n               bias_initializer=\'zeros\',\n               name=\'conv1\')(inputs)\n    \n    # ReLU Activation\n    x = ReLU(name=\'relu1\')(x)\n    \n    # Optional Dropout Layer\n    i

In [8]:
# Create tiny embedded convolutional model using Sequential API
def create_tiny_embed_conv_model_small(input_shape=(124, 129, 1), num_classes=6, is_training=True):
    model = keras.Sequential(name='tiny_embed_conv_model')
    model.add(keras.layers.Conv2D(filters=8,
                     kernel_size=(10, 8),
                     strides=(2, 2),
                     padding='same',
                     use_bias=True,
                     kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
                     bias_initializer='zeros',
                     input_shape=input_shape,
                     name='conv1'))
    model.add(keras.layers.ReLU(name='relu1'))
    
    if is_training:
        model.add(keras.layers.Dropout(rate=0.2, name='dropout1'))
    
    model.add(keras.layers.Conv2D(filters=8,
                     kernel_size=(10, 8),
                     strides=(8, 8),
                     padding='same',
                     use_bias=True,
                     kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
                     bias_initializer='zeros',
                     name='conv2'))
    model.add(keras.layers.ReLU(name = 'relu2'))
    
    if is_training:
        model.add(keras.layers.Dropout(rate=0.2, name='dropout2'))
    
    model.add(keras.layers.Flatten(name='flatten'))
    model.add(keras.layers.Dense(units=num_classes,
                    activation='softmax',
                    kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
                    bias_initializer='zeros',
                    name='fc2'))
    
    return model

### Model 5 the tiny embed conv model with hyperparameters after gridsearching hypterparameters

Best hyperparameters: {'kernel_size_1_height': 9, 'kernel_size_1_width': 9, 'stride_1_height': 2, 'stride_1_width': 1, 'conv1_filters': 16, 'dropout_1': True, 'kernel_size_2_height': 7, 'kernel_size_2_width': 7, 'stride_2_height': 4, 'stride_2_width': 6, 'conv2_filters': 12, 'dropout_2': True, 'tuner/epochs': 15, 'tuner/initial_epoch': 5, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0013'}

In [9]:
'''
def create_tiny_embed_conv_model_small_best(input_shape, num_classes=6, is_training=True):
    """
    Builds a tiny convolutional model optimized for microcontrollers.

    Args:
        input_shape: Tuple, the shape of the input data (time_steps, frequency_bins, channels).
        num_classes: Integer, number of output classes.
        is_training: Boolean, whether the model is being trained or deployed.

    Returns:
        model: Keras Model instance.
    """
    inputs = Input(shape=(124, 129, 1), name='input')

    # Convolutional Layer
    x = Conv2D(filters=16,
               kernel_size=(9, 9),
               strides=(2, 1),
               padding='same',
               use_bias=True,
               kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
               bias_initializer='zeros',
               name='conv1')(inputs)
    
    # ReLU Activation
    x = ReLU(name='relu1')(x)
    
    # Optional Dropout Layer
    if is_training:
        x = Dropout(rate=0.2, name='dropout1')(x)

    # Convolutional Layer
    x = Conv2D(filters=12,
               kernel_size=(7, 7),
               strides=(4, 6),
               padding='same',
               use_bias=True,
               kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
               bias_initializer='zeros',
               name='conv2')(x)
    
    # ReLU Activation
    x = ReLU(name='relu2')(x)
    
    # Optional Dropout Layer
    if is_training:
        x = Dropout(rate=0.2, name='dropout2')(x)
    
    # Flatten the output
    x = Flatten(name='flatten')(x)
    
    # Output Layer
    outputs = Dense(units=num_classes,
                    activation='softmax',  # Use 'softmax' if you prefer probabilities
                    kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
                    bias_initializer='zeros',
                    name='fc2')(x)
    
    # Define the model
    model = Model(inputs=inputs, outputs=outputs, name='tiny_embed_conv_model_best')
    
    return model
'''

'\ndef create_tiny_embed_conv_model_small_best(input_shape, num_classes=6, is_training=True):\n    """\n    Builds a tiny convolutional model optimized for microcontrollers.\n\n    Args:\n        input_shape: Tuple, the shape of the input data (time_steps, frequency_bins, channels).\n        num_classes: Integer, number of output classes.\n        is_training: Boolean, whether the model is being trained or deployed.\n\n    Returns:\n        model: Keras Model instance.\n    """\n    inputs = Input(shape=(124, 129, 1), name=\'input\')\n\n    # Convolutional Layer\n    x = Conv2D(filters=16,\n               kernel_size=(9, 9),\n               strides=(2, 1),\n               padding=\'same\',\n               use_bias=True,\n               kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),\n               bias_initializer=\'zeros\',\n               name=\'conv1\')(inputs)\n    \n    # ReLU Activation\n    x = ReLU(name=\'relu1\')(x)\n    \n    # Optional Dropout Layer\n

In [10]:
# Create tiny embedded convolutional model (best version) using Sequential API
def create_tiny_embed_conv_model_small_best(input_shape=(124, 129, 1), num_classes=6, is_training=True):
    model = keras.Sequential(name='tiny_embed_conv_model_best')
    model.add(keras.layers.Conv2D(filters=16,
                     kernel_size=(9, 9),
                     strides=(2, 1),
                     padding='same',
                     use_bias=True,
                     kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
                     bias_initializer='zeros',
                     input_shape=input_shape,
                     name='conv1'))
    model.add(keras.layers.ReLU(name='relu1'))
    
    if is_training:
        model.add(keras.layers.Dropout(rate=0.2, name='dropout1'))
    
    model.add(keras.layers.Conv2D(filters=12,
                     kernel_size=(7, 7),
                     strides=(4, 6),
                     padding='same',
                     use_bias=True,
                     kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
                     bias_initializer='zeros',
                     name='conv2'))
    model.add(keras.layers.ReLU(name='relu2'))
    
    if is_training:
        model.add(keras.layers.Dropout(rate=0.2, name='dropout2'))
    
    model.add(keras.layers.Flatten(name='flatten'))
    model.add(keras.layers.Dense(units=num_classes,
                    activation='softmax',
                    kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01),
                    bias_initializer='zeros',
                    name='fc2'))
    
    return model

## Hyperparameter Tuning (grid search)

we can try to implement some sort of neural architecure search too (NAS)

In [11]:
gridsearch = False

In [12]:
def build_model(hp):
    input_shape = (124, 129, 1)
    num_classes = 6
    
    inputs = Input(shape=input_shape, name='input')

    # First convolutional layer
    kernel_size_1 = (hp.Int('kernel_size_1_height', min_value=3, max_value=10, step=2), 
                     hp.Int('kernel_size_1_width', min_value=3, max_value=10, step=2))
    stride_1 = (hp.Int('stride_1_height', min_value=1, max_value=2, step=1), 
                hp.Int('stride_1_width', min_value=1, max_value=2, step=1))
    
    x = Conv2D(filters=hp.Int('conv1_filters', min_value=4, max_value=16, step=4),
               kernel_size=kernel_size_1,
               strides=stride_1,
               padding='same',
               use_bias=True,
               name='conv1')(inputs)
    
    x = ReLU(name='relu1')(x)
    
    if hp.Boolean('dropout_1'):
        x = Dropout(rate=0.2, name='dropout1')(x)

    # Second convolutional layer
    kernel_size_2 = (hp.Int('kernel_size_2_height', min_value=3, max_value=10, step=2), 
                     hp.Int('kernel_size_2_width', min_value=3, max_value=10, step=2))
    stride_2 = (hp.Int('stride_2_height', min_value=4, max_value=8, step=2), 
                hp.Int('stride_2_width', min_value=4, max_value=8, step=2))
    
    x = Conv2D(filters=hp.Int('conv2_filters', min_value=4, max_value=16, step=4),
               kernel_size=kernel_size_2,
               strides=stride_2,
               padding='same',
               use_bias=True,
               name='conv2')(x)
    
    x = ReLU(name='relu2')(x)
    
    if hp.Boolean('dropout_2'):
        x = Dropout(rate=0.2, name='dropout2')(x)
    
    # Flatten
    x = Flatten(name='flatten')(x)
    
    # Output layer
    outputs = Dense(units=num_classes, activation='softmax', name='fc2')(x)

    model = Model(inputs=inputs, outputs=outputs)
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    return model

if gridsearch == True:
    # Initialize the tuner
    tuner = kt.Hyperband(build_model,
                        objective='val_accuracy',
                        max_epochs=15,
                        factor=3,
                        directory='hyperparameter_tuning_tests',
                        project_name='hyperparameter_tuning')

    # Perform the search
    tuner.search(train_spectrogram_ds, validation_data=val_spectrogram_ds, epochs=10)

    # Get the best model
    best_model = tuner.get_best_models(num_models=1)[0]
    best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]

    print(f"Best hyperparameters: {best_hp.values}")


# Train Models

In [13]:
# Function to plot training history (accuracy and loss)
def plot_training_history(history, model_number):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs_range = range(len(acc))

    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title(f'Model {model_number} - Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title(f'Model {model_number} - Training and Validation Loss')
    plt.show()
