In [None]:
import tensorflow as tf
from reader import AudioReader
from preprocessing import Padding, Normalization
from preprocessing import MFCC
from glob import glob

### Hyperparameters

In [2]:
PREPROCESSING_ARGS = {
    'sampling_rate': 16000,
    'frame_length_in_s': 0.04,
    'frame_step_in_s': 0.02,
    'num_mel_bins': 40,
    'lower_frequency': 20,
    'upper_frequency': 4000,
    'num_coefficients': 10
}

TRAINING_ARGS = {
    'batch_size': 20,
    'learning_rate': 0.01,
    'end_learning_rate': 1e-5,
    'epochs': 50
}

### Create train/val/test Datasets

In [3]:
train_ds = tf.data.Dataset.list_files(glob('/tmp/msc-train/down*') + glob('/tmp/msc-train/up*'))
val_ds = tf.data.Dataset.list_files(glob('/tmp/msc-val/down*') + glob('/tmp/msc-val/up*'))
test_ds = tf.data.Dataset.list_files(glob('/tmp/msc-test/down*') + glob('/tmp/msc-test/up*'))

### Define the Data Pipeline

In [None]:
audio_reader = AudioReader(tf.int16)
padding = Padding(PREPROCESSING_ARGS['sampling_rate'])
normalization = Normalization(tf.int16)
mfcc_processor = MFCC(**PREPROCESSING_ARGS)

LABELS = ['down', 'up']

def prepare_for_training(feature, label):
    feature = tf.expand_dims(feature, -1)
    label_id = tf.argmax(label == LABELS)
    return feature, label_id

train_ds = (train_ds
            .map(audio_reader.get_audio_and_label)
            .map(padding.pad)
            .map(normalization.normalize)
            .map(mfcc_processor.get_mfccs_and_label)
            .map(prepare_for_training)
            .batch(TRAINING_ARGS['batch_size'])
            .cache())
val_ds = (val_ds
            .map(audio_reader.get_audio_and_label)
            .map(padding.pad)
            .map(normalization.normalize)
            .map(mfcc_processor.get_mfccs_and_label)
            .map(prepare_for_training)
            .batch(TRAINING_ARGS['batch_size']))
test_ds = (test_ds
            .map(audio_reader.get_audio_and_label)
            .map(padding.pad)
            .map(normalization.normalize)
            .map(mfcc_processor.get_mfccs_and_label)
            .map(prepare_for_training)
            .batch(TRAINING_ARGS['batch_size']))

### Read a batch of data

In [None]:
for example_batch, example_labels in train_ds.take(1):
  print('Batch Shape:', example_batch.shape)
  print('Data Shape:', example_batch.shape[1:])
  print('Labels:', example_labels)

### Create the Model

In [6]:
class SepConvReluBN(tf.keras.layers.Layer):
    def __init__(self, num_outputs, kernel_size, stride, bn=False):
        super().__init__()
        self.bn = bn
        self.stride = stride
        
        self.sep_conv = tf.keras.layers.SeparableConv2D(
            num_outputs, kernel_size, strides=stride, padding="same", use_bias=not bn
        )
        self.relu = tf.keras.layers.ReLU()
        if bn:
            self.bn_layer = tf.keras.layers.BatchNormalization()

    def call(self, inputs, training=False):
        net = self.sep_conv(inputs)
        net = self.relu(net)
        if self.bn:
            net = self.bn_layer(net, training=training)
        return net


class SepResBlock(tf.keras.layers.Layer):
    def __init__(self, num_channels):
        super().__init__()
        self.sep_conv_relu_bn_1 = SepConvReluBN(num_channels, kernel_size=3, stride=1, bn=True)
        self.sep_conv_relu_bn_2 = SepConvReluBN(num_channels, kernel_size=3, stride=1, bn=False)
        self.bn_layer = tf.keras.layers.BatchNormalization()

    def call(self, inputs, training=False):
        layer_in = inputs
        net = self.sep_conv_relu_bn_1(inputs, training=training)
        net = self.sep_conv_relu_bn_2(net, training=training)
        net += layer_in
        net = self.bn_layer(net, training=training)
        return net


class SepResNet(tf.keras.Model):
    def __init__(self, num_classes, num_blocks, num_channels):
        super().__init__()
        self.num_blocks = num_blocks

        self.first_conv = tf.keras.layers.SeparableConv2D(num_channels, 3, strides=2, padding='same')
        
        self.blocks = [SepResBlock(num_channels) for i in range(self.num_blocks)]

        self.global_pool = tf.keras.layers.GlobalAveragePooling2D()
        self.fc = tf.keras.layers.Dense(num_classes)
        self.softmax = tf.keras.layers.Softmax()

    def call(self, inputs, training=False):
        net = self.first_conv(inputs)

        for block in self.blocks:
            net = block(net, training=training)

        net = self.global_pool(net)
        logits = self.fc(net)
        softmax = self.softmax(logits)

        return softmax


class SepResNet8(SepResNet):
    def __init__(self, num_classes):
        super().__init__(num_classes, num_blocks=2, num_channels=64)

model = SepResNet8(len(LABELS))

In [None]:
model.build(example_batch.shape)
model.summary()

### Create callbacks

In [8]:
# Learning Rate scheduler
linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=TRAINING_ARGS['learning_rate'],
    end_learning_rate=TRAINING_ARGS['end_learning_rate'],
    decay_steps=len(list(train_ds)) * TRAINING_ARGS['epochs'],
)
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(linear_decay)

# Early Stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1,
    mode='auto'
)

In [None]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=False)
optimizer = tf.optimizers.Adam(learning_rate=linear_decay)
metrics = [tf.metrics.SparseCategoricalAccuracy()]
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

checkpoint_filepath = '/saved_models/best_model'
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    save_best_only=True,
    verbose=1,
    save_format='tf'  # Use TensorFlow SavedModel format
)

history = model.fit(
    train_ds, 
    epochs=TRAINING_ARGS['epochs'], 
    validation_data=val_ds, 
    callbacks=[lr_scheduler, early_stopping, model_checkpoint]
)

### Evaluate the Model

In [None]:
training_loss = history.history['loss'][-1]
training_accuracy = history.history['sparse_categorical_accuracy'][-1]
val_loss = history.history['val_loss'][-1]
val_accuracy = history.history['val_sparse_categorical_accuracy'][-1]

best_model = tf.keras.models.load_model(checkpoint_filepath)
test_loss, test_accuracy = best_model.evaluate(test_ds)

print(f'Training Loss: {training_loss:.4f}')
print(f'Training Accuracy: {training_accuracy*100.:.2f}%')
print()
print(f'Validation Loss: {val_loss:.4f}')
print(f'Validation Accuracy: {val_accuracy*100.:.2f}%')
print()
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy*100.:.2f}%')

### Save the Model

In [None]:
import os
from time import time

model_name = 'group11'
saved_model_dir = f'./saved_models/{model_name}'
if not os.path.exists(saved_model_dir):
    os.makedirs(saved_model_dir)

@tf.function(input_signature=[tf.TensorSpec(shape=[1, 49, 10, 1], dtype=tf.float32)])
def inference_fn(features):
    return best_model(features, training=False)

best_model.save(saved_model_dir, signatures={'serving_default': inference_fn})

### Save Hyperparameters & Results

In [12]:
import pandas as pd

output_dict = {
    **PREPROCESSING_ARGS,
    **TRAINING_ARGS,
    'test_accuracy': test_accuracy
}

df = pd.DataFrame([output_dict])

output_path='./mfcc_results_resnet.csv'
df.to_csv(output_path, mode='a', header=not os.path.exists(output_path), index=False)

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(f'./saved_models/{model_name}')
tflite_model = converter.convert()
tf_lite_model_dir = f'./tflite_models'
if not os.path.exists(tf_lite_model_dir):
    os.makedirs(tf_lite_model_dir)

with open(f'{tf_lite_model_dir}/{model_name}.tflite', 'wb+') as f:
  f.write(tflite_model)

### Quantize Model

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quantized_model = converter.convert()

# Save the quantized TFLite model
tflite_quantized_filepath = f'{tf_lite_model_dir}/{model_name}_quantized.tflite'
with open(tflite_quantized_filepath, 'wb') as f:
    f.write(tflite_quantized_model)

print(f'Quantized TFLite model with fixed shape saved to: {tflite_quantized_filepath}')

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=43a3b7e8-f7b8-4917-aae7-dd72fffe3a57' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>